#!/usr/bin/env python3 """ Analyze video transcript to identify topics and create chapter divisions. """ import json import re from datetime import timedelta def seconds_to_timestamp(seconds): """Convert seconds to readable timestamp.""" total_seconds = int(float(seconds)) hours, remainder = divmod(total_seconds, 3600) minutes, seconds = divmod(remainder, 60) return f"{hours:02d}:{minutes:02d}:{seconds:02d}" def load_transcript(file_path): """Load JSON transcript file.""" with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) return data def extract_segments(data): """Extract transcript segments with timestamps.""" segments = [] for track in data[0]['tracks']: if 'transcript' in track: for item in track['transcript']: start = float(item.get('start', 0)) dur = float(item.get('dur', 0)) text = item.get('text', '').strip() if text and text != '\n': segments.append({ 'start': start, 'end': start + dur, 'text': text }) # Sort by start time segments.sort(key=lambda x: x['start']) return segments def extract_keywords(text): """Extract key topics from text.""" keywords = { 'Market & Community': ['market', 'pasar', 'grup', 'komunitas', 'telegram', 'facebook', 'forum'], 'Problem Finding': ['masalah', 'problem', 'kesulitan', 'permasalahan', 'error', 'bermasalah'], 'Exploration': ['explor', 'coba', 'trial', 'nyoba', 'eksplor', 'explore'], 'Personal Branding': ['branding', 'personal branding', 'show off', 'image', 'eksistensi'], 'AIDA/Funnel': ['aida', 'awareness', 'interest', 'desire', 'action', 'funel', 'funnel'], 'Trust': ['trust', 'percaya', 'kepercayaan'], 'Clients': ['klien', 'client', 'pelanggan', 'customer'], 'Pricing': ['harga', 'price', 'bayar', 'budget', 'rp', 'juta', 'ribu', 'dibayar'], 'Negotiation': ['tawar', 'negosiasi', 'deal'], 'Services': ['jasa', 'service', 'website', 'plugin', 'elementor', 'instal'], 'Cold/Warm/Hot Market': ['cold market', 'warm market', 'hot market', 'dingin', 'hangat'], 'Network': ['network', 'jaringan', 'koneksi', 'hubungan'], 'Sharing': ['sharing', 'share', 'bagi'], 'Products': ['produk', 'product', 'template'], 'Japri': ['japri', 'private', 'chat pribadi'], } found = [] text_lower = text.lower() for topic, kw_list in keywords.items(): count = sum(1 for kw in kw_list if kw.lower() in text_lower) if count > 0: found.append((topic, count)) return sorted(found, key=lambda x: x[1], reverse=True) def analyze_video(): """Analyze the video transcript.""" file_path = "/Users/dwindown/CascadeProjects/MeetDwindiCom/access-hub/Live Zoom - Diskusi Cara Jual Jasa via Online.json" print("="*80) print("VIDEO TRANSCRIPT ANALYSIS") print("Cara Jual Jasa via Online (How to Sell Services Online)") print("="*80) print() data = load_transcript(file_path) segments = extract_segments(data) print(f"Total segments: {len(segments)}") if not segments: print("No segments found!") return total_duration = segments[-1]['end'] print(f"Total duration: {seconds_to_timestamp(total_duration)} ({total_duration/60:.1f} minutes)\n") # Create time-based groups every 5 minutes print("="*80) print("CONTENT BREAKDOWN BY 5-MINUTE INTERVALS") print("="*80) print() window = 300 # 5 minutes current_time = 0 section_num = 1 while current_time < total_duration: window_end = min(current_time + window, total_duration) window_segments = [s for s in segments if current_time <= s['start'] < window_end] if window_segments: # Combine text combined_text = ' '.join([s['text'] for s in window_segments]) # Extract keywords keywords = extract_keywords(combined_text) print(f"Section {section_num}: {seconds_to_timestamp(current_time)} - {seconds_to_timestamp(window_end)}") print("-" * 80) # Show first 400 characters as preview preview = combined_text[:400] print(f"Content: {preview}...") print() if keywords: print("Key topics detected:") for topic, count in keywords[:7]: print(f" • {topic}: {count} mentions") else: print("Key topics: (transition/break section)") print() print() section_num += 1 current_time = window_end # Now create suggested chapters based on content analysis print("\n") print("="*80) print("SUGGESTED CHAPTER STRUCTURE") print("="*80) print() # Create larger 15-minute groups for chapter suggestions chapter_window = 900 # 15 minutes current_time = 0 chapter_num = 1 while current_time < total_duration: chapter_end = min(current_time + chapter_window, total_duration) chapter_segments = [s for s in segments if current_time <= s['start'] < chapter_end] if chapter_segments: combined_text = ' '.join([s['text'] for s in chapter_segments]) keywords = extract_keywords(combined_text) # Get top 3 keywords for chapter title main_topics = [kw[0] for kw in keywords[:3]] print(f"Chapter {chapter_num}: {seconds_to_timestamp(current_time)} - {seconds_to_timestamp(chapter_end)}") print(f"Main topics: {', '.join(main_topics)}") # Show first 300 chars preview = combined_text[:300].replace('\n', ' ') print(f"Preview: {preview}...") print() print() chapter_num += 1 current_time = chapter_end if __name__ == "__main__": analyze_video()