- Add .trim() checks to all video source conditions - Prevents rendering empty youtube_url as valid video - Fixes double embed card display issue - Update sidebar icon check to use optional chaining with trim 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
180 lines
6.0 KiB
Python
180 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Analyze video transcript to identify topics and create chapter divisions.
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
from datetime import timedelta
|
|
|
|
def seconds_to_timestamp(seconds):
|
|
"""Convert seconds to readable timestamp."""
|
|
total_seconds = int(float(seconds))
|
|
hours, remainder = divmod(total_seconds, 3600)
|
|
minutes, seconds = divmod(remainder, 60)
|
|
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
|
|
|
def load_transcript(file_path):
|
|
"""Load JSON transcript file."""
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
return data
|
|
|
|
def extract_segments(data):
|
|
"""Extract transcript segments with timestamps."""
|
|
segments = []
|
|
|
|
for track in data[0]['tracks']:
|
|
if 'transcript' in track:
|
|
for item in track['transcript']:
|
|
start = float(item.get('start', 0))
|
|
dur = float(item.get('dur', 0))
|
|
text = item.get('text', '').strip()
|
|
|
|
if text and text != '\n':
|
|
segments.append({
|
|
'start': start,
|
|
'end': start + dur,
|
|
'text': text
|
|
})
|
|
|
|
# Sort by start time
|
|
segments.sort(key=lambda x: x['start'])
|
|
return segments
|
|
|
|
def extract_keywords(text):
|
|
"""Extract key topics from text."""
|
|
keywords = {
|
|
'Market & Community': ['market', 'pasar', 'grup', 'komunitas', 'telegram', 'facebook', 'forum'],
|
|
'Problem Finding': ['masalah', 'problem', 'kesulitan', 'permasalahan', 'error', 'bermasalah'],
|
|
'Exploration': ['explor', 'coba', 'trial', 'nyoba', 'eksplor', 'explore'],
|
|
'Personal Branding': ['branding', 'personal branding', 'show off', 'image', 'eksistensi'],
|
|
'AIDA/Funnel': ['aida', 'awareness', 'interest', 'desire', 'action', 'funel', 'funnel'],
|
|
'Trust': ['trust', 'percaya', 'kepercayaan'],
|
|
'Clients': ['klien', 'client', 'pelanggan', 'customer'],
|
|
'Pricing': ['harga', 'price', 'bayar', 'budget', 'rp', 'juta', 'ribu', 'dibayar'],
|
|
'Negotiation': ['tawar', 'negosiasi', 'deal'],
|
|
'Services': ['jasa', 'service', 'website', 'plugin', 'elementor', 'instal'],
|
|
'Cold/Warm/Hot Market': ['cold market', 'warm market', 'hot market', 'dingin', 'hangat'],
|
|
'Network': ['network', 'jaringan', 'koneksi', 'hubungan'],
|
|
'Sharing': ['sharing', 'share', 'bagi'],
|
|
'Products': ['produk', 'product', 'template'],
|
|
'Japri': ['japri', 'private', 'chat pribadi'],
|
|
}
|
|
|
|
found = []
|
|
text_lower = text.lower()
|
|
|
|
for topic, kw_list in keywords.items():
|
|
count = sum(1 for kw in kw_list if kw.lower() in text_lower)
|
|
if count > 0:
|
|
found.append((topic, count))
|
|
|
|
return sorted(found, key=lambda x: x[1], reverse=True)
|
|
|
|
def analyze_video():
|
|
"""Analyze the video transcript."""
|
|
file_path = "/Users/dwindown/CascadeProjects/MeetDwindiCom/access-hub/Live Zoom - Diskusi Cara Jual Jasa via Online.json"
|
|
|
|
print("="*80)
|
|
print("VIDEO TRANSCRIPT ANALYSIS")
|
|
print("Cara Jual Jasa via Online (How to Sell Services Online)")
|
|
print("="*80)
|
|
print()
|
|
|
|
data = load_transcript(file_path)
|
|
segments = extract_segments(data)
|
|
|
|
print(f"Total segments: {len(segments)}")
|
|
|
|
if not segments:
|
|
print("No segments found!")
|
|
return
|
|
|
|
total_duration = segments[-1]['end']
|
|
print(f"Total duration: {seconds_to_timestamp(total_duration)} ({total_duration/60:.1f} minutes)\n")
|
|
|
|
# Create time-based groups every 5 minutes
|
|
print("="*80)
|
|
print("CONTENT BREAKDOWN BY 5-MINUTE INTERVALS")
|
|
print("="*80)
|
|
print()
|
|
|
|
window = 300 # 5 minutes
|
|
current_time = 0
|
|
section_num = 1
|
|
|
|
while current_time < total_duration:
|
|
window_end = min(current_time + window, total_duration)
|
|
window_segments = [s for s in segments
|
|
if current_time <= s['start'] < window_end]
|
|
|
|
if window_segments:
|
|
# Combine text
|
|
combined_text = ' '.join([s['text'] for s in window_segments])
|
|
|
|
# Extract keywords
|
|
keywords = extract_keywords(combined_text)
|
|
|
|
print(f"Section {section_num}: {seconds_to_timestamp(current_time)} - {seconds_to_timestamp(window_end)}")
|
|
print("-" * 80)
|
|
|
|
# Show first 400 characters as preview
|
|
preview = combined_text[:400]
|
|
print(f"Content: {preview}...")
|
|
print()
|
|
|
|
if keywords:
|
|
print("Key topics detected:")
|
|
for topic, count in keywords[:7]:
|
|
print(f" • {topic}: {count} mentions")
|
|
else:
|
|
print("Key topics: (transition/break section)")
|
|
|
|
print()
|
|
print()
|
|
|
|
section_num += 1
|
|
|
|
current_time = window_end
|
|
|
|
# Now create suggested chapters based on content analysis
|
|
print("\n")
|
|
print("="*80)
|
|
print("SUGGESTED CHAPTER STRUCTURE")
|
|
print("="*80)
|
|
print()
|
|
|
|
# Create larger 15-minute groups for chapter suggestions
|
|
chapter_window = 900 # 15 minutes
|
|
current_time = 0
|
|
chapter_num = 1
|
|
|
|
while current_time < total_duration:
|
|
chapter_end = min(current_time + chapter_window, total_duration)
|
|
chapter_segments = [s for s in segments
|
|
if current_time <= s['start'] < chapter_end]
|
|
|
|
if chapter_segments:
|
|
combined_text = ' '.join([s['text'] for s in chapter_segments])
|
|
keywords = extract_keywords(combined_text)
|
|
|
|
# Get top 3 keywords for chapter title
|
|
main_topics = [kw[0] for kw in keywords[:3]]
|
|
|
|
print(f"Chapter {chapter_num}: {seconds_to_timestamp(current_time)} - {seconds_to_timestamp(chapter_end)}")
|
|
print(f"Main topics: {', '.join(main_topics)}")
|
|
|
|
# Show first 300 chars
|
|
preview = combined_text[:300].replace('\n', ' ')
|
|
print(f"Preview: {preview}...")
|
|
print()
|
|
print()
|
|
|
|
chapter_num += 1
|
|
|
|
current_time = chapter_end
|
|
|
|
if __name__ == "__main__":
|
|
analyze_video()
|