Fix duplicate video embed when youtube_url is empty string
- Add .trim() checks to all video source conditions - Prevents rendering empty youtube_url as valid video - Fixes double embed card display issue - Update sidebar icon check to use optional chaining with trim 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
162
analyze_transcript2.py
Normal file
162
analyze_transcript2.py
Normal file
@@ -0,0 +1,162 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Analyze video transcript to identify topics and create chapter divisions.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import timedelta
|
||||
|
||||
def seconds_to_timestamp(seconds):
|
||||
"""Convert seconds to readable timestamp."""
|
||||
td = timedelta(seconds=float(seconds))
|
||||
total_seconds = int(td.total_seconds())
|
||||
hours, remainder = divmod(total_seconds, 3600)
|
||||
minutes, seconds = divmod(remainder, 60)
|
||||
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
||||
|
||||
def load_transcript(file_path):
|
||||
"""Load JSON transcript file."""
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
return data
|
||||
|
||||
def extract_transcript_segments(data):
|
||||
"""Extract all transcript segments with timestamps."""
|
||||
segments = []
|
||||
|
||||
# The structure has a 'tracks' key
|
||||
if 'tracks' in data[0]:
|
||||
for track in data[0]['tracks']:
|
||||
if track['kind'] == 'asr': # Automatic Speech Recognition
|
||||
for event in track['events']:
|
||||
start_time = event.get('tStartMs', 0) / 1000
|
||||
duration = event.get('dDurationMs', 0) / 1000
|
||||
|
||||
# Extract text from segments
|
||||
text_parts = []
|
||||
if 'segs' in event:
|
||||
for seg in event['segs']:
|
||||
if 'utf8' in seg:
|
||||
text_parts.append(seg['utf8'])
|
||||
|
||||
text = ' '.join(text_parts)
|
||||
if text.strip():
|
||||
segments.append({
|
||||
'start': start_time,
|
||||
'end': start_time + duration,
|
||||
'text': text
|
||||
})
|
||||
|
||||
return segments
|
||||
|
||||
def group_by_time_window(segments, window_seconds=600):
|
||||
"""Group segments into time windows for analysis."""
|
||||
groups = []
|
||||
current_time = 0
|
||||
|
||||
while current_time < segments[-1]['end']:
|
||||
window_end = current_time + window_seconds
|
||||
window_segments = [s for s in segments
|
||||
if current_time <= s['start'] < window_end]
|
||||
|
||||
if window_segments:
|
||||
combined_text = ' '.join([s['text'] for s in window_segments])
|
||||
groups.append({
|
||||
'start': current_time,
|
||||
'end': window_end,
|
||||
'segments': window_segments,
|
||||
'text': combined_text
|
||||
})
|
||||
|
||||
current_time = window_end
|
||||
|
||||
return groups
|
||||
|
||||
def extract_keywords(text):
|
||||
"""Extract key topics from text."""
|
||||
keywords = {
|
||||
'Market & Community': ['market', 'pasar', 'grup', 'komunitas', 'telegram', 'facebook', 'forum'],
|
||||
'Problem Finding': ['masalah', 'problem', 'kesulitan', 'permasalahan', 'error'],
|
||||
'Exploration': ['explor', 'coba', 'trial', 'nyoba', 'eksplor'],
|
||||
'Personal Branding': ['branding', 'personal branding', 'show off', 'image'],
|
||||
'AIDA/Funnel': ['aida', 'awareness', 'interest', 'desire', 'action', 'funel', 'funnel'],
|
||||
'Trust': ['trust', 'percaya', 'kepercayaan'],
|
||||
'Clients': ['klien', 'client', 'pelanggan', 'customer'],
|
||||
'Pricing': ['harga', 'price', 'bayar', 'budget', 'rp', 'juta', 'ribu'],
|
||||
'Negotiation': ['tawar', 'negosiasi', 'deal'],
|
||||
'Services': ['jasa', 'service', 'website', 'plugin', 'elementor', 'instal'],
|
||||
'Cold/Warm/Hot Market': ['cold', 'warm', 'hot', 'dingin', 'hangat'],
|
||||
'Network': ['network', 'jaringan', 'koneksi', 'hubungan'],
|
||||
'Sharing': ['sharing', 'share', 'bagi'],
|
||||
'Products': ['produk', 'product', 'template'],
|
||||
}
|
||||
|
||||
found = []
|
||||
text_lower = text.lower()
|
||||
|
||||
for topic, kw_list in keywords.items():
|
||||
count = sum(1 for kw in kw_list if kw.lower() in text_lower)
|
||||
if count > 0:
|
||||
found.append((topic, count))
|
||||
|
||||
return sorted(found, key=lambda x: x[1], reverse=True)
|
||||
|
||||
def identify_main_topics():
|
||||
"""Identify main topics throughout the video."""
|
||||
file_path = "/Users/dwindown/CascadeProjects/MeetDwindiCom/access-hub/Live Zoom - Diskusi Cara Jual Jasa via Online.json"
|
||||
|
||||
print("Loading transcript...")
|
||||
data = load_transcript(file_path)
|
||||
|
||||
print("Extracting segments...")
|
||||
segments = extract_transcript_segments(data)
|
||||
|
||||
print(f"Total segments: {len(segments)}")
|
||||
|
||||
if not segments:
|
||||
print("No segments found!")
|
||||
return
|
||||
|
||||
total_duration = segments[-1]['end']
|
||||
print(f"Total duration: {seconds_to_timestamp(total_duration)} ({total_duration/60:.1f} minutes)")
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("ANALYZING CONTENT IN 10-MINUTE INTERVALS")
|
||||
print("="*80 + "\n")
|
||||
|
||||
# Group by 10-minute windows
|
||||
groups = group_by_time_window(segments, window_seconds=600)
|
||||
|
||||
for i, group in enumerate(groups, 1):
|
||||
print(f"\n{'='*80}")
|
||||
print(f"SECTION {i}: {seconds_to_timestamp(group['start'])} - {seconds_to_timestamp(group['end'])}")
|
||||
print(f"{'='*80}")
|
||||
|
||||
# Get first 500 chars for preview
|
||||
preview = group['text'][:500]
|
||||
print(f"\nContent Preview:\n{preview}...")
|
||||
|
||||
# Extract keywords
|
||||
keywords = extract_keywords(group['text'])
|
||||
if keywords:
|
||||
print(f"\nMain Topics:")
|
||||
for topic, count in keywords[:5]:
|
||||
print(f" - {topic}: {count} mentions")
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("DETAILED BREAKDOWN (5-minute intervals for first hour)")
|
||||
print("="*80 + "\n")
|
||||
|
||||
# More detailed for first hour
|
||||
detailed_groups = group_by_time_window(segments[:int(len(segments)*0.4)], window_seconds=300)
|
||||
|
||||
for i, group in enumerate(detailed_groups, 1):
|
||||
print(f"\n--- {seconds_to_timestamp(group['start'])} - {seconds_to_timestamp(group['end'])} ---")
|
||||
|
||||
# Get text summary
|
||||
text_summary = group['text'][:300]
|
||||
print(f"{text_summary}...")
|
||||
|
||||
if __name__ == "__main__":
|
||||
identify_main_topics()
|
||||
Reference in New Issue
Block a user