Fix duplicate video embed when youtube_url is empty string
- Add .trim() checks to all video source conditions - Prevents rendering empty youtube_url as valid video - Fixes double embed card display issue - Update sidebar icon check to use optional chaining with trim 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
167
analyze_transcript.py
Normal file
167
analyze_transcript.py
Normal file
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Analyze video transcript to identify topics and create chapter divisions.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import timedelta
|
||||
|
||||
def seconds_to_timestamp(seconds):
|
||||
"""Convert seconds to readable timestamp."""
|
||||
td = timedelta(seconds=float(seconds))
|
||||
hours, remainder = divmod(td.seconds, 3600)
|
||||
minutes, seconds = divmod(remainder, 60)
|
||||
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
||||
|
||||
def load_transcript(file_path):
|
||||
"""Load JSON transcript file."""
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
return data
|
||||
|
||||
def extract_text_with_timestamps(data):
|
||||
"""Extract text segments with timestamps."""
|
||||
segments = []
|
||||
for entry in data:
|
||||
if 'events' in entry:
|
||||
for event in entry['events']:
|
||||
if 'segs' in event:
|
||||
for seg in event['segs']:
|
||||
if 'utf8' in seg:
|
||||
segments.append({
|
||||
'start': float(event.get('tStartMs', 0)) / 1000,
|
||||
'text': seg['utf8']
|
||||
})
|
||||
return segments
|
||||
|
||||
def clean_text(text):
|
||||
"""Clean transcript text."""
|
||||
# Remove extra whitespace
|
||||
text = ' '.join(text.split())
|
||||
return text
|
||||
|
||||
def identify_keywords(text):
|
||||
"""Identify important keywords in Indonesian business context."""
|
||||
keywords = {
|
||||
'market': ['market', 'pasar', 'grup', 'komunitas', 'community'],
|
||||
'problem': ['masalah', 'problem', 'kesulitan', 'error', 'gagal'],
|
||||
'branding': ['branding', 'personal branding', 'image', 'citra'],
|
||||
'funnel': ['funel', 'funnel', 'awareness', 'desire', 'action'],
|
||||
'client': ['klien', 'client', 'pelanggan', 'customer'],
|
||||
'price': ['harga', 'price', 'bayar', 'paid', 'invoice'],
|
||||
'negotiation': ['tawar', 'negosiasi', 'deal'],
|
||||
'service': ['jasa', 'service', 'website', 'plugin'],
|
||||
'exploration': ['explore', 'eksplor', 'coba', 'trial'],
|
||||
'network': ['network', 'jaringan', 'koneksi'],
|
||||
'sharing': ['sharing', 'share', 'bagi'],
|
||||
'product': ['produk', 'product', 'template', 'plugin'],
|
||||
'trust': ['trust', 'percaya', 'kepercayaan'],
|
||||
'sales': ['jual', 'sales', 'closing'],
|
||||
}
|
||||
return keywords
|
||||
|
||||
def analyze_structure():
|
||||
"""Analyze the transcript structure."""
|
||||
file_path = "/Users/dwindown/CascadeProjects/MeetDwindiCom/access-hub/Live Zoom - Diskusi Cara Jual Jasa via Online.json"
|
||||
|
||||
print("Loading transcript...")
|
||||
data = load_transcript(file_path)
|
||||
|
||||
print("Extracting segments...")
|
||||
segments = extract_text_with_timestamps(data)
|
||||
|
||||
print(f"\nTotal segments: {len(segments)}")
|
||||
|
||||
# Get total duration
|
||||
if segments:
|
||||
total_duration = segments[-1]['start']
|
||||
print(f"Total duration: {seconds_to_timestamp(total_duration)} ({total_duration/60:.1f} minutes)")
|
||||
|
||||
# Sample segments at different intervals
|
||||
print("\n=== SAMPLING SEGMENTS AT KEY INTERVALS ===\n")
|
||||
|
||||
sample_points = [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000,
|
||||
3300, 3600, 3900, 4200, 4500, 4800, 5100, 5400, 5700, 6000,
|
||||
6300, 6600, 6900, 7200, 7500, 7800, 8100, 8400, 8700, 9000]
|
||||
|
||||
for i, target_time in enumerate(sample_points):
|
||||
# Find closest segment
|
||||
closest = None
|
||||
min_diff = float('inf')
|
||||
|
||||
for seg in segments:
|
||||
diff = abs(seg['start'] - target_time)
|
||||
if diff < min_diff:
|
||||
min_diff = diff
|
||||
closest = seg
|
||||
|
||||
if closest and min_diff < 60: # Within 1 minute
|
||||
text = clean_text(closest['text'])
|
||||
if len(text) > 50: # Only meaningful segments
|
||||
print(f"[{seconds_to_timestamp(closest['start'])}]")
|
||||
print(f"{text[:200]}...")
|
||||
print()
|
||||
|
||||
# Look for transition phrases
|
||||
print("\n=== LOOKING FOR TRANSITION PHRASES ===\n")
|
||||
|
||||
transition_phrases = [
|
||||
'oke',
|
||||
'jadi',
|
||||
'nah',
|
||||
'kemudian',
|
||||
'selanjutnya',
|
||||
'setelah itu',
|
||||
'sekarang',
|
||||
'selanjutnya',
|
||||
'lanjut',
|
||||
'terus',
|
||||
'setelah',
|
||||
'nah sekarang',
|
||||
'oke jadi',
|
||||
'jadi sekarang',
|
||||
'nah kalau',
|
||||
]
|
||||
|
||||
# Look for sections mentioning main topics
|
||||
print("\n=== SEARCHING FOR TOPIC MENTIONS ===\n")
|
||||
|
||||
topic_keywords = {
|
||||
'Market/Community': ['market', 'pasar', 'grup', 'komunitas', 'community', 'telegram', 'facebook'],
|
||||
'Problem Finding': ['masalah', 'problem', 'kesulitan', 'permasalahan'],
|
||||
'Personal Branding': ['branding', 'personal branding', 'show off'],
|
||||
'AIDA Funnel': ['aida', 'awareness', 'interest', 'desire', 'action', 'funel', 'funnel'],
|
||||
'Getting Clients': ['klien', 'client', 'calon klien'],
|
||||
'Pricing/Payment': ['harga', 'bayar', 'budget', 'invoice', 'price'],
|
||||
'Negotiation': ['tawar', 'negosiasi', 'deal'],
|
||||
'Trust Building': ['trust', 'percaya', 'kepercayaan'],
|
||||
'Services/Products': ['jasa', 'service', 'produk', 'elementor', 'plugin', 'website'],
|
||||
'Cold/Warm/Hot Market': ['cold market', 'warm market', 'hot market'],
|
||||
}
|
||||
|
||||
# Find segments grouped by time periods
|
||||
print("\n=== CONTENT BY TIME PERIODS (every 10 minutes) ===\n")
|
||||
|
||||
period = 600 # 10 minutes in seconds
|
||||
current_period = 0
|
||||
|
||||
while current_period < total_duration:
|
||||
period_end = current_period + period
|
||||
period_segments = [s for s in segments if current_period <= s['start'] < period_end]
|
||||
|
||||
if period_segments:
|
||||
# Combine text from this period
|
||||
period_text = ' '.join([clean_text(s['text']) for s in period_segments[:20]]) # First 20 segments
|
||||
period_text = period_text[:500] # First 500 chars
|
||||
|
||||
print(f"\n{'='*70}")
|
||||
print(f"PERIOD: {seconds_to_timestamp(current_period)} - {seconds_to_timestamp(period_end)}")
|
||||
print(f"{'='*70}")
|
||||
print(period_text)
|
||||
print()
|
||||
|
||||
current_period = period_end
|
||||
|
||||
if __name__ == "__main__":
|
||||
analyze_structure()
|
||||
Reference in New Issue
Block a user