""" Export utilities for transcripts and summaries Supports various formats depending on speaker diarization state """ import json from typing import List, Tuple, Dict, Any from datetime import timedelta import re def format_timestamp(seconds: float, format_type: str = "srt") -> str: """Format timestamp for different subtitle formats""" td = timedelta(seconds=seconds) hours = int(td.total_seconds() // 3600) minutes = int((td.total_seconds() % 3600) // 60) secs = td.total_seconds() % 60 if format_type == "srt": return f"{hours:02d}:{minutes:02d}:{secs:06.3f}".replace(".", ",") elif format_type == "vtt": return f"{hours:02d}:{minutes:02d}:{secs:06.3f}" elif format_type == "ass": return f"{hours:01d}:{minutes:02d}:{secs:05.2f}" else: # default return f"{hours:02d}:{minutes:02d}:{secs:04.1f}" def export_to_srt(utterances: List[Tuple[float, float, str]], utterances_with_speakers=None) -> str: """Export to SubRip (.srt) format""" srt_content = [] # Use speaker-aware utterances if available data_source = utterances_with_speakers if utterances_with_speakers else [(start, end, text, 0) for start, end, text in utterances] for i, (start, end, text, speaker_id) in enumerate(data_source, 1): speaker_prefix = f"Speaker {speaker_id + 1}: " if utterances_with_speakers else "" srt_content.append(f"{i}") srt_content.append(f"{format_timestamp(start, 'srt')} --> {format_timestamp(end, 'srt')}") srt_content.append(f"{speaker_prefix}{text}") srt_content.append("") # Empty line between entries return "\n".join(srt_content) def export_to_vtt(utterances: List[Tuple[float, float, str]], utterances_with_speakers=None) -> str: """Export to WebVTT (.vtt) format""" vtt_content = ["WEBVTT", ""] # Use speaker-aware utterances if available data_source = utterances_with_speakers if utterances_with_speakers else [(start, end, text, 0) for start, end, text in utterances] for start, end, text, speaker_id in data_source: speaker_prefix = f"Speaker {speaker_id + 1}: " if utterances_with_speakers else "" vtt_content.append(f"{format_timestamp(start, 'vtt')} --> {format_timestamp(end, 'vtt')}") vtt_content.append(f"{speaker_prefix}{text}") vtt_content.append("") # Empty line between entries return "\n".join(vtt_content) def export_to_ass(utterances: List[Tuple[float, float, str]], utterances_with_speakers=None) -> str: """Export to Advanced SubStation Alpha (.ass) format""" header = """[Script Info] Title: VoxSum Transcript ScriptType: v4.00+ [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H80000000,0,0,0,0,100,100,0,0,1,2,0,2,10,10,10,1 [Events] Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text """ events = [] # Use speaker-aware utterances if available data_source = utterances_with_speakers if utterances_with_speakers else [(start, end, text, 0) for start, end, text in utterances] for start, end, text, speaker_id in data_source: speaker_prefix = f"Speaker {speaker_id + 1}: " if utterances_with_speakers else "" events.append(f"Dialogue: 0,{format_timestamp(start, 'ass')},{format_timestamp(end, 'ass')},Default,,0,0,0,,{speaker_prefix}{text}") return header + "\n".join(events) def export_to_transcript_json(utterances: List[Tuple[float, float, str]], utterances_with_speakers=None, metadata=None) -> str: """Export to JSON format with detailed transcript data""" # Use speaker-aware utterances if available data_source = utterances_with_speakers if utterances_with_speakers else [(start, end, text, 0) for start, end, text in utterances] transcript_data = { "metadata": metadata or { "source": "VoxSum", "format_version": "1.0", "speakers_detected": len(set(speaker for _, _, _, speaker in data_source)) if utterances_with_speakers else 1 }, "utterances": [ { "start": start, "end": end, "duration": end - start, "text": text, "speaker_id": speaker_id, "speaker_label": f"Speaker {speaker_id + 1}" } for start, end, text, speaker_id in data_source ] } return json.dumps(transcript_data, indent=2, ensure_ascii=False) def export_to_elan_eaf(utterances: List[Tuple[float, float, str]], utterances_with_speakers=None) -> str: """Export to ELAN (.eaf) format for linguistic analysis""" import datetime # Use speaker-aware utterances if available data_source = utterances_with_speakers if utterances_with_speakers else [(start, end, text, 0) for start, end, text in utterances] # Get unique speakers speakers = sorted(set(speaker for _, _, _, speaker in data_source)) current_date = datetime.datetime.now().isoformat() eaf_content = f"""
urn:nl-mpi-tools-elan-eaf:voxsum-transcript {len(data_source)}
""" # Time slots time_id = 1 for start, end, _, _ in data_source: eaf_content += f' \n' time_id += 1 eaf_content += f' \n' time_id += 1 eaf_content += " \n" # Tiers for each speaker for speaker_id in speakers: eaf_content += f' \n' annotation_id = 1 time_id = 1 for start, end, text, spk_id in data_source: if spk_id == speaker_id: eaf_content += f' \n' eaf_content += f' \n' eaf_content += f' {text}\n' eaf_content += f' \n' eaf_content += f' \n' annotation_id += 1 time_id += 2 eaf_content += " \n" eaf_content += """
""" return eaf_content def export_plain_text(utterances: List[Tuple[float, float, str]], utterances_with_speakers=None, include_timestamps=True) -> str: """Export to plain text format""" lines = [] # Use speaker-aware utterances if available data_source = utterances_with_speakers if utterances_with_speakers else [(start, end, text, 0) for start, end, text in utterances] current_speaker = None for start, end, text, speaker_id in data_source: # Add speaker header when speaker changes (for diarized content) if utterances_with_speakers and speaker_id != current_speaker: if lines: # Add blank line before new speaker (except first) lines.append("") lines.append(f"Speaker {speaker_id + 1}:") current_speaker = speaker_id # Add timestamp if requested if include_timestamps: timestamp = f"[{format_timestamp(start, 'default')}] " else: timestamp = "" lines.append(f"{timestamp}{text}") return "\n".join(lines) def export_summary_markdown(summary: str, metadata=None) -> str: """Export summary in Markdown format with metadata""" md_content = [] if metadata: md_content.append("# Summary") md_content.append("") if metadata.get("title"): md_content.append(f"**Title:** {metadata['title']}") if metadata.get("duration"): md_content.append(f"**Duration:** {metadata['duration']}") if metadata.get("speakers"): md_content.append(f"**Speakers:** {metadata['speakers']}") if metadata.get("date"): md_content.append(f"**Date:** {metadata['date']}") md_content.append("") md_content.append("## Content") md_content.append("") md_content.append(summary) return "\n".join(md_content) def export_summary_plain_text(summary: str, metadata=None) -> str: """Export summary in plain text format""" content = [] if metadata: content.append("SUMMARY") content.append("=" * 50) content.append("") if metadata.get("title"): content.append(f"Title: {metadata['title']}") if metadata.get("duration"): content.append(f"Duration: {metadata['duration']}") if metadata.get("speakers"): content.append(f"Speakers: {metadata['speakers']}") if metadata.get("date"): content.append(f"Date: {metadata['date']}") content.append("") content.append("CONTENT") content.append("-" * 50) content.append("") content.append(summary) return "\n".join(content) # Export format definitions SUBTITLE_FORMATS = { "SRT (SubRip)": { "extension": ".srt", "mime_type": "text/plain", "function": export_to_srt }, "VTT (WebVTT)": { "extension": ".vtt", "mime_type": "text/vtt", "function": export_to_vtt }, "ASS (Advanced SubStation Alpha)": { "extension": ".ass", "mime_type": "text/plain", "function": export_to_ass } } TRANSCRIPT_FORMATS = { "Plain Text": { "extension": ".txt", "mime_type": "text/plain", "function": export_plain_text }, "JSON": { "extension": ".json", "mime_type": "application/json", "function": export_to_transcript_json }, "ELAN (EAF)": { "extension": ".eaf", "mime_type": "application/xml", "function": export_to_elan_eaf } } SUMMARY_FORMATS = { "Markdown": { "extension": ".md", "mime_type": "text/markdown", "function": export_summary_markdown }, "Plain Text": { "extension": ".txt", "mime_type": "text/plain", "function": export_summary_plain_text } }