from __future__ import annotations from datetime import datetime from typing import Tuple import re from src.export_utils import ( SUBTITLE_FORMATS, SUMMARY_FORMATS, TRANSCRIPT_FORMATS, export_plain_text, ) from ..models.export import SummaryExportRequest, TranscriptExportRequest def _sanitize_filename(title: str) -> str: """Sanitize title for use in filename""" if not title: return "" # Remove or replace invalid filename characters sanitized = re.sub(r'[<>:"/\\|?*]', '', title) # Replace spaces and other problematic characters with underscores, but keep Unicode letters/numbers sanitized = re.sub(r'[^\w\-_.]', '_', sanitized, flags=re.UNICODE) # Remove multiple consecutive underscores sanitized = re.sub(r'_+', '_', sanitized) # Remove leading/trailing underscores sanitized = sanitized.strip('_') # Limit length return sanitized[:50] if sanitized else "" def _build_utterance_tuples(payload: TranscriptExportRequest): utterances = [(u.start, u.end, u.text) for u in payload.utterances] has_speakers = any(u.speaker is not None for u in payload.utterances) utterances_with_speakers = None if has_speakers: utterances_with_speakers = [ (u.start, u.end, u.text, u.speaker if u.speaker is not None else 0) for u in payload.utterances ] return utterances, utterances_with_speakers def generate_transcript_export(payload: TranscriptExportRequest) -> Tuple[str, str, str]: utterances, utterances_with_speakers = _build_utterance_tuples(payload) if payload.format in SUBTITLE_FORMATS: fmt = SUBTITLE_FORMATS[payload.format] content = fmt["function"](utterances, utterances_with_speakers) elif payload.format in TRANSCRIPT_FORMATS: fmt = TRANSCRIPT_FORMATS[payload.format] if payload.format == "Plain Text": content = export_plain_text( utterances, utterances_with_speakers, include_timestamps=payload.include_timestamps, ) else: content = fmt["function"](utterances, utterances_with_speakers) else: raise ValueError(f"Unsupported transcript export format: {payload.format}") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") title_part = f"_{_sanitize_filename(payload.title)}" if payload.title else "" filename = f"transcript{title_part}_{timestamp}{fmt['extension']}" return content, filename, fmt["mime_type"] def generate_summary_export(payload: SummaryExportRequest) -> Tuple[str, str, str]: if payload.format not in SUMMARY_FORMATS: raise ValueError(f"Unsupported summary export format: {payload.format}") fmt = SUMMARY_FORMATS[payload.format] content = fmt["function"](payload.summary, payload.metadata) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") title_part = f"_{_sanitize_filename(payload.title)}" if payload.title else "" filename = f"summary{title_part}_{timestamp}{fmt['extension']}" return content, filename, fmt["mime_type"]