File size: 3,100 Bytes
ba4a241
 
 
 
d8028fb
ba4a241
 
 
 
 
 
 
 
 
 
 
d8028fb
 
 
 
 
 
72c7bbc
 
 
 
 
 
d8028fb
 
 
 
ba4a241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8028fb
 
ba4a241
 
 
 
 
 
 
 
 
 
d8028fb
 
ba4a241
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from __future__ import annotations

from datetime import datetime
from typing import Tuple
import re

from src.export_utils import (
    SUBTITLE_FORMATS,
    SUMMARY_FORMATS,
    TRANSCRIPT_FORMATS,
    export_plain_text,
)

from ..models.export import SummaryExportRequest, TranscriptExportRequest


def _sanitize_filename(title: str) -> str:
    """Sanitize title for use in filename"""
    if not title:
        return ""
    # Remove or replace invalid filename characters
    sanitized = re.sub(r'[<>:"/\\|?*]', '', title)
    # Replace spaces and other problematic characters with underscores, but keep Unicode letters/numbers
    sanitized = re.sub(r'[^\w\-_.]', '_', sanitized, flags=re.UNICODE)
    # Remove multiple consecutive underscores
    sanitized = re.sub(r'_+', '_', sanitized)
    # Remove leading/trailing underscores
    sanitized = sanitized.strip('_')
    # Limit length
    return sanitized[:50] if sanitized else ""


def _build_utterance_tuples(payload: TranscriptExportRequest):
    utterances = [(u.start, u.end, u.text) for u in payload.utterances]
    has_speakers = any(u.speaker is not None for u in payload.utterances)
    utterances_with_speakers = None
    if has_speakers:
        utterances_with_speakers = [
            (u.start, u.end, u.text, u.speaker if u.speaker is not None else 0)
            for u in payload.utterances
        ]
    return utterances, utterances_with_speakers


def generate_transcript_export(payload: TranscriptExportRequest) -> Tuple[str, str, str]:
    utterances, utterances_with_speakers = _build_utterance_tuples(payload)

    if payload.format in SUBTITLE_FORMATS:
        fmt = SUBTITLE_FORMATS[payload.format]
        content = fmt["function"](utterances, utterances_with_speakers)
    elif payload.format in TRANSCRIPT_FORMATS:
        fmt = TRANSCRIPT_FORMATS[payload.format]
        if payload.format == "Plain Text":
            content = export_plain_text(
                utterances,
                utterances_with_speakers,
                include_timestamps=payload.include_timestamps,
            )
        else:
            content = fmt["function"](utterances, utterances_with_speakers)
    else:
        raise ValueError(f"Unsupported transcript export format: {payload.format}")

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    title_part = f"_{_sanitize_filename(payload.title)}" if payload.title else ""
    filename = f"transcript{title_part}_{timestamp}{fmt['extension']}"
    return content, filename, fmt["mime_type"]


def generate_summary_export(payload: SummaryExportRequest) -> Tuple[str, str, str]:
    if payload.format not in SUMMARY_FORMATS:
        raise ValueError(f"Unsupported summary export format: {payload.format}")

    fmt = SUMMARY_FORMATS[payload.format]
    content = fmt["function"](payload.summary, payload.metadata)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    title_part = f"_{_sanitize_filename(payload.title)}" if payload.title else ""
    filename = f"summary{title_part}_{timestamp}{fmt['extension']}"
    return content, filename, fmt["mime_type"]