|
|
from __future__ import annotations |
|
|
|
|
|
from datetime import datetime |
|
|
from typing import Tuple |
|
|
import re |
|
|
|
|
|
from src.export_utils import ( |
|
|
SUBTITLE_FORMATS, |
|
|
SUMMARY_FORMATS, |
|
|
TRANSCRIPT_FORMATS, |
|
|
export_plain_text, |
|
|
) |
|
|
|
|
|
from ..models.export import SummaryExportRequest, TranscriptExportRequest |
|
|
|
|
|
|
|
|
def _sanitize_filename(title: str) -> str: |
|
|
"""Sanitize title for use in filename""" |
|
|
if not title: |
|
|
return "" |
|
|
|
|
|
sanitized = re.sub(r'[<>:"/\\|?*]', '', title) |
|
|
|
|
|
sanitized = re.sub(r'[^\w\-_.]', '_', sanitized, flags=re.UNICODE) |
|
|
|
|
|
sanitized = re.sub(r'_+', '_', sanitized) |
|
|
|
|
|
sanitized = sanitized.strip('_') |
|
|
|
|
|
return sanitized[:50] if sanitized else "" |
|
|
|
|
|
|
|
|
def _build_utterance_tuples(payload: TranscriptExportRequest): |
|
|
utterances = [(u.start, u.end, u.text) for u in payload.utterances] |
|
|
has_speakers = any(u.speaker is not None for u in payload.utterances) |
|
|
utterances_with_speakers = None |
|
|
if has_speakers: |
|
|
utterances_with_speakers = [ |
|
|
(u.start, u.end, u.text, u.speaker if u.speaker is not None else 0) |
|
|
for u in payload.utterances |
|
|
] |
|
|
return utterances, utterances_with_speakers |
|
|
|
|
|
|
|
|
def generate_transcript_export(payload: TranscriptExportRequest) -> Tuple[str, str, str]: |
|
|
utterances, utterances_with_speakers = _build_utterance_tuples(payload) |
|
|
|
|
|
if payload.format in SUBTITLE_FORMATS: |
|
|
fmt = SUBTITLE_FORMATS[payload.format] |
|
|
content = fmt["function"](utterances, utterances_with_speakers) |
|
|
elif payload.format in TRANSCRIPT_FORMATS: |
|
|
fmt = TRANSCRIPT_FORMATS[payload.format] |
|
|
if payload.format == "Plain Text": |
|
|
content = export_plain_text( |
|
|
utterances, |
|
|
utterances_with_speakers, |
|
|
include_timestamps=payload.include_timestamps, |
|
|
) |
|
|
else: |
|
|
content = fmt["function"](utterances, utterances_with_speakers) |
|
|
else: |
|
|
raise ValueError(f"Unsupported transcript export format: {payload.format}") |
|
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
title_part = f"_{_sanitize_filename(payload.title)}" if payload.title else "" |
|
|
filename = f"transcript{title_part}_{timestamp}{fmt['extension']}" |
|
|
return content, filename, fmt["mime_type"] |
|
|
|
|
|
|
|
|
def generate_summary_export(payload: SummaryExportRequest) -> Tuple[str, str, str]: |
|
|
if payload.format not in SUMMARY_FORMATS: |
|
|
raise ValueError(f"Unsupported summary export format: {payload.format}") |
|
|
|
|
|
fmt = SUMMARY_FORMATS[payload.format] |
|
|
content = fmt["function"](payload.summary, payload.metadata) |
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
title_part = f"_{_sanitize_filename(payload.title)}" if payload.title else "" |
|
|
filename = f"summary{title_part}_{timestamp}{fmt['extension']}" |
|
|
return content, filename, fmt["mime_type"] |
|
|
|