import os from dataclasses import dataclass @dataclass class ModelConfig: # Whisper ASR whisper_model: str = "openai/whisper-medium" whisper_language: str = "id" # Speaker Diarization diarization_model: str = "pyannote/speaker-diarization-3.1" min_speakers: int = 1 max_speakers: int = 10 # Text Processing summarization_model: str = "bert-base-multilingual-cased" ner_model: str = "cahya/bert-base-indonesian-NER" keyword_model: str = "paraphrase-multilingual-MiniLM-L12-v2" # Processing Parameters chunk_size: int = 3000 chunk_overlap: int = 200 summary_ratio: float = 0.3 max_summary_sentences: int = 6 # Output output_formats: list = None def __post_init__(self): if self.output_formats is None: self.output_formats = ["markdown", "json", "html"] # Set HF token from environment self.hf_token = os.environ.get("HF_TOKEN", None) # Global config instance config = ModelConfig()