Yermia's picture
First init
5da9a16
import os
from dataclasses import dataclass
@dataclass
class ModelConfig:
# Whisper ASR
whisper_model: str = "openai/whisper-medium"
whisper_language: str = "id"
# Speaker Diarization
diarization_model: str = "pyannote/speaker-diarization-3.1"
min_speakers: int = 1
max_speakers: int = 10
# Text Processing
summarization_model: str = "bert-base-multilingual-cased"
ner_model: str = "cahya/bert-base-indonesian-NER"
keyword_model: str = "paraphrase-multilingual-MiniLM-L12-v2"
# Processing Parameters
chunk_size: int = 3000
chunk_overlap: int = 200
summary_ratio: float = 0.3
max_summary_sentences: int = 6
# Output
output_formats: list = None
def __post_init__(self):
if self.output_formats is None:
self.output_formats = ["markdown", "json", "html"]
# Set HF token from environment
self.hf_token = os.environ.get("HF_TOKEN", None)
# Global config instance
config = ModelConfig()