Spaces:
Sleeping
Sleeping
import os | |
from dataclasses import dataclass | |
class ModelConfig: | |
# Whisper ASR | |
whisper_model: str = "openai/whisper-medium" | |
whisper_language: str = "id" | |
# Speaker Diarization | |
diarization_model: str = "pyannote/speaker-diarization-3.1" | |
min_speakers: int = 1 | |
max_speakers: int = 10 | |
# Text Processing | |
summarization_model: str = "bert-base-multilingual-cased" | |
ner_model: str = "cahya/bert-base-indonesian-NER" | |
keyword_model: str = "paraphrase-multilingual-MiniLM-L12-v2" | |
# Processing Parameters | |
chunk_size: int = 3000 | |
chunk_overlap: int = 200 | |
summary_ratio: float = 0.3 | |
max_summary_sentences: int = 6 | |
# Output | |
output_formats: list = None | |
def __post_init__(self): | |
if self.output_formats is None: | |
self.output_formats = ["markdown", "json", "html"] | |
# Set HF token from environment | |
self.hf_token = os.environ.get("HF_TOKEN", None) | |
# Global config instance | |
config = ModelConfig() |