Spaces:
Paused
Paused
| # app/api/schemas.py | |
| from enum import Enum | |
| from typing import Optional, List, Dict, Any, Union | |
| from pydantic import BaseModel, Field | |
| # Voice options as a non-restrictive string | |
| class Voice(str): | |
| """Voice options for CSM model - allowing any string value""" | |
| pass | |
| class ResponseFormat(str, Enum): | |
| mp3 = "mp3" | |
| opus = "opus" | |
| aac = "aac" | |
| flac = "flac" | |
| wav = "wav" | |
| # Create SpeechRequest for compatibility with our new code | |
| class SpeechRequest(BaseModel): | |
| model: Optional[str] = Field("csm-1b", description="The TTS model to use") | |
| input: str = Field(..., description="The text to generate audio for") | |
| voice: Optional[str] = Field("alloy", description="The voice to use for generation") | |
| response_format: Optional[ResponseFormat] = Field(ResponseFormat.mp3, description="The format of the audio response") | |
| speed: Optional[float] = Field(1.0, description="The speed of the audio", ge=0.25, le=4.0) | |
| # CSM-specific parameters | |
| max_audio_length_ms: Optional[float] = Field(90000, description="Maximum audio length in milliseconds") | |
| temperature: Optional[float] = Field(0.9, description="Sampling temperature", ge=0.0, le=2.0) | |
| topk: Optional[int] = Field(50, description="Top-k for sampling", ge=1, le=100) | |
| class Config: | |
| populate_by_name = True | |
| extra = "ignore" # Allow extra fields without error | |
| # Maintain TTSRequest for backward compatibility | |
| class TTSRequest(SpeechRequest): | |
| """Legacy alias for SpeechRequest for backward compatibility""" | |
| pass | |
| class TTSResponse(BaseModel): | |
| """Only used for API documentation""" | |
| pass |