Spaces:

pets123
/

sesame_openai

Paused

01115c6 8 months ago

1.62 kB

	# app/api/schemas.py
	from enum import Enum
	from typing import Optional, List, Dict, Any, Union
	from pydantic import BaseModel, Field

	# Voice options as a non-restrictive string
	class Voice(str):
	"""Voice options for CSM model - allowing any string value"""
	pass

	class ResponseFormat(str, Enum):
	mp3 = "mp3"
	opus = "opus"
	aac = "aac"
	flac = "flac"
	wav = "wav"

	# Create SpeechRequest for compatibility with our new code
	class SpeechRequest(BaseModel):
	model: Optional[str] = Field("csm-1b", description="The TTS model to use")
	input: str = Field(..., description="The text to generate audio for")
	voice: Optional[str] = Field("alloy", description="The voice to use for generation")
	response_format: Optional[ResponseFormat] = Field(ResponseFormat.mp3, description="The format of the audio response")
	speed: Optional[float] = Field(1.0, description="The speed of the audio", ge=0.25, le=4.0)
	# CSM-specific parameters
	max_audio_length_ms: Optional[float] = Field(90000, description="Maximum audio length in milliseconds")
	temperature: Optional[float] = Field(0.9, description="Sampling temperature", ge=0.0, le=2.0)
	topk: Optional[int] = Field(50, description="Top-k for sampling", ge=1, le=100)

	class Config:
	populate_by_name = True
	extra = "ignore" # Allow extra fields without error

	# Maintain TTSRequest for backward compatibility
	class TTSRequest(SpeechRequest):
	"""Legacy alias for SpeechRequest for backward compatibility"""
	pass

	class TTSResponse(BaseModel):
	"""Only used for API documentation"""
	pass