import os import wave import uuid from google import genai from google.genai import types GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2): with wave.open(filename, "wb") as wf: wf.setnchannels(channels) wf.setsampwidth(sample_width) wf.setframerate(rate) wf.writeframes(pcm) def tts_gemini(text, api_key=GEMINI_API_KEY): output_dir = os.path.join(os.getcwd(), "tts_outputs") os.makedirs(output_dir, exist_ok=True) file_name = os.path.join(output_dir, f"tts_{uuid.uuid4().hex}.wav") try: client = genai.Client(api_key=api_key) response = client.models.generate_content( model="gemini-2.5-flash-preview-tts", contents=text, config=types.GenerateContentConfig( response_modalities=["AUDIO"], speech_config=types.SpeechConfig( voice_config=types.VoiceConfig( prebuilt_voice_config=types.PrebuiltVoiceConfig( voice_name='Kore', ) ) ), ) ) data = response.candidates[0].content.parts[0].inline_data.data wave_file(file_name, data) return file_name except Exception as e: print(f"[Gemini TTS ERROR] {e}") return None