|
import os |
|
import wave |
|
import uuid |
|
from google import genai |
|
from google.genai import types |
|
|
|
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") |
|
|
|
def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2): |
|
with wave.open(filename, "wb") as wf: |
|
wf.setnchannels(channels) |
|
wf.setsampwidth(sample_width) |
|
wf.setframerate(rate) |
|
wf.writeframes(pcm) |
|
|
|
def tts_gemini(text, api_key=GEMINI_API_KEY): |
|
output_dir = os.path.join(os.getcwd(), "tts_outputs") |
|
os.makedirs(output_dir, exist_ok=True) |
|
file_name = os.path.join(output_dir, f"tts_{uuid.uuid4().hex}.wav") |
|
try: |
|
client = genai.Client(api_key=api_key) |
|
response = client.models.generate_content( |
|
model="gemini-2.5-flash-preview-tts", |
|
contents=text, |
|
config=types.GenerateContentConfig( |
|
response_modalities=["AUDIO"], |
|
speech_config=types.SpeechConfig( |
|
voice_config=types.VoiceConfig( |
|
prebuilt_voice_config=types.PrebuiltVoiceConfig( |
|
voice_name='Kore', |
|
) |
|
) |
|
), |
|
) |
|
) |
|
data = response.candidates[0].content.parts[0].inline_data.data |
|
wave_file(file_name, data) |
|
return file_name |
|
except Exception as e: |
|
print(f"[Gemini TTS ERROR] {e}") |
|
return None |