Spaces:

the11
/

Voice-Activated-RAG-System

Running

File size: 1,411 Bytes

a704a0c

import os
import wave
import uuid
from google import genai
from google.genai import types

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
    with wave.open(filename, "wb") as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(sample_width)
        wf.setframerate(rate)
        wf.writeframes(pcm)

def tts_gemini(text, api_key=GEMINI_API_KEY):
    output_dir = os.path.join(os.getcwd(), "tts_outputs")
    os.makedirs(output_dir, exist_ok=True)
    file_name = os.path.join(output_dir, f"tts_{uuid.uuid4().hex}.wav")
    try:
        client = genai.Client(api_key=api_key)
        response = client.models.generate_content(
            model="gemini-2.5-flash-preview-tts",
            contents=text,
            config=types.GenerateContentConfig(
                response_modalities=["AUDIO"],
                speech_config=types.SpeechConfig(
                    voice_config=types.VoiceConfig(
                        prebuilt_voice_config=types.PrebuiltVoiceConfig(
                            voice_name='Kore',
                        )
                    )
                ),
            )
        )
        data = response.candidates[0].content.parts[0].inline_data.data
        wave_file(file_name, data)
        return file_name
    except Exception as e:
        print(f"[Gemini TTS ERROR] {e}")
        return None