Spaces:

the11
/

Voice-Activated-RAG-System

Running

Voice-Activated-RAG-System / tts_gemini.py

Upload 9 files

a704a0c verified 29 days ago

1.41 kB

	import os
	import wave
	import uuid
	from google import genai
	from google.genai import types

	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

	def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
	with wave.open(filename, "wb") as wf:
	wf.setnchannels(channels)
	wf.setsampwidth(sample_width)
	wf.setframerate(rate)
	wf.writeframes(pcm)

	def tts_gemini(text, api_key=GEMINI_API_KEY):
	output_dir = os.path.join(os.getcwd(), "tts_outputs")
	os.makedirs(output_dir, exist_ok=True)
	file_name = os.path.join(output_dir, f"tts_{uuid.uuid4().hex}.wav")
	try:
	client = genai.Client(api_key=api_key)
	response = client.models.generate_content(
	model="gemini-2.5-flash-preview-tts",
	contents=text,
	config=types.GenerateContentConfig(
	response_modalities=["AUDIO"],
	speech_config=types.SpeechConfig(
	voice_config=types.VoiceConfig(
	prebuilt_voice_config=types.PrebuiltVoiceConfig(
	voice_name='Kore',
	)
	)
	),
	)
	)
	data = response.candidates[0].content.parts[0].inline_data.data
	wave_file(file_name, data)
	return file_name
	except Exception as e:
	print(f"[Gemini TTS ERROR] {e}")
	return None