Spaces:
Runtime error
Runtime error
""" | |
FastRTC + Gemma-3 minimal voice chat app | |
Requirements: | |
pip install fastrtc transformers torch torchaudio | |
""" | |
import asyncio | |
from typing import AsyncGenerator | |
from fastrtc import ( | |
ReplyOnPause, | |
Stream, | |
get_stt_model, | |
get_tts_model, | |
wait_for_item, | |
) | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
# ------------------------------------------------------------------ | |
# 1. Load Gemma-3 (4b-it) via transformers | |
# ------------------------------------------------------------------ | |
MODEL_ID = "google/gemma-3-4b-it" | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_ID, | |
torch_dtype=torch.bfloat16, | |
device_map="auto", | |
trust_remote_code=True, | |
) | |
# ------------------------------------------------------------------ | |
# 2. Build a simple chat pipeline | |
# ------------------------------------------------------------------ | |
chat_pipeline = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
max_new_tokens=256, | |
do_sample=True, | |
temperature=0.7, | |
) | |
# ------------------------------------------------------------------ | |
# 3. Voice pipeline helpers | |
# ------------------------------------------------------------------ | |
stt = get_stt_model("tiny") | |
tts = get_tts_model("coqui/XTTS-v2", lang="en") | |
# ------------------------------------------------------------------ | |
# 4. Response generator | |
# ------------------------------------------------------------------ | |
def response_generator(prompt: str) -> str: | |
"""Feed the user prompt to Gemma-3 and return the assistant text.""" | |
messages = [{"role": "user", "content": prompt}] | |
prompt_text = tokenizer.apply_chat_template( | |
messages, tokenize=False, add_generation_prompt=True | |
) | |
output = chat_pipeline(prompt_text)[0]["generated_text"] | |
# strip the prompt from the output | |
return output[len(prompt_text) :].strip() | |
# ------------------------------------------------------------------ | |
# 5. FastRTC streaming handler | |
# ------------------------------------------------------------------ | |
async def chat_handler( | |
audio: AsyncGenerator, | |
) -> AsyncGenerator[bytes, None]: | |
"""Receive user voice, transcribe, answer via Gemma-3, stream back TTS audio.""" | |
async for user_text in stt.transcribe(audio): | |
if not user_text.strip(): | |
continue | |
# Generate response | |
reply_text = response_generator(user_text) | |
# Stream TTS audio back to the user | |
async for chunk in tts.synthesize(reply_text): | |
yield chunk | |
# ------------------------------------------------------------------ | |
# 6. Launch the app | |
# ------------------------------------------------------------------ | |
if __name__ == "__main__": | |
stream = Stream( | |
handler=ReplyOnPause(chat_handler), | |
modality="audio", | |
mode="send-receive", | |
) | |
stream.ui.launch(server_name="0.0.0.0", server_port=7860) |