Spaces:
Runtime error
Runtime error
File size: 4,106 Bytes
4df6700 797af4f ccc0748 c0c2699 797af4f 4df6700 ccc0748 797af4f ccc0748 797af4f ccc0748 797af4f ccc0748 4df6700 ccc0748 013f6a1 ccc0748 013f6a1 4df6700 ccc0748 013f6a1 ccc0748 30a17b1 ccc0748 4df6700 ccc0748 797af4f ccc0748 797af4f ccc0748 797af4f ccc0748 797af4f ccc0748 797af4f ccc0748 797af4f ccc0748 30a17b1 ccc0748 01f7ec4 d518218 797af4f ccc0748 797af4f ccc0748 797af4f ccc0748 c0c2699 227326d ccc0748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import os
import time
import requests
import gradio as gr
import numpy as np
from dotenv import load_dotenv
from elevenlabs import ElevenLabs
from fastapi import FastAPI
from fastrtc import (
AdditionalOutputs,
ReplyOnPause,
Stream,
get_stt_model,
get_twilio_turn_credentials,
)
from gradio.utils import get_space
from numpy.typing import NDArray
# Load environment variables
load_dotenv()
# Initialize DeepSeek client
class DeepSeekAPI:
def __init__(self, api_key):
self.api_key = api_key
def chat_completion(self, messages, temperature=0.7, max_tokens=512):
url = "https://api.deepseek.com/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}"
}
payload = {
"model": "deepseek-chat",
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens
}
response = requests.post(url, json=payload, headers=headers)
# Check for error response
if response.status_code != 200:
print(f"DeepSeek API error: {response.status_code} - {response.text}")
return {"choices": [{"message": {"content": "I'm sorry, I encountered an error processing your request."}}]}
return response.json()
# Initialize clients
deepseek_client = DeepSeekAPI(api_key=os.getenv("DEEPSEEK_API_KEY"))
tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
stt_model = get_stt_model()
# Set up Twilio credentials for WebRTC
# The function doesn't accept keyword arguments, it reads from env vars directly
twilio_credentials = get_twilio_turn_credentials()
# Log Twilio status
if twilio_credentials:
print("Twilio TURN credentials successfully configured")
else:
print("No Twilio credentials found or invalid credentials")
# Handler function for voice conversation
def response(
audio: tuple[int, NDArray[np.int16 | np.float32]],
chatbot: list[dict] | None = None,
):
chatbot = chatbot or []
messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
start = time.time()
text = stt_model.stt(audio)
print("transcription", time.time() - start)
print("prompt", text)
chatbot.append({"role": "user", "content": text})
yield AdditionalOutputs(chatbot)
messages.append({"role": "user", "content": text})
# Replace Groq LLM with DeepSeek
response_data = deepseek_client.chat_completion(
messages=messages,
max_tokens=512
)
response_text = response_data["choices"][0]["message"]["content"]
chatbot.append({"role": "assistant", "content": response_text})
for chunk in tts_client.text_to_speech.convert_as_stream(
text=response_text,
voice_id="JBFqnCBsd6RMkjVDRZzb",
model_id="eleven_multilingual_v2",
output_format="pcm_24000",
):
audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
yield (24000, audio_array)
yield AdditionalOutputs(chatbot)
# Create the chatbot and Stream components
chatbot = gr.Chatbot(type="messages")
stream = Stream(
modality="audio",
mode="send-receive",
handler=ReplyOnPause(response, input_sample_rate=16000),
additional_outputs_handler=lambda a, b: b,
additional_inputs=[chatbot],
additional_outputs=[chatbot],
rtc_configuration=twilio_credentials, # Always use Twilio credentials
concurrency_limit=5 if get_space() else None,
time_limit=90 if get_space() else None,
ui_args={"title": "LLM Voice Chat (Powered by DeepSeek, ElevenLabs, and WebRTC ⚡️)"},
)
# Mount the STREAM UI to the FastAPI app
app = FastAPI()
app = gr.mount_gradio_app(app, stream.ui, path="/")
if __name__ == "__main__":
import os
os.environ["GRADIO_SSR_MODE"] = "false"
if (mode := os.getenv("MODE")) == "UI":
stream.ui.launch(server_port=7860)
elif mode == "PHONE":
stream.fastphone(host="0.0.0.0", port=7860)
else:
stream.ui.launch(server_port=7860)
|