Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import edge_tts | |
import asyncio | |
import tempfile | |
import numpy as np | |
import soxr | |
from pydub import AudioSegment | |
import torch | |
import sentencepiece as spm | |
import onnxruntime as ort | |
from huggingface_hub import hf_hub_download | |
# Load Menu Data | |
def load_menu(): | |
menu_file = "menu.xlsx" | |
try: | |
return pd.read_excel(menu_file) | |
except Exception as e: | |
raise ValueError(f"Error loading menu file: {e}") | |
# Filter Menu Items | |
def filter_menu(preference): | |
menu_data = load_menu() | |
if preference == "Halal/Non-Veg": | |
filtered_data = menu_data[menu_data["Ingredients"].str.contains("Chicken|Mutton|Fish|Prawns|Goat", case=False, na=False)] | |
elif preference == "Vegetarian": | |
filtered_data = menu_data[~menu_data["Ingredients"].str.contains("Chicken|Mutton|Fish|Prawns|Goat", case=False, na=False)] | |
elif preference == "Guilt-Free": | |
filtered_data = menu_data[menu_data["Description"].str.contains(r"Fat: ([0-9]|10)g", case=False, na=False)] | |
else: | |
filtered_data = menu_data | |
menu_html = """" # Prepare dynamic HTML for the menu | |
for _, item in filtered_data.iterrows(): | |
menu_html += f""" | |
<div> | |
<h3>{item['Dish Name']}</h3> | |
<p>Price: ${item['Price ($)']}</p> | |
<p>Description: {item['Description']}</p> | |
</div> | |
""" | |
return menu_html | |
# Speech Recognition Model Configuration | |
model_name = "neongeckocom/stt_en_citrinet_512_gamma_0_25" | |
sample_rate = 16000 | |
preprocessor = torch.jit.load(hf_hub_download(model_name, "preprocessor.ts", subfolder="onnx")) | |
encoder = ort.InferenceSession(hf_hub_download(model_name, "model.onnx", subfolder="onnx")) | |
tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.spm", subfolder="onnx")) | |
async def respond(audio_path, preference): | |
# Transcribe audio to text | |
transcription = transcribe(audio_path) | |
# Voice-based interaction logic | |
if "vegetarian" in transcription.lower(): | |
preference = "Vegetarian" | |
elif "non-veg" in transcription.lower() or "halal" in transcription.lower(): | |
preference = "Halal/Non-Veg" | |
elif "guilt-free" in transcription.lower(): | |
preference = "Guilt-Free" | |
elif "menu details" in transcription.lower(): | |
preference = "All" | |
# Filter menu based on preference | |
menu_html = filter_menu(preference) | |
# Text-to-Speech Response | |
reply = f"Here are some {preference} dishes available." if preference != "All" else "Here are all the menu details available." | |
communicate = edge_tts.Communicate(reply) | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: | |
tmp_path = tmp_file.name | |
await communicate.save(tmp_path) | |
return tmp_path, menu_html | |
def transcribe(audio_path): | |
audio_file = AudioSegment.from_file(audio_path) | |
sr = audio_file.frame_rate | |
audio_buffer = np.array(audio_file.get_array_of_samples()) | |
audio_fp32 = np.divide(audio_buffer, np.iinfo(audio_buffer.dtype).max, dtype=np.float32) | |
audio_16k = soxr.resample(audio_fp32, sr, sample_rate) | |
input_signal = torch.tensor(audio_16k).unsqueeze(0) | |
length = torch.tensor(len(audio_16k)).unsqueeze(0) | |
processed_signal, _ = preprocessor.forward(input_signal=input_signal, length=length) | |
logits = encoder.run(None, {'audio_signal': processed_signal.numpy(), 'length': length.numpy()})[0][0] | |
blank_id = tokenizer.vocab_size() | |
decoded_prediction = [p for p in logits.argmax(axis=1).tolist() if p != blank_id] | |
text = tokenizer.decode_ids(decoded_prediction) | |
return text | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
gr.Markdown(""" | |
<div style="text-align: right; margin-bottom: 10px;"> | |
<img src="/mnt/data/Screenshot%202024-12-28%20102122.png" alt="Microphone Icon" style="width: 30px; height: 30px; cursor: pointer;"> | |
</div> | |
""") | |
audio_input = gr.Audio(label="Speak your preference", source="microphone", type="filepath") | |
preference = gr.Textbox(label="Current Preference", value="All") | |
audio_output = gr.Audio(label="Assistant Response", autoplay=True) | |
menu_output = gr.HTML(label="Menu Suggestions") | |
audio_input.change(respond, inputs=[audio_input, preference], outputs=[audio_output, menu_output]) | |
if __name__ == "__main__": | |
demo.queue().launch() | |