itsanurag commited on
Commit
29b0098
·
verified ·
1 Parent(s): ffef4ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -22
app.py CHANGED
@@ -13,18 +13,19 @@ client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
13
  # Initialize DuckDuckGo search tool
14
  duckduckgo_search = DuckDuckGoSearchRun()
15
 
 
16
  default_lang = "en"
17
  engines = { default_lang: Model(default_lang) }
18
 
19
  def transcribe(audio):
 
20
  lang = "en"
21
  model = engines[lang]
22
  text = model.stt_file(audio)[0]
23
  return text
24
 
25
- system_instructions1 = "<s>[SYSTEM] Answer as Real Jarvis JARVIS, Made by 'Tony Stark', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses as if You are the character Jarvis, made by 'Tony Stark.' The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
26
-
27
  def format_prompt(message, history):
 
28
  prompt = "<s>"
29
  for user_prompt, bot_response in history:
30
  prompt += f"[INST] {user_prompt} [/INST]"
@@ -33,6 +34,7 @@ def format_prompt(message, history):
33
  return prompt
34
 
35
  def generate(prompt, history, temperature=0.9, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
 
36
  temperature = float(temperature)
37
  if temperature < 1e-2:
38
  temperature = 1e-2
@@ -54,10 +56,8 @@ def generate(prompt, history, temperature=0.9, max_new_tokens=512, top_p=0.95, r
54
 
55
  for response in stream:
56
  output += response.token.text
57
- # Yield model's response first
58
  yield output
59
 
60
- # Now, perform DuckDuckGo search and yield results
61
  search_result = duckduckgo_search.run(prompt)
62
  if search_result:
63
  yield search_result
@@ -65,13 +65,26 @@ def generate(prompt, history, temperature=0.9, max_new_tokens=512, top_p=0.95, r
65
  yield "Sorry, I couldn't find any relevant information."
66
 
67
  async def respond(audio):
68
- user = transcribe(audio)
69
- reply = model(user)
70
- communicate = edge_tts.Communicate(reply)
71
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
72
- tmp_path = tmp_file.name
73
- await communicate.save(tmp_path)
74
- yield tmp_path
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  additional_inputs = [
77
  gr.Slider(
@@ -124,21 +137,11 @@ with gr.Blocks(css=customCSS) as demo:
124
  gr.Markdown("Getting real-time updated results for prompts is still proprietary in the face of GPT-4, Co-Pilot etc. This app serves as an open-source alternative for this! UPDATE: Previous version of this app i.e. RAG_FRIDAY_mark_2 has faced some technical issues due to rate limit errors. Problem and solution have been updated by me thanks to this community thread: https://github.com/joaomdmoura/crewAI/issues/136")
125
 
126
  with gr.Row():
127
- input_audio = gr.Audio(label="Voice Chat (BETA)", sources="microphone", type="filepath", waveform_options=False)
128
  output_audio = gr.Audio(label="JARVIS", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
129
  gr.Interface(fn=respond, inputs=[input_audio], outputs=[output_audio], live=True)
130
 
131
  gr.Markdown("## Additional Parameters")
132
- with gr.Row():
133
- user_input = gr.Textbox(label="Prompt", value="What is Wikipedia")
134
- input_text = gr.Textbox(label="Input Text", elem_id="important")
135
- output_audio = gr.Audio(label="JARVIS", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
136
-
137
- with gr.Row():
138
- translate_btn = gr.Button("Response")
139
- translate_btn.click(fn=generate, inputs=user_input, outputs=output_audio, api_name="translate")
140
-
141
- gr.Markdown("## Additional Settings")
142
  for slider in additional_inputs:
143
  slider.render()
144
 
 
13
  # Initialize DuckDuckGo search tool
14
  duckduckgo_search = DuckDuckGoSearchRun()
15
 
16
+ # Initialize ASR model
17
  default_lang = "en"
18
  engines = { default_lang: Model(default_lang) }
19
 
20
  def transcribe(audio):
21
+ """Transcribes the audio file to text."""
22
  lang = "en"
23
  model = engines[lang]
24
  text = model.stt_file(audio)[0]
25
  return text
26
 
 
 
27
  def format_prompt(message, history):
28
+ """Formats the prompt for the language model."""
29
  prompt = "<s>"
30
  for user_prompt, bot_response in history:
31
  prompt += f"[INST] {user_prompt} [/INST]"
 
34
  return prompt
35
 
36
  def generate(prompt, history, temperature=0.9, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
37
+ """Generates a response from the language model."""
38
  temperature = float(temperature)
39
  if temperature < 1e-2:
40
  temperature = 1e-2
 
56
 
57
  for response in stream:
58
  output += response.token.text
 
59
  yield output
60
 
 
61
  search_result = duckduckgo_search.run(prompt)
62
  if search_result:
63
  yield search_result
 
65
  yield "Sorry, I couldn't find any relevant information."
66
 
67
  async def respond(audio):
68
+ """Handles the full pipeline: transcribe, generate response, and TTS."""
69
+ try:
70
+ # Transcribe audio to text
71
+ user_text = transcribe(audio)
72
+
73
+ # Generate response using the language model
74
+ history = []
75
+ response_generator = generate(user_text, history)
76
+ response_text = ""
77
+ for response in response_generator:
78
+ response_text = response
79
+
80
+ # Convert the text response to speech
81
+ communicate = edge_tts.Communicate(response_text)
82
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
83
+ tmp_path = tmp_file.name
84
+ await communicate.save(tmp_path)
85
+ return tmp_path
86
+ except Exception as e:
87
+ return str(e)
88
 
89
  additional_inputs = [
90
  gr.Slider(
 
137
  gr.Markdown("Getting real-time updated results for prompts is still proprietary in the face of GPT-4, Co-Pilot etc. This app serves as an open-source alternative for this! UPDATE: Previous version of this app i.e. RAG_FRIDAY_mark_2 has faced some technical issues due to rate limit errors. Problem and solution have been updated by me thanks to this community thread: https://github.com/joaomdmoura/crewAI/issues/136")
138
 
139
  with gr.Row():
140
+ input_audio = gr.Audio(label="Voice Chat (BETA)", source="microphone", type="filepath", waveform_options=False)
141
  output_audio = gr.Audio(label="JARVIS", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
142
  gr.Interface(fn=respond, inputs=[input_audio], outputs=[output_audio], live=True)
143
 
144
  gr.Markdown("## Additional Parameters")
 
 
 
 
 
 
 
 
 
 
145
  for slider in additional_inputs:
146
  slider.render()
147