ALVHB95 commited on
Commit
c7c56dc
·
1 Parent(s): 18629c0
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -122,24 +122,24 @@ qa_chain = ConversationalRetrievalChain.from_llm(
122
  import soundfile as sf
123
  import gradio as gr
124
  from gradio_client import Client
 
125
 
126
  # Load ASR pipeline
127
- asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large")
128
 
129
  def chat_interface(question, audio_input=None, history=None):
130
  if audio_input is not None:
131
  # Function to transcribe the audio input
132
- def transcribe_audio(audio):
133
- sample_rate, audio_data = audio
134
- file_name = "recorded_audio.wav"
135
- # Write audio data to a file
136
- sf.write(file_name, audio_data, sample_rate)
137
- # Transcribe audio using ASR model
138
- transcript = asr_pipe(file_name)["text"]
139
- return transcript
140
 
141
  # Transcribe the audio input
142
- transcribed_text = transcribe_audio(audio_input)
143
 
144
  # Call the chatbot API with the transcribed text
145
  client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")
 
122
  import soundfile as sf
123
  import gradio as gr
124
  from gradio_client import Client
125
+ import numpy as np
126
 
127
  # Load ASR pipeline
128
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-large")
129
 
130
  def chat_interface(question, audio_input=None, history=None):
131
  if audio_input is not None:
132
  # Function to transcribe the audio input
133
+
134
+ def transcribe(audio):
135
+ sr, y = audio
136
+ y = y.astype(np.float32)
137
+ y /= np.max(np.abs(y))
138
+
139
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
 
140
 
141
  # Transcribe the audio input
142
+ transcribed_text = transcribe(audio_input)
143
 
144
  # Call the chatbot API with the transcribed text
145
  client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")