mguven61 commited on
Commit
51bfbfb
Β·
verified Β·
1 Parent(s): 9248fab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -36
app.py CHANGED
@@ -12,21 +12,20 @@ import soundfile as sf
12
  class RealTimeTranslator:
13
  def __init__(self):
14
  self.model = whisper.load_model("base")
15
-
16
  self.languages = {
17
  'en': 'English',
18
- 'fr': 'French',
19
  'hi': 'Hindi',
20
  'es': 'Spanish',
21
  'de': 'German',
22
  'ja': 'Japanese',
23
  'tr': 'Turkish'
24
  }
25
-
26
  def speech_to_text(self, audio_path, source_lang):
27
  try:
28
  result = self.model.transcribe(
29
- audio_path,
30
  language=source_lang,
31
  temperature=0.0,
32
  best_of=5,
@@ -35,74 +34,138 @@ class RealTimeTranslator:
35
  return result["text"]
36
  except Exception as e:
37
  return f"Error in speech-to-text: {str(e)}"
38
-
39
  def translate_text(self, text, source_lang, target_lang):
40
  try:
 
 
41
  translation = GoogleTranslator(source=source_lang, target=target_lang).translate(text)
42
  return translation
43
  except Exception as e:
44
  return f"Error in translation: {str(e)}"
45
-
46
  def text_to_speech(self, text, target_lang):
47
  try:
48
- with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as fp:
49
- tts = gTTS(text=text, lang=target_lang)
50
  tts.save(fp.name)
51
  return fp.name
52
  except Exception as e:
53
  return f"Error in text-to-speech: {str(e)}"
54
-
55
  def process_audio(self, audio, source_lang, target_lang):
56
  try:
57
  if audio is None:
58
  return None, "No audio input received", "Please provide audio input"
59
-
 
 
 
 
 
 
 
 
60
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as fp:
61
- sf.write(fp.name, audio[1], audio[0])
62
  audio_path = fp.name
63
-
 
64
  text = self.speech_to_text(audio_path, source_lang)
65
  if "Error" in text:
 
66
  return None, text, ""
67
-
 
68
  translated_text = self.translate_text(text, source_lang, target_lang)
69
  if "Error" in translated_text:
 
70
  return None, text, translated_text
71
-
 
72
  output_audio_path = self.text_to_speech(translated_text, target_lang)
73
  if "Error" in output_audio_path:
 
74
  return None, text, translated_text
75
-
76
- output_audio, sr = librosa.load(output_audio_path)
77
-
 
 
78
  os.unlink(audio_path)
79
  os.unlink(output_audio_path)
80
-
81
  return (sr, output_audio), text, translated_text
82
-
83
  except Exception as e:
84
  return None, f"Error: {str(e)}", f"Error: {str(e)}"
85
 
86
  def create_gradio_interface():
87
  translator = RealTimeTranslator()
88
-
89
- demo = gr.Interface(
90
- fn=translator.process_audio,
91
- inputs=[
92
- gr.Audio(sources=["microphone"], type="numpy", label="Audio Input", streaming=False),
93
- gr.Dropdown(choices=list(translator.languages.keys()), value="tr", label="Source Language"),
94
- gr.Dropdown(choices=list(translator.languages.keys()), value="en", label="Target Language")
95
- ],
96
- outputs=[
97
- gr.Audio(label="Translated Audio"),
98
- gr.Textbox(label="Original Text"),
99
- gr.Textbox(label="Translated Text")
100
- ],
101
- title="VoΔ±ceAI61",
102
- allow_flagging="never"
103
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  return demo
105
 
106
  if __name__ == "__main__":
107
  demo = create_gradio_interface()
108
- demo.launch(share=True, debug=True)
 
 
 
 
 
 
12
  class RealTimeTranslator:
13
  def __init__(self):
14
  self.model = whisper.load_model("base")
 
15
  self.languages = {
16
  'en': 'English',
17
+ 'fr': 'French',
18
  'hi': 'Hindi',
19
  'es': 'Spanish',
20
  'de': 'German',
21
  'ja': 'Japanese',
22
  'tr': 'Turkish'
23
  }
24
+
25
  def speech_to_text(self, audio_path, source_lang):
26
  try:
27
  result = self.model.transcribe(
28
+ audio_path,
29
  language=source_lang,
30
  temperature=0.0,
31
  best_of=5,
 
34
  return result["text"]
35
  except Exception as e:
36
  return f"Error in speech-to-text: {str(e)}"
37
+
38
  def translate_text(self, text, source_lang, target_lang):
39
  try:
40
+ if source_lang == target_lang:
41
+ return text
42
  translation = GoogleTranslator(source=source_lang, target=target_lang).translate(text)
43
  return translation
44
  except Exception as e:
45
  return f"Error in translation: {str(e)}"
46
+
47
  def text_to_speech(self, text, target_lang):
48
  try:
49
+ with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as fp:
50
+ tts = gTTS(text=text, lang=target_lang, slow=False)
51
  tts.save(fp.name)
52
  return fp.name
53
  except Exception as e:
54
  return f"Error in text-to-speech: {str(e)}"
55
+
56
  def process_audio(self, audio, source_lang, target_lang):
57
  try:
58
  if audio is None:
59
  return None, "No audio input received", "Please provide audio input"
60
+
61
+ # Handle different audio input formats
62
+ if isinstance(audio, tuple):
63
+ sample_rate, audio_data = audio
64
+ else:
65
+ # If audio is a file path (uploaded file)
66
+ audio_data, sample_rate = librosa.load(audio, sr=None)
67
+
68
+ # Save audio to temporary file
69
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as fp:
70
+ sf.write(fp.name, audio_data, sample_rate)
71
  audio_path = fp.name
72
+
73
+ # Speech to text
74
  text = self.speech_to_text(audio_path, source_lang)
75
  if "Error" in text:
76
+ os.unlink(audio_path)
77
  return None, text, ""
78
+
79
+ # Translate text
80
  translated_text = self.translate_text(text, source_lang, target_lang)
81
  if "Error" in translated_text:
82
+ os.unlink(audio_path)
83
  return None, text, translated_text
84
+
85
+ # Text to speech
86
  output_audio_path = self.text_to_speech(translated_text, target_lang)
87
  if "Error" in output_audio_path:
88
+ os.unlink(audio_path)
89
  return None, text, translated_text
90
+
91
+ # Load output audio
92
+ output_audio, sr = librosa.load(output_audio_path, sr=22050)
93
+
94
+ # Clean up temporary files
95
  os.unlink(audio_path)
96
  os.unlink(output_audio_path)
97
+
98
  return (sr, output_audio), text, translated_text
99
+
100
  except Exception as e:
101
  return None, f"Error: {str(e)}", f"Error: {str(e)}"
102
 
103
  def create_gradio_interface():
104
  translator = RealTimeTranslator()
105
+
106
+ with gr.Blocks(title="VoiceAI61 - Real-Time Voice Translator") as demo:
107
+ gr.Markdown("# 🎀 VoiceAI61 - Real-Time Voice Translator")
108
+ gr.Markdown("Record your voice or upload an audio file to translate between languages!")
109
+
110
+ with gr.Row():
111
+ with gr.Column():
112
+ # Audio input with both microphone and file upload
113
+ audio_input = gr.Audio(
114
+ sources=["microphone", "upload"],
115
+ type="filepath",
116
+ label="🎀 Record Audio or Upload File"
117
+ )
118
+
119
+ with gr.Row():
120
+ source_lang = gr.Dropdown(
121
+ choices=list(translator.languages.keys()),
122
+ value="tr",
123
+ label="πŸ—£οΈ Source Language"
124
+ )
125
+ target_lang = gr.Dropdown(
126
+ choices=list(translator.languages.keys()),
127
+ value="en",
128
+ label="🌍 Target Language"
129
+ )
130
+
131
+ translate_btn = gr.Button("πŸ”„ Translate", variant="primary", size="lg")
132
+
133
+ with gr.Column():
134
+ audio_output = gr.Audio(label="πŸ”Š Translated Audio")
135
+ original_text = gr.Textbox(label="πŸ“ Original Text", lines=3)
136
+ translated_text = gr.Textbox(label="🌐 Translated Text", lines=3)
137
+
138
+ # Event handlers
139
+ translate_btn.click(
140
+ fn=translator.process_audio,
141
+ inputs=[audio_input, source_lang, target_lang],
142
+ outputs=[audio_output, original_text, translated_text]
143
+ )
144
+
145
+ # Auto-translate when audio is recorded/uploaded
146
+ audio_input.change(
147
+ fn=translator.process_audio,
148
+ inputs=[audio_input, source_lang, target_lang],
149
+ outputs=[audio_output, original_text, translated_text]
150
+ )
151
+
152
+ # Examples
153
+ gr.Examples(
154
+ examples=[
155
+ [None, "en", "tr"],
156
+ [None, "tr", "en"],
157
+ [None, "fr", "en"],
158
+ ],
159
+ inputs=[audio_input, source_lang, target_lang],
160
+ )
161
+
162
  return demo
163
 
164
  if __name__ == "__main__":
165
  demo = create_gradio_interface()
166
+ demo.launch(
167
+ share=True,
168
+ debug=True,
169
+ server_name="0.0.0.0",
170
+ server_port=7860
171
+ )