mguven61 commited on
Commit
671772e
·
verified ·
1 Parent(s): 8cd1f2c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +113 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ import numpy as np
5
+ import whisper
6
+ from deep_translator import GoogleTranslator
7
+ from gtts import gTTS
8
+ import librosa
9
+ import tempfile
10
+ import soundfile as sf
11
+
12
+ class RealTimeTranslator:
13
+ def __init__(self):
14
+ self.model = whisper.load_model("base")
15
+
16
+ self.languages = {
17
+ 'en': 'English',
18
+ 'fr': 'French',
19
+ 'hi': 'Hindi',
20
+ 'es': 'Spanish',
21
+ 'de': 'German',
22
+ 'ja': 'Japanese',
23
+ 'tr': 'Turkish'
24
+ }
25
+
26
+ def speech_to_text(self, audio_path, source_lang):
27
+ try:
28
+ result = self.model.transcribe(
29
+ audio_path,
30
+ language=source_lang,
31
+ temperature=0.0,
32
+ best_of=5,
33
+ beam_size=5
34
+ )
35
+ return result["text"]
36
+ except Exception as e:
37
+ return f"Error in speech-to-text: {str(e)}"
38
+
39
+ def translate_text(self, text, source_lang, target_lang):
40
+ try:
41
+ translation = GoogleTranslator(source=source_lang, target=target_lang).translate(text)
42
+ return translation
43
+ except Exception as e:
44
+ return f"Error in translation: {str(e)}"
45
+
46
+ def text_to_speech(self, text, target_lang):
47
+ try:
48
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as fp:
49
+ tts = gTTS(text=text, lang=target_lang)
50
+ tts.save(fp.name)
51
+ return fp.name
52
+ except Exception as e:
53
+ return f"Error in text-to-speech: {str(e)}"
54
+
55
+ def process_audio(self, audio, source_lang, target_lang):
56
+ try:
57
+ if audio is None:
58
+ return None, "No audio input received", "Please provide audio input"
59
+
60
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as fp:
61
+ sf.write(fp.name, audio[1], audio[0])
62
+ audio_path = fp.name
63
+
64
+ text = self.speech_to_text(audio_path, source_lang)
65
+ if "Error" in text:
66
+ return None, text, ""
67
+
68
+ translated_text = self.translate_text(text, source_lang, target_lang)
69
+ if "Error" in translated_text:
70
+ return None, text, translated_text
71
+
72
+ output_audio_path = self.text_to_speech(translated_text, target_lang)
73
+ if "Error" in output_audio_path:
74
+ return None, text, translated_text
75
+
76
+ output_audio, sr = librosa.load(output_audio_path)
77
+
78
+ os.unlink(audio_path)
79
+ os.unlink(output_audio_path)
80
+
81
+ return (sr, output_audio), text, translated_text
82
+
83
+ except Exception as e:
84
+ return None, f"Error: {str(e)}", f"Error: {str(e)}"
85
+
86
+ def create_gradio_interface():
87
+ translator = RealTimeTranslator()
88
+
89
+ demo = gr.Interface(
90
+ fn=translator.process_audio,
91
+ inputs=[
92
+ gr.Audio(sources=["microphone"], type="numpy", label="Audio Input"),
93
+ gr.Dropdown(choices=list(translator.languages.keys()), value="tr", label="Source Language"),
94
+ gr.Dropdown(choices=list(translator.languages.keys()), value="en", label="Target Language")
95
+ ],
96
+ outputs=[
97
+ gr.Audio(label="Translated Audio"),
98
+ gr.Textbox(label="Original Text"),
99
+ gr.Textbox(label="Translated Text")
100
+ ],
101
+ title="VoiceAI61",
102
+ examples=[
103
+ [None, "tr", "en"],
104
+ [None, "en", "tr"],
105
+ [None, "tr", "fr"],
106
+ [None, "es", "tr"]
107
+ ]
108
+ )
109
+ return demo
110
+
111
+ if __name__ == "__main__":
112
+ demo = create_gradio_interface()
113
+ demo.launch(share=True, debug=True)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ gradio
4
+ numpy
5
+ deep-translator
6
+ gtts
7
+ librosa
8
+ soundfile
9
+
10
+