File size: 4,710 Bytes
671772e 89242a2 671772e 313da01 66c0d4d 313da01 66c0d4d 313da01 4d86f0d 66c0d4d 4d86f0d 66c0d4d 313da01 66c0d4d 51bfbfb 89242a2 313da01 89242a2 66c0d4d 89242a2 313da01 66c0d4d 313da01 89242a2 66c0d4d 89242a2 66c0d4d 89242a2 313da01 bedf11b 00bdaed bedf11b 00bdaed bedf11b 313da01 4d86f0d 00bdaed 4d86f0d 313da01 00bdaed 313da01 00bdaed 313da01 00bdaed 313da01 89242a2 b93fe8f 313da01 00bdaed 313da01 bedf11b 4a733bb 313da01 671772e 4a733bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import gradio as gr
import whisper
from deep_translator import GoogleTranslator
from gtts import gTTS
import tempfile
import os
import warnings
warnings.filterwarnings("ignore")
model = whisper.load_model("base")
def translate_audio(text_input, upload_audio, mic_audio, source_lang="tr", target_lang="en"):
audio_file = None
audio_source = ""
if mic_audio is not None and mic_audio != "":
audio_file = mic_audio
audio_source = "Microphone"
print(f"Using microphone audio: {mic_audio}")
elif upload_audio is not None:
audio_file = upload_audio
audio_source = "Upload"
print(f"Using uploaded audio: {upload_audio}")
elif text_input and text_input.strip():
try:
original_text = text_input.strip()
if source_lang != target_lang:
translator = GoogleTranslator(source=source_lang, target=target_lang)
translated_text = translator.translate(original_text)
else:
translated_text = original_text
tts = gTTS(text=translated_text, lang=target_lang, slow=False)
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file:
tts.save(tmp_file.name)
return (
tmp_file.name,
f"Original: {original_text}",
f"Translated: {translated_text}",
f" Source: Text Input"
)
except Exception as e:
return None, f" Error: {str(e)}", "Please try again", "Source: Text Input"
else:
return None, " No input provided", "Please upload audio, record with microphone, OR enter text", ""
try:
print(f"Processing {audio_source} audio: {audio_file}")
result = model.transcribe(audio_file, language=source_lang, fp16=False)
original_text = result["text"].strip()
if not original_text:
return None, " No speech detected", "Please try again with clearer audio", f"Source: {audio_source}"
print(f"Transcribed: {original_text}")
if source_lang != target_lang:
translator = GoogleTranslator(source=source_lang, target=target_lang)
translated_text = translator.translate(original_text)
else:
translated_text = original_text
print(f"Translated: {translated_text}")
tts = gTTS(text=translated_text, lang=target_lang, slow=False)
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file:
tts.save(tmp_file.name)
return (
tmp_file.name,
f"Original: {original_text}",
f" Translated: {translated_text}",
f" Source: {audio_source}"
)
except Exception as e:
print(f"Error: {str(e)}")
return None, f" Error: {str(e)}", "Please try again", f"Source: {audio_source}"
languages = {
"tr": "Turkish",
"en": "English",
"fr": "French",
"es": "Spanish",
"de": "German",
"hi": "Hindi",
"ja": "Japanese",
"ar": "Arabic",
"ru": "Russian",
"zh": "Chinese"
}
demo = gr.Interface(
fn=translate_audio,
inputs=[
gr.Textbox(
label="Enter text to translate",
placeholder="Type text here or leave empty to use audio input...",
lines=2
),
gr.Audio(
type="filepath",
sources=["upload"],
label="Upload Audio File"
),
gr.Audio(
sources=["microphone"],
label="OR Record with Microphone",
type="filepath"
),
gr.Dropdown(
choices=list(languages.keys()),
value="tr",
label="Source Language"
),
gr.Dropdown(
choices=list(languages.keys()),
value="en",
label="Target Language"
)
],
outputs=[
gr.Audio(label="Translated Audio Output"),
gr.Textbox(label="Original Text"),
gr.Textbox(label="Translated Text"),
gr.Textbox(label="Audio Source Info")
],
title="VoiceAI61",
article="""
<div style="text-align: center; margin-top: 20px;">
<p>📱 <strong>dear users:</strong> trabzon champion. </p>
</div>
""",
examples=[
["", None, None, "tr", "en"],
["", None, None, "en", "fr"],
["Hello world", None, None, "en", "es"],
],
allow_flagging="never"
)
if __name__ == "__main__":
demo.launch() |