Spaces:

Futuresony
/

Automatic-speech-recognition

Sleeping

App Files Files Community

Futuresony commited on Feb 9

Commit

3bb7afd

verified ·

1 Parent(s): 5019f14

Create app.py

Browse files

Files changed (1) hide show

app.py +49 -0

app.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import gradio as gr
+from ttsmms import download, TTS
+from langdetect import detect
+import os
+from pydub import AudioSegment
+from pydub.playback import play
+# Ensure ffmpeg works inside Hugging Face Spaces
+AudioSegment.converter = "/usr/bin/ffmpeg"
+# Download and load TTS models
+swahili_dir = download("swh", "./data/swahili")
+english_dir = download("eng", "./data/english")  # Ensure an English TTS model is available
+swahili_tts = TTS(swahili_dir)
+english_tts = TTS(english_dir)
+# Function to process mixed-language text
+def text_to_speech(text):
+    words = text.split()  # Split text into words
+    audio_clips = []
+    for word in words:
+        lang = detect(word)  # Detect language of each word
+        wav_path = f"./temp_{word}.wav"
+        if lang == "sw":
+            swahili_tts.synthesis(word, wav_path=wav_path)
+        else:
+            english_tts.synthesis(word, wav_path=wav_path)
+        audio_clips.append(AudioSegment.from_wav(wav_path))
+        os.remove(wav_path)  # Remove temporary files
+    # Combine all audio clips
+    final_audio = sum(audio_clips)
+    output_path = "./output.wav"
+    final_audio.export(output_path, format="wav")
+    return output_path
+# Gradio UI
+gr.Interface(
+    fn=text_to_speech,
+    inputs=gr.Textbox(label="Enter Text"),
+    outputs=gr.Audio(label="Generated Speech"),
+    title="Swahili & English Text-to-Speech",
+    description="Type text in Swahili and English, and listen to the mixed-language speech.",
+).launch()