sudoping01 commited on
Commit
fb7a6bf
·
verified ·
1 Parent(s): 7f7805a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +157 -76
app.py CHANGED
@@ -2,102 +2,183 @@ import gradio as gr
2
  from transformers import VitsModel, AutoTokenizer
3
  import torch
4
  import logging
 
 
 
5
 
6
  logging.basicConfig(level=logging.INFO)
7
  logger = logging.getLogger(__name__)
8
 
9
- languages = ["bambara", "boomu", "dogon", "pular", "songhoy", "tamasheq"]
 
 
 
 
 
 
 
 
10
 
11
- models = {}
12
- tokenizers = {}
13
 
14
  examples = {
15
- "bambara": "An filɛ ni ye yɔrɔ minna ni an ye an sigi ka a layɛ yala an bɛ ka baara min kɛ ɛsike a kɛlen don ka Ɲɛ wa ?",
16
- "boomu": "Vunurobe wozomɛ pɛɛ, Poli we zo woro han Deeɓenu wara li Deeɓenu faralo zuun. Lo we baba a lo wara yi see ɓa Zuwifera ma ɓa Gɛrɛkela wa.",
17
- "dogon": "Pɔɔlɔ, kubɔ lugo joo le, bana dɛin dɛin le, inɛw Ama titiyaanw le digɛu, Ama, emɛ babe bɛrɛ sɔɔ sɔi.",
18
- "pular": "Miɗo ndaarde saabe Laamɗo e saabe Iisaa Almasiihu caroyoowo wuurɓe e maayɓe oo, miɗo ndaardire saabe gartol makko ka num e Laamu makko",
19
- "songhoy": "Haya ka se beenediyo kokoyteraydi go hima nda huukoy foo ka fatta ja subaahi ka taasi goykoyyo ngu rezẽ faridi se",
20
- "tamasheq": "Toḍă tăfukt ɣas, issăɣră-dd măssi-s n-ašĕkrĕš ănaẓraf-net, inn'-as: 'Ǝɣĕr-dd inaxdimăn, tĕẓlĕd-asăn, sănt s-wi dd-ĕšrăynen har tĕkkĕd wi dd-ăzzarnen."
 
 
 
 
 
 
 
21
  }
22
 
23
- try:
24
- for lang in languages:
25
- logger.info(f"Loading model and tokenizer for {lang}...")
26
- models[lang] = VitsModel.from_pretrained("MALIBA-AI/malian-tts", subfolder=f"models/{lang}")
27
- tokenizers[lang] = AutoTokenizer.from_pretrained("MALIBA-AI/malian-tts", subfolder=f"models/{lang}")
28
- logger.info(f"Successfully loaded {lang}")
29
- except Exception as e:
30
- logger.error(f"Failed to load models: {str(e)}")
31
- raise Exception(f"Model loading failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- def generate_audio(language, text):
 
 
 
 
34
  if not text.strip():
35
  return None, "Please enter some text to synthesize."
36
 
37
  try:
38
- model = models[language]
39
- tokenizer = tokenizers[language]
 
 
40
 
41
- inputs = tokenizer(text, return_tensors="pt")
 
42
 
43
- with torch.no_grad():
44
- output = model(**inputs).waveform
45
-
46
- waveform = output.squeeze().cpu().numpy()
47
- sample_rate = model.config.sampling_rate
48
-
49
- return (sample_rate, waveform), None
50
  except Exception as e:
51
- logger.error(f"Error during inference for {language}: {str(e)}")
52
- return None, f"Error generating audio: {str(e)}"
53
 
54
- def load_example(language):
 
55
  return examples.get(language, "No example available")
56
 
57
- with gr.Blocks(title="MalianVoices") as demo:
58
- gr.Markdown(
59
- """
60
- # MalianVoices: 🇲🇱 Text-to-Speech in Six Malian Languages
61
-
62
- Lightweight TTS for six Malian languages: **Bambara, Boomu, Dogon, Pular, Songhoy, Tamasheq**.
63
-
64
- - Real-time TTS with fast response
65
- - ✅ Runs on CPU
66
-
67
- ## How to Use
68
- 1. Pick a language from the dropdown
69
- 2. Enter your text or load an example
70
- 3. Click **"Generate Audio"** to listen
71
- """
72
- )
73
-
74
- with gr.Row():
75
- language = gr.Dropdown(choices=languages, label="Language", value="bambara")
 
 
 
 
 
 
 
 
76
  with gr.Column():
77
- text = gr.Textbox(label="Input Text", lines=5, placeholder="Type your text here...")
78
- example_btn = gr.Button("Load Example")
79
-
80
- generate_btn = gr.Button("Generate Audio", variant="primary")
81
- audio_output = gr.Audio(label="Generated Audio", type="numpy")
82
- error_msg = gr.Textbox(label="Status", visible=False)
83
-
84
- # Footer at the bottom
85
- gr.Markdown(
86
- """
87
- By [sudoping01](https://huggingface.co/sudoping01), from [sudoping01/malian-tts](https://huggingface.co/sudoping01/malian-tts). Fine-tuned on Meta’s MMS, CC BY-NC 4.0, non-commercial.
88
- """
89
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
- # Connect buttons to functions
92
- generate_btn.click(
93
- fn=generate_audio,
94
- inputs=[language, text],
95
- outputs=[audio_output, error_msg]
96
- )
97
- example_btn.click(
98
- fn=load_example,
99
- inputs=language,
100
- outputs=text
101
- )
102
 
103
- demo.launch()
 
 
 
 
 
2
  from transformers import VitsModel, AutoTokenizer
3
  import torch
4
  import logging
5
+ import spaces
6
+ from typing import Tuple, Optional
7
+ import numpy as np
8
 
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
12
+ if torch.cuda.is_available():
13
+ device = "cuda"
14
+ logger.info("Using CUDA for inference.")
15
+ elif torch.backends.mps.is_available():
16
+ device = "mps"
17
+ logger.info("Using MPS for inference.")
18
+ else:
19
+ device = "cpu"
20
+ logger.info("Using CPU for inference.")
21
 
22
+ languages = ["bambara", "boomu", "dogon", "pular", "songhoy", "tamasheq"]
 
23
 
24
  examples = {
25
+
26
+ "bambara": "An filɛ ni ye yɔrɔ minna ni an ye an sigi ka a layɛ yala an ka baara min ɛsike a kɛlen don ka Ɲɛ wa ?",
27
+
28
+ "boomu": "Vunurobe wozomɛ pɛɛ, Poli we zo woro han Deeɓenu wara li Deeɓenu faralo zuun. Lo we baba a lo wara yi see ɓa Zuwifera ma ɓa Gɛrɛkela wa.",
29
+
30
+ "dogon": "Pɔɔlɔ, kubɔ lugo joo le, bana dɛin dɛin le, inɛw Ama titiyaanw le digɛu, Ama, emɛ babe bɛrɛ sɔɔ sɔi.",
31
+
32
+ "pular": "Miɗo ndaarde saabe Laamɗo e saabe Iisaa Almasiihu caroyoowo wuurɓe e maayɓe oo, miɗo ndaardire saabe gartol makko ka num e Laamu makko",
33
+
34
+ "songhoy": "Haya ka se beenediyo kokoyteraydi go hima nda huukoy foo ka fatta ja subaahi ka taasi goykoyyo ngu rezẽ faridi se",
35
+
36
+ "tamasheq": "Toḍă tăfukt ɣas, issăɣră-dd măssi-s n-ašĕkrĕš ănaẓraf-net, inn'-as: 'Ǝɣĕr-dd inaxdimăn, tĕẓlĕd-asăn, sănt s-wi dd-ĕšrăynen har tĕkkĕd wi dd-ăzzarnen."
37
+
38
  }
39
 
40
+ class MalianTTS:
41
+ def __init__(self, model_name: str = "MALIBA-AI/malian-tts"):
42
+ self.model_name = model_name
43
+ self.models = {}
44
+ self.tokenizers = {}
45
+ self._load_models()
46
+
47
+ def _load_models(self):
48
+ """Load all language models and tokenizers"""
49
+ try:
50
+ for lang in languages:
51
+ logger.info(f"Loading model and tokenizer for {lang}...")
52
+ self.models[lang] = VitsModel.from_pretrained(
53
+ self.model_name,
54
+ subfolder=f"models/{lang}"
55
+ ).to(device)
56
+ self.tokenizers[lang] = AutoTokenizer.from_pretrained(
57
+ self.model_name,
58
+ subfolder=f"models/{lang}"
59
+ )
60
+ logger.info(f"Successfully loaded {lang}")
61
+ except Exception as e:
62
+ logger.error(f"Failed to load models: {str(e)}")
63
+ raise Exception(f"Model loading failed: {str(e)}")
64
+
65
+ def synthesize(self, language: str, text: str) -> Tuple[Optional[Tuple[int, np.ndarray]], Optional[str]]:
66
+ """Generate audio from text for the specified language"""
67
+ if not text.strip():
68
+ return None, "Please enter some text to synthesize."
69
+
70
+ try:
71
+ model = self.models[language]
72
+ tokenizer = self.tokenizers[language]
73
+
74
+ inputs = tokenizer(text, return_tensors="pt").to(device)
75
+
76
+ with torch.no_grad():
77
+ output = model(**inputs).waveform
78
+
79
+ waveform = output.squeeze().cpu().numpy()
80
+ sample_rate = model.config.sampling_rate
81
+
82
+ return (sample_rate, waveform), None
83
+
84
+ except Exception as e:
85
+ logger.error(f"Error during inference for {language}: {str(e)}")
86
+ return None, f"Error generating audio: {str(e)}"
87
+
88
+ # Initialize the TTS system
89
+ tts_system = MalianTTS()
90
 
91
+ @spaces.GPU()
92
+ def generate_audio(language: str, text: str) -> Tuple[Optional[Tuple[int, np.ndarray]], str]:
93
+ """
94
+ Generate audio from text using the specified language model.
95
+ """
96
  if not text.strip():
97
  return None, "Please enter some text to synthesize."
98
 
99
  try:
100
+ audio_output, error_msg = tts_system.synthesize(language, text)
101
+ if error_msg:
102
+ logger.error(f"TTS generation failed: {error_msg}")
103
+ return None, error_msg
104
 
105
+ logger.info(f"Successfully generated audio for {language}")
106
+ return audio_output, "Audio generated successfully!"
107
 
 
 
 
 
 
 
 
108
  except Exception as e:
109
+ logger.error(f"Audio generation failed: {e}")
110
+ return None, f"Error: {str(e)}"
111
 
112
+ def load_example(language: str) -> str:
113
+ """Load example text for the selected language"""
114
  return examples.get(language, "No example available")
115
 
116
+ def build_interface():
117
+ """
118
+ Builds the Gradio interface for Malian TTS.
119
+ """
120
+ with gr.Blocks(title="MalianVoices") as demo:
121
+ gr.Markdown(
122
+ """
123
+ # MalianVoices: 🇲🇱 Text-to-Speech in Six Malian Languages
124
+
125
+ Lightweight TTS for six Malian languages: **Bambara, Boomu, Dogon, Pular, Songhoy, Tamasheq**.
126
+
127
+ - Real-time TTS with fast response
128
+
129
+ ## How to Use
130
+ 1. Pick a language from the dropdown
131
+ 2. Enter your text or load an example
132
+ 3. Click **"Generate Audio"** to listen
133
+ """
134
+ )
135
+
136
+ with gr.Row():
137
+ language = gr.Dropdown(
138
+ choices=languages,
139
+ label="Language",
140
+ value="bambara"
141
+ )
142
+
143
  with gr.Column():
144
+ text = gr.Textbox(
145
+ label="Input Text",
146
+ lines=5,
147
+ placeholder="Type your text here..."
148
+ )
149
+
150
+ with gr.Row():
151
+ example_btn = gr.Button("Load Example")
152
+ generate_btn = gr.Button("Generate Audio", variant="primary")
153
+
154
+ audio_output = gr.Audio(label="Generated Audio", type="numpy")
155
+ status_msg = gr.Textbox(label="Status", interactive=False)
156
+
157
+ # Footer
158
+ gr.Markdown(
159
+ """
160
+ By [sudoping01](https://huggingface.co/sudoping01), from [sudoping01/malian-tts](https://huggingface.co/sudoping01/malian-tts).
161
+ Fine-tuned on Meta's MMS, CC BY-NC 4.0, non-commercial.
162
+ """
163
+ )
164
+
165
+ # Connect buttons to functions
166
+ generate_btn.click(
167
+ fn=generate_audio,
168
+ inputs=[language, text],
169
+ outputs=[audio_output, status_msg]
170
+ )
171
+
172
+ example_btn.click(
173
+ fn=load_example,
174
+ inputs=language,
175
+ outputs=text
176
+ )
177
 
178
+ return demo
 
 
 
 
 
 
 
 
 
 
179
 
180
+ if __name__ == "__main__":
181
+ logger.info("Starting the Gradio interface for MalianVoices TTS.")
182
+ interface = build_interface()
183
+ interface.launch()
184
+ logger.info("Gradio interface running.")