import gradio as gr import os import tempfile import syn_hifigan as syn #import syn_vgan as syn #import syn_k_univnet_multi as syn description_text = """ # Multilingual TTS for Sámi languages (+ Finnish and Estonian) Welcome! This is a demonstration of a multi-lingual and multi-speaker Text-to-Speech (TTS) model. The demo is related to research on TTS for low-resource languages, and the effect of augmenting the training data with areally close languages. Disclaimers: For convenience, the demo uses pretrained HiFi-GAN vocoder which doesn't work well with male voices. English does not well due to small dataset and orthographic transcriptions. Use the demo just for testing, not for frequent or commercial use. """ speakers = { "aj(sma)": 2, "am(sme)": 3, "ms(sme)": 4, "ln(sme)": 5, "mu(smj)": 7, "sa(smj)": 8, "bi(smj": 10, "css(fin)": 11, "ti(fin)": 13, "ta(fin)": 14, "liivika(est)": 15, "indek(est)": 16, "kylli(est)": 17, "andreas(est)": 18, "peeter(est)": 19, "kersti(est)": 20, "M6670(eng)": 21, "M6097(eng)": 22, "F92(eng)": 23, "F9136(eng)": 24 } mean_pitch = { "aj0": 130, "aj1": 130, "am": 120, "ms": 120, "ln": 120, "lo": 120, "mu": 120, "sa": 120, "kd": 120, "bi": 120, "ti": 130, "ta": 115, "liivika": 120, "indek": 90, "kylli": 140, "andreas": 100, "peeter": 80, "kersti": 120 } languages = { "guess": -1, "South Sámi": 0, #South "North Sámi": 1, #North "Lule Sámi": 2, #Lule "Finnish": 3, "Estonian": 4, "English": 5 } # --- NEW: Add a dictionary for default prompts per language --- default_prompts = { "guess": "Sáhtta go esso-burgera luohti, Koskenkorva dahje carpool karajoiki gádjut árgabeaivveluođi?", "North Sámi": "Riektačállinreaidduid lassin Divvun-joavkkus ovdanit dál maiddái hállanteknologiijareaidduid.", "South Sámi": " Buerie aerede gaajhkesh dovnesh jïh buerie båeteme dan bæjhkoehtæmman.", #Guktie datnine?", "Lule Sámi": "Sáme hållamsyntiesaj baktu máhttá adnegoahtet sáme gielajt ådå aktijvuodajn.", "Finnish": "Joka kuuseen kurkottaa, se katajaan kapsahtaa.", "Estonian": "Aprilli lõpp pani aiapidajate kannatuse jälle proovile – pärast mõnepäevast sooja saabub ootamatu külmalaine.", "English": "This obscure language is not supported by this model." } public = False tempdir = tempfile.gettempdir() tts = syn.Synthesizer() def speak(text, language, speaker, l_weight, s_weight, pace, postfilter): # pitch_shift,pitch_std): # text frontend not implemented... text = text.replace("...", "…") #print(speakers[speaker]) #print(language) use_lid = False if language == "guess": use_lid = True audio = tts.speak(text, output_file=f'{tempdir}/tmp', lang=languages[language], spkr=speakers[speaker], l_weight=l_weight, s_weight=s_weight, pace=pace, clarity=postfilter, guess_lang=use_lid) # , mean_pitch = mean_pitch[speaker]) """ if not public: try: os.system("play " + tempdir + "/tmp.wav &") except: pass """ return (22050, audio) # update the text box based on language selection def update_text_prompt(language): """ Updates the text in the textbox to the default prompt for the selected language. """ prompt = default_prompts.get(language, "") # Get the prompt, or an empty string if not found return gr.Textbox(value=prompt) # with gr.Blocks() as tts_gui: gr.Markdown(description_text) #"## Multilingual TTS for Sámi languages (+ Finnish and Estonian)") with gr.Row(): with gr.Column(scale=2): # Define each component and assign it to a variable text_input = gr.Textbox(label="Text", value=default_prompts["North Sámi"]) language_dd = gr.Dropdown(list(languages.keys()), label="Language", value="North Sámi") speaker_dd = gr.Dropdown(list(speakers.keys()), label="Voice", value="ms(sme)") with gr.Row(): l_weight_slider = gr.Slider(minimum=0.5, maximum=1.5, step=0.05, value=1, label="Language Weight") s_weight_slider = gr.Slider(minimum=0.5, maximum=1.5, step=0.05, value=1, label="Speaker Weight") pace_slider = gr.Slider(minimum=0.5, maximum=1.5, step=0.05, value=1.0, label="Speech Rate") postfilter_slider = gr.Slider(minimum=0., maximum=2, step=0.05, value=1.0, label="Post-processing") with gr.Column(scale=1): # Add a button to trigger synthesis speak_button = gr.Button("Speak", variant="primary") audio_output = gr.Audio(label="Output") language_dd.change( fn=update_text_prompt, inputs=[language_dd], outputs=[text_input] ) speak_button.click( fn=speak, inputs=[ text_input, language_dd, speaker_dd, l_weight_slider, s_weight_slider, pace_slider, postfilter_slider ], outputs=[audio_output] ) if __name__ == "__main__": tts_gui.launch(share=public)