alp commited on
Commit
f9f3b99
verified
1 Parent(s): 9eca7d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -49
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  import tempfile
3
- from TTS.utils.synthesizer import Synthesizer
4
  from TTS.api import TTS
5
  from huggingface_hub import hf_hub_download
6
  import torch
@@ -9,70 +8,64 @@ CUDA = torch.cuda.is_available()
9
 
10
  REPO_ID = "collectivat/catotron-ona"
11
 
 
 
 
 
 
 
12
  my_title = "Catotron Text-to-Speech"
13
  my_description = "This model is based on Fast Speech implemented in 馃惛 [Coqui.ai](https://coqui.ai/)."
14
 
15
- # Download Catotron model files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  best_model_path = hf_hub_download(repo_id=REPO_ID, filename="fast-speech_best_model.pth")
17
  config_path = hf_hub_download(repo_id=REPO_ID, filename="fast-speech_config.json")
18
  vocoder_model = hf_hub_download(repo_id=REPO_ID, filename="ljspeech--hifigan_v2_model_file.pth")
19
  vocoder_config = hf_hub_download(repo_id=REPO_ID, filename="ljspeech--hifigan_v2_config.json")
20
 
21
- # Initialize Synthesizer for Catotron
22
- synthesizer = Synthesizer(
23
- tts_checkpoint=best_model_path,
24
- tts_config_path=config_path,
25
- tts_speakers_file=None,
26
- tts_languages_file=None,
27
- vocoder_checkpoint=vocoder_model,
28
- vocoder_config=vocoder_config,
29
- encoder_checkpoint="",
30
- encoder_config="",
31
- use_cuda=CUDA
32
- )
33
 
34
- # Initialize voice conversion model
35
- vc_model = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", gpu=CUDA)
 
36
 
37
- def tts(text, split_sentences, speaker_wav):
 
38
  text = text.replace("\n", ". ")
39
  text = text.replace("(", ",")
40
  text = text.replace(")", ",")
41
  text = text.replace(";", ",")
42
 
43
- # Generate with Catotron
44
- wavs = synthesizer.tts(text, split_sentences=split_sentences)
45
-
46
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
47
- synthesizer.save_wav(wavs, fp)
48
- temp_catotron = fp.name
49
-
50
- # Apply voice conversion if speaker provided
51
- if speaker_wav is not None:
52
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp_out:
53
- vc_model.voice_conversion_to_file(source_wav=temp_catotron, target_wav=speaker_wav, file_path=fp_out.name)
54
- return fp_out.name
55
-
56
- return temp_catotron
57
 
58
- with gr.Blocks() as iface:
59
- gr.Markdown(f"# {my_title}")
60
- gr.Markdown(my_description)
61
-
62
- with gr.Row():
63
- with gr.Column():
64
- text_input = gr.Textbox(lines=5, label="Input Text")
65
- split_check = gr.Checkbox(label="Split Sentences", value=True)
66
- speaker_audio = gr.Audio(label="Speaker audio for voice cloning (optional)", type="filepath")
67
- submit_btn = gr.Button("Generate")
68
-
69
- with gr.Column():
70
- audio_output = gr.Audio(label="Output Audio", type="filepath")
71
-
72
- submit_btn.click(
73
- fn=tts,
74
- inputs=[text_input, split_check, speaker_audio],
75
- outputs=audio_output
76
- )
77
 
 
 
 
 
 
 
 
 
 
78
  iface.launch()
 
1
  import gradio as gr
2
  import tempfile
 
3
  from TTS.api import TTS
4
  from huggingface_hub import hf_hub_download
5
  import torch
 
8
 
9
  REPO_ID = "collectivat/catotron-ona"
10
 
11
+ VOICE_CONVERSION_MODELS = {
12
+ 'freevc24': 'voice_conversion_models/multilingual/vctk/freevc24',
13
+ 'openvoice_v1': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v1',
14
+ 'openvoice_v2': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v2',
15
+ }
16
+
17
  my_title = "Catotron Text-to-Speech"
18
  my_description = "This model is based on Fast Speech implemented in 馃惛 [Coqui.ai](https://coqui.ai/)."
19
 
20
+ my_examples = [
21
+ ["Catotron, s铆ntesi de la parla obert i lliure en catal脿.", True, None, 'freevc24'],
22
+ ["Leonor Ferrer Girabau va ser una delineant, mestra i activista barcelonina, nascuda al carrer actual de la Conc貌rdia del Poble-sec, que es va convertir en la primera dona a obtenir el t铆tol de delineant a Catalunya i a l'estat.", True, None, 'freevc24'],
23
+ ["S'espera un dia anticicl貌nic amb temperatures suaus i vent fluix.", False, None, 'freevc24']
24
+ ]
25
+
26
+ my_inputs = [
27
+ gr.Textbox(lines=5, label="Input Text"),
28
+ gr.Checkbox(label="Split Sentences", value=False),
29
+ gr.Audio(type="filepath", label="Speaker audio for voice cloning (optional)"),
30
+ gr.Dropdown(label="Voice Conversion Model", choices=list(VOICE_CONVERSION_MODELS.keys())),
31
+ ]
32
+
33
+ my_outputs = gr.Audio(type="filepath", label="Output Audio", autoplay=True)
34
+
35
  best_model_path = hf_hub_download(repo_id=REPO_ID, filename="fast-speech_best_model.pth")
36
  config_path = hf_hub_download(repo_id=REPO_ID, filename="fast-speech_config.json")
37
  vocoder_model = hf_hub_download(repo_id=REPO_ID, filename="ljspeech--hifigan_v2_model_file.pth")
38
  vocoder_config = hf_hub_download(repo_id=REPO_ID, filename="ljspeech--hifigan_v2_config.json")
39
 
40
+ api = TTS(model_path=best_model_path, config_path=config_path, vocoder_path=vocoder_model, vocoder_config_path=vocoder_config).to("cuda" if CUDA else "cpu")
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ # pre-download voice conversion models
43
+ for model in VOICE_CONVERSION_MODELS.values():
44
+ api.load_vc_model_by_name(model, gpu=CUDA)
45
 
46
+ def tts(text: str, split_sentences: bool = False, speaker_wav: str = None, voice_cv_model: str = 'freevc24'):
47
+ # replace oov characters
48
  text = text.replace("\n", ". ")
49
  text = text.replace("(", ",")
50
  text = text.replace(")", ",")
51
  text = text.replace(";", ",")
52
 
 
 
 
53
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
54
+ if speaker_wav:
55
+ api.load_vc_model_by_name(VOICE_CONVERSION_MODELS[voice_cv_model], gpu=CUDA)
56
+ api.tts_with_vc_to_file(text, speaker_wav=speaker_wav, file_path=fp.name, split_sentences=split_sentences)
57
+ else:
58
+ api.tts_to_file(text, file_path=fp.name, split_sentences=split_sentences)
 
 
 
 
 
59
 
60
+ return fp.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ iface = gr.Interface(
63
+ fn=tts,
64
+ inputs=my_inputs,
65
+ outputs=my_outputs,
66
+ title=my_title,
67
+ description=my_description,
68
+ examples=my_examples,
69
+ cache_examples=True
70
+ )
71
  iface.launch()