|
import io |
|
|
|
from TTS.utils.synthesizer import Synthesizer |
|
from src.inference import TextToSpeechEngine |
|
|
|
|
|
|
|
lang = "hi" |
|
hi_model = Synthesizer( |
|
tts_checkpoint=f'checkpoints/{lang}/fastpitch/best_model.pth', |
|
tts_config_path=f'checkpoints/{lang}/fastpitch/config.json', |
|
tts_speakers_file=f'checkpoints/{lang}/fastpitch/speakers.pth', |
|
|
|
tts_languages_file=None, |
|
vocoder_checkpoint=f'checkpoints/{lang}/hifigan/best_model.pth', |
|
vocoder_config=f'checkpoints/{lang}/hifigan/config.json', |
|
encoder_checkpoint="", |
|
encoder_config="", |
|
use_cuda=True, |
|
) |
|
|
|
|
|
|
|
lang = "ta" |
|
ta_model = Synthesizer( |
|
tts_checkpoint=f'checkpoints/{lang}/fastpitch/best_model.pth', |
|
tts_config_path=f'checkpoints/{lang}/fastpitch/config.json', |
|
tts_speakers_file=f'checkpoints/{lang}/fastpitch/speakers.pth', |
|
|
|
tts_languages_file=None, |
|
vocoder_checkpoint=f'checkpoints/{lang}/hifigan/best_model.pth', |
|
vocoder_config=f'checkpoints/{lang}/hifigan/config.json', |
|
encoder_checkpoint="", |
|
encoder_config="", |
|
use_cuda=True, |
|
) |
|
|
|
|
|
|
|
models = { |
|
"hi": hi_model, |
|
"ta": ta_model, |
|
} |
|
engine = TextToSpeechEngine(models) |
|
|
|
|
|
|
|
hindi_raw_audio = engine.infer_from_text( |
|
input_text="सलाम दुनिया", |
|
lang="hi", |
|
speaker_name="male" |
|
) |
|
byte_io = io.BytesIO() |
|
scipy_wav_write(byte_io, DEFAULT_SAMPLING_RATE, hindi_raw_audio) |
|
|
|
with open("hindi_audio.wav", "wb") as f: |
|
f.write(byte_io.read()) |
|
|
|
|
|
|
|
tamil_raw_audio = engine.infer_from_text( |
|
input_text="வணக்கம் உலகம்", |
|
lang="ta", |
|
speaker_name="female" |
|
) |
|
byte_io = io.BytesIO() |
|
scipy_wav_write(byte_io, DEFAULT_SAMPLING_RATE, tamil_raw_audio) |
|
|
|
with open("tamil_audio.wav", "wb") as f: |
|
f.write(byte_io.read()) |
|
|