Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,244 Bytes
f79db70 49abc70 f79db70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
from ..llama_configs import LLAMA_CONFIGS
class T3Config:
def __init__(self, text_tokens_dict_size=704):
self.start_text_token = 255
self.stop_text_token = 0
self.text_tokens_dict_size = text_tokens_dict_size
self.max_text_tokens = 2048
self.start_speech_token = 6561
self.stop_speech_token = 6562
self.speech_tokens_dict_size = 8194
self.max_speech_tokens = 4096
self.llama_config_name = "Llama_520M"
self.input_pos_emb = "learned"
self.speech_cond_prompt_len = 150
self.encoder_type = "voice_encoder"
self.speaker_embed_size = 256
self.use_perceiver_resampler = True
self.emotion_adv = True
@property
def n_channels(self):
return LLAMA_CONFIGS[self.llama_config_name]["hidden_size"]
@property
def is_multilingual(self):
return self.text_tokens_dict_size == 2352
@classmethod
def english_only(cls):
"""Create configuration for English-only TTS model."""
return cls(text_tokens_dict_size=704)
@classmethod
def multilingual(cls):
"""Create configuration for multilingual TTS model."""
return cls(text_tokens_dict_size=2352) |