higgs_audio_v2

Running on Zero

App Files Files Community

zachzzc commited on 6 days ago

Commit

b4da283

1 Parent(s): 493db6d

Add more voice clone voices; Update model names; Update playground

Browse files

Files changed (12) hide show

.gitattributes +8 -1
app.py +120 -22
higgs_audio/serve/serve_engine.py +50 -0
voice_examples/{wizard.wav → belinda.wav} +2 -2
voice_examples/broom_salesman.wav +3 -0
voice_examples/chadwick.wav +3 -0
voice_examples/config.json +31 -3
voice_examples/en_man.wav +3 -0
voice_examples/en_woman.wav +3 -0
voice_examples/mabel.wav +3 -0
voice_examples/vex.wav +3 -0
voice_examples/zh_man_sichuan.wav +3 -0

.gitattributes CHANGED Viewed

@@ -33,4 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
-voice_examples/wizard.wav filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+voice_examples/en_woman.wav filter=lfs diff=lfs merge=lfs -text
+voice_examples/mabel.wav filter=lfs diff=lfs merge=lfs -text
+voice_examples/vex.wav filter=lfs diff=lfs merge=lfs -text
+voice_examples/zh_man_sichuan.wav filter=lfs diff=lfs merge=lfs -text
+voice_examples/belinda.wav filter=lfs diff=lfs merge=lfs -text
+voice_examples/broom_salesman.wav filter=lfs diff=lfs merge=lfs -text
+voice_examples/chadwick.wav filter=lfs diff=lfs merge=lfs -text
+voice_examples/en_man.wav filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -25,8 +25,8 @@ from higgs_audio.data_types import ChatMLSample, AudioContent, Message
 engine = None
 # Default model configuration
-DEFAULT_MODEL_PATH = "bosonai/higgs-audio-v2-generation-3B-staging"
-DEFAULT_AUDIO_TOKENIZER_PATH = "bosonai/higgs-audio-v2-tokenizer-staging"
 SAMPLE_RATE = 24000
 DEFAULT_SYSTEM_PROMPT = (
@@ -43,46 +43,54 @@ PREDEFINED_EXAMPLES = {
     "voice-clone": {
         "system_prompt": "",
         "input_text": "Hey there! I'm your friendly voice twin in the making. Pick a voice preset below or upload your own audio - let's clone some vocals and bring your voice to life! ",
-        "description": "Voice clone template",
     },
-    "zero-shot": {
         "system_prompt": DEFAULT_SYSTEM_PROMPT,
-        "input_text": "Hey hey! Welcome to Higgs Audio, your voice's new best friend. Drop your text below, and I'll turn it into something that sounds awesome! Let's make some audio magic!",
-        "description": "Zero-shot template",
     },
-    "multispeaker-interleave": {
-        "system_prompt": "Generate audio following instruction.\n\n"
         "<|scene_desc_start|>\n"
-        "SPEAKER0: vocal fry;feminism;slightly fast\n"
-        "SPEAKER1: masculine;moderate;moderate pitch;monotone;mature\n"
-        "In this scene, a group of adventurers is debating whether to investigate a potentially dangerous situation.\n"
         "<|scene_desc_end|>",
-        "input_text": "<|generation_instruction_start|>\nGenerate interleaved transcript and audio that lasts for around 10 seconds.\n<|generation_instruction_end|>",
-        "description": "Multispeaker interleave example",
     },
-    "single-speaker-accent": {
         "system_prompt": "Generate audio following instruction.\n\n"
         "<|scene_desc_start|>\n"
-        "SPEAKER0: British accent;\n"
         "<|scene_desc_end|>",
         "input_text": "Hey, everyone! Welcome back to Tech Talk Tuesdays.\n"
         "It's your host, Alex, and today, we're diving into a topic that's become absolutely crucial in the tech world — deep learning.\n"
         "And let's be honest, if you've been even remotely connected to tech, AI, or machine learning lately, you know that deep learning is everywhere.\n"
         "\n"
         "So here's the big question: Do you want to understand how deep learning works?\n",
-        "description": "Single speaker example",
     },
     "single-speaker-zh": {
         "system_prompt": "Generate audio following instruction.\n\n"
         "<|scene_desc_start|>\n"
-        "\nAudio is recorded from a quiet room.\n"
-        "\nSPEAKER0: feminine\n"
         "<|scene_desc_end|>",
         "input_text": "大家好, 欢迎收听本期的跟李沐学AI. 今天沐哥在忙着洗数据, 所以由我, 希格斯主播代替他讲这期视频.\n"
         "今天我们要聊的是一个你绝对不能忽视的话题: 多模态学习.\n"
         "那么, 问题来了, 你真的了解多模态吗? 你知道如何自己动手构建多模态大模型吗.\n"
         "或者说, 你能察觉到我其实是个机器人吗?",
-        "description": "Single speaker with Chinese text",
     },
 }
@@ -130,6 +138,62 @@ def get_voice_present(voice_preset):
     return voice_path, text
 @spaces.GPU
 def initialize_engine(model_path, audio_tokenizer_path) -> bool:
     """Initialize the HiggsAudioServeEngine."""
@@ -200,6 +264,7 @@ def prepare_chatml_sample(
         messages.append(Message(role="assistant", content=[audio_content]))
     # Add the main user message
     messages.append(Message(role="user", content=text))
     return ChatMLSample(messages=messages)
@@ -217,6 +282,8 @@ def text_to_speech(
     top_k=50,
     system_prompt=DEFAULT_SYSTEM_PROMPT,
     stop_strings=None,
 ):
     """Convert text to speech using HiggsAudioServeEngine."""
     global engine
@@ -237,7 +304,8 @@ def text_to_speech(
         request_id = f"tts-playground-{str(uuid.uuid4())}"
         logger.info(
             f"{request_id}: Generating speech for text: {text[:100]}..., \n"
-            f"with parameters: temperature={temperature}, top_p={top_p}, top_k={top_k}, stop_list={stop_list}"
         )
         start_time = time.time()
@@ -249,6 +317,8 @@ def text_to_speech(
             top_k=top_k if top_k > 0 else None,
             top_p=top_p,
             stop_strings=stop_list,
         )
         generation_time = time.time() - start_time
@@ -312,7 +382,7 @@ def create_ui():
     }
     """
-    default_template = "zero-shot"
     """Create the Gradio UI."""
     with gr.Blocks(theme=my_theme, css=custom_css) as demo:
@@ -329,6 +399,12 @@ def create_ui():
                     info="Select a predefined example for system and input messages. Voice preset will be set to EMPTY when a example is selected.",
                 )
                 system_prompt = gr.TextArea(
                     label="System Prompt",
                     placeholder="Enter system prompt to guide the model...",
@@ -378,6 +454,22 @@ def create_ui():
                     )
                     top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top P")
                     top_k = gr.Slider(minimum=-1, maximum=100, value=50, step=1, label="Top K")
                     # Add stop strings component
                     stop_strings = gr.Dataframe(
                         label="Stop Strings",
@@ -437,10 +529,12 @@ def create_ui():
                 template = PREDEFINED_EXAMPLES[template_name]
                 # Enable voice preset and custom reference only for voice-clone template
                 is_voice_clone = template_name == "voice-clone"
-                voice_preset_value = "wizard" if is_voice_clone else "EMPTY"
                 return (
                     template["system_prompt"],  # system_prompt
                     template["input_text"],  # input_text
                     gr.update(
                         value=voice_preset_value, interactive=is_voice_clone, visible=is_voice_clone
                     ),  # voice_preset (value and interactivity)
@@ -454,6 +548,7 @@ def create_ui():
                     gr.update(),
                     gr.update(),
                     gr.update(),
                 )  # No change if template not found
         # Set up event handlers
@@ -465,6 +560,7 @@ def create_ui():
             outputs=[
                 system_prompt,
                 input_text,
                 voice_preset,
                 custom_reference_accordion,
                 voice_samples_section,
@@ -485,6 +581,8 @@ def create_ui():
                 top_k,
                 system_prompt,
                 stop_strings,
             ],
             outputs=[output_text, output_audio],
             api_name="generate_speech",

 engine = None
 # Default model configuration
+DEFAULT_MODEL_PATH = "bosonai/higgs-audio-v2-generation-3B-base"
+DEFAULT_AUDIO_TOKENIZER_PATH = "bosonai/higgs-audio-v2-tokenizer"
 SAMPLE_RATE = 24000
 DEFAULT_SYSTEM_PROMPT = (
     "voice-clone": {
         "system_prompt": "",
         "input_text": "Hey there! I'm your friendly voice twin in the making. Pick a voice preset below or upload your own audio - let's clone some vocals and bring your voice to life! ",
+        "description": "Voice clone to clone the reference audio. Leave the system prompt empty.",
     },
+    "smart-voice": {
         "system_prompt": DEFAULT_SYSTEM_PROMPT,
+        "input_text": "The sun rises in the east and sets in the west. This simple fact has been observed by humans for thousands of years.",
+        "description": "Smart voice to generate speech based on the context",
     },
+    "multispeaker-voice-description": {
+        "system_prompt": "You are an AI assistant designed to convert text into speech.\n"
+        "If the user's message includes a [SPEAKER*] tag, do not read out the tag and generate speech for the following text, using the specified voice.\n"
+        "If no speaker tag is present, select a suitable voice on your own.\n\n"
         "<|scene_desc_start|>\n"
+        "SPEAKER0: feminine\n"
+        "SPEAKER1: masculine\n"
         "<|scene_desc_end|>",
+        "input_text": "[SPEAKER0] I can't believe you did that without even asking me first!\n"
+        "[SPEAKER1] Oh, come on! It wasn't a big deal, and I knew you would overreact like this.\n"
+        "[SPEAKER0] Overreact? You made a decision that affects both of us without even considering my opinion!\n"
+        "[SPEAKER1] Because I didn't have time to sit around waiting for you to make up your mind! Someone had to act.",
+        "description": "Multispeaker with different voice descriptions in the system prompt",
     },
+    "single-speaker-voice-description": {
         "system_prompt": "Generate audio following instruction.\n\n"
         "<|scene_desc_start|>\n"
+        "SPEAKER0: He speaks with a clear British accent and a conversational, inquisitive tone. His delivery is articulate and at a moderate pace, and very clear audio.\n"
         "<|scene_desc_end|>",
         "input_text": "Hey, everyone! Welcome back to Tech Talk Tuesdays.\n"
         "It's your host, Alex, and today, we're diving into a topic that's become absolutely crucial in the tech world — deep learning.\n"
         "And let's be honest, if you've been even remotely connected to tech, AI, or machine learning lately, you know that deep learning is everywhere.\n"
         "\n"
         "So here's the big question: Do you want to understand how deep learning works?\n",
+        "description": "Single speaker with voice description in the system prompt",
     },
     "single-speaker-zh": {
         "system_prompt": "Generate audio following instruction.\n\n"
         "<|scene_desc_start|>\n"
+        "Audio is recorded from a quiet room.\n"
         "<|scene_desc_end|>",
         "input_text": "大家好, 欢迎收听本期的跟李沐学AI. 今天沐哥在忙着洗数据, 所以由我, 希格斯主播代替他讲这期视频.\n"
         "今天我们要聊的是一个你绝对不能忽视的话题: 多模态学习.\n"
         "那么, 问题来了, 你真的了解多模态吗? 你知道如何自己动手构建多模态大模型吗.\n"
         "或者说, 你能察觉到我其实是个机器人吗?",
+        "description": "Single speaker speaking Chinese",
+    },
+    "single-speaker-bgm": {
+        "system_prompt": DEFAULT_SYSTEM_PROMPT,
+        "input_text": "<SE_s>[Music]</SE_s> I will remember this, thought Ender, when I am defeated. To keep dignity, and give honor where it's due, so that defeat is not disgrace. And I hope I don't have to do it often. <SE_e>[Music]</SE_e>",
+        "description": "Single speaker with BGM using music tag. This is an experimental feature and may need to try multiple times to get the best result.",
     },
 }
     return voice_path, text
+def normalize_chinese_punctuation(text):
+    """
+    Convert Chinese (full-width) punctuation marks to English (half-width) equivalents.
+    """
+    # Mapping of Chinese punctuation to English punctuation
+    chinese_to_english_punct = {
+        "，": ", ",  # comma
+        "。": ".",  # period
+        "：": ":",  # colon
+        "；": ";",  # semicolon
+        "？": "?",  # question mark
+        "！": "!",  # exclamation mark
+        "（": "(",  # left parenthesis
+        "）": ")",  # right parenthesis
+        "【": "[",  # left square bracket
+        "】": "]",  # right square bracket
+        "《": "<",  # left angle quote
+        "》": ">",  # right angle quote
+        "“": '"',  # left double quotation
+        "”": '"',  # right double quotation
+        "‘": "'",  # left single quotation
+        "’": "'",  # right single quotation
+        "、": ",",  # enumeration comma
+        "—": "-",  # em dash
+        "…": "...",  # ellipsis
+        "·": ".",  # middle dot
+        "「": '"',  # left corner bracket
+        "」": '"',  # right corner bracket
+        "『": '"',  # left double corner bracket
+        "』": '"',  # right double corner bracket
+    }
+    # Replace each Chinese punctuation with its English counterpart
+    for zh_punct, en_punct in chinese_to_english_punct.items():
+        text = text.replace(zh_punct, en_punct)
+    return text
+def normalize_text(transcript: str):
+    transcript = normalize_chinese_punctuation(transcript)
+    # Other normalizations (e.g., parentheses and other symbols. Will be improved in the future)
+    transcript = transcript.replace("(", " ")
+    transcript = transcript.replace(")", " ")
+    transcript = transcript.replace("°F", " degrees Fahrenheit")
+    transcript = transcript.replace("°C", " degrees Celsius")
+    lines = transcript.split("\n")
+    transcript = "\n".join([" ".join(line.split()) for line in lines if line.strip()])
+    transcript = transcript.strip()
+    if not any([transcript.endswith(c) for c in [".", "!", "?", ",", ";", '"', "'", "</SE_e>", "</SE>"]]):
+        transcript += "."
+    return transcript
 @spaces.GPU
 def initialize_engine(model_path, audio_tokenizer_path) -> bool:
     """Initialize the HiggsAudioServeEngine."""
         messages.append(Message(role="assistant", content=[audio_content]))
     # Add the main user message
+    text = normalize_text(text)
     messages.append(Message(role="user", content=text))
     return ChatMLSample(messages=messages)
     top_k=50,
     system_prompt=DEFAULT_SYSTEM_PROMPT,
     stop_strings=None,
+    ras_win_len=20,
+    ras_win_max_num_repeat=2,
 ):
     """Convert text to speech using HiggsAudioServeEngine."""
     global engine
         request_id = f"tts-playground-{str(uuid.uuid4())}"
         logger.info(
             f"{request_id}: Generating speech for text: {text[:100]}..., \n"
+            f"with parameters: temperature={temperature}, top_p={top_p}, top_k={top_k}, stop_list={stop_list}, "
+            f"ras_win_len={ras_win_len}, ras_win_max_num_repeat={ras_win_max_num_repeat}"
         )
         start_time = time.time()
             top_k=top_k if top_k > 0 else None,
             top_p=top_p,
             stop_strings=stop_list,
+            ras_win_len=ras_win_len if ras_win_len > 0 else None,
+            ras_win_max_num_repeat=max(ras_win_len, ras_win_max_num_repeat),
         )
         generation_time = time.time() - start_time
     }
     """
+    default_template = "smart-voice"
     """Create the Gradio UI."""
     with gr.Blocks(theme=my_theme, css=custom_css) as demo:
                     info="Select a predefined example for system and input messages. Voice preset will be set to EMPTY when a example is selected.",
                 )
+                # Template description display
+                template_description = gr.HTML(
+                    value=f'<p style="font-size: 0.85em; color: var(--body-text-color-subdued); margin: 0; padding: 0;"> {PREDEFINED_EXAMPLES[default_template]["description"]}</p>',
+                    visible=True,
+                )
                 system_prompt = gr.TextArea(
                     label="System Prompt",
                     placeholder="Enter system prompt to guide the model...",
                     )
                     top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top P")
                     top_k = gr.Slider(minimum=-1, maximum=100, value=50, step=1, label="Top K")
+                    ras_win_len = gr.Slider(
+                        minimum=0,
+                        maximum=10,
+                        value=0,
+                        step=1,
+                        label="RAS Window Length",
+                        info="Window length for repetition avoidance sampling",
+                    )
+                    ras_win_max_num_repeat = gr.Slider(
+                        minimum=1,
+                        maximum=10,
+                        value=2,
+                        step=1,
+                        label="RAS Max Num Repeat",
+                        info="Maximum number of repetitions allowed in the window",
+                    )
                     # Add stop strings component
                     stop_strings = gr.Dataframe(
                         label="Stop Strings",
                 template = PREDEFINED_EXAMPLES[template_name]
                 # Enable voice preset and custom reference only for voice-clone template
                 is_voice_clone = template_name == "voice-clone"
+                voice_preset_value = "belinda" if is_voice_clone else "EMPTY"
+                description_text = f'<p style="font-size: 0.85em; color: var(--body-text-color-subdued); margin: 0; padding: 0;"> {template["description"]}</p>'
                 return (
                     template["system_prompt"],  # system_prompt
                     template["input_text"],  # input_text
+                    description_text,  # template_description
                     gr.update(
                         value=voice_preset_value, interactive=is_voice_clone, visible=is_voice_clone
                     ),  # voice_preset (value and interactivity)
                     gr.update(),
                     gr.update(),
                     gr.update(),
+                    gr.update(),
                 )  # No change if template not found
         # Set up event handlers
             outputs=[
                 system_prompt,
                 input_text,
+                template_description,
                 voice_preset,
                 custom_reference_accordion,
                 voice_samples_section,
                 top_k,
                 system_prompt,
                 stop_strings,
+                ras_win_len,
+                ras_win_max_num_repeat,
             ],
             outputs=[output_text, output_audio],
             api_name="generate_speech",

higgs_audio/serve/serve_engine.py CHANGED Viewed

@@ -27,6 +27,45 @@ from ..data_collator.higgs_audio_collator import HiggsAudioSampleCollator
 from ..audio_processing.higgs_audio_tokenizer import load_higgs_audio_tokenizer
 @dataclass
 class HiggsAudioStreamerDelta:
     """Represents a chunk of generated content, either text or audio tokens."""
@@ -422,3 +461,14 @@ class HiggsAudioServeEngine:
                     "cached_tokens": 0,
                 },
             )

 from ..audio_processing.higgs_audio_tokenizer import load_higgs_audio_tokenizer
+def normalize_chinese_punctuation(text):
+    """
+    Convert Chinese (full-width) punctuation marks to English (half-width) equivalents.
+    """
+    # Mapping of Chinese punctuation to English punctuation
+    chinese_to_english_punct = {
+        "，": ",",  # comma
+        "。": ".",  # period
+        "：": ":",  # colon
+        "；": ";",  # semicolon
+        "？": "?",  # question mark
+        "！": "!",  # exclamation mark
+        "（": "(",  # left parenthesis
+        "）": ")",  # right parenthesis
+        "【": "[",  # left square bracket
+        "】": "]",  # right square bracket
+        "《": "<",  # left angle quote
+        "》": ">",  # right angle quote
+        "“": '"',  # left double quotation
+        "”": '"',  # right double quotation
+        "‘": "'",  # left single quotation
+        "’": "'",  # right single quotation
+        "、": ",",  # enumeration comma
+        "—": "-",  # em dash
+        "…": "...",  # ellipsis
+        "·": ".",  # middle dot
+        "「": '"',  # left corner bracket
+        "」": '"',  # right corner bracket
+        "『": '"',  # left double corner bracket
+        "』": '"',  # right double corner bracket
+    }
+    # Replace each Chinese punctuation with its English counterpart
+    for zh_punct, en_punct in chinese_to_english_punct.items():
+        text = text.replace(zh_punct, en_punct)
+    return text
 @dataclass
 class HiggsAudioStreamerDelta:
     """Represents a chunk of generated content, either text or audio tokens."""
                     "cached_tokens": 0,
                 },
             )
+    def text_normalize(self, text: str) -> str:
+        """
+        Normalize the text.
+        """
+        # Perform some basic normalization
+        text = normalize_chinese_punctuation(text)
+        # Handle parentheses
+        text = text.replace("(", " ")
+        text = text.replace(")", " ")
+        return text

voice_examples/{wizard.wav → belinda.wav} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83bda9cd63be92366ef40dbe15c33e67b78766fb7069609f10dfc05cc626deba
-size 1246508

 version https://git-lfs.github.com/spec/v1
+oid sha256:e663310bfe539efac3350fd6b277214dcddd65d5a46949180f11c719c8b9b769
+size 896776

voice_examples/broom_salesman.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9cb4f37dcac12227045845c07c8aef823519cbf7b62bcbc6223158f9d282e1a
+size 3383338

voice_examples/chadwick.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:166acd9a8d8bf3e205bf8217dfd47f8232437c0ea128c326bd1a9060c099e003
+size 458796

voice_examples/config.json CHANGED Viewed

@@ -1,6 +1,34 @@
 {
-    "wizard": {
-        "transcript": "I would imagine so. A wand with a dragon heartstring core is capable of dazzling magic.",
-        "audio_file": "wizard.wav"
     }
 }

 {
+    "belinda": {
+        "transcript": "Twas the night before my birthday. Hooray! It's almost here! It may not be a holiday, but it's the best day of the year.",
+        "audio_file": "belinda.wav"
+    },
+    "broom_salesman": {
+        "transcript": "I would imagine so. A wand with a dragon heartstring core is capable of dazzling magic. And the bond between you and your wand should only grow stronger. Do not be surprised at your new wand's ability to perceive your intentions - particularly in a moment of need.",
+        "audio_file": "broom_salesman.wav"
+    },
+    "chadwick": {
+        "transcript": "Oh dear, who left all this junk lying around? Whoops, there it goes! Mind your pointed little pink head, starfish man.",
+        "audio_file": "chadwick.wav"
+    },
+    "en_man": {
+        "transcript": "Maintaining your ability to learn translates into increased marketability, improved career options and higher salaries.",
+        "audio_file": "en_man.wav"
+    },
+    "en_woman": {
+        "transcript": "The device would work during the day as well, if you took steps to either block direct sunlight or point it away from the sun.",
+        "audio_file": "en_woman.wav"
+    },
+    "mabel": {
+        "transcript": "You do talk an awful lot about weather, did you know that? Sometimes I wonder if you're actually content to be a wizard or if you're secretly harbouring a desire to become a seer of the clouds.",
+        "audio_file": "mabel.wav"
+    },
+    "vex": {
+        "transcript": "Uhh, this is going to take forever. Why is everything so far?",
+        "audio_file": "vex.wav"
+    },
+    "zh_man_sichuan": {
+        "transcript": "对，这就是我，万人敬仰的太乙真人，虽然有点婴儿肥，但也掩不住我逼人的帅气。",
+        "audio_file": "zh_man_sichuan.wav"
     }
 }

voice_examples/en_man.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ca3df71ad1b6968765e69870220d34c6b2c2550a499cf59560d9d764d10b94e
+size 375566

voice_examples/en_woman.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1d49dc69f3b0731ed7b10ddf51dfc8f73465d4323f45841d93583d8b1e4d3e6
+size 313272

voice_examples/mabel.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e6c5e522c662c5d6b862d8b17e1618546666ce993dcd560f3bdd34a48bacd9f
+size 1054730

voice_examples/vex.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d95c6dcf7265847edd76989ffb2d3f5a92aa3e2bbd3718317010b49842c98954
+size 523086

voice_examples/zh_man_sichuan.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53892ece071342958403bc5643f84169a30b89cc0fc79eb69508bfa11dd85e68
+size 618528