higgs_audio_v2

Running on Zero

zachzzc commited on 4 days ago

Commit

d842d24

verified ·

1 Parent(s): c0add26

Update the sound effect normalization

Files changed (1) hide show

app.py CHANGED Viewed

@@ -89,7 +89,7 @@ PREDEFINED_EXAMPLES = {
     },
     "single-speaker-bgm": {
         "system_prompt": DEFAULT_SYSTEM_PROMPT,
-        "input_text": "<SE_s>[Music]</SE_s> I will remember this, thought Ender, when I am defeated. To keep dignity, and give honor where it's due, so that defeat is not disgrace. And I hope I don't have to do it often. <SE_e>[Music]</SE_e>",
         "description": "Single speaker with BGM using music tag. This is an experimental feature and you may need to try multiple times to get the best result.",
     },
 }
@@ -184,6 +184,22 @@ def normalize_text(transcript: str):
     transcript = transcript.replace(")", " ")
     transcript = transcript.replace("°F", " degrees Fahrenheit")
     transcript = transcript.replace("°C", " degrees Celsius")
     lines = transcript.split("\n")
     transcript = "\n".join([" ".join(line.split()) for line in lines if line.strip()])
     transcript = transcript.strip()

     },
     "single-speaker-bgm": {
         "system_prompt": DEFAULT_SYSTEM_PROMPT,
+        "input_text": "[music start] I will remember this, thought Ender, when I am defeated. To keep dignity, and give honor where it's due, so that defeat is not disgrace. And I hope I don't have to do it often. [music end]",
         "description": "Single speaker with BGM using music tag. This is an experimental feature and you may need to try multiple times to get the best result.",
     },
 }
     transcript = transcript.replace(")", " ")
     transcript = transcript.replace("°F", " degrees Fahrenheit")
     transcript = transcript.replace("°C", " degrees Celsius")
+    for tag, replacement in [
+        ("[laugh]", "<SE>[Laughter]</SE>"),
+        ("[humming start]", "<SE>[Humming]</SE>"),
+        ("[humming end]", "<SE_e>[Humming]</SE_e>"),
+        ("[music start]", "<SE_s>[Music]</SE_s>"),
+        ("[music end]", "<SE_e>[Music]</SE_e>"),
+        ("[music]", "<SE>[Music]</SE>"),
+        ("[sing start]", "<SE_s>[Singing]</SE_s>"),
+        ("[sing end]", "<SE_e>[Singing]</SE_e>"),
+        ("[applause]", "<SE>[Applause]</SE>"),
+        ("[cheering]", "<SE>[Cheering]</SE>"),
+        ("[cough]", "<SE>[Cough]</SE>"),
+    ]:
+        transcript = transcript.replace(tag, replacement)
     lines = transcript.split("\n")
     transcript = "\n".join([" ".join(line.split()) for line in lines if line.strip()])
     transcript = transcript.strip()