Spaces:
Runtime error
Runtime error
Commit
·
b5b5082
1
Parent(s):
7890f41
first attempt
Browse files- app.py +37 -4
- neutts-air +1 -0
- packages.txt +1 -0
- requirements.txt +19 -0
app.py
CHANGED
@@ -1,7 +1,40 @@
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append("neutts-air")
|
3 |
+
from neuttsair.neutts import NeuTTSAir
|
4 |
import gradio as gr
|
5 |
+
import spaces
|
6 |
|
7 |
+
# load model
|
8 |
+
tts = NeuTTSAir(
|
9 |
+
backbone_repo=backbone,
|
10 |
+
backbone_device="gpu",
|
11 |
+
codec_repo="neuphonic/neucodec",
|
12 |
+
codec_device="gpu"
|
13 |
+
)
|
14 |
|
15 |
+
@spaces.GPU()
|
16 |
+
def infer(ref_text, ref_audio_path, gen_text):
|
17 |
+
|
18 |
+
gr.Info("Starting inference request!")
|
19 |
+
gr.Info("Encoding reference...")
|
20 |
+
ref_codes = tts.encode_reference(ref_audio_path)
|
21 |
+
|
22 |
+
gr.Info(f"Generating audio for input text: {input_text}")
|
23 |
+
wav = tts.infer(input_text, ref_codes, ref_text)
|
24 |
+
|
25 |
+
return (24_000, wav)
|
26 |
+
|
27 |
+
demo = gr.Interface(
|
28 |
+
fn=infer,
|
29 |
+
inputs=[
|
30 |
+
gr.Textbox(label="Reference Text"),
|
31 |
+
gr.Audio(source="upload", type="filepath", label="Reference Audio"),
|
32 |
+
gr.Textbox(label="Text to Generate"),
|
33 |
+
],
|
34 |
+
outputs=gr.Audio(type="numpy", label="Generated Speech"),
|
35 |
+
title="NeuTTS-Air",
|
36 |
+
description="Upload a reference audio sample, provide the reference text, and enter new text to synthesize."
|
37 |
+
)
|
38 |
+
|
39 |
+
if __name__ == "__main__":
|
40 |
+
demo.launch()
|
neutts-air
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit ededc7d354b05cb6d245c2a8563e04c5f8ac12a2
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
espeak
|
requirements.txt
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets==4.0.0
|
2 |
+
librosa==0.11.0
|
3 |
+
neucodec>=0.0.3
|
4 |
+
numpy==2.2.6
|
5 |
+
pandas==2.3.2
|
6 |
+
phonemizer==3.3.0
|
7 |
+
requests==2.32.5
|
8 |
+
scipy>=1.15
|
9 |
+
soundfile==0.13.1
|
10 |
+
torch==2.8.0
|
11 |
+
torchao==0.13.0
|
12 |
+
torchaudio==2.8.0
|
13 |
+
torchtune==0.6.1
|
14 |
+
tqdm==4.67.1
|
15 |
+
transformers==4.56.1
|
16 |
+
vector-quantize-pytorch==1.17.8
|
17 |
+
resemble-perth==1.0.1
|
18 |
+
accelerate==1.10.1
|
19 |
+
gradio
|