Spaces:
Running
on
Zero
Running
on
Zero
enable settings
Browse files
app.py
CHANGED
|
@@ -114,28 +114,24 @@ with gr.Blocks(title="😻 KaniTTS - Text to Speech", theme=gr.themes.Default())
|
|
| 114 |
)
|
| 115 |
|
| 116 |
with gr.Accordion("Settings", open=False):
|
| 117 |
-
|
| 118 |
minimum=0.1, maximum=1.5, value=0.6, step=0.05,
|
| 119 |
-
label="
|
| 120 |
-
info="Higher values (0.7-1.0) create more expressive but less stable speech"
|
| 121 |
)
|
| 122 |
top_p = gr.Slider(
|
| 123 |
minimum=0.1, maximum=1.0, value=0.95, step=0.05,
|
| 124 |
label="Top P",
|
| 125 |
-
info="Nucleus sampling threshold"
|
| 126 |
)
|
| 127 |
-
|
| 128 |
minimum=1.0, maximum=2.0, value=1.1, step=0.05,
|
| 129 |
label="Repetition Penalty",
|
| 130 |
-
info="Higher values discourage repetitive patterns"
|
| 131 |
)
|
| 132 |
-
|
| 133 |
minimum=100, maximum=2000, value=1200, step=100,
|
| 134 |
-
label="Max
|
| 135 |
-
info="Maximum length of generated audio (in tokens)"
|
| 136 |
)
|
| 137 |
|
| 138 |
-
generate_btn = gr.Button("
|
| 139 |
|
| 140 |
|
| 141 |
with gr.Column(scale=1):
|
|
@@ -154,7 +150,7 @@ with gr.Blocks(title="😻 KaniTTS - Text to Speech", theme=gr.themes.Default())
|
|
| 154 |
# GPU generation event
|
| 155 |
generate_btn.click(
|
| 156 |
fn=generate_speech_gpu,
|
| 157 |
-
inputs=[text_input, model_dropdown],
|
| 158 |
outputs=[audio_output, time_report_output]
|
| 159 |
)
|
| 160 |
|
|
@@ -178,7 +174,7 @@ with gr.Blocks(title="😻 KaniTTS - Text to Speech", theme=gr.themes.Default())
|
|
| 178 |
|
| 179 |
gr.Examples(
|
| 180 |
examples=examples,
|
| 181 |
-
inputs=[text_input, model_dropdown,
|
| 182 |
fn=generate_speech_gpu,
|
| 183 |
outputs=[audio_output, time_report_output],
|
| 184 |
cache_examples=True,
|
|
|
|
| 114 |
)
|
| 115 |
|
| 116 |
with gr.Accordion("Settings", open=False):
|
| 117 |
+
temp = gr.Slider(
|
| 118 |
minimum=0.1, maximum=1.5, value=0.6, step=0.05,
|
| 119 |
+
label="Temp",
|
|
|
|
| 120 |
)
|
| 121 |
top_p = gr.Slider(
|
| 122 |
minimum=0.1, maximum=1.0, value=0.95, step=0.05,
|
| 123 |
label="Top P",
|
|
|
|
| 124 |
)
|
| 125 |
+
rp = gr.Slider(
|
| 126 |
minimum=1.0, maximum=2.0, value=1.1, step=0.05,
|
| 127 |
label="Repetition Penalty",
|
|
|
|
| 128 |
)
|
| 129 |
+
max_tok = gr.Slider(
|
| 130 |
minimum=100, maximum=2000, value=1200, step=100,
|
| 131 |
+
label="Max Tokens",
|
|
|
|
| 132 |
)
|
| 133 |
|
| 134 |
+
generate_btn = gr.Button("Run", variant="primary", size="lg")
|
| 135 |
|
| 136 |
|
| 137 |
with gr.Column(scale=1):
|
|
|
|
| 150 |
# GPU generation event
|
| 151 |
generate_btn.click(
|
| 152 |
fn=generate_speech_gpu,
|
| 153 |
+
inputs=[text_input, model_dropdown, temp, top_p, rp, max_tok],
|
| 154 |
outputs=[audio_output, time_report_output]
|
| 155 |
)
|
| 156 |
|
|
|
|
| 174 |
|
| 175 |
gr.Examples(
|
| 176 |
examples=examples,
|
| 177 |
+
inputs=[text_input, model_dropdown, temp, top_p, rp, max_tok],
|
| 178 |
fn=generate_speech_gpu,
|
| 179 |
outputs=[audio_output, time_report_output],
|
| 180 |
cache_examples=True,
|
util.py
CHANGED
|
@@ -197,7 +197,7 @@ class KaniModel:
|
|
| 197 |
model_request = point_2 - point_1
|
| 198 |
player_time = point_3 - point_2
|
| 199 |
total_time = point_3 - point_1
|
| 200 |
-
report = f"SPEECH TOKENS: {model_request:.2f}\
|
| 201 |
return report
|
| 202 |
|
| 203 |
def run_model(self, text: str):
|
|
|
|
| 197 |
model_request = point_2 - point_1
|
| 198 |
player_time = point_3 - point_2
|
| 199 |
total_time = point_3 - point_1
|
| 200 |
+
report = f"SPEECH TOKENS: {model_request:.2f}\nCODEC: {player_time:.2f}\nTOTAL: {total_time:.2f}"
|
| 201 |
return report
|
| 202 |
|
| 203 |
def run_model(self, text: str):
|