kemuriririn commited on
Commit
9e032ec
·
1 Parent(s): c82a118

(wip)debug

Browse files
Files changed (2) hide show
  1. models.py +7 -0
  2. tts.py +32 -31
models.py CHANGED
@@ -477,6 +477,13 @@ def insert_initial_models():
477
  is_open=True,
478
  model_url="https://github.com/FunAudioLLM/CosyVoice",
479
  ),
 
 
 
 
 
 
 
480
  # Model(
481
  # id="papla-p1",
482
  # name="Papla P1",
 
477
  is_open=True,
478
  model_url="https://github.com/FunAudioLLM/CosyVoice",
479
  ),
480
+ Model(
481
+ id="gpt-sovits-v2",
482
+ name="GPT-SoVITS v2",
483
+ model_type=ModelType.TTS,
484
+ is_open=True,
485
+ model_url="https://huggingface.co/spaces/lj1995/GPT-SoVITS-v2",
486
+ ),
487
  # Model(
488
  # id="papla-p1",
489
  # name="Papla P1",
tts.py CHANGED
@@ -37,10 +37,6 @@ model_mapping = {
37
  # "provider": "elevenlabs",
38
  # "model": "eleven_flash_v2_5",
39
  # },
40
- # "cartesia-sonic-2": {
41
- # "provider": "cartesia",
42
- # "model": "sonic-2",
43
- # },
44
  "spark-tts": {
45
  "provider": "spark",
46
  "model": "spark-tts",
@@ -65,10 +61,6 @@ model_mapping = {
65
  # "provider": "hume",
66
  # "model": "octave",
67
  # },
68
- # "megatts3": {
69
- # "provider": "megatts3",
70
- # "model": "megatts3",
71
- # },
72
  # "minimax-02-hd": {
73
  # "provider": "minimax",
74
  # "model": "speech-02-hd",
@@ -85,14 +77,14 @@ model_mapping = {
85
  "provider": "bilibili",
86
  "model": "index-tts",
87
  },
88
- "step-audio-tts-3b": {
89
- "provider": "swarmeta_ai",
90
- "model": "step-audio-tts-3b",
91
- },
92
  "maskgct": {
93
  "provider": "amphion",
94
  "model": "maskgct",
95
  },
 
 
 
 
96
  }
97
  url = "https://tts-agi-tts-router-v2.hf.space/tts"
98
  headers = {
@@ -266,25 +258,9 @@ def predict_cosyvoice_tts(text, reference_audio_path=None):
266
  return result
267
 
268
 
269
- def predict_step_audio_tts_3b(text, reference_audio_path=None):
270
- from gradio_client import Client, handle_file,file
271
- client = Client("https://swarmeta-ai-step-audio-tts-3b.ms.show/")
272
- if not reference_audio_path:
273
- raise ValueError("step-audio-tts-3b 需要 reference_audio_path")
274
- prompt_audio = handle_file(reference_audio_path)
275
- result = client.predict(
276
- text=text,
277
- prompt_audio=file(reference_audio_path),
278
- prompt_text="",
279
- api_name="/generate_clone"
280
- )
281
- print("step-audio-tts-3b result:", result)
282
- return result
283
-
284
-
285
  def predict_maskgct(text, reference_audio_path=None):
286
  from gradio_client import Client, handle_file
287
- client = Client("https://s5k.cn/api/v1/studio/amphion/maskgct/gradio/")
288
  if not reference_audio_path:
289
  raise ValueError("maskgct 需要 reference_audio_path")
290
  prompt_wav = handle_file(reference_audio_path)
@@ -299,6 +275,31 @@ def predict_maskgct(text, reference_audio_path=None):
299
  return result
300
 
301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  def predict_tts(text, model, reference_audio_path=None):
303
  global client
304
  print(f"Predicting TTS for {model}")
@@ -315,10 +316,10 @@ def predict_tts(text, model, reference_audio_path=None):
315
  return predict_spark_tts(text, reference_audio_path)
316
  elif model == "cosyvoice-2.0":
317
  return predict_cosyvoice_tts(text, reference_audio_path)
318
- elif model == "step-audio-tts-3b":
319
- return predict_step_audio_tts_3b(text, reference_audio_path)
320
  elif model == "maskgct":
321
  return predict_maskgct(text, reference_audio_path)
 
 
322
 
323
  if not model in model_mapping:
324
  raise ValueError(f"Model {model} not found")
 
37
  # "provider": "elevenlabs",
38
  # "model": "eleven_flash_v2_5",
39
  # },
 
 
 
 
40
  "spark-tts": {
41
  "provider": "spark",
42
  "model": "spark-tts",
 
61
  # "provider": "hume",
62
  # "model": "octave",
63
  # },
 
 
 
 
64
  # "minimax-02-hd": {
65
  # "provider": "minimax",
66
  # "model": "speech-02-hd",
 
77
  "provider": "bilibili",
78
  "model": "index-tts",
79
  },
 
 
 
 
80
  "maskgct": {
81
  "provider": "amphion",
82
  "model": "maskgct",
83
  },
84
+ "gpt-sovits-v2": {
85
+ "provider": "gpt-sovits",
86
+ "model": "gpt-sovits-v2",
87
+ },
88
  }
89
  url = "https://tts-agi-tts-router-v2.hf.space/tts"
90
  headers = {
 
258
  return result
259
 
260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  def predict_maskgct(text, reference_audio_path=None):
262
  from gradio_client import Client, handle_file
263
+ client = Client("amphion/maskgct")
264
  if not reference_audio_path:
265
  raise ValueError("maskgct 需要 reference_audio_path")
266
  prompt_wav = handle_file(reference_audio_path)
 
275
  return result
276
 
277
 
278
+ def predict_gpt_sovits_v2(text, reference_audio_path=None):
279
+ from gradio_client import Client, file
280
+ client = Client("lj1995/GPT-SoVITS-v2")
281
+ if not reference_audio_path:
282
+ raise ValueError("GPT-SoVITS-v2 需要 reference_audio_path")
283
+ result = client.predict(
284
+ ref_wav_path=file(reference_audio_path),
285
+ prompt_text="",
286
+ prompt_language="English",
287
+ text=text,
288
+ text_language="English",
289
+ how_to_cut="Slice once every 4 sentences",
290
+ top_k=15,
291
+ top_p=1,
292
+ temperature=1,
293
+ ref_free=False,
294
+ speed=1,
295
+ if_freeze=False,
296
+ inp_refs=[],
297
+ api_name="/get_tts_wav"
298
+ )
299
+ print("gpt-sovits-v2 result:", result)
300
+ return result
301
+
302
+
303
  def predict_tts(text, model, reference_audio_path=None):
304
  global client
305
  print(f"Predicting TTS for {model}")
 
316
  return predict_spark_tts(text, reference_audio_path)
317
  elif model == "cosyvoice-2.0":
318
  return predict_cosyvoice_tts(text, reference_audio_path)
 
 
319
  elif model == "maskgct":
320
  return predict_maskgct(text, reference_audio_path)
321
+ elif model == "gpt-sovits-v2":
322
+ return predict_gpt_sovits_v2(text, reference_audio_path)
323
 
324
  if not model in model_mapping:
325
  raise ValueError(f"Model {model} not found")