Spaces:
Sleeping
Sleeping
File size: 2,642 Bytes
50dd0bc 9d28841 9712d04 50dd0bc bb840e1 50dd0bc 9d28841 780954b 6bf86b0 780954b 50dd0bc 3166c53 50dd0bc 3166c53 50dd0bc 968bcb8 3166c53 50dd0bc 3166c53 50dd0bc 968bcb8 50dd0bc 2c60425 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
asr_models:
- id: openai/whisper-large-v3-turbo
name: Whisper large-v3-turbo
- id: openai/whisper-large-v3
name: Whisper large-v3
- id: openai/whisper-medium
name: Whisper medium
- id: openai/whisper-small
name: Whisper small
- id: funasr/paraformer-zh
name: Paraformer-zh
llm_models:
- id: gemini-2.5-flash
name: Gemini 2.5 Flash
- id: google/gemma-2-2b
name: Gemma 2 2B
- id: meta-llama/Llama-3.2-3B-Instruct
name: Llama 3.2 3B Instruct
- id: meta-llama/Llama-3.1-8B-Instruct
name: Llama 3.1 8B Instruct
- id: Qwen/Qwen3-8B
name: Qwen3 8B
- id: Qwen/Qwen3-30B-A3B
name: Qwen3 30B A3B
svs_models:
- id: mandarin-espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
name: Visinger2 (Bilingual)-zh
model_path: espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
lang: mandarin
voices:
voice1: resources/singer/singer_embedding_ace-2.npy
voice2: resources/singer/singer_embedding_ace-8.npy
voice3: resources/singer/singer_embedding_itako.npy
voice4: resources/singer/singer_embedding_kising_orange.npy
voice5: resources/singer/singer_embedding_m4singer_Alto-4.npy
- id: japanese-espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
name: Visinger2 (Bilingual)-jp
model_path: espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
lang: japanese
voices:
voice1: resources/singer/singer_embedding_ace-2.npy
voice2: resources/singer/singer_embedding_ace-8.npy
voice3: resources/singer/singer_embedding_itako.npy
voice4: resources/singer/singer_embedding_kising_orange.npy
voice5: resources/singer/singer_embedding_m4singer_Alto-4.npy
- id: mandarin-espnet/aceopencpop_svs_visinger2_40singer_pretrain
name: Visinger2 (Chinese)
model_path: espnet/aceopencpop_svs_visinger2_40singer_pretrain
lang: mandarin
voices:
voice1: 5
voice2: 8
voice3: 12
voice4: 15
voice5: 29
melody_sources:
- id: gen-random-none
name: Random Generation
desc: "Melody is generated without any structure or reference."
- id: sample-note-kising
name: Sampled Melody (KiSing)
desc: "Melody is retrieved from KiSing dataset."
- id: sample-note-touhou
name: Sampled Melody (Touhou)
desc: "Melody is retrieved from Touhou dataset."
- id: sample-lyric-kising
name: Sampled Melody with Lyrics (Kising)
desc: "Melody with aligned lyrics are sampled from Kising dataset."
- id: sample-lyric-genre
name: Sampled Melody with Lyrics (Synthetic)
desc: "Melody with aligned lyrics are sampled from Kising dataset."
|