File size: 2,642 Bytes
50dd0bc
 
 
 
 
 
 
9d28841
 
9712d04
 
50dd0bc
 
bb840e1
 
50dd0bc
 
9d28841
 
780954b
 
6bf86b0
 
780954b
 
50dd0bc
 
3166c53
50dd0bc
3166c53
50dd0bc
 
968bcb8
 
 
 
 
3166c53
50dd0bc
3166c53
50dd0bc
 
968bcb8
 
 
 
 
50dd0bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c60425
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
asr_models:
  - id: openai/whisper-large-v3-turbo
    name: Whisper large-v3-turbo
  - id: openai/whisper-large-v3
    name: Whisper large-v3
  - id: openai/whisper-medium
    name: Whisper medium
  - id: openai/whisper-small
    name: Whisper small
  - id: funasr/paraformer-zh
    name: Paraformer-zh

llm_models:
  - id: gemini-2.5-flash
    name: Gemini 2.5 Flash
  - id: google/gemma-2-2b
    name: Gemma 2 2B
  - id: meta-llama/Llama-3.2-3B-Instruct
    name: Llama 3.2 3B Instruct
  - id: meta-llama/Llama-3.1-8B-Instruct
    name: Llama 3.1 8B Instruct
  - id: Qwen/Qwen3-8B
    name: Qwen3 8B
  - id: Qwen/Qwen3-30B-A3B
    name: Qwen3 30B A3B

svs_models:
  - id: mandarin-espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
    name: Visinger2 (Bilingual)-zh
    model_path: espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
    lang: mandarin
    voices:
      voice1: resources/singer/singer_embedding_ace-2.npy
      voice2: resources/singer/singer_embedding_ace-8.npy
      voice3: resources/singer/singer_embedding_itako.npy
      voice4: resources/singer/singer_embedding_kising_orange.npy
      voice5: resources/singer/singer_embedding_m4singer_Alto-4.npy
  - id: japanese-espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
    name: Visinger2 (Bilingual)-jp
    model_path: espnet/mixdata_svs_visinger2_spkemb_lang_pretrained_avg
    lang: japanese
    voices:
      voice1: resources/singer/singer_embedding_ace-2.npy
      voice2: resources/singer/singer_embedding_ace-8.npy
      voice3: resources/singer/singer_embedding_itako.npy
      voice4: resources/singer/singer_embedding_kising_orange.npy
      voice5: resources/singer/singer_embedding_m4singer_Alto-4.npy
  - id: mandarin-espnet/aceopencpop_svs_visinger2_40singer_pretrain
    name: Visinger2 (Chinese)
    model_path: espnet/aceopencpop_svs_visinger2_40singer_pretrain
    lang: mandarin
    voices:
      voice1: 5
      voice2: 8
      voice3: 12
      voice4: 15
      voice5: 29

melody_sources:
  - id: gen-random-none
    name: Random Generation
    desc: "Melody is generated without any structure or reference."
  - id: sample-note-kising
    name: Sampled Melody (KiSing)
    desc: "Melody is retrieved from KiSing dataset."
  - id: sample-note-touhou
    name: Sampled Melody (Touhou)
    desc: "Melody is retrieved from Touhou dataset."
  - id: sample-lyric-kising
    name: Sampled Melody with Lyrics (Kising)
    desc: "Melody with aligned lyrics are sampled from Kising dataset."
  - id: sample-lyric-genre
    name: Sampled Melody with Lyrics (Synthetic)
    desc: "Melody with aligned lyrics are sampled from Kising dataset."