lj1995 commited on
Commit
abe4494
·
1 Parent(s): 48ddb2e

Delete configs

Browse files
configs/s1.yaml DELETED
@@ -1,31 +0,0 @@
1
- train:
2
- seed: 1234
3
- epochs: 300
4
- batch_size: 8
5
- gradient_accumulation: 4
6
- save_every_n_epoch: 1
7
- precision: 16
8
- gradient_clip: 1.0
9
- optimizer:
10
- lr: 0.01
11
- lr_init: 0.00001
12
- lr_end: 0.0001
13
- warmup_steps: 2000
14
- decay_steps: 40000
15
- data:
16
- max_eval_sample: 8
17
- max_sec: 54
18
- num_workers: 1
19
- pad_val: 1024 # same with EOS in model
20
- model:
21
- vocab_size: 1025
22
- phoneme_vocab_size: 512
23
- embedding_dim: 512
24
- hidden_dim: 512
25
- head: 16
26
- linear_units: 2048
27
- n_layer: 12
28
- dropout: 0
29
- EOS: 1024
30
- inference:
31
- top_k: 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/s1big.yaml DELETED
@@ -1,31 +0,0 @@
1
- train:
2
- seed: 1234
3
- epochs: 300
4
- batch_size: 8
5
- gradient_accumulation: 4
6
- save_every_n_epoch: 1
7
- precision: 16-mixed
8
- gradient_clip: 1.0
9
- optimizer:
10
- lr: 0.01
11
- lr_init: 0.00001
12
- lr_end: 0.0001
13
- warmup_steps: 2000
14
- decay_steps: 40000
15
- data:
16
- max_eval_sample: 8
17
- max_sec: 54
18
- num_workers: 1
19
- pad_val: 1024 # same with EOS in model
20
- model:
21
- vocab_size: 1025
22
- phoneme_vocab_size: 512
23
- embedding_dim: 1024
24
- hidden_dim: 1024
25
- head: 16
26
- linear_units: 2048
27
- n_layer: 16
28
- dropout: 0
29
- EOS: 1024
30
- inference:
31
- top_k: 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/s1big2.yaml DELETED
@@ -1,31 +0,0 @@
1
- train:
2
- seed: 1234
3
- epochs: 300
4
- batch_size: 12
5
- gradient_accumulation: 4
6
- save_every_n_epoch: 1
7
- precision: 16-mixed
8
- gradient_clip: 1.0
9
- optimizer:
10
- lr: 0.01
11
- lr_init: 0.00001
12
- lr_end: 0.0001
13
- warmup_steps: 2000
14
- decay_steps: 40000
15
- data:
16
- max_eval_sample: 8
17
- max_sec: 54
18
- num_workers: 1
19
- pad_val: 1024 # same with EOS in model
20
- model:
21
- vocab_size: 1025
22
- phoneme_vocab_size: 512
23
- embedding_dim: 1024
24
- hidden_dim: 1024
25
- head: 16
26
- linear_units: 2048
27
- n_layer: 6
28
- dropout: 0
29
- EOS: 1024
30
- inference:
31
- top_k: 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/s1longer-v2.yaml DELETED
@@ -1,31 +0,0 @@
1
- train:
2
- seed: 1234
3
- epochs: 20
4
- batch_size: 8
5
- save_every_n_epoch: 1
6
- precision: 16-mixed
7
- gradient_clip: 1.0
8
- optimizer:
9
- lr: 0.01
10
- lr_init: 0.00001
11
- lr_end: 0.0001
12
- warmup_steps: 2000
13
- decay_steps: 40000
14
- data:
15
- max_eval_sample: 8
16
- max_sec: 54
17
- num_workers: 4
18
- pad_val: 1024 # same with EOS in model
19
- model:
20
- vocab_size: 1025
21
- phoneme_vocab_size: 732
22
- embedding_dim: 512
23
- hidden_dim: 512
24
- head: 16
25
- linear_units: 2048
26
- n_layer: 24
27
- dropout: 0
28
- EOS: 1024
29
- random_bert: 0
30
- inference:
31
- top_k: 15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/s1longer.yaml DELETED
@@ -1,31 +0,0 @@
1
- train:
2
- seed: 1234
3
- epochs: 20
4
- batch_size: 8
5
- save_every_n_epoch: 1
6
- precision: 16-mixed
7
- gradient_clip: 1.0
8
- optimizer:
9
- lr: 0.01
10
- lr_init: 0.00001
11
- lr_end: 0.0001
12
- warmup_steps: 2000
13
- decay_steps: 40000
14
- data:
15
- max_eval_sample: 8
16
- max_sec: 54
17
- num_workers: 4
18
- pad_val: 1024 # same with EOS in model
19
- model:
20
- vocab_size: 1025
21
- phoneme_vocab_size: 512
22
- embedding_dim: 512
23
- hidden_dim: 512
24
- head: 16
25
- linear_units: 2048
26
- n_layer: 24
27
- dropout: 0
28
- EOS: 1024
29
- random_bert: 0
30
- inference:
31
- top_k: 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/s1mq.yaml DELETED
@@ -1,77 +0,0 @@
1
- train:
2
- seed: 1234
3
- epochs: 100
4
- batch_size: 6
5
- gradient_accumulation: 4
6
- save_every_n_epoch: 1
7
- precision: 32
8
- gradient_clip: 1.0
9
- optimizer:
10
- lr: 0.01
11
- lr_init: 0.00001
12
- lr_end: 0.0001
13
- warmup_steps: 2000
14
- decay_steps: 40000
15
- data:
16
- max_eval_sample: 8
17
- max_sec: 40
18
- num_workers: 1
19
- pad_val: 1024 # same with EOS in model
20
- model:
21
- saving_path: "ckpt/"
22
- resume_checkpoint: null
23
- vocoder_config_path: "quantizer/new_ckpt/config.json"
24
- vocoder_ckpt_path: "quantizer/new_ckpt/g_00600000"
25
- datadir: "/home/liweiche/GigaSpeech/wavs"
26
- metapath: "/home/liweiche/GigaSpeech/train2.json"
27
- val_metapath: "/home/liweiche/GigaSpeech/dev2.json"
28
- sampledir: "logs/"
29
- pretrained_path: null
30
- lr: 0.0001
31
- batch_size: 200.0
32
- train_bucket_size: 8192
33
- training_step: 800000
34
- optim_flat_percent: 0.0
35
- warmup_step: 50
36
- adam_beta1: 0.9
37
- adam_beta2: 0.98
38
- ffd_size: 3072
39
- hidden_size: 768
40
- enc_nlayers: 6
41
- dec_nlayers: 6
42
- nheads: 12
43
- ar_layer: 4
44
- ar_ffd_size: 1024
45
- ar_hidden_size: 256
46
- ar_nheads: 4
47
- aligner_softmax_temp: 1.0
48
- layer_norm_eps: 0.00001
49
- speaker_embed_dropout: 0.05
50
- label_smoothing: 0.0
51
- val_check_interval: 5000
52
- check_val_every_n_epoch: 1
53
- precision: "fp16"
54
- nworkers: 16
55
- distributed: true
56
- accelerator: "ddp"
57
- version: null
58
- accumulate_grad_batches: 1
59
- use_repetition_token: true
60
- use_repetition_gating: false
61
- repetition_penalty: 1.0
62
- sampling_temperature: 1.0
63
- top_k: -1
64
- min_top_k: 3
65
- top_p: 0.8
66
- sample_num: 4
67
- length_penalty_max_length: 15000
68
- length_penalty_max_prob: 0.95
69
- max_input_length: 2048
70
- max_output_length: 2000
71
- sample_rate: 16000
72
- n_codes: 1024
73
- n_cluster_groups: 1
74
- phone_context_window: 4
75
- phoneset_size: 1000
76
- inference:
77
- top_k: 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/s2.json DELETED
@@ -1,90 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 100,
4
- "eval_interval": 500,
5
- "seed": 1234,
6
- "epochs": 100,
7
- "learning_rate": 0.0001,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 32,
14
- "fp16_run": true,
15
- "lr_decay": 0.999875,
16
- "segment_size": 20480,
17
- "init_lr_ratio": 1,
18
- "warmup_epochs": 0,
19
- "c_mel": 45,
20
- "c_kl": 1.0,
21
- "text_low_lr_rate": 0.4
22
- },
23
- "data": {
24
- "max_wav_value": 32768.0,
25
- "sampling_rate": 32000,
26
- "filter_length": 2048,
27
- "hop_length": 640,
28
- "win_length": 2048,
29
- "n_mel_channels": 128,
30
- "mel_fmin": 0.0,
31
- "mel_fmax": null,
32
- "add_blank": true,
33
- "n_speakers": 300,
34
- "cleaned_text": true
35
- },
36
- "model": {
37
- "inter_channels": 192,
38
- "hidden_channels": 192,
39
- "filter_channels": 768,
40
- "n_heads": 2,
41
- "n_layers": 6,
42
- "kernel_size": 3,
43
- "p_dropout": 0.1,
44
- "resblock": "1",
45
- "resblock_kernel_sizes": [
46
- 3,
47
- 7,
48
- 11
49
- ],
50
- "resblock_dilation_sizes": [
51
- [
52
- 1,
53
- 3,
54
- 5
55
- ],
56
- [
57
- 1,
58
- 3,
59
- 5
60
- ],
61
- [
62
- 1,
63
- 3,
64
- 5
65
- ]
66
- ],
67
- "upsample_rates": [
68
- 10,
69
- 8,
70
- 2,
71
- 2,
72
- 2
73
- ],
74
- "upsample_initial_channel": 512,
75
- "upsample_kernel_sizes": [
76
- 16,
77
- 16,
78
- 8,
79
- 2,
80
- 2
81
- ],
82
- "n_layers_q": 3,
83
- "use_spectral_norm": false,
84
- "gin_channels": 512,
85
- "semantic_frame_rate": "25hz",
86
- "freeze_quantizer": true
87
- },
88
- "s2_ckpt_dir": "logs/s2/big2k1",
89
- "content_module": "cnhubert"
90
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/train.yaml DELETED
@@ -1,32 +0,0 @@
1
- gpu:
2
- n_card: 1
3
- n_process_per_card: 2
4
- io:
5
- text_path: D:\RVC1006\GPT-SoVITS\GPT_SoVITS
6
- save_every_n_epoch: 1
7
- precision: 16-mixed
8
- gradient_clip: 1.0
9
- optimizer:
10
- lr: 0.01
11
- lr_init: 0.00001
12
- lr_end: 0.0001
13
- warmup_steps: 2000
14
- decay_steps: 40000
15
- data:
16
- max_eval_sample: 8
17
- max_sec: 54
18
- num_workers: 1
19
- pad_val: 1024 # same with EOS in model
20
- model:
21
- vocab_size: 1025
22
- phoneme_vocab_size: 512
23
- embedding_dim: 512
24
- hidden_dim: 512
25
- head: 16
26
- linear_units: 2048
27
- n_layer: 24
28
- dropout: 0
29
- EOS: 1024
30
- random_bert: 0
31
- inference:
32
- top_k: 5