jeremycch commited on
Commit
4281fe4
·
verified ·
1 Parent(s): ed96c68

Upload folder using huggingface_hub

Browse files
FLowHigh_basic_400k.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "random_seed": 104,
3
+ "data": {
4
+ "data_path": "The/path/to/train/directory/path",
5
+ "valid_path": "The/path/to/test/directory/path",
6
+ "valid_prepare": true,
7
+ "samplingrate": 48000,
8
+ "max_wav_value": 32767.0,
9
+ "n_fft": 2048,
10
+ "hop_length": 480,
11
+ "win_length": 2048,
12
+ "n_mel_channels": 256,
13
+ "mel_fmin": 20,
14
+ "mel_fmax": 24000,
15
+ "downsample_min": 4000,
16
+ "downsample_max": 32000,
17
+ "downsampling_method": "scipy"
18
+ },
19
+ "model": {
20
+ "modelname": "FLowHigh",
21
+ "architecture": "transformer",
22
+ "dim": 1024,
23
+ "n_layers": 2,
24
+ "n_heads": 16,
25
+ "dim_head":64,
26
+ "cfm_path": "basic_cfm",
27
+ "sigma":1e-4,
28
+ "vocoder": "bigvgan",
29
+ "vocoderpath": "/<Path>/vocoder/BIGVGAN/checkpoint/g_48_00850000",
30
+ "vocoderconfigpath": "/<Path>/vocoder/BIGVGAN/config/bigvgan_48khz_256band_config.json"
31
+ },
32
+ "train": {
33
+ "random_split_seed": 53,
34
+ "batchsize": 128,
35
+ "lr": 3e-4,
36
+ "initial_lr": 1e-5,
37
+ "n_train_steps": 400001,
38
+ "n_warmup_steps": 0,
39
+ "log_every": 10000,
40
+ "save_results_every": 10000,
41
+ "save_model_every": 100000,
42
+ "save_dir": "The/path/to/save/FLowHigh/",
43
+ "weighted_loss": false
44
+ }
45
+ }
FLowHigh_basic_400k.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84688e90d09b6f0788aeabc351ebc5f8d86463adb7ecf7ca3ef9c548e3825b8f
3
+ size 481490826
bigvgan_48khz_256band.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resblock": "1",
3
+ "num_gpus": 0,
4
+ "batch_size": 64,
5
+ "learning_rate": 0.0001,
6
+ "adam_b1": 0.8,
7
+ "adam_b2": 0.99,
8
+ "lr_decay": 0.999,
9
+ "seed": 1234,
10
+
11
+ "upsample_rates": [6,5,4,2,2],
12
+ "upsample_kernel_sizes": [12,11,8,4,4],
13
+ "upsample_initial_channel": 512,
14
+ "resblock_kernel_sizes": [3,7,11],
15
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
16
+
17
+ "activation": "snakebeta",
18
+ "snake_logscale": true,
19
+
20
+ "resolutions": [[1024, 120, 600], [2048, 240, 1200], [512, 50, 240]],
21
+ "mpd_reshapes": [2, 3, 5, 7, 11],
22
+ "use_spectral_norm": false,
23
+ "discriminator_channel_mult": 1,
24
+
25
+ "segment_size": 15360,
26
+ "num_mels": 256,
27
+ "num_freq": 2049,
28
+ "n_fft": 2048,
29
+ "hop_size": 480,
30
+ "win_size": 2048,
31
+
32
+ "sampling_rate": 48000,
33
+
34
+ "fmin": 20,
35
+ "fmax": 24000,
36
+ "fmax_for_loss": null,
37
+
38
+ "num_workers": 4,
39
+
40
+ "dist_config": {
41
+ "dist_backend": "nccl",
42
+ "dist_url": "tcp://localhost:54321",
43
+ "world_size": 1
44
+ }
45
+ }
bigvgan_48khz_256band.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40c9fbe33e8d9f4090b988733996984e899c2ba69e475435e49704c8378c14bb
3
+ size 56105238