ResembleAI
/

FlowHigh

Model card Files Files and versions Community

jeremycch commited on about 1 month ago

Commit

4281fe4

·

verified ·

1 Parent(s): ed96c68

Upload folder using huggingface_hub

Files changed (4) hide show

FLowHigh_basic_400k.json +45 -0
FLowHigh_basic_400k.pt +3 -0
bigvgan_48khz_256band.json +45 -0
bigvgan_48khz_256band.pt +3 -0

FLowHigh_basic_400k.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+    "random_seed": 104,
+    "data": {
+      "data_path": "The/path/to/train/directory/path",
+      "valid_path": "The/path/to/test/directory/path",
+      "valid_prepare": true,
+      "samplingrate": 48000,
+      "max_wav_value": 32767.0,
+      "n_fft": 2048,
+      "hop_length": 480,
+      "win_length": 2048,
+      "n_mel_channels": 256,
+      "mel_fmin": 20,
+      "mel_fmax": 24000,
+      "downsample_min": 4000,
+      "downsample_max": 32000,
+      "downsampling_method": "scipy"
+    },
+    "model": {
+      "modelname": "FLowHigh",
+      "architecture": "transformer",
+      "dim": 1024,
+      "n_layers": 2,
+      "n_heads": 16,
+      "dim_head":64,
+      "cfm_path": "basic_cfm",
+      "sigma":1e-4,
+      "vocoder": "bigvgan",
+      "vocoderpath": "/<Path>/vocoder/BIGVGAN/checkpoint/g_48_00850000",
+      "vocoderconfigpath": "/<Path>/vocoder/BIGVGAN/config/bigvgan_48khz_256band_config.json"
+    },
+    "train": {
+      "random_split_seed": 53,
+      "batchsize": 128,
+      "lr": 3e-4,
+      "initial_lr": 1e-5,
+      "n_train_steps": 400001,
+      "n_warmup_steps": 0,
+      "log_every": 10000,
+      "save_results_every": 10000,
+      "save_model_every": 100000,
+      "save_dir": "The/path/to/save/FLowHigh/",
+      "weighted_loss": false
+    }
+  }

FLowHigh_basic_400k.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84688e90d09b6f0788aeabc351ebc5f8d86463adb7ecf7ca3ef9c548e3825b8f
+size 481490826

bigvgan_48khz_256band.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+    "resblock": "1",
+    "num_gpus": 0,
+    "batch_size": 64,
+    "learning_rate": 0.0001,
+    "adam_b1": 0.8,
+    "adam_b2": 0.99,
+    "lr_decay": 0.999,
+    "seed": 1234,
+    "upsample_rates": [6,5,4,2,2],
+    "upsample_kernel_sizes": [12,11,8,4,4],
+    "upsample_initial_channel": 512,
+    "resblock_kernel_sizes": [3,7,11],
+    "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
+    "activation": "snakebeta",
+    "snake_logscale": true,
+    "resolutions": [[1024, 120, 600], [2048, 240, 1200], [512, 50, 240]],
+    "mpd_reshapes": [2, 3, 5, 7, 11],
+    "use_spectral_norm": false,
+    "discriminator_channel_mult": 1,
+    "segment_size": 15360,
+    "num_mels": 256,
+    "num_freq": 2049,
+    "n_fft": 2048,
+    "hop_size": 480,
+    "win_size": 2048,
+    "sampling_rate": 48000,
+    "fmin": 20,
+    "fmax": 24000,
+    "fmax_for_loss": null,
+    "num_workers": 4,
+    "dist_config": {
+        "dist_backend": "nccl",
+        "dist_url": "tcp://localhost:54321",
+        "world_size": 1
+    }
+}

bigvgan_48khz_256band.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40c9fbe33e8d9f4090b988733996984e899c2ba69e475435e49704c8378c14bb
+size 56105238