|
import os |
|
import shutil |
|
import sys |
|
|
|
import json |
|
|
|
now_dir = os.getcwd() |
|
sys.path.append(now_dir) |
|
import traceback, pdb |
|
import warnings |
|
|
|
import numpy as np |
|
import torch |
|
|
|
os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1" |
|
import logging |
|
import threading |
|
from random import shuffle |
|
from subprocess import Popen |
|
from time import sleep |
|
|
|
import faiss |
|
import ffmpeg |
|
import gradio as gr |
|
import soundfile as sf |
|
from config import Config |
|
from fairseq import checkpoint_utils |
|
from i18n import I18nAuto |
|
from infer_pack.models import ( |
|
SynthesizerTrnMs256NSFsid, |
|
SynthesizerTrnMs256NSFsid_nono, |
|
SynthesizerTrnMs768NSFsid, |
|
SynthesizerTrnMs768NSFsid_nono, |
|
) |
|
from infer_pack.models_onnx import SynthesizerTrnMsNSFsidM |
|
from infer_uvr5 import _audio_pre_, _audio_pre_new |
|
from MDXNet import MDXNetDereverb |
|
from my_utils import load_audio |
|
from train.process_ckpt import change_info, extract_small_model, merge, show_info |
|
from vc_infer_pipeline import VC |
|
from sklearn.cluster import MiniBatchKMeans |
|
|
|
logging.getLogger("numba").setLevel(logging.WARNING) |
|
|
|
|
|
tmp = os.path.join(now_dir, "TEMP") |
|
shutil.rmtree(tmp, ignore_errors=True) |
|
shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True) |
|
shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True) |
|
os.makedirs(tmp, exist_ok=True) |
|
os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) |
|
os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True) |
|
os.environ["TEMP"] = tmp |
|
warnings.filterwarnings("ignore") |
|
torch.manual_seed(114514) |
|
|
|
|
|
config = Config() |
|
i18n = I18nAuto() |
|
i18n.print() |
|
|
|
ngpu = torch.cuda.device_count() |
|
gpu_infos = [] |
|
mem = [] |
|
if_gpu_ok = False |
|
|
|
if torch.cuda.is_available() or ngpu != 0: |
|
for i in range(ngpu): |
|
gpu_name = torch.cuda.get_device_name(i) |
|
if any( |
|
value in gpu_name.upper() |
|
for value in [ |
|
"10", |
|
"16", |
|
"20", |
|
"30", |
|
"40", |
|
"A2", |
|
"A3", |
|
"A4", |
|
"P4", |
|
"A50", |
|
"500", |
|
"A60", |
|
"70", |
|
"80", |
|
"90", |
|
"M4", |
|
"T4", |
|
"TITAN", |
|
] |
|
): |
|
|
|
if_gpu_ok = True |
|
gpu_infos.append("%s\t%s" % (i, gpu_name)) |
|
mem.append( |
|
int( |
|
torch.cuda.get_device_properties(i).total_memory |
|
/ 1024 |
|
/ 1024 |
|
/ 1024 |
|
+ 0.4 |
|
) |
|
) |
|
if if_gpu_ok and len(gpu_infos) > 0: |
|
gpu_info = "\n".join(gpu_infos) |
|
default_batch_size = min(mem) // 2 |
|
else: |
|
gpu_info = i18n("Unfortunately you don't have a working graphics card to support your training.") |
|
default_batch_size = 1 |
|
gpus = "-".join([i[0] for i in gpu_infos]) |
|
|
|
|
|
class ToolButton(gr.Button, gr.components.FormComponent): |
|
"""Small button with single emoji as text, fits inside gradio forms""" |
|
|
|
def __init__(self, **kwargs): |
|
super().__init__(variant="tool", **kwargs) |
|
|
|
def get_block_name(self): |
|
return "button" |
|
|
|
|
|
hubert_model = None |
|
|
|
|
|
def load_hubert(): |
|
global hubert_model |
|
models, _, _ = checkpoint_utils.load_model_ensemble_and_task( |
|
["hubert_base.pt"], |
|
suffix="", |
|
) |
|
hubert_model = models[0] |
|
hubert_model = hubert_model.to(config.device) |
|
if config.is_half: |
|
hubert_model = hubert_model.half() |
|
else: |
|
hubert_model = hubert_model.float() |
|
hubert_model.eval() |
|
|
|
|
|
weight_root = "weights" |
|
weight_uvr5_root = "uvr5_weights" |
|
index_root = "logs" |
|
names = [] |
|
for name in os.listdir(weight_root): |
|
if name.endswith(".pth"): |
|
names.append(name) |
|
index_paths = [] |
|
for root, dirs, files in os.walk(index_root, topdown=False): |
|
for name in files: |
|
if name.endswith(".index") and "trained" not in name: |
|
index_paths.append("%s/%s" % (root, name)) |
|
uvr5_names = [] |
|
for name in os.listdir(weight_uvr5_root): |
|
if name.endswith(".pth") or "onnx" in name: |
|
uvr5_names.append(name.replace(".pth", "")) |
|
|
|
|
|
def vc_single( |
|
sid, |
|
input_audio_path, |
|
f0_up_key, |
|
f0_file, |
|
f0_method, |
|
file_index, |
|
file_index2, |
|
|
|
index_rate, |
|
filter_radius, |
|
resample_sr, |
|
rms_mix_rate, |
|
protect, |
|
crepe_hop_length, |
|
): |
|
global tgt_sr, net_g, vc, hubert_model, version |
|
if input_audio_path is None: |
|
return "You need to upload an audio", None |
|
f0_up_key = int(f0_up_key) |
|
try: |
|
audio = load_audio(input_audio_path, 16000) |
|
audio_max = np.abs(audio).max() / 0.95 |
|
if audio_max > 1: |
|
audio /= audio_max |
|
times = [0, 0, 0] |
|
if not hubert_model: |
|
load_hubert() |
|
if_f0 = cpt.get("f0", 1) |
|
file_index = ( |
|
( |
|
file_index.strip(" ") |
|
.strip('"') |
|
.strip("\n") |
|
.strip('"') |
|
.strip(" ") |
|
.replace("trained", "added") |
|
) |
|
if file_index != "" |
|
else file_index2 |
|
) |
|
|
|
|
|
|
|
audio_opt = vc.pipeline( |
|
hubert_model, |
|
net_g, |
|
sid, |
|
audio, |
|
input_audio_path, |
|
times, |
|
f0_up_key, |
|
f0_method, |
|
file_index, |
|
|
|
index_rate, |
|
if_f0, |
|
filter_radius, |
|
tgt_sr, |
|
resample_sr, |
|
rms_mix_rate, |
|
version, |
|
protect, |
|
crepe_hop_length, |
|
f0_file=f0_file, |
|
) |
|
if tgt_sr != resample_sr >= 16000: |
|
tgt_sr = resample_sr |
|
index_info = ( |
|
"Using index:%s." % file_index |
|
if os.path.exists(file_index) |
|
else "Index not used." |
|
) |
|
return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % ( |
|
index_info, |
|
times[0], |
|
times[1], |
|
times[2], |
|
), (tgt_sr, audio_opt) |
|
except: |
|
info = traceback.format_exc() |
|
print(info) |
|
return info, (None, None) |
|
|
|
|
|
def vc_multi( |
|
sid, |
|
dir_path, |
|
opt_root, |
|
paths, |
|
f0_up_key, |
|
f0_method, |
|
file_index, |
|
file_index2, |
|
|
|
index_rate, |
|
filter_radius, |
|
resample_sr, |
|
rms_mix_rate, |
|
protect, |
|
format1, |
|
crepe_hop_length, |
|
): |
|
try: |
|
dir_path = ( |
|
dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ") |
|
) |
|
opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") |
|
os.makedirs(opt_root, exist_ok=True) |
|
try: |
|
if dir_path != "": |
|
paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)] |
|
else: |
|
paths = [path.name for path in paths] |
|
except: |
|
traceback.print_exc() |
|
paths = [path.name for path in paths] |
|
infos = [] |
|
for path in paths: |
|
info, opt = vc_single( |
|
sid, |
|
path, |
|
f0_up_key, |
|
None, |
|
f0_method, |
|
file_index, |
|
file_index2, |
|
|
|
index_rate, |
|
filter_radius, |
|
resample_sr, |
|
rms_mix_rate, |
|
protect, |
|
crepe_hop_length |
|
) |
|
if "Success" in info: |
|
try: |
|
tgt_sr, audio_opt = opt |
|
if format1 in ["wav", "flac"]: |
|
sf.write( |
|
"%s/%s.%s" % (opt_root, os.path.basename(path), format1), |
|
audio_opt, |
|
tgt_sr, |
|
) |
|
else: |
|
path = "%s/%s.wav" % (opt_root, os.path.basename(path)) |
|
sf.write( |
|
path, |
|
audio_opt, |
|
tgt_sr, |
|
) |
|
if os.path.exists(path): |
|
os.system( |
|
"ffmpeg -i %s -vn %s -q:a 2 -y" |
|
% (path, path[:-4] + ".%s" % format1) |
|
) |
|
except: |
|
info += traceback.format_exc() |
|
infos.append("%s->%s" % (os.path.basename(path), info)) |
|
yield "\n".join(infos) |
|
yield "\n".join(infos) |
|
except: |
|
yield traceback.format_exc() |
|
|
|
|
|
def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0): |
|
infos = [] |
|
try: |
|
inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") |
|
save_root_vocal = ( |
|
save_root_vocal.strip(" ").strip('"').strip("\n").strip('"').strip(" ") |
|
) |
|
save_root_ins = ( |
|
save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ") |
|
) |
|
if model_name == "onnx_dereverb_By_FoxJoy": |
|
pre_fun = MDXNetDereverb(15) |
|
else: |
|
func = _audio_pre_ if "DeEcho" not in model_name else _audio_pre_new |
|
pre_fun = func( |
|
agg=int(agg), |
|
model_path=os.path.join(weight_uvr5_root, model_name + ".pth"), |
|
device=config.device, |
|
is_half=config.is_half, |
|
) |
|
if inp_root != "": |
|
paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)] |
|
else: |
|
paths = [path.name for path in paths] |
|
for path in paths: |
|
inp_path = os.path.join(inp_root, path) |
|
need_reformat = 1 |
|
done = 0 |
|
try: |
|
info = ffmpeg.probe(inp_path, cmd="ffprobe") |
|
if ( |
|
info["streams"][0]["channels"] == 2 |
|
and info["streams"][0]["sample_rate"] == "44100" |
|
): |
|
need_reformat = 0 |
|
pre_fun._path_audio_( |
|
inp_path, save_root_ins, save_root_vocal, format0 |
|
) |
|
done = 1 |
|
except: |
|
need_reformat = 1 |
|
traceback.print_exc() |
|
if need_reformat == 1: |
|
tmp_path = "%s/%s.reformatted.wav" % (tmp, os.path.basename(inp_path)) |
|
os.system( |
|
"ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y" |
|
% (inp_path, tmp_path) |
|
) |
|
inp_path = tmp_path |
|
try: |
|
if done == 0: |
|
pre_fun._path_audio_( |
|
inp_path, save_root_ins, save_root_vocal, format0 |
|
) |
|
infos.append("%s->Success" % (os.path.basename(inp_path))) |
|
yield "\n".join(infos) |
|
except: |
|
infos.append( |
|
"%s->%s" % (os.path.basename(inp_path), traceback.format_exc()) |
|
) |
|
yield "\n".join(infos) |
|
except: |
|
infos.append(traceback.format_exc()) |
|
yield "\n".join(infos) |
|
finally: |
|
try: |
|
if model_name == "onnx_dereverb_By_FoxJoy": |
|
del pre_fun.pred.model |
|
del pre_fun.pred.model_ |
|
else: |
|
del pre_fun.model |
|
del pre_fun |
|
except: |
|
traceback.print_exc() |
|
print("clean_empty_cache") |
|
if torch.cuda.is_available(): |
|
torch.cuda.empty_cache() |
|
yield "\n".join(infos) |
|
|
|
|
|
|
|
def get_vc(sid, to_return_protect0, to_return_protect1): |
|
global n_spk, tgt_sr, net_g, vc, cpt, version |
|
if sid == "" or sid == []: |
|
global hubert_model |
|
if hubert_model is not None: |
|
print("clean_empty_cache") |
|
del net_g, n_spk, vc, hubert_model, tgt_sr |
|
hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None |
|
if torch.cuda.is_available(): |
|
torch.cuda.empty_cache() |
|
|
|
if_f0 = cpt.get("f0", 1) |
|
version = cpt.get("version", "v1") |
|
if version == "v1": |
|
if if_f0 == 1: |
|
net_g = SynthesizerTrnMs256NSFsid( |
|
*cpt["config"], is_half=config.is_half |
|
) |
|
else: |
|
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) |
|
elif version == "v2": |
|
if if_f0 == 1: |
|
net_g = SynthesizerTrnMs768NSFsid( |
|
*cpt["config"], is_half=config.is_half |
|
) |
|
else: |
|
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) |
|
del net_g, cpt |
|
if torch.cuda.is_available(): |
|
torch.cuda.empty_cache() |
|
cpt = None |
|
return {"visible": False, "__type__": "update"} |
|
person = "%s/%s" % (weight_root, sid) |
|
print("loading %s" % person) |
|
cpt = torch.load(person, map_location="cpu") |
|
tgt_sr = cpt["config"][-1] |
|
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] |
|
if_f0 = cpt.get("f0", 1) |
|
if if_f0 == 0: |
|
to_return_protect0 = to_return_protect1 = { |
|
"visible": False, |
|
"value": 0.5, |
|
"__type__": "update", |
|
} |
|
else: |
|
to_return_protect0 = { |
|
"visible": True, |
|
"value": to_return_protect0, |
|
"__type__": "update", |
|
} |
|
to_return_protect1 = { |
|
"visible": True, |
|
"value": to_return_protect1, |
|
"__type__": "update", |
|
} |
|
version = cpt.get("version", "v1") |
|
if version == "v1": |
|
if if_f0 == 1: |
|
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) |
|
else: |
|
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) |
|
elif version == "v2": |
|
if if_f0 == 1: |
|
net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half) |
|
else: |
|
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) |
|
del net_g.enc_q |
|
print(net_g.load_state_dict(cpt["weight"], strict=False)) |
|
net_g.eval().to(config.device) |
|
if config.is_half: |
|
net_g = net_g.half() |
|
else: |
|
net_g = net_g.float() |
|
vc = VC(tgt_sr, config) |
|
n_spk = cpt["config"][-3] |
|
return ( |
|
{"visible": True, "maximum": n_spk, "__type__": "update"}, |
|
to_return_protect0, |
|
to_return_protect1, |
|
) |
|
|
|
|
|
def change_choices(): |
|
names = [] |
|
for name in os.listdir(weight_root): |
|
if name.endswith(".pth"): |
|
names.append(name) |
|
index_paths = [] |
|
for root, dirs, files in os.walk(index_root, topdown=False): |
|
for name in files: |
|
if name.endswith(".index") and "trained" not in name: |
|
index_paths.append("%s/%s" % (root, name)) |
|
return {"choices": sorted(names), "__type__": "update"}, { |
|
"choices": sorted(index_paths), |
|
"__type__": "update", |
|
} |
|
|
|
|
|
def clean(): |
|
return {"value": "", "__type__": "update"} |
|
|
|
|
|
sr_dict = { |
|
"32k": 32000, |
|
"40k": 40000, |
|
"48k": 48000, |
|
} |
|
|
|
|
|
def if_done(done, p): |
|
while 1: |
|
if p.poll() is None: |
|
sleep(0.5) |
|
else: |
|
break |
|
done[0] = True |
|
|
|
|
|
def if_done_multi(done, ps): |
|
while 1: |
|
|
|
|
|
flag = 1 |
|
for p in ps: |
|
if p.poll() is None: |
|
flag = 0 |
|
sleep(0.5) |
|
break |
|
if flag == 1: |
|
break |
|
done[0] = True |
|
|
|
|
|
def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): |
|
sr = sr_dict[sr] |
|
os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) |
|
f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w") |
|
f.close() |
|
cmd = ( |
|
config.python_cmd |
|
+ " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s " |
|
% (trainset_dir, sr, n_p, now_dir, exp_dir) |
|
+ str(config.noparallel) |
|
) |
|
print(cmd) |
|
p = Popen(cmd, shell=True) |
|
|
|
done = [False] |
|
threading.Thread( |
|
target=if_done, |
|
args=( |
|
done, |
|
p, |
|
), |
|
).start() |
|
while 1: |
|
with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: |
|
yield (f.read()) |
|
sleep(1) |
|
if done[0]: |
|
break |
|
with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: |
|
log = f.read() |
|
print(log) |
|
yield log |
|
|
|
|
|
|
|
def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, echl): |
|
gpus = gpus.split("-") |
|
os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) |
|
f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w") |
|
f.close() |
|
if if_f0: |
|
cmd = config.python_cmd + " extract_f0_print.py %s/logs/%s %s %s %s" % ( |
|
now_dir, |
|
exp_dir, |
|
n_p, |
|
f0method, |
|
echl, |
|
) |
|
print(cmd) |
|
p = Popen(cmd, shell=True, cwd=now_dir) |
|
|
|
done = [False] |
|
threading.Thread( |
|
target=if_done, |
|
args=( |
|
done, |
|
p, |
|
), |
|
).start() |
|
while 1: |
|
with open( |
|
"%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" |
|
) as f: |
|
yield (f.read()) |
|
sleep(1) |
|
if done[0]: |
|
break |
|
with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: |
|
log = f.read() |
|
print(log) |
|
yield log |
|
|
|
""" |
|
n_part=int(sys.argv[1]) |
|
i_part=int(sys.argv[2]) |
|
i_gpu=sys.argv[3] |
|
exp_dir=sys.argv[4] |
|
os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu) |
|
""" |
|
leng = len(gpus) |
|
ps = [] |
|
for idx, n_g in enumerate(gpus): |
|
cmd = ( |
|
config.python_cmd |
|
+ " extract_feature_print.py %s %s %s %s %s/logs/%s %s" |
|
% ( |
|
config.device, |
|
leng, |
|
idx, |
|
n_g, |
|
now_dir, |
|
exp_dir, |
|
version19, |
|
) |
|
) |
|
print(cmd) |
|
p = Popen( |
|
cmd, shell=True, cwd=now_dir |
|
) |
|
ps.append(p) |
|
|
|
done = [False] |
|
threading.Thread( |
|
target=if_done_multi, |
|
args=( |
|
done, |
|
ps, |
|
), |
|
).start() |
|
while 1: |
|
with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: |
|
yield (f.read()) |
|
sleep(1) |
|
if done[0]: |
|
break |
|
with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: |
|
log = f.read() |
|
print(log) |
|
yield log |
|
|
|
|
|
def change_sr2(sr2, if_f0_3, version19): |
|
path_str = "" if version19 == "v1" else "_v2" |
|
f0_str = "f0" if if_f0_3 else "" |
|
if_pretrained_generator_exist = os.access( |
|
"pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK |
|
) |
|
if_pretrained_discriminator_exist = os.access( |
|
"pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK |
|
) |
|
if not if_pretrained_generator_exist: |
|
print( |
|
"pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), |
|
"not exist, will not use pretrained model", |
|
) |
|
if not if_pretrained_discriminator_exist: |
|
print( |
|
"pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), |
|
"not exist, will not use pretrained model", |
|
) |
|
return ( |
|
"pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2) |
|
if if_pretrained_generator_exist |
|
else "", |
|
"pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2) |
|
if if_pretrained_discriminator_exist |
|
else "", |
|
) |
|
|
|
|
|
def change_version19(sr2, if_f0_3, version19): |
|
path_str = "" if version19 == "v1" else "_v2" |
|
if sr2 == "32k" and version19 == "v1": |
|
sr2 = "40k" |
|
to_return_sr2 = ( |
|
{"choices": ["40k", "48k"], "__type__": "update", "value": sr2} |
|
if version19 == "v1" |
|
else {"choices": ["40k", "48k", "32k"], "__type__": "update", "value": sr2} |
|
) |
|
f0_str = "f0" if if_f0_3 else "" |
|
if_pretrained_generator_exist = os.access( |
|
"pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK |
|
) |
|
if_pretrained_discriminator_exist = os.access( |
|
"pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK |
|
) |
|
if not if_pretrained_generator_exist: |
|
print( |
|
"pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), |
|
"not exist, will not use pretrained model", |
|
) |
|
if not if_pretrained_discriminator_exist: |
|
print( |
|
"pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), |
|
"not exist, will not use pretrained model", |
|
) |
|
return ( |
|
"pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2) |
|
if if_pretrained_generator_exist |
|
else "", |
|
"pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2) |
|
if if_pretrained_discriminator_exist |
|
else "", |
|
to_return_sr2, |
|
) |
|
|
|
|
|
def change_f0(if_f0_3, sr2, version19): |
|
path_str = "" if version19 == "v1" else "_v2" |
|
if_pretrained_generator_exist = os.access( |
|
"pretrained%s/f0G%s.pth" % (path_str, sr2), os.F_OK |
|
) |
|
if_pretrained_discriminator_exist = os.access( |
|
"pretrained%s/f0D%s.pth" % (path_str, sr2), os.F_OK |
|
) |
|
if not if_pretrained_generator_exist: |
|
print( |
|
"pretrained%s/f0G%s.pth" % (path_str, sr2), |
|
"not exist, will not use pretrained model", |
|
) |
|
if not if_pretrained_discriminator_exist: |
|
print( |
|
"pretrained%s/f0D%s.pth" % (path_str, sr2), |
|
"not exist, will not use pretrained model", |
|
) |
|
if if_f0_3: |
|
return ( |
|
{"visible": True, "__type__": "update"}, |
|
"pretrained%s/f0G%s.pth" % (path_str, sr2) |
|
if if_pretrained_generator_exist |
|
else "", |
|
"pretrained%s/f0D%s.pth" % (path_str, sr2) |
|
if if_pretrained_discriminator_exist |
|
else "", |
|
) |
|
return ( |
|
{"visible": False, "__type__": "update"}, |
|
("pretrained%s/G%s.pth" % (path_str, sr2)) |
|
if if_pretrained_generator_exist |
|
else "", |
|
("pretrained%s/D%s.pth" % (path_str, sr2)) |
|
if if_pretrained_discriminator_exist |
|
else "", |
|
) |
|
|
|
|
|
|
|
def click_train( |
|
exp_dir1, |
|
sr2, |
|
if_f0_3, |
|
spk_id5, |
|
save_epoch10, |
|
total_epoch11, |
|
batch_size12, |
|
if_save_latest13, |
|
pretrained_G14, |
|
pretrained_D15, |
|
gpus16, |
|
if_cache_gpu17, |
|
if_save_every_weights18, |
|
version19, |
|
): |
|
|
|
exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) |
|
os.makedirs(exp_dir, exist_ok=True) |
|
gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir) |
|
feature_dir = ( |
|
"%s/3_feature256" % (exp_dir) |
|
if version19 == "v1" |
|
else "%s/3_feature768" % (exp_dir) |
|
) |
|
if if_f0_3: |
|
f0_dir = "%s/2a_f0" % (exp_dir) |
|
f0nsf_dir = "%s/2b-f0nsf" % (exp_dir) |
|
names = ( |
|
set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) |
|
& set([name.split(".")[0] for name in os.listdir(feature_dir)]) |
|
& set([name.split(".")[0] for name in os.listdir(f0_dir)]) |
|
& set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) |
|
) |
|
else: |
|
names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( |
|
[name.split(".")[0] for name in os.listdir(feature_dir)] |
|
) |
|
opt = [] |
|
for name in names: |
|
if if_f0_3: |
|
opt.append( |
|
"%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s" |
|
% ( |
|
gt_wavs_dir.replace("\\", "\\\\"), |
|
name, |
|
feature_dir.replace("\\", "\\\\"), |
|
name, |
|
f0_dir.replace("\\", "\\\\"), |
|
name, |
|
f0nsf_dir.replace("\\", "\\\\"), |
|
name, |
|
spk_id5, |
|
) |
|
) |
|
else: |
|
opt.append( |
|
"%s/%s.wav|%s/%s.npy|%s" |
|
% ( |
|
gt_wavs_dir.replace("\\", "\\\\"), |
|
name, |
|
feature_dir.replace("\\", "\\\\"), |
|
name, |
|
spk_id5, |
|
) |
|
) |
|
fea_dim = 256 if version19 == "v1" else 768 |
|
if if_f0_3: |
|
for _ in range(2): |
|
opt.append( |
|
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" |
|
% (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5) |
|
) |
|
else: |
|
for _ in range(2): |
|
opt.append( |
|
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" |
|
% (now_dir, sr2, now_dir, fea_dim, spk_id5) |
|
) |
|
shuffle(opt) |
|
with open("%s/filelist.txt" % exp_dir, "w") as f: |
|
f.write("\n".join(opt)) |
|
print("write filelist done") |
|
|
|
|
|
print("use gpus:", gpus16) |
|
if pretrained_G14 == "": |
|
print("no pretrained Generator") |
|
if pretrained_D15 == "": |
|
print("no pretrained Discriminator") |
|
if gpus16: |
|
cmd = ( |
|
config.python_cmd |
|
+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" |
|
% ( |
|
exp_dir1, |
|
sr2, |
|
1 if if_f0_3 else 0, |
|
batch_size12, |
|
gpus16, |
|
total_epoch11, |
|
save_epoch10, |
|
"-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", |
|
"-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", |
|
1 if if_save_latest13 == i18n("yes") else 0, |
|
1 if if_cache_gpu17 == i18n("yes") else 0, |
|
1 if if_save_every_weights18 == i18n("yes") else 0, |
|
version19, |
|
) |
|
) |
|
else: |
|
cmd = ( |
|
config.python_cmd |
|
+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" |
|
% ( |
|
exp_dir1, |
|
sr2, |
|
1 if if_f0_3 else 0, |
|
batch_size12, |
|
total_epoch11, |
|
save_epoch10, |
|
"-pg %s" % pretrained_G14 if pretrained_G14 != "" else "\b", |
|
"-pd %s" % pretrained_D15 if pretrained_D15 != "" else "\b", |
|
1 if if_save_latest13 == i18n("yes") else 0, |
|
1 if if_cache_gpu17 == i18n("yes") else 0, |
|
1 if if_save_every_weights18 == i18n("yes") else 0, |
|
version19, |
|
) |
|
) |
|
print(cmd) |
|
p = Popen(cmd, shell=True, cwd=now_dir) |
|
p.wait() |
|
return "After the training is completed, you can view the console training log or train.log in the experiment folder." |
|
|
|
|
|
|
|
def train_index(exp_dir1, version19): |
|
exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) |
|
os.makedirs(exp_dir, exist_ok=True) |
|
feature_dir = ( |
|
"%s/3_feature256" % (exp_dir) |
|
if version19 == "v1" |
|
else "%s/3_feature768" % (exp_dir) |
|
) |
|
if not os.path.exists(feature_dir): |
|
return "Please perform feature extraction first!" |
|
listdir_res = list(os.listdir(feature_dir)) |
|
if len(listdir_res) == 0: |
|
return "Please perform feature extraction first!" |
|
infos = [] |
|
npys = [] |
|
for name in sorted(listdir_res): |
|
phone = np.load("%s/%s" % (feature_dir, name)) |
|
npys.append(phone) |
|
big_npy = np.concatenate(npys, 0) |
|
big_npy_idx = np.arange(big_npy.shape[0]) |
|
np.random.shuffle(big_npy_idx) |
|
big_npy = big_npy[big_npy_idx] |
|
if big_npy.shape[0] > 2e5: |
|
|
|
infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0]) |
|
yield "\n".join(infos) |
|
try: |
|
big_npy = ( |
|
MiniBatchKMeans( |
|
n_clusters=10000, |
|
verbose=True, |
|
batch_size=256 * config.n_cpu, |
|
compute_labels=False, |
|
init="random", |
|
) |
|
.fit(big_npy) |
|
.cluster_centers_ |
|
) |
|
except: |
|
info = traceback.format_exc() |
|
print(info) |
|
infos.append(info) |
|
yield "\n".join(infos) |
|
|
|
np.save("%s/total_fea.npy" % exp_dir, big_npy) |
|
n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) |
|
infos.append("%s,%s" % (big_npy.shape, n_ivf)) |
|
yield "\n".join(infos) |
|
index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) |
|
|
|
infos.append("training") |
|
yield "\n".join(infos) |
|
index_ivf = faiss.extract_index_ivf(index) |
|
index_ivf.nprobe = 1 |
|
index.train(big_npy) |
|
faiss.write_index( |
|
index, |
|
"%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" |
|
% (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), |
|
) |
|
|
|
infos.append("adding") |
|
yield "\n".join(infos) |
|
batch_size_add = 8192 |
|
for i in range(0, big_npy.shape[0], batch_size_add): |
|
index.add(big_npy[i : i + batch_size_add]) |
|
faiss.write_index( |
|
index, |
|
"%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" |
|
% (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), |
|
) |
|
infos.append( |
|
"Successfully built index, added_IVF%s_Flat_nprobe_%s_%s_%s.index" |
|
% (n_ivf, index_ivf.nprobe, exp_dir1, version19) |
|
) |
|
|
|
|
|
yield "\n".join(infos) |
|
|
|
|
|
|
|
def train1key( |
|
exp_dir1, |
|
sr2, |
|
if_f0_3, |
|
trainset_dir4, |
|
spk_id5, |
|
np7, |
|
f0method8, |
|
save_epoch10, |
|
total_epoch11, |
|
batch_size12, |
|
if_save_latest13, |
|
pretrained_G14, |
|
pretrained_D15, |
|
gpus16, |
|
if_cache_gpu17, |
|
if_save_every_weights18, |
|
version19, |
|
echl |
|
): |
|
infos = [] |
|
|
|
def get_info_str(strr): |
|
infos.append(strr) |
|
return "\n".join(infos) |
|
|
|
model_log_dir = "%s/logs/%s" % (now_dir, exp_dir1) |
|
preprocess_log_path = "%s/preprocess.log" % model_log_dir |
|
extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir |
|
gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir |
|
feature_dir = ( |
|
"%s/3_feature256" % model_log_dir |
|
if version19 == "v1" |
|
else "%s/3_feature768" % model_log_dir |
|
) |
|
|
|
os.makedirs(model_log_dir, exist_ok=True) |
|
|
|
open(preprocess_log_path, "w").close() |
|
cmd = ( |
|
config.python_cmd |
|
+ " trainset_preprocess_pipeline_print.py %s %s %s %s " |
|
% (trainset_dir4, sr_dict[sr2], np7, model_log_dir) |
|
+ str(config.noparallel) |
|
) |
|
yield get_info_str(i18n("step1:Processing data")) |
|
yield get_info_str(cmd) |
|
p = Popen(cmd, shell=True) |
|
p.wait() |
|
with open(preprocess_log_path, "r") as f: |
|
print(f.read()) |
|
|
|
open(extract_f0_feature_log_path, "w") |
|
if if_f0_3: |
|
yield get_info_str("step2a:Extracting pitch") |
|
cmd = config.python_cmd + " extract_f0_print.py %s %s %s %s" % ( |
|
model_log_dir, |
|
np7, |
|
f0method8, |
|
echl |
|
) |
|
yield get_info_str(cmd) |
|
p = Popen(cmd, shell=True, cwd=now_dir) |
|
p.wait() |
|
with open(extract_f0_feature_log_path, "r") as f: |
|
print(f.read()) |
|
else: |
|
yield get_info_str(i18n("step2a:No need to extract pitch")) |
|
|
|
yield get_info_str(i18n("step2b:Extracting features")) |
|
gpus = gpus16.split("-") |
|
leng = len(gpus) |
|
ps = [] |
|
for idx, n_g in enumerate(gpus): |
|
cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s %s" % ( |
|
config.device, |
|
leng, |
|
idx, |
|
n_g, |
|
model_log_dir, |
|
version19, |
|
) |
|
yield get_info_str(cmd) |
|
p = Popen( |
|
cmd, shell=True, cwd=now_dir |
|
) |
|
ps.append(p) |
|
for p in ps: |
|
p.wait() |
|
with open(extract_f0_feature_log_path, "r") as f: |
|
print(f.read()) |
|
|
|
yield get_info_str(i18n("step3a:Training model")) |
|
|
|
if if_f0_3: |
|
f0_dir = "%s/2a_f0" % model_log_dir |
|
f0nsf_dir = "%s/2b-f0nsf" % model_log_dir |
|
names = ( |
|
set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) |
|
& set([name.split(".")[0] for name in os.listdir(feature_dir)]) |
|
& set([name.split(".")[0] for name in os.listdir(f0_dir)]) |
|
& set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) |
|
) |
|
else: |
|
names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( |
|
[name.split(".")[0] for name in os.listdir(feature_dir)] |
|
) |
|
opt = [] |
|
for name in names: |
|
if if_f0_3: |
|
opt.append( |
|
"%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s" |
|
% ( |
|
gt_wavs_dir.replace("\\", "\\\\"), |
|
name, |
|
feature_dir.replace("\\", "\\\\"), |
|
name, |
|
f0_dir.replace("\\", "\\\\"), |
|
name, |
|
f0nsf_dir.replace("\\", "\\\\"), |
|
name, |
|
spk_id5, |
|
) |
|
) |
|
else: |
|
opt.append( |
|
"%s/%s.wav|%s/%s.npy|%s" |
|
% ( |
|
gt_wavs_dir.replace("\\", "\\\\"), |
|
name, |
|
feature_dir.replace("\\", "\\\\"), |
|
name, |
|
spk_id5, |
|
) |
|
) |
|
fea_dim = 256 if version19 == "v1" else 768 |
|
if if_f0_3: |
|
for _ in range(2): |
|
opt.append( |
|
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" |
|
% (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5) |
|
) |
|
else: |
|
for _ in range(2): |
|
opt.append( |
|
"%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" |
|
% (now_dir, sr2, now_dir, fea_dim, spk_id5) |
|
) |
|
shuffle(opt) |
|
with open("%s/filelist.txt" % model_log_dir, "w") as f: |
|
f.write("\n".join(opt)) |
|
yield get_info_str("write filelist done") |
|
if gpus16: |
|
cmd = ( |
|
config.python_cmd |
|
+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" |
|
% ( |
|
exp_dir1, |
|
sr2, |
|
1 if if_f0_3 else 0, |
|
batch_size12, |
|
gpus16, |
|
total_epoch11, |
|
save_epoch10, |
|
"-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", |
|
"-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", |
|
1 if if_save_latest13 == i18n("是") else 0, |
|
1 if if_cache_gpu17 == i18n("是") else 0, |
|
1 if if_save_every_weights18 == i18n("是") else 0, |
|
version19, |
|
) |
|
) |
|
else: |
|
cmd = ( |
|
config.python_cmd |
|
+ " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" |
|
% ( |
|
exp_dir1, |
|
sr2, |
|
1 if if_f0_3 else 0, |
|
batch_size12, |
|
total_epoch11, |
|
save_epoch10, |
|
"-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", |
|
"-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", |
|
1 if if_save_latest13 == i18n("是") else 0, |
|
1 if if_cache_gpu17 == i18n("是") else 0, |
|
1 if if_save_every_weights18 == i18n("是") else 0, |
|
version19, |
|
) |
|
) |
|
yield get_info_str(cmd) |
|
p = Popen(cmd, shell=True, cwd=now_dir) |
|
p.wait() |
|
yield get_info_str(i18n("After the training is completed, you can view the console training log or train.log in the experiment folder.")) |
|
|
|
npys = [] |
|
listdir_res = list(os.listdir(feature_dir)) |
|
for name in sorted(listdir_res): |
|
phone = np.load("%s/%s" % (feature_dir, name)) |
|
npys.append(phone) |
|
big_npy = np.concatenate(npys, 0) |
|
|
|
big_npy_idx = np.arange(big_npy.shape[0]) |
|
np.random.shuffle(big_npy_idx) |
|
big_npy = big_npy[big_npy_idx] |
|
|
|
if big_npy.shape[0] > 2e5: |
|
|
|
info = "Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0] |
|
print(info) |
|
yield get_info_str(info) |
|
try: |
|
big_npy = ( |
|
MiniBatchKMeans( |
|
n_clusters=10000, |
|
verbose=True, |
|
batch_size=256 * config.n_cpu, |
|
compute_labels=False, |
|
init="random", |
|
) |
|
.fit(big_npy) |
|
.cluster_centers_ |
|
) |
|
except: |
|
info = traceback.format_exc() |
|
print(info) |
|
yield get_info_str(info) |
|
|
|
np.save("%s/total_fea.npy" % model_log_dir, big_npy) |
|
|
|
|
|
n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) |
|
yield get_info_str("%s,%s" % (big_npy.shape, n_ivf)) |
|
index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) |
|
yield get_info_str("training index") |
|
index_ivf = faiss.extract_index_ivf(index) |
|
index_ivf.nprobe = 1 |
|
index.train(big_npy) |
|
faiss.write_index( |
|
index, |
|
"%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" |
|
% (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), |
|
) |
|
yield get_info_str("adding index") |
|
batch_size_add = 8192 |
|
for i in range(0, big_npy.shape[0], batch_size_add): |
|
index.add(big_npy[i : i + batch_size_add]) |
|
faiss.write_index( |
|
index, |
|
"%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" |
|
% (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), |
|
) |
|
yield get_info_str( |
|
"Index built successfully, added_IVF%s_Flat_nprobe_%s_%s_%s.index" |
|
% (n_ivf, index_ivf.nprobe, exp_dir1, version19) |
|
) |
|
yield get_info_str(i18n("The whole process is completed!")) |
|
|
|
|
|
|
|
def change_info_(ckpt_path): |
|
if not os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")): |
|
return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} |
|
try: |
|
with open( |
|
ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r" |
|
) as f: |
|
info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1]) |
|
sr, f0 = info["sample_rate"], info["if_f0"] |
|
version = "v2" if ("version" in info and info["version"] == "v2") else "v1" |
|
return sr, str(f0), version |
|
except: |
|
traceback.print_exc() |
|
return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} |
|
|
|
|
|
def export_onnx(ModelPath, ExportedPath): |
|
cpt = torch.load(ModelPath, map_location="cpu") |
|
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] |
|
vec_channels = 256 if cpt.get("version", "v1") == "v1" else 768 |
|
|
|
test_phone = torch.rand(1, 200, vec_channels) |
|
test_phone_lengths = torch.tensor([200]).long() |
|
test_pitch = torch.randint(size=(1, 200), low=5, high=255) |
|
test_pitchf = torch.rand(1, 200) |
|
test_ds = torch.LongTensor([0]) |
|
test_rnd = torch.rand(1, 192, 200) |
|
|
|
device = "cpu" |
|
|
|
|
|
net_g = SynthesizerTrnMsNSFsidM( |
|
*cpt["config"], is_half=False, version=cpt.get("version", "v1") |
|
) |
|
net_g.load_state_dict(cpt["weight"], strict=False) |
|
input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"] |
|
output_names = [ |
|
"audio", |
|
] |
|
|
|
torch.onnx.export( |
|
net_g, |
|
( |
|
test_phone.to(device), |
|
test_phone_lengths.to(device), |
|
test_pitch.to(device), |
|
test_pitchf.to(device), |
|
test_ds.to(device), |
|
test_rnd.to(device), |
|
), |
|
ExportedPath, |
|
dynamic_axes={ |
|
"phone": [1], |
|
"pitch": [1], |
|
"pitchf": [1], |
|
"rnd": [2], |
|
}, |
|
do_constant_folding=False, |
|
opset_version=13, |
|
verbose=False, |
|
input_names=input_names, |
|
output_names=output_names, |
|
) |
|
return "Finished" |
|
|
|
|
|
|
|
import re as regex |
|
import scipy.io.wavfile as wavfile |
|
|
|
cli_current_page = "HOME" |
|
|
|
def cli_split_command(com): |
|
exp = r'(?:(?<=\s)|^)"(.*?)"(?=\s|$)|(\S+)' |
|
split_array = regex.findall(exp, com) |
|
split_array = [group[0] if group[0] else group[1] for group in split_array] |
|
return split_array |
|
|
|
def execute_generator_function(genObject): |
|
for _ in genObject: pass |
|
|
|
def cli_infer(com): |
|
|
|
com = cli_split_command(com) |
|
model_name = com[0] |
|
source_audio_path = com[1] |
|
output_file_name = com[2] |
|
feature_index_path = com[3] |
|
f0_file = None |
|
|
|
|
|
speaker_id = int(com[4]) |
|
transposition = float(com[5]) |
|
f0_method = com[6] |
|
crepe_hop_length = int(com[7]) |
|
harvest_median_filter = int(com[8]) |
|
resample = int(com[9]) |
|
mix = float(com[10]) |
|
feature_ratio = float(com[11]) |
|
protection_amnt = float(com[12]) |
|
|
|
print("Mangio-RVC-Fork Infer-CLI: Starting the inference...") |
|
vc_data = get_vc(model_name) |
|
print(vc_data) |
|
print("Mangio-RVC-Fork Infer-CLI: Performing inference...") |
|
conversion_data = vc_single( |
|
speaker_id, |
|
source_audio_path, |
|
transposition, |
|
f0_file, |
|
f0_method, |
|
feature_index_path, |
|
feature_index_path, |
|
feature_ratio, |
|
harvest_median_filter, |
|
resample, |
|
mix, |
|
protection_amnt, |
|
crepe_hop_length, |
|
) |
|
if "Success." in conversion_data[0]: |
|
print("Mangio-RVC-Fork Infer-CLI: Inference succeeded. Writing to %s/%s..." % ('audio-outputs', output_file_name)) |
|
wavfile.write('%s/%s' % ('audio-outputs', output_file_name), conversion_data[1][0], conversion_data[1][1]) |
|
print("Mangio-RVC-Fork Infer-CLI: Finished! Saved output to %s/%s" % ('audio-outputs', output_file_name)) |
|
else: |
|
print("Mangio-RVC-Fork Infer-CLI: Inference failed. Here's the traceback: ") |
|
print(conversion_data[0]) |
|
|
|
def cli_pre_process(com): |
|
com = cli_split_command(com) |
|
model_name = com[0] |
|
trainset_directory = com[1] |
|
sample_rate = com[2] |
|
num_processes = int(com[3]) |
|
|
|
print("Mangio-RVC-Fork Pre-process: Starting...") |
|
generator = preprocess_dataset( |
|
trainset_directory, |
|
model_name, |
|
sample_rate, |
|
num_processes |
|
) |
|
execute_generator_function(generator) |
|
print("Mangio-RVC-Fork Pre-process: Finished") |
|
|
|
def cli_extract_feature(com): |
|
com = cli_split_command(com) |
|
model_name = com[0] |
|
gpus = com[1] |
|
num_processes = int(com[2]) |
|
has_pitch_guidance = True if (int(com[3]) == 1) else False |
|
f0_method = com[4] |
|
crepe_hop_length = int(com[5]) |
|
version = com[6] |
|
|
|
print("Mangio-RVC-CLI: Extract Feature Has Pitch: " + str(has_pitch_guidance)) |
|
print("Mangio-RVC-CLI: Extract Feature Version: " + str(version)) |
|
print("Mangio-RVC-Fork Feature Extraction: Starting...") |
|
generator = extract_f0_feature( |
|
gpus, |
|
num_processes, |
|
f0_method, |
|
has_pitch_guidance, |
|
model_name, |
|
version, |
|
crepe_hop_length |
|
) |
|
execute_generator_function(generator) |
|
print("Mangio-RVC-Fork Feature Extraction: Finished") |
|
|
|
def cli_train(com): |
|
com = cli_split_command(com) |
|
model_name = com[0] |
|
sample_rate = com[1] |
|
has_pitch_guidance = True if (int(com[2]) == 1) else False |
|
speaker_id = int(com[3]) |
|
save_epoch_iteration = int(com[4]) |
|
total_epoch = int(com[5]) |
|
batch_size = int(com[6]) |
|
gpu_card_slot_numbers = com[7] |
|
if_save_latest = i18n("yes") if (int(com[8]) == 1) else i18n("no") |
|
if_cache_gpu = i18n("yes") if (int(com[9]) == 1) else i18n("no") |
|
if_save_every_weight = i18n("yes") if (int(com[10]) == 1) else i18n("no") |
|
version = com[11] |
|
|
|
pretrained_base = "pretrained/" if version == "v1" else "pretrained_v2/" |
|
|
|
g_pretrained_path = "%sf0G%s.pth" % (pretrained_base, sample_rate) |
|
d_pretrained_path = "%sf0D%s.pth" % (pretrained_base, sample_rate) |
|
|
|
print("Mangio-RVC-Fork Train-CLI: Training...") |
|
click_train( |
|
model_name, |
|
sample_rate, |
|
has_pitch_guidance, |
|
speaker_id, |
|
save_epoch_iteration, |
|
total_epoch, |
|
batch_size, |
|
if_save_latest, |
|
g_pretrained_path, |
|
d_pretrained_path, |
|
gpu_card_slot_numbers, |
|
if_cache_gpu, |
|
if_save_every_weight, |
|
version |
|
) |
|
|
|
def cli_train_feature(com): |
|
com = cli_split_command(com) |
|
model_name = com[0] |
|
version = com[1] |
|
print("Mangio-RVC-Fork Train Feature Index-CLI: Training... Please wait") |
|
generator = train_index( |
|
model_name, |
|
version |
|
) |
|
execute_generator_function(generator) |
|
print("Mangio-RVC-Fork Train Feature Index-CLI: Done!") |
|
|
|
def cli_extract_model(com): |
|
com = cli_split_command(com) |
|
model_path = com[0] |
|
save_name = com[1] |
|
sample_rate = com[2] |
|
has_pitch_guidance = com[3] |
|
info = com[4] |
|
version = com[5] |
|
extract_small_model_process = extract_small_model( |
|
model_path, |
|
save_name, |
|
sample_rate, |
|
has_pitch_guidance, |
|
info, |
|
version |
|
) |
|
if extract_small_model_process == "Success.": |
|
print("Mangio-RVC-Fork Extract Small Model: Success!") |
|
else: |
|
print(str(extract_small_model_process)) |
|
print("Mangio-RVC-Fork Extract Small Model: Failed!") |
|
|
|
def print_page_details(): |
|
if cli_current_page == "HOME": |
|
print(" go home : Takes you back to home with a navigation list.") |
|
print(" go infer : Takes you to inference command execution.\n") |
|
print(" go pre-process : Takes you to training step.1) pre-process command execution.") |
|
print(" go extract-feature : Takes you to training step.2) extract-feature command execution.") |
|
print(" go train : Takes you to training step.3) being or continue training command execution.") |
|
print(" go train-feature : Takes you to the train feature index command execution.\n") |
|
print(" go extract-model : Takes you to the extract small model command execution.") |
|
elif cli_current_page == "INFER": |
|
print(" arg 1) model name with .pth in ./weights: mi-test.pth") |
|
print(" arg 2) source audio path: myFolder\\MySource.wav") |
|
print(" arg 3) output file name to be placed in './audio-outputs': MyTest.wav") |
|
print(" arg 4) feature index file path: logs/mi-test/added_IVF3042_Flat_nprobe_1.index") |
|
print(" arg 5) speaker id: 0") |
|
print(" arg 6) transposition: 0") |
|
print(" arg 7) f0 method: harvest (pm, harvest, crepe, crepe-tiny, hybrid[x,x,x,x], mangio-crepe, mangio-crepe-tiny)") |
|
print(" arg 8) crepe hop length: 160") |
|
print(" arg 9) harvest median filter radius: 3 (0-7)") |
|
print(" arg 10) post resample rate: 0") |
|
print(" arg 11) mix volume envelope: 1") |
|
print(" arg 12) feature index ratio: 0.78 (0-1)") |
|
print(" arg 13) Voiceless Consonant Protection (Less Artifact): 0.33 (Smaller number = more protection. 0.50 means Dont Use.) \n") |
|
print("Example: mi-test.pth saudio/Sidney.wav myTest.wav logs/mi-test/added_index.index 0 -2 harvest 160 3 0 1 0.95 0.33") |
|
elif cli_current_page == "PRE-PROCESS": |
|
print(" arg 1) Model folder name in ./logs: mi-test") |
|
print(" arg 2) Trainset directory: mydataset (or) E:\\my-data-set") |
|
print(" arg 3) Sample rate: 40k (32k, 40k, 48k)") |
|
print(" arg 4) Number of CPU threads to use: 8 \n") |
|
print("Example: mi-test mydataset 40k 24") |
|
elif cli_current_page == "EXTRACT-FEATURE": |
|
print(" arg 1) Model folder name in ./logs: mi-test") |
|
print(" arg 2) Gpu card slot: 0 (0-1-2 if using 3 GPUs)") |
|
print(" arg 3) Number of CPU threads to use: 8") |
|
print(" arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)") |
|
print(" arg 5) f0 Method: harvest (pm, harvest, dio, crepe)") |
|
print(" arg 6) Crepe hop length: 128") |
|
print(" arg 7) Version for pre-trained models: v2 (use either v1 or v2)\n") |
|
print("Example: mi-test 0 24 1 harvest 128 v2") |
|
elif cli_current_page == "TRAIN": |
|
print(" arg 1) Model folder name in ./logs: mi-test") |
|
print(" arg 2) Sample rate: 40k (32k, 40k, 48k)") |
|
print(" arg 3) Has Pitch Guidance?: 1 (0 for no, 1 for yes)") |
|
print(" arg 4) speaker id: 0") |
|
print(" arg 5) Save epoch iteration: 50") |
|
print(" arg 6) Total epochs: 10000") |
|
print(" arg 7) Batch size: 8") |
|
print(" arg 8) Gpu card slot: 0 (0-1-2 if using 3 GPUs)") |
|
print(" arg 9) Save only the latest checkpoint: 0 (0 for no, 1 for yes)") |
|
print(" arg 10) Whether to cache training set to vram: 0 (0 for no, 1 for yes)") |
|
print(" arg 11) Save extracted small model every generation?: 0 (0 for no, 1 for yes)") |
|
print(" arg 12) Model architecture version: v2 (use either v1 or v2)\n") |
|
print("Example: mi-test 40k 1 0 50 10000 8 0 0 0 0 v2") |
|
elif cli_current_page == "TRAIN-FEATURE": |
|
print(" arg 1) Model folder name in ./logs: mi-test") |
|
print(" arg 2) Model architecture version: v2 (use either v1 or v2)\n") |
|
print("Example: mi-test v2") |
|
elif cli_current_page == "EXTRACT-MODEL": |
|
print(" arg 1) Model Path: logs/mi-test/G_168000.pth") |
|
print(" arg 2) Model save name: MyModel") |
|
print(" arg 3) Sample rate: 40k (32k, 40k, 48k)") |
|
print(" arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)") |
|
print(' arg 5) Model information: "My Model"') |
|
print(" arg 6) Model architecture version: v2 (use either v1 or v2)\n") |
|
print('Example: logs/mi-test/G_168000.pth MyModel 40k 1 "Created by Cole Mangio" v2') |
|
print("") |
|
|
|
def change_page(page): |
|
global cli_current_page |
|
cli_current_page = page |
|
return 0 |
|
|
|
def execute_command(com): |
|
if com == "go home": |
|
return change_page("HOME") |
|
elif com == "go infer": |
|
return change_page("INFER") |
|
elif com == "go pre-process": |
|
return change_page("PRE-PROCESS") |
|
elif com == "go extract-feature": |
|
return change_page("EXTRACT-FEATURE") |
|
elif com == "go train": |
|
return change_page("TRAIN") |
|
elif com == "go train-feature": |
|
return change_page("TRAIN-FEATURE") |
|
elif com == "go extract-model": |
|
return change_page("EXTRACT-MODEL") |
|
else: |
|
if com[:3] == "go ": |
|
print("page '%s' does not exist!" % com[3:]) |
|
return 0 |
|
|
|
if cli_current_page == "INFER": |
|
cli_infer(com) |
|
elif cli_current_page == "PRE-PROCESS": |
|
cli_pre_process(com) |
|
elif cli_current_page == "EXTRACT-FEATURE": |
|
cli_extract_feature(com) |
|
elif cli_current_page == "TRAIN": |
|
cli_train(com) |
|
elif cli_current_page == "TRAIN-FEATURE": |
|
cli_train_feature(com) |
|
elif cli_current_page == "EXTRACT-MODEL": |
|
cli_extract_model(com) |
|
|
|
def cli_navigation_loop(): |
|
while True: |
|
print("You are currently in '%s':" % cli_current_page) |
|
print_page_details() |
|
command = input("%s: " % cli_current_page) |
|
try: |
|
execute_command(command) |
|
except: |
|
print(traceback.format_exc()) |
|
|
|
if(config.is_cli): |
|
print("\n\nMangio-RVC-Fork v2 CLI App!\n") |
|
print("Welcome to the CLI version of RVC. Please read the documentation on https://github.com/Mangio621/Mangio-RVC-Fork (README.MD) to understand how to use this app.\n") |
|
cli_navigation_loop() |
|
|
|
|
|
|
|
|
|
|
|
def get_presets(): |
|
data = None |
|
with open('../inference-presets.json', 'r') as file: |
|
data = json.load(file) |
|
preset_names = [] |
|
for preset in data['presets']: |
|
preset_names.append(preset['name']) |
|
|
|
return preset_names |
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as app: |
|
gr.HTML("<h1> The Mangio-RVC-Fork 💻 </h1>") |
|
gr.Markdown( |
|
value=i18n( |
|
"This software is open source under the MIT agreement. The author does not have any control over the software. Those who use the software and spread the sounds derived from the software are fully responsible. <br>If you do not agree with this clause, you cannot use or reference any code and files in the software package. For details, see the root directory <b>Agreement to be followed-LICENSE.txt</b>." |
|
) |
|
) |
|
with gr.Tabs(): |
|
with gr.TabItem(i18n("Model reasoning")): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Row(): |
|
sid0 = gr.Dropdown(label=i18n("Mystery Tone"), choices=sorted(names)) |
|
refresh_button = gr.Button(i18n("Refresh the patch list and index path"), variant="primary") |
|
clean_button = gr.Button(i18n("Uninstalling sounds saves video memory"), variant="primary") |
|
spk_item = gr.Slider( |
|
minimum=0, |
|
maximum=2333, |
|
step=1, |
|
label=i18n("Please select the speaker id"), |
|
value=0, |
|
visible=False, |
|
interactive=True, |
|
) |
|
clean_button.click(fn=clean, inputs=[], outputs=[sid0]) |
|
with gr.Group(): |
|
gr.Markdown( |
|
value=i18n("For male to female, +12key is recommended, for female to male, -12key is recommended. If the sound range explodes and causes timbre distortion, you can adjust it to the appropriate range yourself.") |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
vc_transform0 = gr.Number( |
|
label=i18n("Transpose(integer, number of semitones, octave up 12 octave down -12)"), value=0 |
|
) |
|
input_audio0 = gr.Textbox( |
|
label=i18n("Enter the path of the audio file to be processed (the default is an example of the correct format)"), |
|
value="E:\\codes\\py39\\test-20230416b\\todo-songs\\winter flowers clip1.wav", |
|
) |
|
f0method0 = gr.Radio( |
|
label=i18n( |
|
"Select the pitch extraction algorithm. You can use pm to speed up the input singing voice. Harvest has good bass but is extremely slow. Crepe has good effect but consumes GPU." |
|
), |
|
choices=["pm", "harvest", "dio", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny"], |
|
value="pm", |
|
interactive=True, |
|
) |
|
crepe_hop_length = gr.Slider( |
|
minimum=1, |
|
maximum=512, |
|
step=1, |
|
label=i18n("crepe_hop_length"), |
|
value=160, |
|
interactive=True |
|
) |
|
filter_radius0 = gr.Slider( |
|
minimum=0, |
|
maximum=7, |
|
label=i18n(">=3, use median filtering on the result of harvest pitch recognition, the value is the filter radius, which can reduce mute"), |
|
value=3, |
|
step=1, |
|
interactive=True, |
|
) |
|
with gr.Column(): |
|
file_index1 = gr.Textbox( |
|
label=i18n("Feature retrieval library file path, if empty, use the drop-down selection result"), |
|
value="", |
|
interactive=True, |
|
) |
|
file_index2 = gr.Dropdown( |
|
label=i18n("Automatically detect index path, drop-down selection"), |
|
choices=sorted(index_paths), |
|
interactive=True, |
|
) |
|
refresh_button.click( |
|
fn=change_choices, inputs=[], outputs=[sid0, file_index2] |
|
) |
|
|
|
|
|
|
|
|
|
|
|
index_rate1 = gr.Slider( |
|
minimum=0, |
|
maximum=1, |
|
label=i18n("Search feature proportion"), |
|
value=0.88, |
|
interactive=True, |
|
) |
|
with gr.Column(): |
|
resample_sr0 = gr.Slider( |
|
minimum=0, |
|
maximum=48000, |
|
label=i18n("Post-processing resampling to the final sampling rate, 0 means no resampling"), |
|
value=0, |
|
step=1, |
|
interactive=True, |
|
) |
|
rms_mix_rate0 = gr.Slider( |
|
minimum=0, |
|
maximum=1, |
|
label=i18n("The input source volume envelope replaces the output volume envelope blending ratio. The closer it is to 1, the more the output envelope is used."), |
|
value=1, |
|
interactive=True, |
|
) |
|
protect0 = gr.Slider( |
|
minimum=0, |
|
maximum=0.5, |
|
label=i18n( |
|
"Protects clear consonants and breathing sounds, and prevents electronic music tearing and other artifacts. It is not enabled when it is set to 0.5. It is more effective when it is lowered, but the indexing effect may be reduced." |
|
), |
|
value=0.33, |
|
step=0.01, |
|
interactive=True, |
|
) |
|
f0_file = gr.File(label=i18n("F0 curve file, optional, one line per pitch, replaces the default F0 and sharp and flat tones")) |
|
but0 = gr.Button(i18n("Convert"), variant="primary") |
|
with gr.Row(): |
|
vc_output1 = gr.Textbox(label=i18n("Output information")) |
|
vc_output2 = gr.Audio(label=i18n("Output audio (three dots in the lower right corner, click to download)")) |
|
but0.click( |
|
vc_single, |
|
[ |
|
spk_item, |
|
input_audio0, |
|
vc_transform0, |
|
f0_file, |
|
f0method0, |
|
file_index1, |
|
file_index2, |
|
|
|
index_rate1, |
|
filter_radius0, |
|
resample_sr0, |
|
rms_mix_rate0, |
|
protect0, |
|
crepe_hop_length |
|
], |
|
[vc_output1, vc_output2], |
|
) |
|
with gr.Group(): |
|
gr.Markdown( |
|
value=i18n("Batch conversion, input the audio folder to be converted, or upload multiple audio files, and output the converted audio in the specified folder (default opt).") |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
vc_transform1 = gr.Number( |
|
label=i18n("Transpose(integer, number of semitones, octave up 12 octave down -12)"), value=0 |
|
) |
|
opt_input = gr.Textbox(label=i18n("Specify output folder"), value="opt") |
|
f0method1 = gr.Radio( |
|
label=i18n( |
|
"Select the pitch extraction algorithm. You can use pm to speed up the input singing voice. Harvest has good bass but is extremely slow. Crepe has good effect but consumes GPU." |
|
), |
|
choices=["pm", "harvest", "crepe"], |
|
value="pm", |
|
interactive=True, |
|
) |
|
filter_radius1 = gr.Slider( |
|
minimum=0, |
|
maximum=7, |
|
label=i18n(">=3, use median filtering on the result of harvest pitch recognition, the value is the filter radius, which can reduce mute"), |
|
value=3, |
|
step=1, |
|
interactive=True, |
|
) |
|
with gr.Column(): |
|
file_index3 = gr.Textbox( |
|
label=i18n("Feature retrieval library file path, if empty, use the drop-down selection result"), |
|
value="", |
|
interactive=True, |
|
) |
|
file_index4 = gr.Dropdown( |
|
label=i18n("Automatically detect index path, drop-down selection"), |
|
choices=sorted(index_paths), |
|
interactive=True, |
|
) |
|
refresh_button.click( |
|
fn=lambda: change_choices()[1], |
|
inputs=[], |
|
outputs=file_index4, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
index_rate2 = gr.Slider( |
|
minimum=0, |
|
maximum=1, |
|
label=i18n("Search feature proportion"), |
|
value=1, |
|
interactive=True, |
|
) |
|
with gr.Column(): |
|
resample_sr1 = gr.Slider( |
|
minimum=0, |
|
maximum=48000, |
|
label=i18n("Post-processing resampling to the final sampling rate, 0 means no resampling"), |
|
value=0, |
|
step=1, |
|
interactive=True, |
|
) |
|
rms_mix_rate1 = gr.Slider( |
|
minimum=0, |
|
maximum=1, |
|
label=i18n("The input source volume envelope replaces the output volume envelope blending ratio. The closer it is to 1, the more the output envelope is used."), |
|
value=1, |
|
interactive=True, |
|
) |
|
protect1 = gr.Slider( |
|
minimum=0, |
|
maximum=0.5, |
|
label=i18n( |
|
"Protects clear consonants and breathing sounds, and prevents electronic music tearing and other artifacts. It is not enabled when it is set to 0.5. It is more effective when it is lowered, but the indexing effect may be reduced." |
|
), |
|
value=0.33, |
|
step=0.01, |
|
interactive=True, |
|
) |
|
with gr.Column(): |
|
dir_input = gr.Textbox( |
|
label=i18n("Enter the path of the audio folder to be processed (just copy it from the address bar of the file manager)"), |
|
value="E:\codes\py39\\test-20230416b\\todo-songs", |
|
) |
|
inputs = gr.File( |
|
file_count="multiple", label=i18n("You can also batch import audio files, choose one of the two, and read the folder first") |
|
) |
|
with gr.Row(): |
|
format1 = gr.Radio( |
|
label=i18n("Export file format"), |
|
choices=["wav", "flac", "mp3", "m4a"], |
|
value="flac", |
|
interactive=True, |
|
) |
|
but1 = gr.Button(i18n("Convert"), variant="primary") |
|
vc_output3 = gr.Textbox(label=i18n("Output information")) |
|
but1.click( |
|
vc_multi, |
|
[ |
|
spk_item, |
|
dir_input, |
|
opt_input, |
|
inputs, |
|
vc_transform1, |
|
f0method1, |
|
file_index3, |
|
file_index4, |
|
|
|
index_rate2, |
|
filter_radius1, |
|
resample_sr1, |
|
rms_mix_rate1, |
|
protect1, |
|
format1, |
|
crepe_hop_length, |
|
], |
|
[vc_output3], |
|
) |
|
sid0.change( |
|
fn=get_vc, |
|
inputs=[sid0, protect0, protect1], |
|
outputs=[spk_item, protect0, protect1], |
|
) |
|
with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")): |
|
with gr.Group(): |
|
gr.Markdown( |
|
value=i18n( |
|
"Batch processing of vocal accompaniment separation, using the UVR5 model.<br>" |
|
"An example of a qualified folder path format: E:\\codes\\py39\\vits_vc_gpu\\White Deer Frost Flower Test Sample (just copy it from the address bar of the file manager). <br>" |
|
"Models are divided into three categories: <br>" |
|
"1. Preserve vocals: Choose this for audio without harmony, it preserves the main vocals better than HP5. There are two models built-in, HP2 and HP3. HP3 may slightly miss the accompaniment but preserves the main vocals a little better than HP2; <br>" |
|
"2. Keep only the main voice: Select this for audio with harmony, which may weaken the main voice. Built-in HP5 model; <br>" |
|
"3. De-reverberation and de-delay model (by FoxJoy):" |
|
" (1) MDX-Net (onnx_dereverb): is the best choice for dual-channel reverberation, and cannot remove single-channel reverberation;" |
|
" (234)DeEcho: Removes delay effects. Aggressive removes delay effects more thoroughly than Normal. DeReverb additionally removes reverberation and can remove mono reverberation, but it cannot completely remove high-frequency plate reverberation. <br>" |
|
"De-reverb/de-delay, attached:<br>" |
|
"1. The time consumption of DeEcho-DeReverb model is nearly twice that of the other two DeEcho models;<br>" |
|
"2. The MDX-Net-Dereverb model is quite slow;<br>" |
|
"3. The cleanest configuration I personally recommend is to use MDX-Net first and then DeEcho-Aggressive." |
|
) |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
dir_wav_input = gr.Textbox( |
|
label=i18n("Enter the path of the audio folder to be processed"), |
|
value="E:\\codes\\py39\\test-20230416b\\todo-songs\\todo-songs", |
|
) |
|
wav_inputs = gr.File( |
|
file_count="multiple", label=i18n("You can also batch import audio files, choose one of the two, and read the folder first") |
|
) |
|
with gr.Column(): |
|
model_choose = gr.Dropdown(label=i18n("Model"), choices=uvr5_names) |
|
agg = gr.Slider( |
|
minimum=0, |
|
maximum=20, |
|
step=1, |
|
label="Vocal extraction aggressiveness", |
|
value=10, |
|
interactive=True, |
|
visible=False, |
|
) |
|
opt_vocal_root = gr.Textbox( |
|
label=i18n("Specify the output folder for the lead vocals"), value="opt" |
|
) |
|
opt_ins_root = gr.Textbox( |
|
label=i18n("Specify the folder for outputting non-lead vocals"), value="opt" |
|
) |
|
format0 = gr.Radio( |
|
label=i18n("Export file format"), |
|
choices=["wav", "flac", "mp3", "m4a"], |
|
value="flac", |
|
interactive=True, |
|
) |
|
but2 = gr.Button(i18n("Convert"), variant="primary") |
|
vc_output4 = gr.Textbox(label=i18n("Output information")) |
|
but2.click( |
|
uvr, |
|
[ |
|
model_choose, |
|
dir_wav_input, |
|
opt_vocal_root, |
|
wav_inputs, |
|
opt_ins_root, |
|
agg, |
|
format0, |
|
], |
|
[vc_output4], |
|
) |
|
with gr.TabItem(i18n("train")): |
|
gr.Markdown( |
|
value=i18n( |
|
"Step 1: Fill in the experimental configuration. The experimental data is placed under logs, one folder for each experiment. You need to manually enter the experiment name path, which contains the experimental configuration, logs, and trained model files." |
|
) |
|
) |
|
with gr.Row(): |
|
exp_dir1 = gr.Textbox(label=i18n("Enter experiment name"), value="mi-test") |
|
sr2 = gr.Radio( |
|
label=i18n("target sampling rate"), |
|
choices=["40k", "48k"], |
|
value="40k", |
|
interactive=True, |
|
) |
|
if_f0_3 = gr.Radio( |
|
label=i18n("Does the model have pitch guidance (must be provided for singing, but not for voice)"), |
|
choices=[True, False], |
|
value=True, |
|
interactive=True, |
|
) |
|
version19 = gr.Radio( |
|
label=i18n("Version"), |
|
choices=["v1", "v2"], |
|
value="v1", |
|
interactive=True, |
|
visible=True, |
|
) |
|
np7 = gr.Slider( |
|
minimum=0, |
|
maximum=config.n_cpu, |
|
step=1, |
|
label=i18n("The number of CPU processes used to extract pitch and process data"), |
|
value=int(np.ceil(config.n_cpu / 1.5)), |
|
interactive=True, |
|
) |
|
with gr.Group(): |
|
gr.Markdown( |
|
value=i18n( |
|
"step2a: Automatically traverse all files that can be decoded into audio in the training folder and perform slice normalization, generating 2 wav folders in the experimental directory; currently only supports single-player training." |
|
) |
|
) |
|
with gr.Row(): |
|
trainset_dir4 = gr.Textbox( |
|
label=i18n("Enter the training folder path"), value="E:\\Voice Audio+Annotation\\Kenshi Yonezu\\src" |
|
) |
|
spk_id5 = gr.Slider( |
|
minimum=0, |
|
maximum=4, |
|
step=1, |
|
label=i18n("Please specify the speaker id"), |
|
value=0, |
|
interactive=True, |
|
) |
|
but1 = gr.Button(i18n("Process data"), variant="primary") |
|
info1 = gr.Textbox(label=i18n("Output information"), value="") |
|
but1.click( |
|
preprocess_dataset, [trainset_dir4, exp_dir1, sr2, np7], [info1] |
|
) |
|
with gr.Group(): |
|
gr.Markdown(value=i18n("step2b: Use CPU to extract pitch (if the model has pitch), use GPU to extract features (select card number)")) |
|
with gr.Row(): |
|
with gr.Column(): |
|
gpus6 = gr.Textbox( |
|
label=i18n("Enter the card numbers to be used separated by -, for example 0-1-2 uses card 0, card 1 and card 2"), |
|
value=gpus, |
|
interactive=True, |
|
) |
|
gpu_info9 = gr.Textbox(label=i18n("显卡信息"), value=gpu_info) |
|
with gr.Column(): |
|
f0method8 = gr.Radio( |
|
label=i18n( |
|
"Select the pitch extraction algorithm: input singing voice can be accelerated by pm, high-quality voice but poor CPU can be accelerated by dio, harvest has better quality but is slow" |
|
), |
|
choices=["pm", "harvest", "dio", "crepe", "mangio-crepe"], |
|
value="harvest", |
|
interactive=True, |
|
) |
|
extraction_crepe_hop_length = gr.Slider( |
|
minimum=1, |
|
maximum=512, |
|
step=1, |
|
label=i18n("crepe_hop_length"), |
|
value=64, |
|
interactive=True |
|
) |
|
but2 = gr.Button(i18n("Feature extraction"), variant="primary") |
|
info2 = gr.Textbox(label=i18n("Output information"), value="", max_lines=8) |
|
but2.click( |
|
extract_f0_feature, |
|
[gpus6, np7, f0method8, if_f0_3, exp_dir1, version19, extraction_crepe_hop_length], |
|
[info2], |
|
) |
|
with gr.Group(): |
|
gr.Markdown(value=i18n("step3: Fill in the training settings and start training the model and indexing")) |
|
with gr.Row(): |
|
save_epoch10 = gr.Slider( |
|
minimum=0, |
|
maximum=50, |
|
step=1, |
|
label=i18n("Save frequency save_every_epoch"), |
|
value=5, |
|
interactive=True, |
|
) |
|
total_epoch11 = gr.Slider( |
|
minimum=0, |
|
maximum=10000, |
|
step=1, |
|
label=i18n("Total number of training rounds total_epoch"), |
|
value=20, |
|
interactive=True, |
|
) |
|
batch_size12 = gr.Slider( |
|
minimum=1, |
|
maximum=40, |
|
step=1, |
|
label=i18n("batch_size for each graphics card"), |
|
value=default_batch_size, |
|
interactive=True, |
|
) |
|
if_save_latest13 = gr.Radio( |
|
label=i18n("Whether to save only the latest ckpt file to save hard disk space"), |
|
choices=[i18n("yes"), i18n("yes")], |
|
value=i18n("yes"), |
|
interactive=True, |
|
) |
|
if_cache_gpu17 = gr.Radio( |
|
label=i18n( |
|
"Whether to cache all training sets to the video memory. Small data under 10 minutes can be cached to speed up training. Large data cache will explode the video memory and will not increase the speed much." |
|
), |
|
choices=[i18n("yes"), i18n("no")], |
|
value=i18n("no"), |
|
interactive=True, |
|
) |
|
if_save_every_weights18 = gr.Radio( |
|
label=i18n("Whether to save the final small model to the weights folder at each save time point"), |
|
choices=[i18n("yes"), i18n("no")], |
|
value=i18n("no"), |
|
interactive=True, |
|
) |
|
with gr.Row(): |
|
pretrained_G14 = gr.Textbox( |
|
label=i18n("Load the pre-trained bottom model G path"), |
|
value="pretrained/f0G40k.pth", |
|
interactive=True, |
|
) |
|
pretrained_D15 = gr.Textbox( |
|
label=i18n("Load the pre-trained bottom model D path"), |
|
value="pretrained/f0D40k.pth", |
|
interactive=True, |
|
) |
|
sr2.change( |
|
change_sr2, |
|
[sr2, if_f0_3, version19], |
|
[pretrained_G14, pretrained_D15], |
|
) |
|
version19.change( |
|
change_version19, |
|
[sr2, if_f0_3, version19], |
|
[pretrained_G14, pretrained_D15, sr2], |
|
) |
|
if_f0_3.change( |
|
change_f0, |
|
[if_f0_3, sr2, version19], |
|
[f0method8, pretrained_G14, pretrained_D15], |
|
) |
|
gpus16 = gr.Textbox( |
|
label=i18n("Enter the card numbers to be used separated by -, for example 0-1-2 uses card 0, card 1 and card 2"), |
|
value=gpus, |
|
interactive=True, |
|
) |
|
but3 = gr.Button(i18n("Training model"), variant="primary") |
|
but4 = gr.Button(i18n("Training feature index"), variant="primary") |
|
but5 = gr.Button(i18n("One click training"), variant="primary") |
|
info3 = gr.Textbox(label=i18n("Output information"), value="", max_lines=10) |
|
but3.click( |
|
click_train, |
|
[ |
|
exp_dir1, |
|
sr2, |
|
if_f0_3, |
|
spk_id5, |
|
save_epoch10, |
|
total_epoch11, |
|
batch_size12, |
|
if_save_latest13, |
|
pretrained_G14, |
|
pretrained_D15, |
|
gpus16, |
|
if_cache_gpu17, |
|
if_save_every_weights18, |
|
version19, |
|
], |
|
info3, |
|
) |
|
but4.click(train_index, [exp_dir1, version19], info3) |
|
but5.click( |
|
train1key, |
|
[ |
|
exp_dir1, |
|
sr2, |
|
if_f0_3, |
|
trainset_dir4, |
|
spk_id5, |
|
np7, |
|
f0method8, |
|
save_epoch10, |
|
total_epoch11, |
|
batch_size12, |
|
if_save_latest13, |
|
pretrained_G14, |
|
pretrained_D15, |
|
gpus16, |
|
if_cache_gpu17, |
|
if_save_every_weights18, |
|
version19, |
|
extraction_crepe_hop_length |
|
], |
|
info3, |
|
) |
|
|
|
with gr.TabItem(i18n("ckpt processing")): |
|
with gr.Group(): |
|
gr.Markdown(value=i18n("Model fusion, can be used to test timbre fusion")) |
|
with gr.Row(): |
|
ckpt_a = gr.Textbox(label=i18n("A model path"), value="", interactive=True) |
|
ckpt_b = gr.Textbox(label=i18n("B model path"), value="", interactive=True) |
|
alpha_a = gr.Slider( |
|
minimum=0, |
|
maximum=1, |
|
label=i18n("A model weight"), |
|
value=0.5, |
|
interactive=True, |
|
) |
|
with gr.Row(): |
|
sr_ = gr.Radio( |
|
label=i18n("target sampling rate"), |
|
choices=["40k", "48k"], |
|
value="40k", |
|
interactive=True, |
|
) |
|
if_f0_ = gr.Radio( |
|
label=i18n("Does the model have pitch guidance?"), |
|
choices=[i18n("yes"), i18n("no")], |
|
value=i18n("yes"), |
|
interactive=True, |
|
) |
|
info__ = gr.Textbox( |
|
label=i18n("Model information to be placed"), value="", max_lines=8, interactive=True |
|
) |
|
name_to_save0 = gr.Textbox( |
|
label=i18n("The saved model name has no suffix"), |
|
value="", |
|
max_lines=1, |
|
interactive=True, |
|
) |
|
version_2 = gr.Radio( |
|
label=i18n("Model version model"), |
|
choices=["v1", "v2"], |
|
value="v1", |
|
interactive=True, |
|
) |
|
with gr.Row(): |
|
but6 = gr.Button(i18n("Fusion"), variant="primary") |
|
info4 = gr.Textbox(label=i18n("Output information"), value="", max_lines=8) |
|
but6.click( |
|
merge, |
|
[ |
|
ckpt_a, |
|
ckpt_b, |
|
alpha_a, |
|
sr_, |
|
if_f0_, |
|
info__, |
|
name_to_save0, |
|
version_2, |
|
], |
|
info4, |
|
) |
|
with gr.Group(): |
|
gr.Markdown(value=i18n("Modify model information (only supports small model files extracted from the weights folder)")) |
|
with gr.Row(): |
|
ckpt_path0 = gr.Textbox( |
|
label=i18n("model path"), value="", interactive=True |
|
) |
|
info_ = gr.Textbox( |
|
label=i18n("Model information to be changed"), value="", max_lines=8, interactive=True |
|
) |
|
name_to_save1 = gr.Textbox( |
|
label=i18n("The saved file name, the default is empty and the same as the source file name"), |
|
value="", |
|
max_lines=8, |
|
interactive=True, |
|
) |
|
with gr.Row(): |
|
but7 = gr.Button(i18n("Revise"), variant="primary") |
|
info5 = gr.Textbox(label=i18n("Output information"), value="", max_lines=8) |
|
but7.click(change_info, [ckpt_path0, info_, name_to_save1], info5) |
|
with gr.Group(): |
|
gr.Markdown(value=i18n("View model information (only supports small model files extracted from the weights folder)")) |
|
with gr.Row(): |
|
ckpt_path1 = gr.Textbox( |
|
label=i18n("model path"), value="", interactive=True |
|
) |
|
but8 = gr.Button(i18n("Check"), variant="primary") |
|
info6 = gr.Textbox(label=i18n("Output information"), value="", max_lines=8) |
|
but8.click(show_info, [ckpt_path1], info6) |
|
with gr.Group(): |
|
gr.Markdown( |
|
value=i18n( |
|
"Model extraction (enter the large file model path in the logs folder), suitable for the case where you don’t want to train the model halfway through and there is no automatic extraction to save the small file model, or you want to test the intermediate model" |
|
) |
|
) |
|
with gr.Row(): |
|
ckpt_path2 = gr.Textbox( |
|
label=i18n("model path"), |
|
value="E:\\codes\\py39\\logs\\mi-test_f0_48k\\G_23333.pth", |
|
interactive=True, |
|
) |
|
save_name = gr.Textbox( |
|
label=i18n("save name"), value="", interactive=True |
|
) |
|
sr__ = gr.Radio( |
|
label=i18n("target sampling rate"), |
|
choices=["32k", "40k", "48k"], |
|
value="40k", |
|
interactive=True, |
|
) |
|
if_f0__ = gr.Radio( |
|
label=i18n("Whether the model has pitch guidance, 1 for yes, 0 for no"), |
|
choices=["1", "0"], |
|
value="1", |
|
interactive=True, |
|
) |
|
version_1 = gr.Radio( |
|
label=i18n("Model version model"), |
|
choices=["v1", "v2"], |
|
value="v2", |
|
interactive=True, |
|
) |
|
info___ = gr.Textbox( |
|
label=i18n("Model information to be placed"), value="", max_lines=8, interactive=True |
|
) |
|
but9 = gr.Button(i18n("extract"), variant="primary") |
|
info7 = gr.Textbox(label=i18n("Output information"), value="", max_lines=8) |
|
ckpt_path2.change( |
|
change_info_, [ckpt_path2], [sr__, if_f0__, version_1] |
|
) |
|
but9.click( |
|
extract_small_model, |
|
[ckpt_path2, save_name, sr__, if_f0__, info___, version_1], |
|
info7, |
|
) |
|
|
|
with gr.TabItem(i18n("Onnx export")): |
|
with gr.Row(): |
|
ckpt_dir = gr.Textbox(label=i18n("RVC model path"), value="", interactive=True) |
|
with gr.Row(): |
|
onnx_dir = gr.Textbox( |
|
label=i18n("Onnx output path"), value="", interactive=True |
|
) |
|
with gr.Row(): |
|
infoOnnx = gr.Label(label="info") |
|
with gr.Row(): |
|
butOnnx = gr.Button(i18n("Exporting Onnx Models"), variant="primary") |
|
butOnnx.click(export_onnx, [ckpt_dir, onnx_dir], infoOnnx) |
|
|
|
tab_faq = i18n("FAQ") |
|
with gr.TabItem(tab_faq): |
|
try: |
|
if tab_faq == "FAQ": |
|
with open("docs/faq.md", "r", encoding="utf8") as f: |
|
info = f.read() |
|
else: |
|
with open("docs/faq_en.md", "r", encoding="utf8") as f: |
|
info = f.read() |
|
gr.Markdown(value=info) |
|
except: |
|
gr.Markdown(traceback.format_exc()) |
|
|
|
|
|
|
|
def save_preset( |
|
preset_name, |
|
sid0, |
|
vc_transform, |
|
input_audio, |
|
f0method, |
|
crepe_hop_length, |
|
filter_radius, |
|
file_index1, |
|
file_index2, |
|
index_rate, |
|
resample_sr, |
|
rms_mix_rate, |
|
protect, |
|
f0_file |
|
): |
|
data = None |
|
with open('../inference-presets.json', 'r') as file: |
|
data = json.load(file) |
|
preset_json = { |
|
'name': preset_name, |
|
'model': sid0, |
|
'transpose': vc_transform, |
|
'audio_file': input_audio, |
|
'f0_method': f0method, |
|
'crepe_hop_length': crepe_hop_length, |
|
'median_filtering': filter_radius, |
|
'feature_path': file_index1, |
|
'auto_feature_path': file_index2, |
|
'search_feature_ratio': index_rate, |
|
'resample': resample_sr, |
|
'volume_envelope': rms_mix_rate, |
|
'protect_voiceless': protect, |
|
'f0_file_path': f0_file |
|
} |
|
data['presets'].append(preset_json) |
|
with open('../inference-presets.json', 'w') as file: |
|
json.dump(data, file) |
|
file.flush() |
|
print("Saved Preset %s into inference-presets.json!" % preset_name) |
|
|
|
|
|
def on_preset_changed(preset_name): |
|
print("Changed Preset to %s!" % preset_name) |
|
data = None |
|
with open('../inference-presets.json', 'r') as file: |
|
data = json.load(file) |
|
|
|
print("Searching for " + preset_name) |
|
returning_preset = None |
|
for preset in data['presets']: |
|
if(preset['name'] == preset_name): |
|
print("Found a preset") |
|
returning_preset = preset |
|
|
|
return ( |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if config.iscolab or config.paperspace: |
|
app.queue(concurrency_count=511, max_size=1022).launch(share=True) |
|
else: |
|
app.queue(concurrency_count=511, max_size=1022).launch( |
|
server_name="0.0.0.0", |
|
inbrowser=not config.noautoopen, |
|
server_port=config.listen_port, |
|
quiet=True, |
|
) |
|
|
|
|
|
|