diff --git a/main/app/app.py b/main/app/app.py
deleted file mode 100644
index 78c8852c27b00d52ab5a43e4e6c112aba79595b0..0000000000000000000000000000000000000000
--- a/main/app/app.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import os
-import io
-import ssl
-import sys
-import time
-import codecs
-import logging
-import warnings
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-start_time = time.time()
-
-from main.app.tabs.extra.extra import extra_tab
-from main.app.tabs.editing.editing import editing_tab
-from main.app.tabs.training.training import training_tab
-from main.app.tabs.downloads.downloads import download_tab
-from main.app.tabs.inference.inference import inference_tab
-from main.app.variables import logger, config, translations, theme, font, configs, language, allow_disk
-
-ssl._create_default_https_context = ssl._create_unverified_context
-
-warnings.filterwarnings("ignore")
-for l in ["httpx", "gradio", "uvicorn", "httpcore", "urllib3"]:
- logging.getLogger(l).setLevel(logging.ERROR)
-
-with gr.Blocks(title="📱 Vietnamese-RVC GUI BY ANH", theme=theme, css="".format(fonts=font or "https://fonts.googleapis.com/css2?family=Courgette&display=swap")) as app:
- gr.HTML("
🎵VIETNAMESE RVC BY ANH🎵
")
- gr.HTML(f"{translations['title']}
")
-
- with gr.Tabs():
- inference_tab()
- editing_tab()
- training_tab()
- download_tab()
- extra_tab(app)
-
- with gr.Row():
- gr.Markdown(translations["rick_roll"].format(rickroll=codecs.decode('uggcf://jjj.lbhghor.pbz/jngpu?i=qDj4j9JtKpD', 'rot13')))
-
- with gr.Row():
- gr.Markdown(translations["terms_of_use"])
-
- with gr.Row():
- gr.Markdown(translations["exemption"])
-
- logger.info(config.device)
- logger.info(translations["start_app"])
- logger.info(translations["set_lang"].format(lang=language))
-
- port = configs.get("app_port", 7860)
- server_name = configs.get("server_name", "0.0.0.0")
- share = "--share" in sys.argv
-
- original_stdout = sys.stdout
- sys.stdout = io.StringIO()
-
- for i in range(configs.get("num_of_restart", 5)):
- try:
- _, _, share_url = app.queue().launch(
- favicon_path=configs["ico_path"],
- server_name=server_name,
- server_port=port,
- show_error=configs.get("app_show_error", False),
- inbrowser="--open" in sys.argv,
- share=share,
- allowed_paths=allow_disk,
- prevent_thread_lock=True,
- quiet=True
- )
- break
- except OSError:
- logger.debug(translations["port"].format(port=port))
- port -= 1
- except Exception as e:
- logger.error(translations["error_occurred"].format(e=e))
- sys.exit(1)
-
- sys.stdout = original_stdout
- logger.info(f"{translations['running_local_url']}: {server_name}:{port}")
-
- if share: logger.info(f"{translations['running_share_url']}: {share_url}")
- logger.info(f"{translations['gradio_start']}: {(time.time() - start_time):.2f}s")
-
- while 1:
- time.sleep(5)
\ No newline at end of file
diff --git a/main/app/core/downloads.py b/main/app/core/downloads.py
deleted file mode 100644
index 62680d6b4eeff90a82c63883233badd161651417..0000000000000000000000000000000000000000
--- a/main/app/core/downloads.py
+++ /dev/null
@@ -1,187 +0,0 @@
-import os
-import re
-import sys
-import json
-import codecs
-import shutil
-import yt_dlp
-import warnings
-import requests
-
-from bs4 import BeautifulSoup
-
-sys.path.append(os.getcwd())
-
-from main.tools import huggingface, gdown, meganz, mediafire, pixeldrain
-from main.app.core.ui import gr_info, gr_warning, gr_error, process_output
-from main.app.variables import logger, translations, model_options, configs
-from main.app.core.process import move_files_from_directory, fetch_pretrained_data, extract_name_model
-
-def download_url(url):
- if not url: return gr_warning(translations["provide_url"])
- if not os.path.exists(configs["audios_path"]): os.makedirs(configs["audios_path"], exist_ok=True)
-
- with warnings.catch_warnings():
- warnings.filterwarnings("ignore")
- ydl_opts = {
- "format": "bestaudio/best",
- "postprocessors": [{
- "key": "FFmpegExtractAudio",
- "preferredcodec": "wav",
- "preferredquality": "192"
- }],
- "quiet": True,
- "no_warnings": True,
- "noplaylist": True,
- "verbose": False
- }
-
- gr_info(translations["start"].format(start=translations["download_music"]))
-
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
- audio_output = os.path.join(configs["audios_path"], re.sub(r'\s+', '-', re.sub(r'[^\w\s\u4e00-\u9fff\uac00-\ud7af\u0400-\u04FF\u1100-\u11FF]', '', ydl.extract_info(url, download=False).get('title', 'video')).strip()))
- if os.path.exists(audio_output): shutil.rmtree(audio_output, ignore_errors=True)
-
- ydl_opts['outtmpl'] = audio_output
-
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
- audio_output = process_output(audio_output + ".wav")
-
- ydl.download([url])
-
- gr_info(translations["success"])
- return [audio_output, audio_output, translations["success"]]
-
-def move_file(file, download_dir, model):
- weights_dir = configs["weights_path"]
- logs_dir = configs["logs_path"]
-
- if not os.path.exists(weights_dir): os.makedirs(weights_dir, exist_ok=True)
- if not os.path.exists(logs_dir): os.makedirs(logs_dir, exist_ok=True)
-
- if file.endswith(".zip"): shutil.unpack_archive(file, download_dir)
- move_files_from_directory(download_dir, weights_dir, logs_dir, model)
-
-def download_model(url=None, model=None):
- if not url: return gr_warning(translations["provide_url"])
-
- url = url.replace("/blob/", "/resolve/").replace("?download=true", "").strip()
- download_dir = "download_model"
-
- os.makedirs(download_dir, exist_ok=True)
-
- try:
- gr_info(translations["start"].format(start=translations["download"]))
-
- if "huggingface.co" in url: file = huggingface.HF_download_file(url, download_dir)
- elif "google.com" in url: file = gdown.gdown_download(url, download_dir)
- elif "mediafire.com" in url: file = mediafire.Mediafire_Download(url, download_dir)
- elif "pixeldrain.com" in url: file = pixeldrain.pixeldrain(url, download_dir)
- elif "mega.nz" in url: file = meganz.mega_download_url(url, download_dir)
- else:
- gr_warning(translations["not_support_url"])
- return translations["not_support_url"]
-
- if not model:
- modelname = os.path.basename(file)
- model = extract_name_model(modelname) if modelname.endswith(".index") else os.path.splitext(modelname)[0]
- if model is None: model = os.path.splitext(modelname)[0]
-
- model = model.replace(".onnx", "").replace(".pth", "").replace(".index", "").replace(".zip", "").replace(" ", "_").replace("(", "").replace(")", "").replace("[", "").replace("]", "").replace("{", "").replace("}", "").replace(",", "").replace('"', "").replace("'", "").replace("|", "").strip()
-
- move_file(file, download_dir, model)
- gr_info(translations["success"])
-
- return translations["success"]
- except Exception as e:
- gr_error(message=translations["error_occurred"].format(e=e))
- return translations["error_occurred"].format(e=e)
- finally:
- shutil.rmtree(download_dir, ignore_errors=True)
-
-def download_pretrained_model(choices, model, sample_rate):
- pretraineds_custom_path = configs["pretrained_custom_path"]
-
- if choices == translations["list_model"]:
- paths = fetch_pretrained_data()[model][sample_rate]
-
- if not os.path.exists(pretraineds_custom_path): os.makedirs(pretraineds_custom_path, exist_ok=True)
- url = codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/cergenvarq_phfgbz/", "rot13") + paths
-
- gr_info(translations["download_pretrain"])
- file = huggingface.HF_download_file(url.replace("/blob/", "/resolve/").replace("?download=true", "").strip(), os.path.join(pretraineds_custom_path, paths))
-
- if file.endswith(".zip"):
- shutil.unpack_archive(file, pretraineds_custom_path)
- os.remove(file)
-
- gr_info(translations["success"])
- return translations["success"], None
- elif choices == translations["download_url"]:
- if not model: return gr_warning(translations["provide_pretrain"].format(dg="D"))
- if not sample_rate: return gr_warning(translations["provide_pretrain"].format(dg="G"))
-
- gr_info(translations["download_pretrain"])
-
- for url in [model, sample_rate]:
- url = url.replace("/blob/", "/resolve/").replace("?download=true", "").strip()
-
- if "huggingface.co" in url: huggingface.HF_download_file(url, pretraineds_custom_path)
- elif "google.com" in url: gdown.gdown_download(url, pretraineds_custom_path)
- elif "mediafire.com" in url: mediafire.Mediafire_Download(url, pretraineds_custom_path)
- elif "pixeldrain.com" in url: pixeldrain.pixeldrain(url, pretraineds_custom_path)
- elif "mega.nz" in url: meganz.mega_download_url(url, pretraineds_custom_path)
- else:
- gr_warning(translations["not_support_url"])
- return translations["not_support_url"], translations["not_support_url"]
-
- gr_info(translations["success"])
- return translations["success"], translations["success"]
-
-def fetch_models_data(search):
- all_table_data = []
- page = 1
-
- while 1:
- try:
- response = requests.post(url=codecs.decode("uggcf://ibvpr-zbqryf.pbz/srgpu_qngn.cuc", "rot13"), data={"page": page, "search": search})
-
- if response.status_code == 200:
- table_data = response.json().get("table", "")
- if not table_data.strip(): break
-
- all_table_data.append(table_data)
- page += 1
- else:
- logger.debug(f"{translations['code_error']} {response.status_code}")
- break
- except json.JSONDecodeError:
- logger.debug(translations["json_error"])
- break
- except requests.RequestException as e:
- logger.debug(translations["requests_error"].format(e=e))
- break
-
- return all_table_data
-
-def search_models(name):
- if not name: return gr_warning(translations["provide_name"])
- gr_info(translations["start"].format(start=translations["search"]))
-
- tables = fetch_models_data(name)
-
- if len(tables) == 0:
- gr_info(translations["not_found"].format(name=name))
- return [None]*2
- else:
- model_options.clear()
-
- for table in tables:
- for row in BeautifulSoup(table, "html.parser").select("tr"):
- name_tag, url_tag = row.find("a", {"class": "fs-5"}), row.find("a", {"class": "btn btn-sm fw-bold btn-light ms-0 p-1 ps-2 pe-2"})
- url = url_tag["href"].replace("https://easyaivoice.com/run?url=", "")
- if "huggingface" in url:
- if name_tag and url_tag: model_options[name_tag.text.replace(".onnx", "").replace(".pth", "").replace(".index", "").replace(".zip", "").replace(" ", "_").replace("(", "").replace(")", "").replace("[", "").replace("]", "").replace(",", "").replace('"', "").replace("'", "").replace("|", "_").replace("-_-", "_").replace("_-_", "_").replace("-", "_").replace("---", "_").replace("___", "_").strip()] = url
-
- gr_info(translations["found"].format(results=len(model_options)))
- return [{"value": "", "choices": model_options, "interactive": True, "visible": True, "__type__": "update"}, {"value": translations["downloads"], "visible": True, "__type__": "update"}]
\ No newline at end of file
diff --git a/main/app/core/editing.py b/main/app/core/editing.py
deleted file mode 100644
index 403fde62463efee1754d8791355ee796d836c23f..0000000000000000000000000000000000000000
--- a/main/app/core/editing.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import os
-import sys
-import random
-import librosa
-import subprocess
-
-import numpy as np
-import soundfile as sf
-
-sys.path.append(os.getcwd())
-
-from main.app.core.ui import gr_info, gr_warning, process_output
-from main.app.variables import python, translations, configs, config
-
-def audio_effects(input_path, output_path, resample, resample_sr, chorus_depth, chorus_rate, chorus_mix, chorus_delay, chorus_feedback, distortion_drive, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift, delay_seconds, delay_feedback, delay_mix, compressor_threshold, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold, limiter_release, gain_db, bitcrush_bit_depth, clipping_threshold, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost_db, bass_boost_frequency, treble_boost_db, treble_boost_frequency, fade_in_duration, fade_out_duration, export_format, chorus, distortion, reverb, delay, compressor, limiter, gain, bitcrush, clipping, phaser, treble_bass_boost, fade_in_out, audio_combination, audio_combination_input, main_vol, combine_vol):
- if not input_path or not os.path.exists(input_path) or os.path.isdir(input_path):
- gr_warning(translations["input_not_valid"])
- return None
-
- if not output_path:
- gr_warning(translations["output_not_valid"])
- return None
-
- if os.path.isdir(output_path): output_path = os.path.join(output_path, f"audio_effects.{export_format}")
- output_dir = os.path.dirname(output_path) or output_path
-
- if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
- output_path = process_output(output_path)
-
- gr_info(translations["start"].format(start=translations["apply_effect"]))
-
- if config.debug_mode: subprocess.run([python, configs["audio_effects_path"], "--input_path", input_path, "--output_path", output_path, "--resample", str(resample), "--resample_sr", str(resample_sr), "--chorus_depth", str(chorus_depth), "--chorus_rate", str(chorus_rate), "--chorus_mix", str(chorus_mix), "--chorus_delay", str(chorus_delay), "--chorus_feedback", str(chorus_feedback), "--drive_db", str(distortion_drive), "--reverb_room_size", str(reverb_room_size), "--reverb_damping", str(reverb_damping), "--reverb_wet_level", str(reverb_wet_level), "--reverb_dry_level", str(reverb_dry_level), "--reverb_width", str(reverb_width), "--reverb_freeze_mode", str(reverb_freeze_mode), "--pitch_shift", str(pitch_shift), "--delay_seconds", str(delay_seconds), "--delay_feedback", str(delay_feedback), "--delay_mix", str(delay_mix), "--compressor_threshold", str(compressor_threshold), "--compressor_ratio", str(compressor_ratio), "--compressor_attack_ms", str(compressor_attack_ms), "--compressor_release_ms", str(compressor_release_ms), "--limiter_threshold", str(limiter_threshold), "--limiter_release", str(limiter_release), "--gain_db", str(gain_db), "--bitcrush_bit_depth", str(bitcrush_bit_depth), "--clipping_threshold", str(clipping_threshold), "--phaser_rate_hz", str(phaser_rate_hz), "--phaser_depth", str(phaser_depth), "--phaser_centre_frequency_hz", str(phaser_centre_frequency_hz), "--phaser_feedback", str(phaser_feedback), "--phaser_mix", str(phaser_mix), "--bass_boost_db", str(bass_boost_db), "--bass_boost_frequency", str(bass_boost_frequency), "--treble_boost_db", str(treble_boost_db), "--treble_boost_frequency", str(treble_boost_frequency), "--fade_in_duration", str(fade_in_duration), "--fade_out_duration", str(fade_out_duration), "--export_format", export_format, "--chorus", str(chorus), "--distortion", str(distortion), "--reverb", str(reverb), "--pitchshift", str(pitch_shift != 0), "--delay", str(delay), "--compressor", str(compressor), "--limiter", str(limiter), "--gain", str(gain), "--bitcrush", str(bitcrush), "--clipping", str(clipping), "--phaser", str(phaser), "--treble_bass_boost", str(treble_bass_boost), "--fade_in_out", str(fade_in_out), "--audio_combination", str(audio_combination), "--audio_combination_input", audio_combination_input, "--main_volume", str(main_vol), "--combination_volume", str(combine_vol)])
- else:
- from main.inference.audio_effects import process_audio
-
- process_audio(input_path, output_path, resample, resample_sr, chorus_depth, chorus_rate, chorus_mix, chorus_delay, chorus_feedback, distortion_drive, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift, delay_seconds, delay_feedback, delay_mix, compressor_threshold, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold, limiter_release, gain_db, bitcrush_bit_depth, clipping_threshold, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost_db, bass_boost_frequency, treble_boost_db, treble_boost_frequency, fade_in_duration, fade_out_duration, export_format, chorus, distortion, reverb, pitch_shift != 0, delay, compressor, limiter, gain, bitcrush, clipping, phaser, treble_bass_boost, fade_in_out, audio_combination, audio_combination_input, main_vol, combine_vol)
-
- gr_info(translations["success"])
- return output_path.replace("wav", export_format)
-
-def vibrato(y, sr, freq=5, depth=0.003):
- return y[np.clip((np.arange(len(y)) + (depth * np.sin(2 * np.pi * freq * (np.arange(len(y)) / sr))) * sr).astype(int), 0, len(y) - 1)]
-
-def apply_voice_quirk(audio_path, mode, output_path, export_format):
- if not audio_path or not os.path.exists(audio_path) or os.path.isdir(audio_path):
- gr_warning(translations["input_not_valid"])
- return None
-
- if not output_path:
- gr_warning(translations["output_not_valid"])
- return None
-
- if os.path.isdir(output_path): output_path = os.path.join(output_path, f"audio_quirk.{export_format}")
- output_dir = os.path.dirname(output_path) or output_path
-
- if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
- output_path = process_output(output_path)
-
- gr_info(translations["start"].format(start=translations["apply_effect"]))
-
- y, sr = librosa.load(audio_path, sr=None)
- output_path = output_path.replace("wav", export_format)
-
- mode = translations["quirk_choice"][mode]
- if mode == 0: mode = random.randint(1, 16)
-
- if mode == 1: y *= np.random.uniform(0.5, 0.8, size=len(y))
- elif mode == 2: y = librosa.effects.pitch_shift(y=y + np.random.normal(0, 0.01, y.shape), sr=sr, n_steps=np.random.uniform(-1.5, -3.5))
- elif mode == 3: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=3), rate=1.2)
- elif mode == 4: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=8), rate=1.3)
- elif mode == 5: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=-3), rate=0.75)
- elif mode == 6: y *= np.sin(np.linspace(0, np.pi * 20, len(y))) * 0.5 + 0.5
- elif mode == 7: y = librosa.effects.time_stretch(vibrato(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=-4), sr, freq=3, depth=0.004), rate=0.85)
- elif mode == 8: y *= 0.6 + np.pad(y, (sr // 2, 0), mode='constant')[:len(y)] * 0.4
- elif mode == 9: y = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=2) + np.sin(np.linspace(0, np.pi * 20, len(y))) * 0.02
- elif mode == 10: y = vibrato(y, sr, freq=8, depth=0.005)
- elif mode == 11: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=4), rate=1.25)
- elif mode == 12: y = np.hstack([np.pad(f, (0, int(len(f)*0.3)), mode='edge') for f in librosa.util.frame(y, frame_length=2048, hop_length=512).T])
- elif mode == 13: y = np.concatenate([y, np.sin(2 * np.pi * np.linspace(0, 1, int(0.05 * sr))) * 0.02])
- elif mode == 14: y += np.random.normal(0, 0.005, len(y))
- elif mode == 15:
- frame = int(sr * 0.2)
- chunks = [y[i:i + frame] for i in range(0, len(y), frame)]
-
- np.random.shuffle(chunks)
- y = np.concatenate(chunks)
- elif mode == 16:
- frame = int(sr * 0.3)
-
- for i in range(0, len(y), frame * 2):
- y[i:i+frame] = y[i:i+frame][::-1]
-
- sf.write(output_path, y, sr, format=export_format)
- gr_info(translations["success"])
-
- return output_path
\ No newline at end of file
diff --git a/main/app/core/f0_extract.py b/main/app/core/f0_extract.py
deleted file mode 100644
index a85c96e33b5b63a7a3aa23707d8e25c6c3ffebd9..0000000000000000000000000000000000000000
--- a/main/app/core/f0_extract.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import os
-import sys
-import librosa
-
-import numpy as np
-import matplotlib.pyplot as plt
-
-sys.path.append(os.getcwd())
-
-from main.library.utils import check_assets
-from main.app.core.ui import gr_info, gr_warning
-from main.library.predictors.Generator import Generator
-from main.app.variables import config, translations, configs
-
-def f0_extract(audio, f0_method, f0_onnx):
- if not audio or not os.path.exists(audio) or os.path.isdir(audio):
- gr_warning(translations["input_not_valid"])
- return [None]*2
-
- check_assets(f0_method, None, f0_onnx, None)
-
- f0_path = os.path.join(configs["f0_path"], os.path.splitext(os.path.basename(audio))[0])
- image_path = os.path.join(f0_path, "f0.png")
- txt_path = os.path.join(f0_path, "f0.txt")
-
- gr_info(translations["start_extract"])
-
- if not os.path.exists(f0_path): os.makedirs(f0_path, exist_ok=True)
-
- y, sr = librosa.load(audio, sr=None)
-
- f0_generator = Generator(sr, 160, 50, 1600, is_half=config.is_half, device=config.device, f0_onnx_mode=f0_onnx, del_onnx_model=f0_onnx)
- _, pitchf = f0_generator.calculator(config.x_pad, f0_method, y, 0, None, 3, False, 0, None, False)
-
- F_temp = np.array(pitchf, dtype=np.float32)
- F_temp[F_temp == 0] = np.nan
-
- f0 = 1200 * np.log2(F_temp / librosa.midi_to_hz(0))
-
- plt.figure(figsize=(10, 4))
- plt.plot(f0)
- plt.title(f0_method)
- plt.xlabel(translations["time_frames"])
- plt.ylabel(translations["Frequency"])
- plt.savefig(image_path)
- plt.close()
-
- with open(txt_path, "w") as f:
- for i, f0_value in enumerate(f0):
- f.write(f"{i * sr / 160},{f0_value}\n")
-
- gr_info(translations["extract_done"])
-
- return [txt_path, image_path]
\ No newline at end of file
diff --git a/main/app/core/inference.py b/main/app/core/inference.py
deleted file mode 100644
index b6fc86e45a107ef67ee9f2dc63af55b31ea27661..0000000000000000000000000000000000000000
--- a/main/app/core/inference.py
+++ /dev/null
@@ -1,387 +0,0 @@
-import os
-import re
-import sys
-import shutil
-import librosa
-import datetime
-import subprocess
-
-import numpy as np
-
-sys.path.append(os.getcwd())
-
-from main.app.core.ui import gr_info, gr_warning, gr_error, process_output
-from main.app.variables import logger, config, configs, translations, python
-
-def convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0_method, input_path, output_path, pth_path, index_path, f0_autotune, clean_audio, clean_strength, export_format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, f0_onnx, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold):
- if config.debug_mode: subprocess.run([python, configs["convert_path"], "--pitch", str(pitch), "--filter_radius", str(filter_radius), "--index_rate", str(index_rate), "--rms_mix_rate", str(rms_mix_rate), "--protect", str(protect), "--hop_length", str(hop_length), "--f0_method", f0_method, "--input_path", input_path, "--output_path", output_path, "--pth_path", pth_path, "--index_path", index_path, "--f0_autotune", str(f0_autotune), "--clean_audio", str(clean_audio), "--clean_strength", str(clean_strength), "--export_format", export_format, "--embedder_model", embedder_model, "--resample_sr", str(resample_sr), "--split_audio", str(split_audio), "--f0_autotune_strength", str(f0_autotune_strength), "--checkpointing", str(checkpointing), "--f0_onnx", str(f0_onnx), "--embedders_mode", embedders_mode, "--formant_shifting", str(formant_shifting), "--formant_qfrency", str(formant_qfrency), "--formant_timbre", str(formant_timbre), "--f0_file", f0_file, "--proposal_pitch", str(proposal_pitch), "--proposal_pitch_threshold", str(proposal_pitch_threshold)])
- else:
- from main.inference.conversion.convert import run_convert_script
-
- run_convert_script(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0_method, input_path, output_path, pth_path, index_path, f0_autotune, f0_autotune_strength, clean_audio, clean_strength, export_format, embedder_model, resample_sr, split_audio, checkpointing, f0_file, f0_onnx, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, proposal_pitch, proposal_pitch_threshold)
-
-def convert_audio(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, input_audio_name, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold):
- model_path = os.path.join(configs["weights_path"], model) if not os.path.exists(model) else model
-
- return_none = [None]*6
- return_none[5] = {"visible": True, "__type__": "update"}
-
- if not use_audio:
- if merge_instrument or not_merge_backing or convert_backing or use_original:
- gr_warning(translations["turn_on_use_audio"])
- return return_none
-
- if use_original:
- if convert_backing:
- gr_warning(translations["turn_off_convert_backup"])
- return return_none
- elif not_merge_backing:
- gr_warning(translations["turn_off_merge_backup"])
- return return_none
-
- if not model or not os.path.exists(model_path) or os.path.isdir(model_path) or not model.endswith((".pth", ".onnx")):
- gr_warning(translations["provide_file"].format(filename=translations["model"]))
- return return_none
-
- f0method, embedder_model = (method if method != "hybrid" else hybrid_method), (embedders if embedders != "custom" else custom_embedders)
-
- if use_audio:
- output_audio = os.path.join(configs["audios_path"], input_audio_name)
-
- from main.library.utils import pydub_load
-
- def get_audio_file(label):
- matching_files = [f for f in os.listdir(output_audio) if label in f]
-
- if not matching_files: return translations["notfound"]
- return os.path.join(output_audio, matching_files[0])
-
- output_path = os.path.join(output_audio, f"Convert_Vocals.{format}")
- output_backing = os.path.join(output_audio, f"Convert_Backing.{format}")
- output_merge_backup = os.path.join(output_audio, f"Vocals+Backing.{format}")
- output_merge_instrument = os.path.join(output_audio, f"Vocals+Instruments.{format}")
-
- if os.path.exists(output_audio): os.makedirs(output_audio, exist_ok=True)
- output_path = process_output(output_path)
-
- if use_original:
- original_vocal = get_audio_file('Original_Vocals_No_Reverb.')
-
- if original_vocal == translations["notfound"]: original_vocal = get_audio_file('Original_Vocals.')
-
- if original_vocal == translations["notfound"]:
- gr_warning(translations["not_found_original_vocal"])
- return return_none
-
- input_path = original_vocal
- else:
- main_vocal = get_audio_file('Main_Vocals_No_Reverb.')
- backing_vocal = get_audio_file('Backing_Vocals_No_Reverb.')
-
- if main_vocal == translations["notfound"]: main_vocal = get_audio_file('Main_Vocals.')
- if not not_merge_backing and backing_vocal == translations["notfound"]: backing_vocal = get_audio_file('Backing_Vocals.')
-
- if main_vocal == translations["notfound"]:
- gr_warning(translations["not_found_main_vocal"])
- return return_none
-
- if not not_merge_backing and backing_vocal == translations["notfound"]:
- gr_warning(translations["not_found_backing_vocal"])
- return return_none
-
- input_path = main_vocal
- backing_path = backing_vocal
-
- gr_info(translations["convert_vocal"])
-
- convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input_path, output_path, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold)
-
- gr_info(translations["convert_success"])
-
- if convert_backing:
- output_backing = process_output(output_backing)
-
- gr_info(translations["convert_backup"])
-
- convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, backing_path, output_backing, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold)
-
- gr_info(translations["convert_backup_success"])
-
- try:
- if not not_merge_backing and not use_original:
- backing_source = output_backing if convert_backing else backing_vocal
-
- output_merge_backup = process_output(output_merge_backup)
-
- gr_info(translations["merge_backup"])
-
- pydub_load(output_path, volume=-4).overlay(pydub_load(backing_source, volume=-6)).export(output_merge_backup, format=format)
-
- gr_info(translations["merge_success"])
-
- if merge_instrument:
- vocals = output_merge_backup if not not_merge_backing and not use_original else output_path
-
- output_merge_instrument = process_output(output_merge_instrument)
-
- gr_info(translations["merge_instruments_process"])
-
- instruments = get_audio_file('Instruments.')
-
- if instruments == translations["notfound"]:
- gr_warning(translations["not_found_instruments"])
- output_merge_instrument = None
- else: pydub_load(instruments, volume=-7).overlay(pydub_load(vocals, volume=-4 if use_original else None)).export(output_merge_instrument, format=format)
-
- gr_info(translations["merge_success"])
- except:
- return return_none
-
- return [(None if use_original else output_path), output_backing, (None if not_merge_backing and use_original else output_merge_backup), (output_path if use_original else None), (output_merge_instrument if merge_instrument else None), {"visible": True, "__type__": "update"}]
- else:
- if not input or not os.path.exists(input) or os.path.isdir(input):
- gr_warning(translations["input_not_valid"])
- return return_none
-
- if not output:
- gr_warning(translations["output_not_valid"])
- return return_none
-
- output = output.replace("wav", format)
-
- if os.path.isdir(input):
- gr_info(translations["is_folder"])
-
- if not [f for f in os.listdir(input) if f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"))]:
- gr_warning(translations["not_found_in_folder"])
- return return_none
-
- gr_info(translations["batch_convert"])
-
- output_dir = os.path.dirname(output) or output
- convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input, output_dir, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold)
-
- gr_info(translations["batch_convert_success"])
-
- return return_none
- else:
- output_dir = os.path.dirname(output) or output
-
- if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
- output = process_output(output)
-
- gr_info(translations["convert_vocal"])
-
- convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input, output, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold)
-
- gr_info(translations["convert_success"])
-
- return_none[0] = output
- return return_none
-
-def convert_selection(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold):
- if use_audio:
- gr_info(translations["search_separate"])
- choice = [f for f in os.listdir(configs["audios_path"]) if os.path.isdir(os.path.join(configs["audios_path"], f))] if config.debug_mode else [f for f in os.listdir(configs["audios_path"]) if os.path.isdir(os.path.join(configs["audios_path"], f)) and any(file.lower().endswith((".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3")) for file in os.listdir(os.path.join(configs["audios_path"], f)))]
-
- gr_info(translations["found_choice"].format(choice=len(choice)))
-
- if len(choice) == 0:
- gr_warning(translations["separator==0"])
-
- return [{"choices": [], "value": "", "interactive": False, "visible": False, "__type__": "update"}, None, None, None, None, None, {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}]
- elif len(choice) == 1:
- convert_output = convert_audio(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, None, None, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, choice[0], checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold)
-
- return [{"choices": [], "value": "", "interactive": False, "visible": False, "__type__": "update"}, convert_output[0], convert_output[1], convert_output[2], convert_output[3], convert_output[4], {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}]
- else: return [{"choices": choice, "value": choice[0], "interactive": True, "visible": True, "__type__": "update"}, None, None, None, None, None, {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"}]
- else:
- main_convert = convert_audio(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, None, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold)
-
- return [{"choices": [], "value": "", "interactive": False, "visible": False, "__type__": "update"}, main_convert[0], None, None, None, None, {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}]
-
-def convert_with_whisper(num_spk, model_size, cleaner, clean_strength, autotune, f0_autotune_strength, checkpointing, model_1, model_2, model_index_1, model_index_2, pitch_1, pitch_2, index_strength_1, index_strength_2, export_format, input_audio, output_audio, onnx_f0_mode, method, hybrid_method, hop_length, embed_mode, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, formant_shifting, formant_qfrency_1, formant_timbre_1, formant_qfrency_2, formant_timbre_2, proposal_pitch, proposal_pitch_threshold):
- from pydub import AudioSegment
- from sklearn.cluster import AgglomerativeClustering
-
- from main.library.speaker_diarization.audio import Audio
- from main.library.speaker_diarization.segment import Segment
- from main.library.speaker_diarization.whisper import load_model
- from main.library.utils import check_spk_diarization, pydub_load
- from main.library.speaker_diarization.embedding import SpeechBrainPretrainedSpeakerEmbedding
-
- check_spk_diarization(model_size)
- model_pth_1, model_pth_2 = os.path.join(configs["weights_path"], model_1) if not os.path.exists(model_1) else model_1, os.path.join(configs["weights_path"], model_2) if not os.path.exists(model_2) else model_2
-
- if (not model_1 or not os.path.exists(model_pth_1) or os.path.isdir(model_pth_1) or not model_pth_1.endswith((".pth", ".onnx"))) and (not model_2 or not os.path.exists(model_pth_2) or os.path.isdir(model_pth_2) or not model_pth_2.endswith((".pth", ".onnx"))):
- gr_warning(translations["provide_file"].format(filename=translations["model"]))
- return None
-
- if not model_1: model_pth_1 = model_pth_2
- if not model_2: model_pth_2 = model_pth_1
-
- if not input_audio or not os.path.exists(input_audio) or os.path.isdir(input_audio):
- gr_warning(translations["input_not_valid"])
- return None
-
- if not output_audio:
- gr_warning(translations["output_not_valid"])
- return None
-
- output_audio = process_output(output_audio)
- gr_info(translations["start_whisper"])
-
- try:
- audio = Audio()
-
- embedding_model = SpeechBrainPretrainedSpeakerEmbedding(embedding=os.path.join(configs["speaker_diarization_path"], "models", "speechbrain"), device=config.device)
- segments = load_model(model_size, device=config.device).transcribe(input_audio, fp16=configs.get("fp16", False), word_timestamps=True)["segments"]
-
- y, sr = librosa.load(input_audio, sr=None)
- duration = len(y) / sr
-
- def segment_embedding(segment):
- waveform, _ = audio.crop(input_audio, Segment(segment["start"], min(duration, segment["end"])))
- return embedding_model(waveform.mean(dim=0, keepdim=True)[None] if waveform.shape[0] == 2 else waveform[None])
-
- def time(secs):
- return datetime.timedelta(seconds=round(secs))
-
- def merge_audio(files_list, time_stamps, original_file_path, output_path, format):
- def extract_number(filename):
- match = re.search(r'_(\d+)', filename)
- return int(match.group(1)) if match else 0
-
- total_duration = len(pydub_load(original_file_path))
- combined = AudioSegment.empty()
- current_position = 0
-
- for file, (start_i, end_i) in zip(sorted(files_list, key=extract_number), time_stamps):
- if start_i > current_position: combined += AudioSegment.silent(duration=start_i - current_position)
-
- combined += pydub_load(file)
- current_position = end_i
-
- if current_position < total_duration: combined += AudioSegment.silent(duration=total_duration - current_position)
- combined.export(output_path, format=format)
-
- return output_path
-
- embeddings = np.zeros(shape=(len(segments), 192))
- for i, segment in enumerate(segments):
- embeddings[i] = segment_embedding(segment)
-
- labels = AgglomerativeClustering(num_spk).fit(np.nan_to_num(embeddings)).labels_
- for i in range(len(segments)):
- segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1)
-
- merged_segments, current_text = [], []
- current_speaker, current_start = None, None
-
- for i, segment in enumerate(segments):
- speaker = segment["speaker"]
- start_time = segment["start"]
- text = segment["text"][1:]
-
- if speaker == current_speaker:
- current_text.append(text)
- end_time = segment["end"]
- else:
- if current_speaker is not None: merged_segments.append({"speaker": current_speaker, "start": current_start, "end": end_time, "text": " ".join(current_text)})
-
- current_speaker = speaker
- current_start = start_time
- current_text = [text]
- end_time = segment["end"]
-
- if current_speaker is not None: merged_segments.append({"speaker": current_speaker, "start": current_start, "end": end_time, "text": " ".join(current_text)})
-
- gr_info(translations["whisper_done"])
-
- x = ""
- for segment in merged_segments:
- x += f"\n{segment['speaker']} {str(time(segment['start']))} - {str(time(segment['end']))}\n"
- x += segment["text"] + "\n"
-
- logger.info(x)
-
- gr_info(translations["process_audio"])
-
- audio = pydub_load(input_audio)
- output_folder = "audios_temp"
-
- if os.path.exists(output_folder): shutil.rmtree(output_folder, ignore_errors=True)
- for f in [output_folder, os.path.join(output_folder, "1"), os.path.join(output_folder, "2")]:
- os.makedirs(f, exist_ok=True)
-
- time_stamps, processed_segments = [], []
- for i, segment in enumerate(merged_segments):
- start_ms = int(segment["start"] * 1000)
- end_ms = int(segment["end"] * 1000)
-
- index = i + 1
-
- segment_filename = os.path.join(output_folder, "1" if i % 2 == 1 else "2", f"segment_{index}.wav")
- audio[start_ms:end_ms].export(segment_filename, format="wav")
-
- processed_segments.append(os.path.join(output_folder, "1" if i % 2 == 1 else "2", f"segment_{index}_output.wav"))
- time_stamps.append((start_ms, end_ms))
-
- f0method, embedder_model = (method if method != "hybrid" else hybrid_method), (embedders if embedders != "custom" else custom_embedders)
-
- gr_info(translations["process_done_start_convert"])
-
- convert(pitch_1, filter_radius, index_strength_1, rms_mix_rate, protect, hop_length, f0method, os.path.join(output_folder, "1"), output_folder, model_pth_1, model_index_1, autotune, cleaner, clean_strength, "wav", embedder_model, resample_sr, False, f0_autotune_strength, checkpointing, onnx_f0_mode, embed_mode, formant_shifting, formant_qfrency_1, formant_timbre_1, "", proposal_pitch, proposal_pitch_threshold)
- convert(pitch_2, filter_radius, index_strength_2, rms_mix_rate, protect, hop_length, f0method, os.path.join(output_folder, "2"), output_folder, model_pth_2, model_index_2, autotune, cleaner, clean_strength, "wav", embedder_model, resample_sr, False, f0_autotune_strength, checkpointing, onnx_f0_mode, embed_mode, formant_shifting, formant_qfrency_2, formant_timbre_2, "", proposal_pitch, proposal_pitch_threshold)
-
- gr_info(translations["convert_success"])
- return merge_audio(processed_segments, time_stamps, input_audio, output_audio.replace("wav", export_format), export_format)
- except Exception as e:
- gr_error(translations["error_occurred"].format(e=e))
- import traceback
- logger.debug(traceback.format_exc())
- return None
- finally:
- if os.path.exists("audios_temp"): shutil.rmtree("audios_temp", ignore_errors=True)
-
-def convert_tts(clean, autotune, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold):
- model_path = os.path.join(configs["weights_path"], model) if not os.path.exists(model) else model
-
- if not model_path or not os.path.exists(model_path) or os.path.isdir(model_path) or not model.endswith((".pth", ".onnx")):
- gr_warning(translations["provide_file"].format(filename=translations["model"]))
- return None
-
- if not input or not os.path.exists(input):
- gr_warning(translations["input_not_valid"])
- return None
-
- if os.path.isdir(input):
- input_audio = [f for f in os.listdir(input) if "tts" in f and f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"))]
-
- if not input_audio:
- gr_warning(translations["not_found_in_folder"])
- return None
-
- input = os.path.join(input, input_audio[0])
-
- if not output:
- gr_warning(translations["output_not_valid"])
- return None
-
- output = output.replace("wav", format)
- if os.path.isdir(output): output = os.path.join(output, f"tts.{format}")
-
- output_dir = os.path.dirname(output)
- if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
-
- output = process_output(output)
-
- f0method = method if method != "hybrid" else hybrid_method
- embedder_model = embedders if embedders != "custom" else custom_embedders
-
- gr_info(translations["convert_vocal"])
-
- convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input, output, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold)
-
- gr_info(translations["convert_success"])
- return output
\ No newline at end of file
diff --git a/main/app/core/model_utils.py b/main/app/core/model_utils.py
deleted file mode 100644
index f7d99f89a85b76551434931aa472bbba7cc14818..0000000000000000000000000000000000000000
--- a/main/app/core/model_utils.py
+++ /dev/null
@@ -1,162 +0,0 @@
-import os
-import sys
-import json
-import onnx
-import torch
-import datetime
-
-from collections import OrderedDict
-
-sys.path.append(os.getcwd())
-
-from main.app.core.ui import gr_info, gr_warning, gr_error
-from main.library.algorithm.onnx_export import onnx_exporter
-from main.app.variables import config, logger, translations, configs
-
-def fushion_model_pth(name, pth_1, pth_2, ratio):
- if not name.endswith(".pth"): name = name + ".pth"
-
- if not pth_1 or not os.path.exists(pth_1) or not pth_1.endswith(".pth"):
- gr_warning(translations["provide_file"].format(filename=translations["model"] + " 1"))
- return [translations["provide_file"].format(filename=translations["model"] + " 1"), None]
-
- if not pth_2 or not os.path.exists(pth_2) or not pth_2.endswith(".pth"):
- gr_warning(translations["provide_file"].format(filename=translations["model"] + " 2"))
- return [translations["provide_file"].format(filename=translations["model"] + " 2"), None]
-
- def extract(ckpt):
- a = ckpt["model"]
- opt = OrderedDict()
- opt["weight"] = {}
-
- for key in a.keys():
- if "enc_q" in key: continue
-
- opt["weight"][key] = a[key]
-
- return opt
-
- try:
- ckpt1 = torch.load(pth_1, map_location="cpu", weights_only=True)
- ckpt2 = torch.load(pth_2, map_location="cpu", weights_only=True)
-
- if ckpt1["sr"] != ckpt2["sr"]:
- gr_warning(translations["sr_not_same"])
- return [translations["sr_not_same"], None]
-
- cfg = ckpt1["config"]
- cfg_f0 = ckpt1["f0"]
- cfg_version = ckpt1["version"]
- cfg_sr = ckpt1["sr"]
-
- vocoder = ckpt1.get("vocoder", "Default")
- rms_extract = ckpt1.get("energy", False)
-
- ckpt1 = extract(ckpt1) if "model" in ckpt1 else ckpt1["weight"]
- ckpt2 = extract(ckpt2) if "model" in ckpt2 else ckpt2["weight"]
-
- if sorted(list(ckpt1.keys())) != sorted(list(ckpt2.keys())):
- gr_warning(translations["architectures_not_same"])
- return [translations["architectures_not_same"], None]
-
- gr_info(translations["start"].format(start=translations["fushion_model"]))
-
- opt = OrderedDict()
- opt["weight"] = {}
-
- for key in ckpt1.keys():
- if key == "emb_g.weight" and ckpt1[key].shape != ckpt2[key].shape:
- min_shape0 = min(ckpt1[key].shape[0], ckpt2[key].shape[0])
- opt["weight"][key] = (ratio * (ckpt1[key][:min_shape0].float()) + (1 - ratio) * (ckpt2[key][:min_shape0].float())).half()
- else: opt["weight"][key] = (ratio * (ckpt1[key].float()) + (1 - ratio) * (ckpt2[key].float())).half()
-
- opt["config"] = cfg
- opt["sr"] = cfg_sr
- opt["f0"] = cfg_f0
- opt["version"] = cfg_version
- opt["infos"] = translations["model_fushion_info"].format(name=name, pth_1=pth_1, pth_2=pth_2, ratio=ratio)
- opt["vocoder"] = vocoder
- opt["energy"] = rms_extract
-
- output_model = configs["weights_path"]
- if not os.path.exists(output_model): os.makedirs(output_model, exist_ok=True)
-
- torch.save(opt, os.path.join(output_model, name))
-
- gr_info(translations["success"])
- return [translations["success"], os.path.join(output_model, name)]
- except Exception as e:
- gr_error(message=translations["error_occurred"].format(e=e))
- return [e, None]
-
-def fushion_model(name, path_1, path_2, ratio):
- if not name:
- gr_warning(translations["provide_name_is_save"])
- return [translations["provide_name_is_save"], None]
-
- if path_1.endswith(".pth") and path_2.endswith(".pth"): return fushion_model_pth(name.replace(".onnx", ".pth"), path_1, path_2, ratio)
- else:
- gr_warning(translations["format_not_valid"])
- return [None, None]
-
-def onnx_export(model_path):
- if not model_path.endswith(".pth"): model_path + ".pth"
- if not model_path or not os.path.exists(model_path) or not model_path.endswith(".pth"): return gr_warning(translations["provide_file"].format(filename=translations["model"]))
-
- try:
- gr_info(translations["start_onnx_export"])
- output = onnx_exporter(model_path, model_path.replace(".pth", ".onnx"), is_half=config.is_half, device=config.device)
-
- gr_info(translations["success"])
- return output
- except Exception as e:
- return gr_error(e)
-
-def model_info(path):
- if not path or not os.path.exists(path) or os.path.isdir(path) or not path.endswith((".pth", ".onnx")): return gr_warning(translations["provide_file"].format(filename=translations["model"]))
-
- def prettify_date(date_str):
- if date_str == translations["not_found_create_time"]: return None
-
- try:
- return datetime.datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%f").strftime("%Y-%m-%d %H:%M:%S")
- except ValueError as e:
- logger.debug(e)
- return translations["format_not_valid"]
-
- if path.endswith(".pth"): model_data = torch.load(path, map_location=torch.device("cpu"))
- else:
- model = onnx.load(path)
- model_data = None
-
- for prop in model.metadata_props:
- if prop.key == "model_info":
- model_data = json.loads(prop.value)
- break
-
- gr_info(translations["read_info"])
-
- epochs = model_data.get("epoch", None)
- if epochs is None:
- epochs = model_data.get("info", None)
- try:
- epoch = epochs.replace("epoch", "").replace("e", "").isdigit()
- if epoch and epochs is None: epochs = translations["not_found"].format(name=translations["epoch"])
- except:
- pass
-
- steps = model_data.get("step", translations["not_found"].format(name=translations["step"]))
- sr = model_data.get("sr", translations["not_found"].format(name=translations["sr"]))
- f0 = model_data.get("f0", translations["not_found"].format(name=translations["f0"]))
- version = model_data.get("version", translations["not_found"].format(name=translations["version"]))
- creation_date = model_data.get("creation_date", translations["not_found_create_time"])
- model_hash = model_data.get("model_hash", translations["not_found"].format(name="model_hash"))
- pitch_guidance = translations["trained_f0"] if f0 else translations["not_f0"]
- creation_date_str = prettify_date(creation_date) if creation_date else translations["not_found_create_time"]
- model_name = model_data.get("model_name", translations["unregistered"])
- model_author = model_data.get("author", translations["not_author"])
- vocoder = model_data.get("vocoder", "Default")
- rms_extract = model_data.get("energy", False)
-
- gr_info(translations["success"])
- return translations["model_info"].format(model_name=model_name, model_author=model_author, epochs=epochs, steps=steps, version=version, sr=sr, pitch_guidance=pitch_guidance, model_hash=model_hash, creation_date_str=creation_date_str, vocoder=vocoder, rms_extract=rms_extract)
\ No newline at end of file
diff --git a/main/app/core/presets.py b/main/app/core/presets.py
deleted file mode 100644
index a33f19510d4696a1b5f20203da8923979abff5de..0000000000000000000000000000000000000000
--- a/main/app/core/presets.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import os
-import sys
-import json
-
-sys.path.append(os.getcwd())
-
-from main.app.variables import translations, configs
-from main.app.core.ui import gr_info, gr_warning, change_preset_choices, change_effect_preset_choices
-
-def load_presets(presets, cleaner, autotune, pitch, clean_strength, index_strength, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, formant_shifting, formant_qfrency, formant_timbre):
- if not presets: gr_warning(translations["provide_file_settings"])
-
- file = {}
- if presets:
- with open(os.path.join(configs["presets_path"], presets)) as f:
- file = json.load(f)
-
- gr_info(translations["load_presets"].format(presets=presets))
- return [file.get("cleaner", cleaner), file.get("autotune", autotune), file.get("pitch", pitch), file.get("clean_strength", clean_strength), file.get("index_strength", index_strength), file.get("resample_sr", resample_sr), file.get("filter_radius", filter_radius), file.get("rms_mix_rate", rms_mix_rate), file.get("protect", protect), file.get("split_audio", split_audio), file.get("f0_autotune_strength", f0_autotune_strength), file.get("formant_shifting", formant_shifting), file.get("formant_qfrency", formant_qfrency), file.get("formant_timbre", formant_timbre)]
-
-def save_presets(name, cleaner, autotune, pitch, clean_strength, index_strength, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, cleaner_chbox, autotune_chbox, pitch_chbox, index_strength_chbox, resample_sr_chbox, filter_radius_chbox, rms_mix_rate_chbox, protect_chbox, split_audio_chbox, formant_shifting_chbox, formant_shifting, formant_qfrency, formant_timbre):
- if not name: return gr_warning(translations["provide_filename_settings"])
- if not any([cleaner_chbox, autotune_chbox, pitch_chbox, index_strength_chbox, resample_sr_chbox, filter_radius_chbox, rms_mix_rate_chbox, protect_chbox, split_audio_chbox, formant_shifting_chbox]): return gr_warning(translations["choose1"])
-
- settings = {}
-
- for checkbox, data in [(cleaner_chbox, {"cleaner": cleaner, "clean_strength": clean_strength}), (autotune_chbox, {"autotune": autotune, "f0_autotune_strength": f0_autotune_strength}), (pitch_chbox, {"pitch": pitch}), (index_strength_chbox, {"index_strength": index_strength}), (resample_sr_chbox, {"resample_sr": resample_sr}), (filter_radius_chbox, {"filter_radius": filter_radius}), (rms_mix_rate_chbox, {"rms_mix_rate": rms_mix_rate}), (protect_chbox, {"protect": protect}), (split_audio_chbox, {"split_audio": split_audio}), (formant_shifting_chbox, {"formant_shifting": formant_shifting, "formant_qfrency": formant_qfrency, "formant_timbre": formant_timbre})]:
- if checkbox: settings.update(data)
-
- with open(os.path.join(configs["presets_path"], name + ".conversion.json"), "w") as f:
- json.dump(settings, f, indent=4)
-
- gr_info(translations["export_settings"].format(name=name))
- return change_preset_choices()
-
-def audio_effect_load_presets(presets, resample_checkbox, audio_effect_resample_sr, chorus_depth, chorus_rate_hz, chorus_mix, chorus_centre_delay_ms, chorus_feedback, distortion_drive_db, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift_semitones, delay_second, delay_feedback, delay_mix, compressor_threshold_db, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold_db, limiter_release_ms, gain_db, bitcrush_bit_depth, clipping_threshold_db, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost, bass_frequency, treble_boost, treble_frequency, fade_in, fade_out, chorus_check_box, distortion_checkbox, reverb_check_box, delay_check_box, compressor_check_box, limiter, gain_checkbox, bitcrush_checkbox, clipping_checkbox, phaser_check_box, bass_or_treble, fade):
- if not presets: gr_warning(translations["provide_file_settings"])
-
- file = {}
- if presets:
- with open(os.path.join(configs["presets_path"], presets)) as f:
- file = json.load(f)
-
- gr_info(translations["load_presets"].format(presets=presets))
- return [
- file.get("resample_checkbox", resample_checkbox), file.get("audio_effect_resample_sr", audio_effect_resample_sr),
- file.get("chorus_depth", chorus_depth), file.get("chorus_rate_hz", chorus_rate_hz),
- file.get("chorus_mix", chorus_mix), file.get("chorus_centre_delay_ms", chorus_centre_delay_ms),
- file.get("chorus_feedback", chorus_feedback), file.get("distortion_drive_db", distortion_drive_db),
- file.get("reverb_room_size", reverb_room_size), file.get("reverb_damping", reverb_damping),
- file.get("reverb_wet_level", reverb_wet_level), file.get("reverb_dry_level", reverb_dry_level),
- file.get("reverb_width", reverb_width), file.get("reverb_freeze_mode", reverb_freeze_mode),
- file.get("pitch_shift_semitones", pitch_shift_semitones), file.get("delay_second", delay_second),
- file.get("delay_feedback", delay_feedback), file.get("delay_mix", delay_mix),
- file.get("compressor_threshold_db", compressor_threshold_db), file.get("compressor_ratio", compressor_ratio),
- file.get("compressor_attack_ms", compressor_attack_ms), file.get("compressor_release_ms", compressor_release_ms),
- file.get("limiter_threshold_db", limiter_threshold_db), file.get("limiter_release_ms", limiter_release_ms),
- file.get("gain_db", gain_db), file.get("bitcrush_bit_depth", bitcrush_bit_depth),
- file.get("clipping_threshold_db", clipping_threshold_db), file.get("phaser_rate_hz", phaser_rate_hz),
- file.get("phaser_depth", phaser_depth), file.get("phaser_centre_frequency_hz", phaser_centre_frequency_hz),
- file.get("phaser_feedback", phaser_feedback), file.get("phaser_mix", phaser_mix),
- file.get("bass_boost", bass_boost), file.get("bass_frequency", bass_frequency),
- file.get("treble_boost", treble_boost), file.get("treble_frequency", treble_frequency),
- file.get("fade_in", fade_in), file.get("fade_out", fade_out),
- file.get("chorus_check_box", chorus_check_box), file.get("distortion_checkbox", distortion_checkbox),
- file.get("reverb_check_box", reverb_check_box), file.get("delay_check_box", delay_check_box),
- file.get("compressor_check_box", compressor_check_box), file.get("limiter", limiter),
- file.get("gain_checkbox", gain_checkbox), file.get("bitcrush_checkbox", bitcrush_checkbox),
- file.get("clipping_checkbox", clipping_checkbox), file.get("phaser_check_box", phaser_check_box),
- file.get("bass_or_treble", bass_or_treble), file.get("fade", fade)
- ]
-
-def audio_effect_save_presets(name, resample_checkbox, audio_effect_resample_sr, chorus_depth, chorus_rate_hz, chorus_mix, chorus_centre_delay_ms, chorus_feedback, distortion_drive_db, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift_semitones, delay_second, delay_feedback, delay_mix, compressor_threshold_db, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold_db, limiter_release_ms, gain_db, bitcrush_bit_depth, clipping_threshold_db, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost, bass_frequency, treble_boost, treble_frequency, fade_in, fade_out, chorus_check_box, distortion_checkbox, reverb_check_box, delay_check_box, compressor_check_box, limiter, gain_checkbox, bitcrush_checkbox, clipping_checkbox, phaser_check_box, bass_or_treble, fade):
- if not name: return gr_warning(translations["provide_filename_settings"])
- if not any([resample_checkbox, chorus_check_box, distortion_checkbox, reverb_check_box, delay_check_box, compressor_check_box, limiter, gain_checkbox, bitcrush_checkbox, clipping_checkbox, phaser_check_box, bass_or_treble, fade, pitch_shift_semitones != 0]): return gr_warning(translations["choose1"])
-
- settings = {}
-
- for checkbox, data in [
- (resample_checkbox, {
- "resample_checkbox": resample_checkbox,
- "audio_effect_resample_sr": audio_effect_resample_sr
- }),
- (chorus_check_box, {
- "chorus_check_box": chorus_check_box,
- "chorus_depth": chorus_depth,
- "chorus_rate_hz": chorus_rate_hz,
- "chorus_mix": chorus_mix,
- "chorus_centre_delay_ms": chorus_centre_delay_ms,
- "chorus_feedback": chorus_feedback
- }),
- (distortion_checkbox, {
- "distortion_checkbox": distortion_checkbox,
- "distortion_drive_db": distortion_drive_db
- }),
- (reverb_check_box, {
- "reverb_check_box": reverb_check_box,
- "reverb_room_size": reverb_room_size,
- "reverb_damping": reverb_damping,
- "reverb_wet_level": reverb_wet_level,
- "reverb_dry_level": reverb_dry_level,
- "reverb_width": reverb_width,
- "reverb_freeze_mode": reverb_freeze_mode
- }),
- (pitch_shift_semitones != 0, {
- "pitch_shift_semitones": pitch_shift_semitones
- }),
- (delay_check_box, {
- "delay_check_box": delay_check_box,
- "delay_second": delay_second,
- "delay_feedback": delay_feedback,
- "delay_mix": delay_mix
- }),
- (compressor_check_box, {
- "compressor_check_box": compressor_check_box,
- "compressor_threshold_db": compressor_threshold_db,
- "compressor_ratio": compressor_ratio,
- "compressor_attack_ms": compressor_attack_ms,
- "compressor_release_ms": compressor_release_ms
- }),
- (limiter, {
- "limiter": limiter,
- "limiter_threshold_db": limiter_threshold_db,
- "limiter_release_ms": limiter_release_ms
- }),
- (gain_checkbox, {
- "gain_checkbox": gain_checkbox,
- "gain_db": gain_db
- }),
- (bitcrush_checkbox, {
- "bitcrush_checkbox": bitcrush_checkbox,
- "bitcrush_bit_depth": bitcrush_bit_depth
- }),
- (clipping_checkbox, {
- "clipping_checkbox": clipping_checkbox,
- "clipping_threshold_db": clipping_threshold_db
- }),
- (phaser_check_box, {
- "phaser_check_box": phaser_check_box,
- "phaser_rate_hz": phaser_rate_hz,
- "phaser_depth": phaser_depth,
- "phaser_centre_frequency_hz": phaser_centre_frequency_hz,
- "phaser_feedback": phaser_feedback,
- "phaser_mix": phaser_mix
- }),
- (bass_or_treble, {
- "bass_or_treble": bass_or_treble,
- "bass_boost": bass_boost,
- "bass_frequency": bass_frequency,
- "treble_boost": treble_boost,
- "treble_frequency": treble_frequency
- }),
- (fade, {
- "fade": fade,
- "fade_in": fade_in,
- "fade_out": fade_out
- })
- ]:
- if checkbox: settings.update(data)
-
- with open(os.path.join(configs["presets_path"], name + ".effect.json"), "w") as f:
- json.dump(settings, f, indent=4)
-
- gr_info(translations["export_settings"].format(name=name))
- return change_effect_preset_choices()
\ No newline at end of file
diff --git a/main/app/core/process.py b/main/app/core/process.py
deleted file mode 100644
index 5b9f83b296de680c6da45be9c020e2aa68ddc070..0000000000000000000000000000000000000000
--- a/main/app/core/process.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import os
-import re
-import sys
-import shutil
-import codecs
-import zipfile
-import requests
-import xml.etree.ElementTree
-
-sys.path.append(os.getcwd())
-
-from main.app.variables import logger, translations, configs
-from main.app.core.ui import gr_info, gr_warning, gr_error, process_output
-
-def read_docx_text(path):
- with zipfile.ZipFile(path) as docx:
- with docx.open("word/document.xml") as document_xml:
- xml_content = document_xml.read()
-
- WORD_NAMESPACE = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
-
- paragraphs = []
- for paragraph in xml.etree.ElementTree.XML(xml_content).iter(WORD_NAMESPACE + 'p'):
- texts = [node.text for node in paragraph.iter(WORD_NAMESPACE + 't') if node.text]
- if texts: paragraphs.append(''.join(texts))
-
- return '\n'.join(paragraphs)
-
-def process_input(file_path):
- if file_path.endswith(".srt"): file_contents = ""
- elif file_path.endswith(".docx"): file_contents = read_docx_text(file_path)
- else:
- try:
- with open(file_path, "r", encoding="utf-8") as file:
- file_contents = file.read()
- except Exception as e:
- gr_warning(translations["read_error"])
- logger.debug(e)
- file_contents = ""
-
- gr_info(translations["upload_success"].format(name=translations["text"]))
- return file_contents
-
-def move_files_from_directory(src_dir, dest_weights, dest_logs, model_name):
- for root, _, files in os.walk(src_dir):
- for file in files:
- file_path = os.path.join(root, file)
- if file.endswith(".index"):
- model_log_dir = os.path.join(dest_logs, model_name)
- os.makedirs(model_log_dir, exist_ok=True)
-
- filepath = process_output(os.path.join(model_log_dir, file.replace(' ', '_').replace('(', '').replace(')', '').replace('[', '').replace(']', '').replace(",", "").replace('"', "").replace("'", "").replace("|", "").replace("{", "").replace("}", "").strip()))
-
- shutil.move(file_path, filepath)
- elif file.endswith(".pth") and not file.startswith("D_") and not file.startswith("G_"):
- pth_path = process_output(os.path.join(dest_weights, model_name + ".pth"))
-
- shutil.move(file_path, pth_path)
- elif file.endswith(".onnx") and not file.startswith("D_") and not file.startswith("G_"):
- pth_path = process_output(os.path.join(dest_weights, model_name + ".onnx"))
-
- shutil.move(file_path, pth_path)
-
-def extract_name_model(filename):
- match = re.search(r"_([A-Za-z0-9]+)(?=_v\d*)", filename.replace('-', '').replace('(', '').replace(')', '').replace('[', '').replace(']', '').replace(",", "").replace('"', "").replace("'", "").replace("|", "").replace("{", "").replace("}", "").strip())
- return match.group(1) if match else None
-
-def save_drop_model(dropbox):
- weight_folder = configs["weights_path"]
- logs_folder = configs["logs_path"]
- save_model_temp = "save_model_temp"
-
- if not os.path.exists(weight_folder): os.makedirs(weight_folder, exist_ok=True)
- if not os.path.exists(logs_folder): os.makedirs(logs_folder, exist_ok=True)
- if not os.path.exists(save_model_temp): os.makedirs(save_model_temp, exist_ok=True)
-
- shutil.move(dropbox, save_model_temp)
-
- try:
- file_name = os.path.basename(dropbox)
-
- if file_name.endswith(".zip"):
- shutil.unpack_archive(os.path.join(save_model_temp, file_name), save_model_temp)
- move_files_from_directory(save_model_temp, weight_folder, logs_folder, file_name.replace(".zip", ""))
- elif file_name.endswith((".pth", ".onnx")):
- output_file = process_output(os.path.join(weight_folder, file_name))
-
- shutil.move(os.path.join(save_model_temp, file_name), output_file)
- elif file_name.endswith(".index"):
- modelname = extract_name_model(file_name)
- if modelname is None: modelname = os.path.splitext(os.path.basename(file_name))[0]
-
- model_logs = os.path.join(logs_folder, modelname)
- if not os.path.exists(model_logs): os.makedirs(model_logs, exist_ok=True)
-
- shutil.move(os.path.join(save_model_temp, file_name), model_logs)
- else:
- gr_warning(translations["unable_analyze_model"])
- return None
-
- gr_info(translations["upload_success"].format(name=translations["model"]))
- return None
- except Exception as e:
- gr_error(message=translations["error_occurred"].format(e=e))
- return None
- finally:
- shutil.rmtree(save_model_temp, ignore_errors=True)
-
-def zip_file(name, pth, index):
- pth_path = os.path.join(configs["weights_path"], pth)
- if not pth or not os.path.exists(pth_path) or not pth.endswith((".pth", ".onnx")): return gr_warning(translations["provide_file"].format(filename=translations["model"]))
-
- zip_file_path = os.path.join(configs["logs_path"], name, name + ".zip")
- gr_info(translations["start"].format(start=translations["zip"]))
-
- with zipfile.ZipFile(zip_file_path, 'w') as zipf:
- zipf.write(pth_path, os.path.basename(pth_path))
- if index: zipf.write(index, os.path.basename(index))
-
- gr_info(translations["success"])
- return {"visible": True, "value": zip_file_path, "__type__": "update"}
-
-def fetch_pretrained_data():
- try:
- response = requests.get(codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/wfba/phfgbz_cergenvarq.wfba", "rot13"))
- response.raise_for_status()
-
- return response.json()
- except:
- return {}
-
-def update_sample_rate_dropdown(model):
- data = fetch_pretrained_data()
- if model != translations["success"]: return {"choices": list(data[model].keys()), "value": list(data[model].keys())[0], "__type__": "update"}
\ No newline at end of file
diff --git a/main/app/core/restart.py b/main/app/core/restart.py
deleted file mode 100644
index 0f054ff3d1e64b755934836a2141732d4af67230..0000000000000000000000000000000000000000
--- a/main/app/core/restart.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import os
-import sys
-import json
-import platform
-import subprocess
-
-sys.path.append(os.getcwd())
-
-from main.app.core.ui import gr_info
-from main.app.variables import python, translations, configs_json
-
-def restart_app(app):
- gr_info(translations["30s"])
- os.system("cls" if platform.system() == "Windows" else "clear")
-
- app.close()
- subprocess.run([python, os.path.join("main", "app", "app.py")] + sys.argv[1:])
-
-def change_language(lang, app):
- configs = json.load(open(configs_json, "r"))
-
- if lang != configs["language"]:
- configs["language"] = lang
-
- with open(configs_json, "w") as f:
- json.dump(configs, f, indent=4)
-
- restart_app(app)
-
-def change_theme(theme, app):
- configs = json.load(open(configs_json, "r"))
-
- if theme != configs["theme"]:
- configs["theme"] = theme
- with open(configs_json, "w") as f:
- json.dump(configs, f, indent=4)
-
- restart_app(app)
-
-def change_font(font, app):
- configs = json.load(open(configs_json, "r"))
-
- if font != configs["font"]:
- configs["font"] = font
- with open(configs_json, "w") as f:
- json.dump(configs, f, indent=4)
-
- restart_app(app)
\ No newline at end of file
diff --git a/main/app/core/separate.py b/main/app/core/separate.py
deleted file mode 100644
index 88e443c2d37dfffb8a1aa50feb855e5d21786fb2..0000000000000000000000000000000000000000
--- a/main/app/core/separate.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import os
-import sys
-import subprocess
-
-sys.path.append(os.getcwd())
-
-from main.app.core.ui import gr_info, gr_warning
-from main.app.variables import python, translations, configs, config
-
-def separator_music(input, output_audio, format, shifts, segments_size, overlap, clean_audio, clean_strength, denoise, separator_model, kara_model, backing, reverb, backing_reverb, hop_length, batch_size, sample_rate):
- output = os.path.dirname(output_audio) or output_audio
-
- if not input or not os.path.exists(input) or os.path.isdir(input):
- gr_warning(translations["input_not_valid"])
- return [None]*4
-
- if not os.path.exists(output):
- gr_warning(translations["output_not_valid"])
- return [None]*4
-
- if not os.path.exists(output): os.makedirs(output)
- gr_info(translations["start"].format(start=translations["separator_music"]))
-
- if config.debug_mode: subprocess.run([python, configs["separate_path"], "--input_path", input, "--output_path", output, "--format", format, "--shifts", str(shifts), "--segments_size", str(segments_size), "--overlap", str(overlap), "--mdx_hop_length", str(hop_length), "--mdx_batch_size", str(batch_size), "--clean_audio", str(clean_audio), "--clean_strength", str(clean_strength), "--kara_model", kara_model, "--backing", str(backing), "--mdx_denoise", str(denoise), "--reverb", str(reverb), "--backing_reverb", str(backing_reverb), "--model_name", separator_model, "--sample_rate", str(sample_rate)])
- else:
- from main.inference.separator_music import separate
-
- separate(input, output, format, shifts, segments_size, overlap, hop_length, batch_size, clean_audio, clean_strength, separator_model, kara_model, backing, denoise, reverb, backing_reverb, sample_rate)
-
- gr_info(translations["success"])
-
- filename, _ = os.path.splitext(os.path.basename(input))
- output = os.path.join(output, filename)
-
- return [os.path.join(output, f"Original_Vocals_No_Reverb.{format}") if reverb else os.path.join(output, f"Original_Vocals.{format}"), os.path.join(output, f"Instruments.{format}"), (os.path.join(output, f"Main_Vocals_No_Reverb.{format}") if reverb else os.path.join(output, f"Main_Vocals.{format}") if backing else None), (os.path.join(output, f"Backing_Vocals_No_Reverb.{format}") if backing_reverb else os.path.join(output, f"Backing_Vocals.{format}") if backing else None)] if os.path.isfile(input) else [None]*4
\ No newline at end of file
diff --git a/main/app/core/training.py b/main/app/core/training.py
deleted file mode 100644
index 91c33dcd2d958cf289bad33d76a0305f12550b85..0000000000000000000000000000000000000000
--- a/main/app/core/training.py
+++ /dev/null
@@ -1,219 +0,0 @@
-import os
-import sys
-import time
-import shutil
-import codecs
-import threading
-import subprocess
-
-sys.path.append(os.getcwd())
-
-from main.tools import huggingface
-from main.app.core.ui import gr_info, gr_warning
-from main.app.variables import python, translations, configs
-
-def if_done(done, p):
- while 1:
- if p.poll() is None: time.sleep(0.5)
- else: break
-
- done[0] = True
-
-def log_read(done, name):
- log_file = os.path.join(configs["logs_path"], "app.log")
-
- f = open(log_file, "w", encoding="utf-8")
- f.close()
-
- while 1:
- with open(log_file, "r", encoding="utf-8") as f:
- yield "".join(line for line in f.readlines() if "DEBUG" not in line and name in line and line.strip() != "")
-
- time.sleep(1)
- if done[0]: break
-
- with open(log_file, "r", encoding="utf-8") as f:
- log = "".join(line for line in f.readlines() if "DEBUG" not in line and line.strip() != "")
-
- yield log
-
-def create_dataset(input_audio, output_dataset, clean_dataset, clean_strength, separator_reverb, kim_vocals_version, overlap, segments_size, denoise_mdx, skip, skip_start, skip_end, hop_length, batch_size, sample_rate):
- version = 1 if kim_vocals_version == "Version-1" else 2
- gr_info(translations["start"].format(start=translations["create"]))
-
- p = subprocess.Popen(f'{python} {configs["create_dataset_path"]} --input_audio "{input_audio}" --output_dataset "{output_dataset}" --clean_dataset {clean_dataset} --clean_strength {clean_strength} --separator_reverb {separator_reverb} --kim_vocal_version {version} --overlap {overlap} --segments_size {segments_size} --mdx_hop_length {hop_length} --mdx_batch_size {batch_size} --denoise_mdx {denoise_mdx} --skip {skip} --skip_start_audios "{skip_start}" --skip_end_audios "{skip_end}" --sample_rate {sample_rate}', shell=True)
- done = [False]
-
- threading.Thread(target=if_done, args=(done, p)).start()
-
- for log in log_read(done, "create_dataset"):
- yield log
-
-def preprocess(model_name, sample_rate, cpu_core, cut_preprocess, process_effects, dataset, clean_dataset, clean_strength):
- sr = int(float(sample_rate.rstrip("k")) * 1000)
-
- if not model_name: return gr_warning(translations["provide_name"])
- if not os.path.exists(dataset) or not any(f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3")) for f in os.listdir(dataset) if os.path.isfile(os.path.join(dataset, f))): return gr_warning(translations["not_found_data"])
-
- model_dir = os.path.join(configs["logs_path"], model_name)
- if os.path.exists(model_dir): shutil.rmtree(model_dir, ignore_errors=True)
-
- p = subprocess.Popen(f'{python} {configs["preprocess_path"]} --model_name "{model_name}" --dataset_path "{dataset}" --sample_rate {sr} --cpu_cores {cpu_core} --cut_preprocess {cut_preprocess} --process_effects {process_effects} --clean_dataset {clean_dataset} --clean_strength {clean_strength}', shell=True)
- done = [False]
-
- threading.Thread(target=if_done, args=(done, p)).start()
- os.makedirs(model_dir, exist_ok=True)
-
- for log in log_read(done, "preprocess"):
- yield log
-
-def extract(model_name, version, method, pitch_guidance, hop_length, cpu_cores, gpu, sample_rate, embedders, custom_embedders, onnx_f0_mode, embedders_mode, f0_autotune, f0_autotune_strength, hybrid_method, rms_extract):
- f0method, embedder_model = (method if method != "hybrid" else hybrid_method), (embedders if embedders != "custom" else custom_embedders)
- sr = int(float(sample_rate.rstrip("k")) * 1000)
-
- if not model_name: return gr_warning(translations["provide_name"])
- model_dir = os.path.join(configs["logs_path"], model_name)
-
- try:
- if not any(os.path.isfile(os.path.join(model_dir, "sliced_audios", f)) for f in os.listdir(os.path.join(model_dir, "sliced_audios"))) or not any(os.path.isfile(os.path.join(model_dir, "sliced_audios_16k", f)) for f in os.listdir(os.path.join(model_dir, "sliced_audios_16k"))): return gr_warning(translations["not_found_data_preprocess"])
- except:
- return gr_warning(translations["not_found_data_preprocess"])
-
- p = subprocess.Popen(f'{python} {configs["extract_path"]} --model_name "{model_name}" --rvc_version {version} --f0_method {f0method} --pitch_guidance {pitch_guidance} --hop_length {hop_length} --cpu_cores {cpu_cores} --gpu {gpu} --sample_rate {sr} --embedder_model {embedder_model} --f0_onnx {onnx_f0_mode} --embedders_mode {embedders_mode} --f0_autotune {f0_autotune} --f0_autotune_strength {f0_autotune_strength} --rms_extract {rms_extract}', shell=True)
- done = [False]
-
- threading.Thread(target=if_done, args=(done, p)).start()
- os.makedirs(model_dir, exist_ok=True)
-
- for log in log_read(done, "extract"):
- yield log
-
-def create_index(model_name, rvc_version, index_algorithm):
- if not model_name: return gr_warning(translations["provide_name"])
- model_dir = os.path.join(configs["logs_path"], model_name)
-
- try:
- if not any(os.path.isfile(os.path.join(model_dir, f"{rvc_version}_extracted", f)) for f in os.listdir(os.path.join(model_dir, f"{rvc_version}_extracted"))): return gr_warning(translations["not_found_data_extract"])
- except:
- return gr_warning(translations["not_found_data_extract"])
-
- p = subprocess.Popen(f'{python} {configs["create_index_path"]} --model_name "{model_name}" --rvc_version {rvc_version} --index_algorithm {index_algorithm}', shell=True)
- done = [False]
-
- threading.Thread(target=if_done, args=(done, p)).start()
- os.makedirs(model_dir, exist_ok=True)
-
- for log in log_read(done, "create_index"):
- yield log
-
-def training(model_name, rvc_version, save_every_epoch, save_only_latest, save_every_weights, total_epoch, sample_rate, batch_size, gpu, pitch_guidance, not_pretrain, custom_pretrained, pretrain_g, pretrain_d, detector, threshold, clean_up, cache, model_author, vocoder, checkpointing, deterministic, benchmark, optimizer, energy_use):
- sr = int(float(sample_rate.rstrip("k")) * 1000)
- if not model_name: return gr_warning(translations["provide_name"])
-
- model_dir = os.path.join(configs["logs_path"], model_name)
- if os.path.exists(os.path.join(model_dir, "train_pid.txt")): os.remove(os.path.join(model_dir, "train_pid.txt"))
-
- try:
- if not any(os.path.isfile(os.path.join(model_dir, f"{rvc_version}_extracted", f)) for f in os.listdir(os.path.join(model_dir, f"{rvc_version}_extracted"))): return gr_warning(translations["not_found_data_extract"])
- except:
- return gr_warning(translations["not_found_data_extract"])
-
- if not not_pretrain:
- if not custom_pretrained:
- pretrain_dir = configs["pretrained_v2_path"] if rvc_version == 'v2' else configs["pretrained_v1_path"]
- download_version = codecs.decode(f"uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/cergenvarq_i{'2' if rvc_version == 'v2' else '1'}/", "rot13")
-
- pretrained_selector = {
- True: {
- 32000: ("f0G32k.pth", "f0D32k.pth"),
- 40000: ("f0G40k.pth", "f0D40k.pth"),
- 48000: ("f0G48k.pth", "f0D48k.pth")
- },
- False: {
- 32000: ("G32k.pth", "D32k.pth"),
- 40000: ("G40k.pth", "D40k.pth"),
- 48000: ("G48k.pth", "D48k.pth")
- }
- }
-
- pg2, pd2 = "", ""
- pg, pd = pretrained_selector[pitch_guidance][sr]
-
- if energy_use: pg2, pd2 = pg2 + "ENERGY_", pd2 + "ENERGY_"
- if vocoder != 'Default': pg2, pd2 = pg2 + vocoder + "_", pd2 + vocoder + "_"
-
- pg2, pd2 = pg2 + pg, pd2 + pd
- pretrained_G, pretrained_D = (
- os.path.join(
- pretrain_dir,
- pg2
- ),
- os.path.join(
- pretrain_dir,
- pd2
- )
- )
-
- try:
- if not os.path.exists(pretrained_G):
- gr_info(translations["download_pretrained"].format(dg="G", rvc_version=rvc_version))
- huggingface.HF_download_file(
- "".join(
- [
- download_version,
- pg2
- ]
- ),
- os.path.join(
- pretrain_dir,
- pg2
- )
- )
-
- if not os.path.exists(pretrained_D):
- gr_info(translations["download_pretrained"].format(dg="D", rvc_version=rvc_version))
- huggingface.HF_download_file(
- "".join(
- [
- download_version,
- pd2
- ]
- ),
- os.path.join(
- pretrain_dir,
- pd2
- )
- )
- except:
- gr_warning(translations["not_use_pretrain_error_download"])
- pretrained_G = pretrained_D = None
- else:
- if not pretrain_g: return gr_warning(translations["provide_pretrained"].format(dg="G"))
- if not pretrain_d: return gr_warning(translations["provide_pretrained"].format(dg="D"))
-
- pg2, pd2 = pretrain_g, pretrain_d
- pretrained_G, pretrained_D = (
- (os.path.join(configs["pretrained_custom_path"], pg2) if not os.path.exists(pg2) else pg2),
- (os.path.join(configs["pretrained_custom_path"], pd2) if not os.path.exists(pd2) else pd2)
- )
-
- if not os.path.exists(pretrained_G): return gr_warning(translations["not_found_pretrain"].format(dg="G"))
- if not os.path.exists(pretrained_D): return gr_warning(translations["not_found_pretrain"].format(dg="D"))
- else:
- pretrained_G = pretrained_D = None
- gr_warning(translations["not_use_pretrain"])
-
- gr_info(translations["start"].format(start=translations["training"]))
-
- p = subprocess.Popen(f'{python} {configs["train_path"]} --model_name "{model_name}" --rvc_version {rvc_version} --save_every_epoch {save_every_epoch} --save_only_latest {save_only_latest} --save_every_weights {save_every_weights} --total_epoch {total_epoch} --sample_rate {sr} --batch_size {batch_size} --gpu {gpu} --pitch_guidance {pitch_guidance} --overtraining_detector {detector} --overtraining_threshold {threshold} --cleanup {clean_up} --cache_data_in_gpu {cache} --g_pretrained_path "{pretrained_G}" --d_pretrained_path "{pretrained_D}" --model_author "{model_author}" --vocoder "{vocoder}" --checkpointing {checkpointing} --deterministic {deterministic} --benchmark {benchmark} --optimizer {optimizer} --energy_use {energy_use}', shell=True)
- done = [False]
-
- with open(os.path.join(model_dir, "train_pid.txt"), "w") as pid_file:
- pid_file.write(str(p.pid))
-
- threading.Thread(target=if_done, args=(done, p)).start()
-
- for log in log_read(done, "train"):
- lines = log.splitlines()
- if len(lines) > 100: log = "\n".join(lines[-100:])
- yield log
\ No newline at end of file
diff --git a/main/app/core/tts.py b/main/app/core/tts.py
deleted file mode 100644
index ee920223abd452b078c4513f55524fbc65f9bc58..0000000000000000000000000000000000000000
--- a/main/app/core/tts.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import os
-import sys
-import pysrt
-import codecs
-import librosa
-import asyncio
-import requests
-import tempfile
-
-import numpy as np
-import soundfile as sf
-
-from edge_tts import Communicate
-
-sys.path.append(os.getcwd())
-
-from main.app.variables import translations
-from main.app.core.ui import gr_info, gr_warning, gr_error
-
-def synthesize_tts(prompt, voice, speed, output, pitch, google):
- if not google: asyncio.run(Communicate(text=prompt, voice=voice, rate=f"+{speed}%" if speed >= 0 else f"{speed}%", pitch=f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz").save(output))
- else:
- response = requests.get(codecs.decode("uggcf://genafyngr.tbbtyr.pbz/genafyngr_ggf", "rot13"), params={"ie": "UTF-8", "q": prompt, "tl": voice, "ttsspeed": speed, "client": "tw-ob"}, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"})
-
- if response.status_code == 200:
- with open(output, "wb") as f:
- f.write(response.content)
-
- if pitch != 0 or speed != 0:
- y, sr = librosa.load(output, sr=None)
-
- if pitch != 0: y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch)
- if speed != 0: y = librosa.effects.time_stretch(y, rate=speed)
-
- sf.write(file=output, data=y, samplerate=sr, format=os.path.splitext(os.path.basename(output))[-1].lower().replace('.', ''))
- else: gr_error(f"{response.status_code}, {response.text}")
-
-def time_stretch(y, sr, target_duration):
- rate = (len(y) / sr) / target_duration
- if rate != 1.0: y = librosa.effects.time_stretch(y=y.astype(np.float32), rate=rate)
-
- n_target = int(round(target_duration * sr))
- return np.pad(y, (0, n_target - len(y))) if len(y) < n_target else y[:n_target]
-
-def pysrttime_to_seconds(t):
- return (t.hours * 60 + t.minutes) * 60 + t.seconds + t.milliseconds / 1000
-
-def srt_tts(srt_file, out_file, voice, rate = 0, sr = 24000, google = False):
- subs = pysrt.open(srt_file)
- if not subs: raise ValueError(translations["srt"])
-
- final_audio = np.zeros(int(round(pysrttime_to_seconds(subs[-1].end) * sr)), dtype=np.float32)
-
- with tempfile.TemporaryDirectory() as tempdir:
- for idx, seg in enumerate(subs):
- wav_path = os.path.join(tempdir, f"seg_{idx}.wav")
- synthesize_tts(" ".join(seg.text.splitlines()), voice, 0, wav_path, rate, google)
-
- audio, file_sr = sf.read(wav_path, dtype=np.float32)
- if file_sr != sr: audio = np.interp(np.linspace(0, len(audio) - 1, int(len(audio) * sr / file_sr)), np.arange(len(audio)), audio)
- adjusted = time_stretch(audio, sr, pysrttime_to_seconds(seg.duration))
-
- start_sample = int(round(pysrttime_to_seconds(seg.start) * sr))
- end_sample = start_sample + adjusted.shape[0]
-
- if end_sample > final_audio.shape[0]:
- adjusted = adjusted[: final_audio.shape[0] - start_sample]
- end_sample = final_audio.shape[0]
-
- final_audio[start_sample:end_sample] += adjusted
-
- sf.write(out_file, final_audio, sr)
-
-def TTS(prompt, voice, speed, output, pitch, google, srt_input):
- if not srt_input: srt_input = ""
-
- if not prompt and not srt_input.endswith(".srt"):
- gr_warning(translations["enter_the_text"])
- return None
-
- if not voice:
- gr_warning(translations["choose_voice"])
- return None
-
- if not output:
- gr_warning(translations["output_not_valid"])
- return None
-
- if os.path.isdir(output): output = os.path.join(output, f"tts.wav")
- gr_info(translations["convert"].format(name=translations["text"]))
-
- output_dir = os.path.dirname(output) or output
- if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
-
- if srt_input.endswith(".srt"): srt_tts(srt_input, output, voice, 0, 24000, google)
- else: synthesize_tts(prompt, voice, speed, output, pitch, google)
-
- gr_info(translations["success"])
- return output
\ No newline at end of file
diff --git a/main/app/core/ui.py b/main/app/core/ui.py
deleted file mode 100644
index 787d66f74f7ebf7fa0a1f2d3b6a4bcf6557f22a4..0000000000000000000000000000000000000000
--- a/main/app/core/ui.py
+++ /dev/null
@@ -1,179 +0,0 @@
-import os
-import sys
-import json
-import torch
-import shutil
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.library import opencl
-from main.app.variables import config, configs, configs_json, logger, translations, edgetts, google_tts_voice, method_f0, method_f0_full
-
-def gr_info(message):
- gr.Info(message, duration=2)
- logger.info(message)
-
-def gr_warning(message):
- gr.Warning(message, duration=2)
- logger.warning(message)
-
-def gr_error(message):
- gr.Error(message=message, duration=6)
- logger.error(message)
-
-def get_gpu_info():
- ngpu = torch.cuda.device_count()
- gpu_infos = [f"{i}: {torch.cuda.get_device_name(i)} ({int(torch.cuda.get_device_properties(i).total_memory / 1024 / 1024 / 1024 + 0.4)} GB)" for i in range(ngpu) if torch.cuda.is_available() or ngpu != 0]
-
- if len(gpu_infos) == 0:
- ngpu = opencl.device_count()
- gpu_infos = [f"{i}: {opencl.device_name(i)}" for i in range(ngpu) if opencl.is_available() or ngpu != 0]
-
- return "\n".join(gpu_infos) if len(gpu_infos) > 0 else translations["no_support_gpu"]
-
-def gpu_number_str():
- ngpu = torch.cuda.device_count()
- if ngpu == 0: ngpu = opencl.device_count()
-
- return str("-".join(map(str, range(ngpu))) if torch.cuda.is_available() or opencl.is_available() else "-")
-
-def change_f0_choices():
- f0_file = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["f0_path"]) for f in files if f.endswith(".txt")])
- return {"value": f0_file[0] if len(f0_file) >= 1 else "", "choices": f0_file, "__type__": "update"}
-
-def change_audios_choices(input_audio):
- audios = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["audios_path"]) for f in files if os.path.splitext(f)[1].lower() in (".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3")])
- return {"value": input_audio if input_audio != "" else (audios[0] if len(audios) >= 1 else ""), "choices": audios, "__type__": "update"}
-
-def change_models_choices():
- model, index = sorted(list(model for model in os.listdir(configs["weights_path"]) if model.endswith((".pth", ".onnx")) and not model.startswith("G_") and not model.startswith("D_"))), sorted([os.path.join(root, name) for root, _, files in os.walk(configs["logs_path"], topdown=False) for name in files if name.endswith(".index") and "trained" not in name])
- return [{"value": model[0] if len(model) >= 1 else "", "choices": model, "__type__": "update"}, {"value": index[0] if len(index) >= 1 else "", "choices": index, "__type__": "update"}]
-
-def change_pretrained_choices():
- pretrainD = sorted([model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "D" in model])
- pretrainG = sorted([model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "G" in model])
-
- return [{"choices": pretrainD, "value": pretrainD[0] if len(pretrainD) >= 1 else "", "__type__": "update"}, {"choices": pretrainG, "value": pretrainG[0] if len(pretrainG) >= 1 else "", "__type__": "update"}]
-
-def change_choices_del():
- return [{"choices": sorted(list(model for model in os.listdir(configs["weights_path"]) if model.endswith(".pth") and not model.startswith("G_") and not model.startswith("D_"))), "__type__": "update"}, {"choices": sorted([os.path.join(configs["logs_path"], f) for f in os.listdir(configs["logs_path"]) if "mute" not in f and os.path.isdir(os.path.join(configs["logs_path"], f))]), "__type__": "update"}]
-
-def change_preset_choices():
- return {"value": "", "choices": sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".conversion.json"))), "__type__": "update"}
-
-def change_effect_preset_choices():
- return {"value": "", "choices": sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".effect.json"))), "__type__": "update"}
-
-def change_tts_voice_choices(google):
- return {"choices": google_tts_voice if google else edgetts, "value": google_tts_voice[0] if google else edgetts[0], "__type__": "update"}
-
-def change_backing_choices(backing, merge):
- if backing or merge: return {"value": False, "interactive": False, "__type__": "update"}
- elif not backing or not merge: return {"interactive": True, "__type__": "update"}
- else: gr_warning(translations["option_not_valid"])
-
-def change_download_choices(select):
- selects = [False]*10
-
- if select == translations["download_url"]: selects[0] = selects[1] = selects[2] = True
- elif select == translations["download_from_csv"]: selects[3] = selects[4] = True
- elif select == translations["search_models"]: selects[5] = selects[6] = True
- elif select == translations["upload"]: selects[9] = True
- else: gr_warning(translations["option_not_valid"])
-
- return [{"visible": selects[i], "__type__": "update"} for i in range(len(selects))]
-
-def change_download_pretrained_choices(select):
- selects = [False]*8
-
- if select == translations["download_url"]: selects[0] = selects[1] = selects[2] = True
- elif select == translations["list_model"]: selects[3] = selects[4] = selects[5] = True
- elif select == translations["upload"]: selects[6] = selects[7] = True
- else: gr_warning(translations["option_not_valid"])
-
- return [{"visible": selects[i], "__type__": "update"} for i in range(len(selects))]
-
-def get_index(model):
- model = os.path.basename(model).split("_")[0]
- return {"value": next((f for f in [os.path.join(root, name) for root, _, files in os.walk(configs["logs_path"], topdown=False) for name in files if name.endswith(".index") and "trained" not in name] if model.split(".")[0] in f), ""), "__type__": "update"} if model else None
-
-def index_strength_show(index):
- return {"visible": index != "" and os.path.exists(index), "value": 0.5, "__type__": "update"}
-
-def hoplength_show(method, hybrid_method=None):
- visible = False
-
- for m in ["mangio-crepe", "fcpe", "yin", "piptrack", "fcn"]:
- if m in method: visible = True
- if m in hybrid_method: visible = True
-
- if visible: break
- else: visible = False
-
- return {"visible": visible, "__type__": "update"}
-
-def visible(value):
- return {"visible": value, "__type__": "update"}
-
-def valueFalse_interactive(value):
- return {"value": False, "interactive": value, "__type__": "update"}
-
-def valueEmpty_visible1(value):
- return {"value": "", "visible": value, "__type__": "update"}
-
-def pitch_guidance_lock(vocoders):
- return {"value": True, "interactive": vocoders == "Default", "__type__": "update"}
-
-def vocoders_lock(pitch, vocoders):
- return {"value": vocoders if pitch else "Default", "interactive": pitch, "__type__": "update"}
-
-def unlock_f0(value):
- return {"choices": method_f0_full if value else method_f0, "value": "rmvpe", "__type__": "update"}
-
-def unlock_vocoder(value, vocoder):
- return {"value": vocoder if value == "v2" else "Default", "interactive": value == "v2", "__type__": "update"}
-
-def unlock_ver(value, vocoder):
- return {"value": "v2" if vocoder == "Default" else value, "interactive": vocoder == "Default", "__type__": "update"}
-
-def visible_embedders(value):
- return {"visible": value != "spin", "__type__": "update"}
-
-def change_fp(fp):
- fp16 = fp == "fp16"
-
- if fp16 and config.device in ["cpu", "mps", "ocl:0"]:
- gr_warning(translations["fp16_not_support"])
- return "fp32"
- else:
- gr_info(translations["start_update_precision"])
-
- configs = json.load(open(configs_json, "r"))
- configs["fp16"] = config.is_half = fp16
-
- with open(configs_json, "w") as f:
- json.dump(configs, f, indent=4)
-
- gr_info(translations["success"])
- return "fp16" if fp16 else "fp32"
-
-def process_output(file_path):
- if config.configs.get("delete_exists_file", True):
- if os.path.exists(file_path): os.remove(file_path)
- return file_path
- else:
- if not os.path.exists(file_path): return file_path
- file = os.path.splitext(os.path.basename(file_path))
-
- index = 1
- while 1:
- file_path = os.path.join(os.path.dirname(file_path), f"{file[0]}_{index}{file[1]}")
- if not os.path.exists(file_path): return file_path
- index += 1
-
-def shutil_move(input_path, output_path):
- output_path = os.path.join(output_path, os.path.basename(input_path)) if os.path.isdir(output_path) else output_path
-
- return shutil.move(input_path, process_output(output_path)) if os.path.exists(output_path) else shutil.move(input_path, output_path)
\ No newline at end of file
diff --git a/main/app/core/utils.py b/main/app/core/utils.py
deleted file mode 100644
index f55c9eddcd799fc4152386e5f96f430caa812235..0000000000000000000000000000000000000000
--- a/main/app/core/utils.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import os
-import sys
-import json
-import codecs
-import requests
-import platform
-import datetime
-
-sys.path.append(os.getcwd())
-
-from main.app.core.ui import gr_info, gr_warning, gr_error
-from main.app.variables import logger, translations, configs
-
-def stop_pid(pid_file, model_name=None, train=False):
- try:
- pid_file_path = os.path.join("assets", f"{pid_file}.txt") if model_name is None else os.path.join(configs["logs_path"], model_name, f"{pid_file}.txt")
-
- if not os.path.exists(pid_file_path): return gr_warning(translations["not_found_pid"])
- else:
- with open(pid_file_path, "r") as pid_file:
- pids = [int(pid) for pid in pid_file.readlines()]
-
- for pid in pids:
- os.kill(pid, 9)
-
- if os.path.exists(pid_file_path): os.remove(pid_file_path)
-
- pid_file_path = os.path.join(configs["logs_path"], model_name, "config.json")
-
- if train and os.path.exists(pid_file_path):
- with open(pid_file_path, "r") as pid_file:
- pid_data = json.load(pid_file)
- pids = pid_data.get("process_pids", [])
-
- with open(pid_file_path, "w") as pid_file:
- pid_data.pop("process_pids", None)
-
- json.dump(pid_data, pid_file, indent=4)
-
- for pid in pids:
- os.kill(pid, 9)
-
- gr_info(translations["end_pid"])
- except:
- pass
-
-def report_bug(error_info, provide):
- report_path = os.path.join(configs["logs_path"], "report_bugs.log")
- if os.path.exists(report_path): os.remove(report_path)
-
- report_url = codecs.decode(requests.get(codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/jroubbx.gkg", "rot13")).text, "rot13")
- if not error_info: error_info = "Không Có"
-
- gr_info(translations["thank"])
-
- if provide:
- try:
- for log in [os.path.join(root, name) for root, _, files in os.walk(os.path.join(configs["logs_path"]), topdown=False) for name in files if name.endswith(".log")]:
- with open(log, "r", encoding="utf-8") as r:
- with open(report_path, "a", encoding="utf-8") as w:
- w.write(str(r.read()))
- w.write("\n")
- except Exception as e:
- gr_error(translations["error_read_log"])
- logger.debug(e)
-
- try:
- with open(report_path, "r", encoding="utf-8") as f:
- content = f.read()
-
- requests.post(report_url, json={"embeds": [{"title": "Báo Cáo Lỗi", "description": f"Mô tả lỗi: {error_info}", "color": 15158332, "author": {"name": "Vietnamese_RVC", "icon_url": codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/vpb.cat", "rot13"), "url": codecs.decode("uggcf://tvguho.pbz/CunzUhlauNau16/Ivrganzrfr-EIP/gerr/znva","rot13")}, "thumbnail": {"url": codecs.decode("uggcf://p.grabe.pbz/7dADJbv-36fNNNNq/grabe.tvs", "rot13")}, "fields": [{"name": "Số Lượng Gỡ Lỗi", "value": content.count("DEBUG")}, {"name": "Số Lượng Thông Tin", "value": content.count("INFO")}, {"name": "Số Lượng Cảnh Báo", "value": content.count("WARNING")}, {"name": "Số Lượng Lỗi", "value": content.count("ERROR")}], "footer": {"text": f"Tên Máy: {platform.uname().node} - Hệ Điều Hành: {platform.system()}-{platform.version()}\nThời Gian Báo Cáo Lỗi: {datetime.datetime.now()}."}}]})
-
- with open(report_path, "rb") as f:
- requests.post(report_url, files={"file": f})
- except Exception as e:
- gr_error(translations["error_send"])
- finally:
- if os.path.exists(report_path): os.remove(report_path)
- else: requests.post(report_url, json={"embeds": [{"title": "Báo Cáo Lỗi", "description": error_info}]})
-
-def google_translate(text, source='auto', target='vi'):
- if text == "": return gr_warning(translations["prompt_warning"])
-
- try:
- import textwrap
-
- def translate_chunk(chunk):
- response = requests.get(codecs.decode("uggcf://genafyngr.tbbtyrncvf.pbz/genafyngr_n/fvatyr", "rot13"), params={'client': 'gtx', 'sl': source, 'tl': target, 'dt': 't', 'q': chunk})
- return ''.join([i[0] for i in response.json()[0]]) if response.status_code == 200 else chunk
-
- translated_text = ''
- for chunk in textwrap.wrap(text, 5000, break_long_words=False, break_on_hyphens=False):
- translated_text += translate_chunk(chunk)
-
- return translated_text
- except:
- return text
\ No newline at end of file
diff --git a/main/app/parser.py b/main/app/parser.py
deleted file mode 100644
index 560bebb6773c4a968b9e1d3885acf28766c80b2d..0000000000000000000000000000000000000000
--- a/main/app/parser.py
+++ /dev/null
@@ -1,319 +0,0 @@
-import os
-import sys
-
-sys.path.append(os.getcwd())
-
-try:
- argv = sys.argv[1]
-except IndexError:
- argv = None
-
-argv_is_allows = ["--audio_effects", "--convert", "--create_dataset", "--create_index", "--extract", "--preprocess", "--separator_music", "--train", "--help_audio_effects", "--help_convert", "--help_create_dataset", "--help_create_index", "--help_extract", "--help_preprocess", "--help_separator_music", "--help_train", "--help"]
-
-if argv not in argv_is_allows:
- print("Cú pháp không hợp lệ! Sử dụng --help để biết thêm")
- quit()
-
-if argv_is_allows[0] in argv: from main.inference.audio_effects import main
-elif argv_is_allows[1] in argv: from main.inference.conversion.convert import main
-elif argv_is_allows[2] in argv: from main.inference.create_dataset import main
-elif argv_is_allows[3] in argv: from main.inference.create_index import main
-elif argv_is_allows[4] in argv: from main.inference.extracting.extract import main
-elif argv_is_allows[5] in argv: from main.inference.preprocess.preprocess import main
-elif argv_is_allows[6] in argv: from main.inference.separator_music import main
-elif argv_is_allows[7] in argv: from main.inference.training.train import main
-elif argv_is_allows[8] in argv:
- print("""Các tham số của `--audio_effects`:
- 1. Đường dẫn tệp:
- - `--input_path` (bắt buộc): Đường dẫn đến tệp âm thanh đầu vào.
- - `--output_path` (mặc định: `./audios/apply_effects.wav`): Đường dẫn lưu tệp đầu ra.
- - `--export_format` (mặc định: `wav`): Định dạng xuất tệp (`wav`, `mp3`, ...).
-
- 2. Lấy mẫu lại:
- - `--resample` (mặc định: `False`): Có lấy mẫu lại hay không.
- - `--resample_sr` (mặc định: `0`): Tần số lấy mẫu mới (Hz).
-
- 3. Hiệu ứng chorus:
- - `--chorus`: Bật/tắt chorus.
- - `--chorus_depth`, `--chorus_rate`, `--chorus_mix`, `--chorus_delay`, `--chorus_feedback`: Các thông số điều chỉnh chorus.
-
- 4. Hiệu ứng distortion:
- - `--distortion`: Bật/tắt distortion.
- - `--drive_db`: Mức độ méo âm thanh.
-
- 5. Hiệu ứng reverb:
- - `--reverb`: Bật/tắt hồi âm.
- - `--reverb_room_size`, `--reverb_damping`, `--reverb_wet_level`, `--reverb_dry_level`, `--reverb_width`, `--reverb_freeze_mode`: Điều chỉnh hồi âm.
-
- 6. Hiệu ứng pitch shift:
- - `--pitchshift`: Bật/tắt thay đổi cao độ.
- - `--pitch_shift`: Giá trị dịch cao độ.
-
- 7. Hiệu ứng delay:
- - `--delay`: Bật/tắt delay.
- - `--delay_seconds`, `--delay_feedback`, `--delay_mix`: Điều chỉnh thời gian trễ, phản hồi và hòa trộn.
-
- 8. Compressor:
- - `--compressor`: Bật/tắt compressor.
- - `--compressor_threshold`, `--compressor_ratio`, `--compressor_attack_ms`, `--compressor_release_ms`: Các thông số nén.
-
- 9. Limiter:
- - `--limiter`: Bật/tắt giới hạn mức âm thanh.
- - `--limiter_threshold`, `--limiter_release`: Ngưỡng giới hạn và thời gian nhả.
-
- 10. Gain (Khuếch đại):
- - `--gain`: Bật/tắt gain.
- - `--gain_db`: Mức gain (dB).
-
- 11. Bitcrush:
- - `--bitcrush`: Bật/tắt hiệu ứng giảm độ phân giải.
- - `--bitcrush_bit_depth`: Số bit của bitcrush.
-
- 12. Clipping:
- - `--clipping`: Bật/tắt cắt âm thanh.
- - `--clipping_threshold`: Ngưỡng clipping.
-
- 13. Phaser:
- - `--phaser`: Bật/tắt hiệu ứng phaser.
- - `--phaser_rate_hz`, `--phaser_depth`, `--phaser_centre_frequency_hz`, `--phaser_feedback`, `--phaser_mix`: Điều chỉnh hiệu ứng phaser.
-
- 14. Boost bass & treble:
- - `--treble_bass_boost`: Bật/tắt tăng cường âm bass và treble.
- - `--bass_boost_db`, `--bass_boost_frequency`, `--treble_boost_db`, `--treble_boost_frequency`: Các thông số tăng bass và treble.
-
- 15. Fade in & fade out:
- - `--fade_in_out`: Bật/tắt hiệu ứng fade.
- - `--fade_in_duration`, `--fade_out_duration`: Thời gian fade vào/ra.
-
- 16. Kết hợp âm thanh:
- - `--audio_combination`: Bật/tắt ghép nhiều tệp âm thanh.
- - `--audio_combination_input`: Đường dẫn tệp âm thanh bổ sung.
- - `--main_volume`: Âm lượng của âm thanh chính.
- - `--combination_volume`:: Âm lượng của âm thanh cần kết hợp.
- """)
- quit()
-elif argv_is_allows[9] in argv:
- print("""Các tham số của --convert:
- 1. Cấu hình xử lý giọng nói:
- - `--pitch` (mặc định: `0`): Điều chỉnh cao độ.
- - `--filter_radius` (mặc định: `3`): Độ mượt của đường F0.
- - `--index_rate` (mặc định: `0.5`): Tỷ lệ sử dụng chỉ mục giọng nói.
- - `--rms_mix_rate` (mặc định: `1`): Hệ số điều chỉnh biên độ âm lượng.
- - `--protect` (mặc định: `0.33`): Bảo vệ phụ âm.
-
- 2. Cấu hình mẫu (frame hop):
- - `--hop_length` (mặc định: `64`): Bước nhảy khi xử lý âm thanh.
-
- 3. Cấu hình F0:
- - `--f0_method` (mặc định: `rmvpe`): Phương pháp dự đoán F0 (`pm`, `dio`, `mangio-crepe-tiny`, `mangio-crepe-small`, `mangio-crepe-medium`, `mangio-crepe-large`, `mangio-crepe-full`, `crepe-tiny`, `crepe-small`, `crepe-medium`, `crepe-large`, `crepe-full`, `fcpe`, `fcpe-legacy`, `rmvpe`, `rmvpe-legacy`, `harvest`, `yin`, `pyin`, `swipe`).
- - `--f0_autotune` (mặc định: `False`): Có tự động điều chỉnh F0 hay không.
- - `--f0_autotune_strength` (mặc định: `1`): Cường độ hiệu chỉnh tự động F0.
- - `--f0_file` (mặc định: ``): Đường dẫn tệp F0 có sẵn.
- - `--f0_onnx` (mặc định: `False`): Có sử dụng phiên bản ONNX của F0 hay không.
- - `--proposal_pitch` (mặc định: `False`): Đề xuất cao độ thay vì điều chỉnh thủ công.
- - `--proposal_pitch_threshold` (mặc định: `255.0`): Tần số ước tính cao độ.
-
- 4. Mô hình nhúng:
- - `--embedder_model` (mặc định: `contentvec_base`): Mô hình nhúng sử dụng.
- - `--embedders_mode` (mặc định: `fairseq`): Chế độ nhúng (`fairseq`, `transformers`, `onnx`).
-
- 5. Đường dẫn tệp:
- - `--input_path` (bắt buộc): Đường dẫn tệp âm thanh đầu vào.
- - `--output_path` (mặc định: `./audios/output.wav`): Đường dẫn lưu tệp đầu ra.
- - `--export_format` (mặc định: `wav`): Định dạng xuất tệp.
- - `--pth_path` (bắt buộc): Đường dẫn đến tệp mô hình `.pth`.
- - `--index_path` (mặc định: `None`): Đường dẫn tệp chỉ mục (nếu có).
-
- 6. Làm sạch âm thanh:
- - `--clean_audio` (mặc định: `False`): Có áp dụng làm sạch âm thanh không.
- - `--clean_strength` (mặc định: `0.7`): Mức độ làm sạch.
-
- 7. Resampling & chia nhỏ âm thanh:
- - `--resample_sr` (mặc định: `0`): Tần số lấy mẫu mới (0 nghĩa là giữ nguyên).
- - `--split_audio` (mặc định: `False`): Có chia nhỏ audio trước khi xử lý không.
-
- 8. Kiểm tra & tối ưu hóa:
- - `--checkpointing` (mặc định: `False`): Bật/tắt checkpointing để tiết kiệm RAM.
-
- 9. Dịch formant:
- - `--formant_shifting` (mặc định: `False`): Có bật hiệu ứng dịch formant không.
- - `--formant_qfrency` (mặc định: `0.8`): Hệ số dịch formant theo tần số.
- - `--formant_timbre` (mặc định: `0.8`): Hệ số thay đổi màu sắc giọng.
- """)
- quit()
-elif argv_is_allows[10] in argv:
- print("""Các tham số của --create_dataset:
- 1. Đường dẫn & cấu hình dataset:
- - `--input_audio` (bắt buộc): Đường dẫn liên kết đến âm thanh (Liên kết Youtube, có thể dùng dấu `,` để dùng nhiều liên kết).
- - `--output_dataset` (mặc định: `./dataset`): Thư mục xuất dữ liệu đầu ra.
- - `--sample_rate` (mặc định: `44100`): Tần số lấy mẫu cho âm thanh.
-
- 2. Làm sạch dữ liệu:
- - `--clean_dataset` (mặc định: `False`): Có áp dụng làm sạch dữ liệu hay không.
- - `--clean_strength` (mặc định: `0.7`): Mức độ làm sạch dữ liệu.
-
- 3. Tách giọng & hiệu ứng:
- - `--separator_reverb` (mặc định: `False`): Có tách vang giọng không.
- - `--kim_vocal_version` (mặc định: `2`): Phiên bản mô hình Kim Vocal để tách (`1`, `2`).
-
- 4. Cấu hình phân đoạn âm thanh:
- - `--overlap` (mặc định: `0.25`): Mức độ chồng lấn giữa các đoạn khi tách.
- - `--segments_size` (mặc định: `256`): Kích thước của từng phân đoạn.
-
- 5. Cấu hình MDX (Music Demixing):
- - `--mdx_hop_length` (mặc định: `1024`): Bước nhảy MDX khi xử lý.
- - `--mdx_batch_size` (mặc định: `1`): Kích thước batch khi xử lý MDX.
- - `--denoise_mdx` (mặc định: `False`): Có áp dụng khử nhiễu khi tách bằng MDX không.
-
- 6. Bỏ qua phần âm thanh:
- - `--skip` (mặc định: `False`): Có bỏ qua giây âm thanh nào không.
- - `--skip_start_audios` (mặc định: `0`): Thời gian (giây) cần bỏ qua ở đầu audio.
- - `--skip_end_audios` (mặc định: `0`): Thời gian (giây) cần bỏ qua ở cuối audio.
- """)
- quit()
-elif argv_is_allows[11] in argv:
- print("""Các tham số của --create_index:
- 1. Thông tin mô hình:
- - `--model_name` (bắt buộc): Tên mô hình.
- - `--rvc_version` (mặc định: `v2`): Phiên bản (`v1`, `v2`).
- - `--index_algorithm` (mặc định: `Auto`): Thuật toán index sử dụng (`Auto`, `Faiss`, `KMeans`).
- """)
- quit()
-elif argv_is_allows[12] in argv:
- print("""Các tham số của --extract:
- 1. Thông tin mô hình:
- - `--model_name` (bắt buộc): Tên mô hình.
- - `--rvc_version` (mặc định: `v2`): Phiên bản RVC (`v1`, `v2`).
-
- 2. Cấu hình F0:
- - `--f0_method` (mặc định: `rmvpe`): Phương pháp dự đoán F0 (`pm`, `dio`, `mangio-crepe-tiny`, `mangio-crepe-small`, `mangio-crepe-medium`, `mangio-crepe-large`, `mangio-crepe-full`, `crepe-tiny`, `crepe-small`, `crepe-medium`, `crepe-large`, `crepe-full`, `fcpe`, `fcpe-legacy`, `rmvpe`, `rmvpe-legacy`, `harvest`, `yin`, `pyin`, `swipe`).
- - `--pitch_guidance` (mặc định: `True`): Có sử dụng hướng dẫn cao độ hay không.
- - `--f0_autotune` (mặc định: `False`): Có tự động điều chỉnh F0 hay không.
- - `--f0_autotune_strength` (mặc định: `1`): Cường độ hiệu chỉnh tự động F0.
-
- 3. Cấu hình xử lý:
- - `--hop_length` (mặc định: `128`): Độ dài bước nhảy trong quá trình xử lý.
- - `--cpu_cores` (mặc định: `2`): Số lượng luồng CPU sử dụng.
- - `--gpu` (mặc định: `-`): Chỉ định GPU sử dụng (ví dụ: `0` cho GPU đầu tiên, `-` để tắt GPU).
- - `--sample_rate` (bắt buộc): Tần số lấy mẫu của âm thanh đầu vào.
-
- 4. Cấu hình nhúng:
- - `--embedder_model` (mặc định: `contentvec_base`): Tên mô hình nhúng.
- - `--f0_onnx` (mặc định: `False`): Có sử dụng phiên bản ONNX của F0 hay không.
- - `--embedders_mode` (mặc định: `fairseq`): Chế độ nhúng (`fairseq`, `transformers`, `onnx`).
-
- 4. RMS:
- - `--rms_extract` (mặc định: False): Trích xuất thêm năng lượng rms.
- """)
- quit()
-elif argv_is_allows[13] in argv:
- print("""Các tham số của --preprocess:
- 1. Thông tin mô hình:
- - `--model_name` (bắt buộc): Tên mô hình.
-
- 2. Cấu hình dữ liệu:
- - `--dataset_path` (mặc định: `./dataset`): Đường dẫn thư mục chứa tệp dữ liệu.
- - `--sample_rate` (bắt buộc): Tần số lấy mẫu của dữ liệu âm thanh.
-
- 3. Cấu hình xử lý:
- - `--cpu_cores` (mặc định: `2`): Số lượng luồng CPU sử dụng.
- - `--cut_preprocess` (mặc định: `True`): Có cắt tệp dữ liệu hay không.
- - `--process_effects` (mặc định: `False`): Có áp dụng tiền xử lý hay không.
- - `--clean_dataset` (mặc định: `False`): Có làm sạch tệp dữ liệu hay không.
- - `--clean_strength` (mặc định: `0.7`): Độ mạnh của quá trình làm sạch dữ liệu.
- """)
- quit()
-elif argv_is_allows[14] in argv:
- print("""Các tham số của --separator_music:
- 1. Đường dẫn dữ liệu:
- - `--input_path` (bắt buộc): Đường dẫn tệp âm thanh đầu vào.
- - `--output_path` (mặc định: `./audios`): Thư mục lưu tệp đầu ra.
- - `--format` (mặc định: `wav`): Định dạng xuất tệp (`wav`, `mp3`,...).
-
- 2. Cấu hình xử lý âm thanh:
- - `--shifts` (mặc định: `2`): Số lượng dự đoán.
- - `--segments_size` (mặc định: `256`): Kích thước phân đoạn âm thanh.
- - `--overlap` (mặc định: `0.25`): Mức độ chồng lấn giữa các đoạn.
- - `--mdx_hop_length` (mặc định: `1024`): Bước nhảy MDX khi xử lý.
- - `--mdx_batch_size` (mặc định: `1`): Kích thước lô.
-
- 3. Xử lý làm sạch:
- - `--clean_audio` (mặc định: `False`): Có làm sạch âm thanh hay không.
- - `--clean_strength` (mặc định: `0.7`): Độ mạnh của bộ lọc làm sạch.
-
- 4. Cấu hình mô hình:
- - `--model_name` (mặc định: `HT-Normal`): Mô hình tách nhạc (`Main_340`, `Main_390`, `Main_406`, `Main_427`, `Main_438`, `Inst_full_292`, `Inst_HQ_1`, `Inst_HQ_2`, `Inst_HQ_3`, `Inst_HQ_4`, `Inst_HQ_5`, `Kim_Vocal_1`, `Kim_Vocal_2`, `Kim_Inst`, `Inst_187_beta`, `Inst_82_beta`, `Inst_90_beta`, `Voc_FT`, `Crowd_HQ`, `Inst_1`, `Inst_2`, `Inst_3`, `MDXNET_1_9703`, `MDXNET_2_9682`, `MDXNET_3_9662`, `Inst_Main`, `MDXNET_Main`, `MDXNET_9482`, `HT-Normal`, `HT-Tuned`, `HD_MMI`, `HT_6S`).
- - `--kara_model` (mặc định: `Version-1`): Phiên bản mô hình tách bè (`Version-1`, `Version-2`).
-
- 5. Hiệu ứng và xử lý hậu kỳ:
- - `--backing` (mặc định: `False`): Có tách bè hay không.
- - `--mdx_denoise` (mặc định: `False`): Có sử dụng khử nhiễu MDX hay không.
- - `--reverb` (mặc định: `False`): Có tách vang hay không.
- - `--backing_reverb` (mặc định: `False`): có tách vang cho giọng bè không.
-
- 6. Tần số lấy mẫu:
- - `--sample_rate` (mặc định: `44100`): Tần số lấy mẫu của âm thanh đầu ra.
- """)
- quit()
-elif argv_is_allows[15] in argv:
- print("""Các tham số của --train:
- 1. Cấu hình mô hình:
- - `--model_name` (bắt buộc): Tên mô hình.
- - `--rvc_version` (mặc định: `v2`): Phiên bản RVC (`v1`, `v2`).
- - `--model_author` (tùy chọn): Tác giả của mô hình.
-
- 2. Cấu hình lưu:
- - `--save_every_epoch` (bắt buộc): Số kỷ nguyên giữa mỗi lần lưu.
- - `--save_only_latest` (mặc định: `True`): Chỉ lưu điểm mới nhất.
- - `--save_every_weights` (mặc định: `True`): Lưu tất cả trọng số của mô hình.
-
- 3. Cấu hình huấn luyện:
- - `--total_epoch` (mặc định: `300`): Tổng số kỷ nguyên huấn luyện.
- - `--batch_size` (mặc định: `8`): Kích thước lô trong quá trình huấn luyện.
- - `--sample_rate` (bắt buộc): Tần số lấy mẫu của âm thanh.
-
- 4. Cấu hình thiết bị:
- - `--gpu` (mặc định: `0`): Chỉ định GPU để sử dụng (số hoặc `-` nếu không dùng GPU).
- - `--cache_data_in_gpu` (mặc định: `False`): Lưu dữ liệu vào GPU để tăng tốc.
-
- 5. Cấu hình huấn luyện nâng cao:
- - `--pitch_guidance` (mặc định: `True`): Sử dụng hướng dẫn cao độ.
- - `--g_pretrained_path` (mặc định: ``): Đường dẫn đến trọng số G đã huấn luyện trước.
- - `--d_pretrained_path` (mặc định: ``): Đường dẫn đến trọng số D đã huấn luyện trước.
- - `--vocoder` (mặc định: `Default`): Bộ mã hóa được sử dụng (`Default`, `MRF-HiFi-GAN`, `RefineGAN`).
- - `--energy_use` (mặc định: `False`): Sử dụng năng lượng rms.
-
- 6. Phát hiện huấn luyện quá mức:
- - `--overtraining_detector` (mặc định: `False`): Bật/tắt chế độ phát hiện huấn luyện quá mức.
- - `--overtraining_threshold` (mặc định: `50`): Ngưỡng để xác định huấn luyện quá mức.
-
- 7. Xử lý dữ liệu:
- - `--cleanup` (mặc định: `False`): Dọn dẹp tệp huấn luyện cũ để tiến hành huấn luyện lại từ đầu.
-
- 8. Tối ưu:
- - `--checkpointing` (mặc định: `False`): Bật/tắt checkpointing để tiết kiệm RAM.
- - `--deterministic` (mặc định: `False`): Khi bật sẽ sử dụng các thuật toán có tính xác định cao, đảm bảo rằng mỗi lần chạy cùng một dữ liệu đầu vào sẽ cho kết quả giống nhau.
- - `--benchmark` (mặc định: `False`): Khi bật sẽ thử nghiệm và chọn thuật toán tối ưu nhất cho phần cứng và kích thước cụ thể.
- - `--optimizer` (mặc định: `AdamW`): Trình tối ưu hóa được sử dụng (`AdamW`, `RAdam`).
- """)
- quit()
-elif argv_is_allows[16] in argv:
- print("""Sử dụng:
- 1. `--help_audio_effects`: Trợ giúp về phần thêm hiệu ứng âm thanh.
- 2. `--help_convert`: Trợ giúp về chuyển đổi âm thanh.
- 3. `--help_create_dataset`: Trợ giúp về tạo dữ liệu huấn luyện.
- 4. `--help_create_index`: Trợ giúp về tạo chỉ mục.
- 5. `--help_extract`: Trợ giúp về trích xuất dữ liệu huấn luyện.
- 6. `--help_preprocess`: Trợ giúp về xử lý trước dữ liệu.
- 7. `--help_separator_music`: Trợ giúp về tách nhạc.
- 8. `--help_train`: Trợ giúp về huấn luyện mô hình.
- """)
- quit()
-
-if __name__ == "__main__":
- import torch.multiprocessing as mp
-
- if "--train" in argv: mp.set_start_method("spawn")
- if "--preprocess" in argv or "--extract" in argv: mp.set_start_method("spawn", force=True)
-
- main()
\ No newline at end of file
diff --git a/main/app/run_tensorboard.py b/main/app/run_tensorboard.py
deleted file mode 100644
index 58e2b0edbf65ec0e24c74ece0a07a558edb6c575..0000000000000000000000000000000000000000
--- a/main/app/run_tensorboard.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import os
-import sys
-import time
-import logging
-import webbrowser
-
-from tensorboard import program
-
-sys.path.append(os.getcwd())
-
-from main.configs.config import Config
-
-config = Config()
-translations = config.translations
-
-def launch_tensorboard():
- for l in ["root", "tensorboard"]:
- logging.getLogger(l).setLevel(logging.ERROR)
-
- tb = program.TensorBoard()
- tb.configure(argv=[None, "--logdir", config.configs["logs_path"], f"--port={config.configs['tensorboard_port']}"])
- url = tb.launch()
-
- print(f"{translations['tensorboard_url']}: {url}")
- if "--open" in sys.argv: webbrowser.open(url)
-
- return f"{translations['tensorboard_url']}: {url}"
-
-if __name__ == "__main__":
- launch_tensorboard()
-
- while 1:
- time.sleep(5)
\ No newline at end of file
diff --git a/main/app/tabs/downloads/downloads.py b/main/app/tabs/downloads/downloads.py
deleted file mode 100644
index 831241684dfabfce90ab285bdc9d25a7611fbf3b..0000000000000000000000000000000000000000
--- a/main/app/tabs/downloads/downloads.py
+++ /dev/null
@@ -1,119 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.variables import translations, configs, models, model_options
-from main.app.core.downloads import download_model, search_models, download_pretrained_model
-from main.app.core.ui import change_download_choices, change_download_pretrained_choices, shutil_move
-from main.app.core.process import fetch_pretrained_data, save_drop_model, update_sample_rate_dropdown
-
-def download_tab():
- with gr.TabItem(translations["downloads"], visible=configs.get("downloads_tab", True)):
- gr.Markdown(translations["download_markdown"])
- with gr.Row():
- gr.Markdown(translations["download_markdown_2"])
- with gr.Row():
- with gr.Accordion(translations["model_download"], open=True):
- with gr.Row():
- downloadmodel = gr.Radio(label=translations["model_download_select"], choices=[translations["download_url"], translations["download_from_csv"], translations["search_models"], translations["upload"]], interactive=True, value=translations["download_url"])
- with gr.Row():
- gr.Markdown("___")
- with gr.Column():
- with gr.Row():
- url_input = gr.Textbox(label=translations["model_url"], value="", placeholder="https://...", scale=6)
- download_model_name = gr.Textbox(label=translations["modelname"], value="", placeholder=translations["modelname"], scale=2)
- url_download = gr.Button(value=translations["downloads"], scale=2)
- with gr.Column():
- model_browser = gr.Dropdown(choices=models.keys(), label=translations["model_warehouse"], scale=8, allow_custom_value=True, visible=False)
- download_from_browser = gr.Button(value=translations["get_model"], scale=2, variant="primary", visible=False)
- with gr.Column():
- search_name = gr.Textbox(label=translations["name_to_search"], placeholder=translations["modelname"], interactive=True, scale=8, visible=False)
- search = gr.Button(translations["search_2"], scale=2, visible=False)
- search_dropdown = gr.Dropdown(label=translations["select_download_model"], value="", choices=[], allow_custom_value=True, interactive=False, visible=False)
- download = gr.Button(translations["downloads"], variant="primary", visible=False)
- with gr.Column():
- model_upload = gr.File(label=translations["drop_model"], file_types=[".pth", ".onnx", ".index", ".zip"], visible=False)
- with gr.Row():
- with gr.Accordion(translations["download_pretrained_2"], open=False):
- with gr.Row():
- pretrain_download_choices = gr.Radio(label=translations["model_download_select"], choices=[translations["download_url"], translations["list_model"], translations["upload"]], value=translations["download_url"], interactive=True)
- with gr.Row():
- gr.Markdown("___")
- with gr.Column():
- with gr.Row():
- pretrainD = gr.Textbox(label=translations["pretrained_url"].format(dg="D"), value="", placeholder="https://...", interactive=True, scale=4)
- pretrainG = gr.Textbox(label=translations["pretrained_url"].format(dg="G"), value="", placeholder="https://...", interactive=True, scale=4)
- download_pretrain_button = gr.Button(translations["downloads"], scale=2)
- with gr.Column():
- with gr.Row():
- pretrain_choices = gr.Dropdown(label=translations["select_pretrain"], info=translations["select_pretrain_info"], choices=list(fetch_pretrained_data().keys()), value="Titan_Medium", allow_custom_value=True, interactive=True, scale=6, visible=False)
- sample_rate_pretrain = gr.Dropdown(label=translations["pretrain_sr"], info=translations["pretrain_sr"], choices=["48k", "40k", "32k"], value="48k", interactive=True, visible=False)
- download_pretrain_choices_button = gr.Button(translations["downloads"], scale=2, variant="primary", visible=False)
- with gr.Row():
- pretrain_upload_g = gr.File(label=translations["drop_pretrain"].format(dg="G"), file_types=[".pth"], visible=False)
- pretrain_upload_d = gr.File(label=translations["drop_pretrain"].format(dg="D"), file_types=[".pth"], visible=False)
- with gr.Row():
- url_download.click(
- fn=download_model,
- inputs=[
- url_input,
- download_model_name
- ],
- outputs=[url_input],
- api_name="download_model"
- )
- download_from_browser.click(
- fn=lambda model: download_model(models[model], model),
- inputs=[model_browser],
- outputs=[model_browser],
- api_name="download_browser"
- )
- with gr.Row():
- downloadmodel.change(fn=change_download_choices, inputs=[downloadmodel], outputs=[url_input, download_model_name, url_download, model_browser, download_from_browser, search_name, search, search_dropdown, download, model_upload])
- search.click(fn=search_models, inputs=[search_name], outputs=[search_dropdown, download])
- model_upload.upload(fn=save_drop_model, inputs=[model_upload], outputs=[model_upload])
- download.click(
- fn=lambda model: download_model(model_options[model], model),
- inputs=[search_dropdown],
- outputs=[search_dropdown],
- api_name="search_models"
- )
- with gr.Row():
- pretrain_download_choices.change(fn=change_download_pretrained_choices, inputs=[pretrain_download_choices], outputs=[pretrainD, pretrainG, download_pretrain_button, pretrain_choices, sample_rate_pretrain, download_pretrain_choices_button, pretrain_upload_d, pretrain_upload_g])
- pretrain_choices.change(fn=update_sample_rate_dropdown, inputs=[pretrain_choices], outputs=[sample_rate_pretrain])
- with gr.Row():
- download_pretrain_button.click(
- fn=download_pretrained_model,
- inputs=[
- pretrain_download_choices,
- pretrainD,
- pretrainG
- ],
- outputs=[pretrainD, pretrainG],
- api_name="download_pretrain_link"
- )
- download_pretrain_choices_button.click(
- fn=download_pretrained_model,
- inputs=[
- pretrain_download_choices,
- pretrain_choices,
- sample_rate_pretrain
- ],
- outputs=[pretrain_choices],
- api_name="download_pretrain_choices"
- )
- pretrain_upload_g.upload(
- fn=lambda pretrain_upload_g: shutil_move(pretrain_upload_g.name, configs["pretrained_custom_path"]),
- inputs=[pretrain_upload_g],
- outputs=[],
- api_name="upload_pretrain_g"
- )
- pretrain_upload_d.upload(
- fn=lambda pretrain_upload_d: shutil_move(pretrain_upload_d.name, configs["pretrained_custom_path"]),
- inputs=[pretrain_upload_d],
- outputs=[],
- api_name="upload_pretrain_d"
- )
\ No newline at end of file
diff --git a/main/app/tabs/editing/child/audio_effects.py b/main/app/tabs/editing/child/audio_effects.py
deleted file mode 100644
index e532b7978af089175a4114ad2de55ed156850514..0000000000000000000000000000000000000000
--- a/main/app/tabs/editing/child/audio_effects.py
+++ /dev/null
@@ -1,393 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.editing import audio_effects
-from main.app.core.presets import audio_effect_load_presets, audio_effect_save_presets
-from main.app.core.ui import visible, change_audios_choices, change_effect_preset_choices, shutil_move
-from main.app.variables import translations, paths_for_files, sample_rate_choice, audio_effect_presets_file, configs
-
-def audio_effects_tab():
- with gr.Row():
- gr.Markdown(translations["audio_effects_edit"])
- with gr.Row():
- with gr.Column():
- with gr.Row():
- reverb_check_box = gr.Checkbox(label=translations["reverb"], value=False, interactive=True)
- chorus_check_box = gr.Checkbox(label=translations["chorus"], value=False, interactive=True)
- delay_check_box = gr.Checkbox(label=translations["delay"], value=False, interactive=True)
- phaser_check_box = gr.Checkbox(label=translations["phaser"], value=False, interactive=True)
- compressor_check_box = gr.Checkbox(label=translations["compressor"], value=False, interactive=True)
- more_options = gr.Checkbox(label=translations["more_option"], value=False, interactive=True)
- with gr.Row():
- with gr.Accordion(translations["input_output"], open=False):
- with gr.Row():
- upload_audio = gr.File(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"])
- with gr.Row():
- audio_in_path = gr.Dropdown(label=translations["input_audio"], value="", choices=paths_for_files, info=translations["provide_audio"], interactive=True, allow_custom_value=True)
- audio_out_path = gr.Textbox(label=translations["output_audio"], value="audios/audio_effects.wav", placeholder="audios/audio_effects.wav", info=translations["provide_output"], interactive=True)
- with gr.Row():
- with gr.Column():
- audio_combination = gr.Checkbox(label=translations["merge_instruments"], value=False, interactive=True)
- audio_combination_input = gr.Dropdown(label=translations["input_audio"], value="", choices=paths_for_files, info=translations["provide_audio"], interactive=True, allow_custom_value=True, visible=audio_combination.value)
- with gr.Row():
- main_vol = gr.Slider(minimum=-80, maximum=80, label=translations["main_volume"], info=translations["main_volume_info"], value=-4, step=1, interactive=True, visible=audio_combination.value)
- combine_vol = gr.Slider(minimum=-80, maximum=80, label=translations["combination_volume"], info=translations["combination_volume_info"], value=-7, step=1, interactive=True, visible=audio_combination.value)
- with gr.Row():
- audio_effects_refresh = gr.Button(translations["refresh"])
- with gr.Row():
- audio_output_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"], value="wav", interactive=True)
- with gr.Row():
- with gr.Accordion(translations["use_presets"], open=False):
- with gr.Row():
- presets_name = gr.Dropdown(label=translations["file_preset"], choices=audio_effect_presets_file, value=audio_effect_presets_file[0] if len(audio_effect_presets_file) > 0 else '', interactive=True, allow_custom_value=True)
- with gr.Row():
- load_click = gr.Button(translations["load_file"], variant="primary")
- refresh_click = gr.Button(translations["refresh"])
- with gr.Accordion(translations["export_file"], open=False):
- with gr.Row():
- with gr.Column():
- name_to_save_file = gr.Textbox(label=translations["filename_to_save"])
- save_file_button = gr.Button(translations["export_file"])
- with gr.Row():
- upload_presets = gr.File(label=translations["upload_presets"], file_types=[".effect.json"])
- with gr.Row():
- apply_effects_button = gr.Button(translations["apply"], variant="primary", scale=2)
- with gr.Row():
- with gr.Column():
- with gr.Row():
- with gr.Accordion(translations["reverb"], open=False, visible=reverb_check_box.value) as reverb_accordion:
- reverb_freeze_mode = gr.Checkbox(label=translations["reverb_freeze"], info=translations["reverb_freeze_info"], value=False, interactive=True)
- reverb_room_size = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.15, label=translations["room_size"], info=translations["room_size_info"], interactive=True)
- reverb_damping = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label=translations["damping"], info=translations["damping_info"], interactive=True)
- reverb_wet_level = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.2, label=translations["wet_level"], info=translations["wet_level_info"], interactive=True)
- reverb_dry_level = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.8, label=translations["dry_level"], info=translations["dry_level_info"], interactive=True)
- reverb_width = gr.Slider(minimum=0, maximum=1, step=0.01, value=1, label=translations["width"], info=translations["width_info"], interactive=True)
- with gr.Row():
- with gr.Accordion(translations["chorus"], open=False, visible=chorus_check_box.value) as chorus_accordion:
- chorus_depth = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["chorus_depth"], info=translations["chorus_depth_info"], interactive=True)
- chorus_rate_hz = gr.Slider(minimum=0.1, maximum=10, step=0.1, value=1.5, label=translations["chorus_rate_hz"], info=translations["chorus_rate_hz_info"], interactive=True)
- chorus_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["chorus_mix"], info=translations["chorus_mix_info"], interactive=True)
- chorus_centre_delay_ms = gr.Slider(minimum=0, maximum=50, step=1, value=10, label=translations["chorus_centre_delay_ms"], info=translations["chorus_centre_delay_ms_info"], interactive=True)
- chorus_feedback = gr.Slider(minimum=-1, maximum=1, step=0.01, value=0, label=translations["chorus_feedback"], info=translations["chorus_feedback_info"], interactive=True)
- with gr.Row():
- with gr.Accordion(translations["delay"], open=False, visible=delay_check_box.value) as delay_accordion:
- delay_second = gr.Slider(minimum=0, maximum=5, step=0.01, value=0.5, label=translations["delay_seconds"], info=translations["delay_seconds_info"], interactive=True)
- delay_feedback = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["delay_feedback"], info=translations["delay_feedback_info"], interactive=True)
- delay_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["delay_mix"], info=translations["delay_mix_info"], interactive=True)
- with gr.Column():
- with gr.Row():
- with gr.Accordion(translations["more_option"], open=False, visible=more_options.value) as more_accordion:
- with gr.Row():
- fade = gr.Checkbox(label=translations["fade"], value=False, interactive=True)
- bass_or_treble = gr.Checkbox(label=translations["bass_or_treble"], value=False, interactive=True)
- limiter = gr.Checkbox(label=translations["limiter"], value=False, interactive=True)
- resample_checkbox = gr.Checkbox(label=translations["resample"], value=False, interactive=True)
- with gr.Row():
- distortion_checkbox = gr.Checkbox(label=translations["distortion"], value=False, interactive=True)
- gain_checkbox = gr.Checkbox(label=translations["gain"], value=False, interactive=True)
- bitcrush_checkbox = gr.Checkbox(label=translations["bitcrush"], value=False, interactive=True)
- clipping_checkbox = gr.Checkbox(label=translations["clipping"], value=False, interactive=True)
- with gr.Accordion(translations["fade"], open=True, visible=fade.value) as fade_accordion:
- with gr.Row():
- fade_in = gr.Slider(minimum=0, maximum=10000, step=100, value=0, label=translations["fade_in"], info=translations["fade_in_info"], interactive=True)
- fade_out = gr.Slider(minimum=0, maximum=10000, step=100, value=0, label=translations["fade_out"], info=translations["fade_out_info"], interactive=True)
- with gr.Accordion(translations["bass_or_treble"], open=True, visible=bass_or_treble.value) as bass_treble_accordion:
- with gr.Row():
- bass_boost = gr.Slider(minimum=0, maximum=20, step=1, value=0, label=translations["bass_boost"], info=translations["bass_boost_info"], interactive=True)
- bass_frequency = gr.Slider(minimum=20, maximum=200, step=10, value=100, label=translations["bass_frequency"], info=translations["bass_frequency_info"], interactive=True)
- with gr.Row():
- treble_boost = gr.Slider(minimum=0, maximum=20, step=1, value=0, label=translations["treble_boost"], info=translations["treble_boost_info"], interactive=True)
- treble_frequency = gr.Slider(minimum=1000, maximum=10000, step=500, value=3000, label=translations["treble_frequency"], info=translations["treble_frequency_info"], interactive=True)
- with gr.Accordion(translations["limiter"], open=True, visible=limiter.value) as limiter_accordion:
- with gr.Row():
- limiter_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-1, label=translations["limiter_threshold_db"], info=translations["limiter_threshold_db_info"], interactive=True)
- limiter_release_ms = gr.Slider(minimum=10, maximum=1000, step=1, value=100, label=translations["limiter_release_ms"], info=translations["limiter_release_ms_info"], interactive=True)
- with gr.Column():
- pitch_shift_semitones = gr.Slider(minimum=-20, maximum=20, step=1, value=0, label=translations["pitch"], info=translations["pitch_info"], interactive=True)
- audio_effect_resample_sr = gr.Radio(choices=[0]+sample_rate_choice, value=0, label=translations["resample"], info=translations["resample_info"], interactive=True, visible=resample_checkbox.value)
- distortion_drive_db = gr.Slider(minimum=0, maximum=50, step=1, value=20, label=translations["distortion"], info=translations["distortion_info"], interactive=True, visible=distortion_checkbox.value)
- gain_db = gr.Slider(minimum=-60, maximum=60, step=1, value=0, label=translations["gain"], info=translations["gain_info"], interactive=True, visible=gain_checkbox.value)
- clipping_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-1, label=translations["clipping_threshold_db"], info=translations["clipping_threshold_db_info"], interactive=True, visible=clipping_checkbox.value)
- bitcrush_bit_depth = gr.Slider(minimum=1, maximum=24, step=1, value=16, label=translations["bitcrush_bit_depth"], info=translations["bitcrush_bit_depth_info"], interactive=True, visible=bitcrush_checkbox.value)
- with gr.Row():
- with gr.Accordion(translations["phaser"], open=False, visible=phaser_check_box.value) as phaser_accordion:
- phaser_depth = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["phaser_depth"], info=translations["phaser_depth_info"], interactive=True)
- phaser_rate_hz = gr.Slider(minimum=0.1, maximum=10, step=0.1, value=1, label=translations["phaser_rate_hz"], info=translations["phaser_rate_hz_info"], interactive=True)
- phaser_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["phaser_mix"], info=translations["phaser_mix_info"], interactive=True)
- phaser_centre_frequency_hz = gr.Slider(minimum=50, maximum=5000, step=10, value=1000, label=translations["phaser_centre_frequency_hz"], info=translations["phaser_centre_frequency_hz_info"], interactive=True)
- phaser_feedback = gr.Slider(minimum=-1, maximum=1, step=0.01, value=0, label=translations["phaser_feedback"], info=translations["phaser_feedback_info"], interactive=True)
- with gr.Row():
- with gr.Accordion(translations["compressor"], open=False, visible=compressor_check_box.value) as compressor_accordion:
- compressor_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-20, label=translations["compressor_threshold_db"], info=translations["compressor_threshold_db_info"], interactive=True)
- compressor_ratio = gr.Slider(minimum=1, maximum=20, step=0.1, value=1, label=translations["compressor_ratio"], info=translations["compressor_ratio_info"], interactive=True)
- compressor_attack_ms = gr.Slider(minimum=0.1, maximum=100, step=0.1, value=10, label=translations["compressor_attack_ms"], info=translations["compressor_attack_ms_info"], interactive=True)
- compressor_release_ms = gr.Slider(minimum=10, maximum=1000, step=1, value=100, label=translations["compressor_release_ms"], info=translations["compressor_release_ms_info"], interactive=True)
- with gr.Row():
- gr.Markdown(translations["output_audio"])
- with gr.Row():
- audio_play_input = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
- audio_play_output = gr.Audio(show_download_button=True, interactive=False, label=translations["output_audio"])
- with gr.Row():
- reverb_check_box.change(fn=visible, inputs=[reverb_check_box], outputs=[reverb_accordion])
- chorus_check_box.change(fn=visible, inputs=[chorus_check_box], outputs=[chorus_accordion])
- delay_check_box.change(fn=visible, inputs=[delay_check_box], outputs=[delay_accordion])
- with gr.Row():
- compressor_check_box.change(fn=visible, inputs=[compressor_check_box], outputs=[compressor_accordion])
- phaser_check_box.change(fn=visible, inputs=[phaser_check_box], outputs=[phaser_accordion])
- more_options.change(fn=visible, inputs=[more_options], outputs=[more_accordion])
- with gr.Row():
- fade.change(fn=visible, inputs=[fade], outputs=[fade_accordion])
- bass_or_treble.change(fn=visible, inputs=[bass_or_treble], outputs=[bass_treble_accordion])
- limiter.change(fn=visible, inputs=[limiter], outputs=[limiter_accordion])
- resample_checkbox.change(fn=visible, inputs=[resample_checkbox], outputs=[audio_effect_resample_sr])
- with gr.Row():
- distortion_checkbox.change(fn=visible, inputs=[distortion_checkbox], outputs=[distortion_drive_db])
- gain_checkbox.change(fn=visible, inputs=[gain_checkbox], outputs=[gain_db])
- clipping_checkbox.change(fn=visible, inputs=[clipping_checkbox], outputs=[clipping_threshold_db])
- bitcrush_checkbox.change(fn=visible, inputs=[bitcrush_checkbox], outputs=[bitcrush_bit_depth])
- with gr.Row():
- upload_audio.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["audios_path"]), inputs=[upload_audio], outputs=[audio_in_path])
- audio_in_path.change(fn=lambda audio: audio if audio else None, inputs=[audio_in_path], outputs=[audio_play_input])
- audio_effects_refresh.click(fn=lambda a, b: [change_audios_choices(a), change_audios_choices(b)], inputs=[audio_in_path, audio_combination_input], outputs=[audio_in_path, audio_combination_input])
- with gr.Row():
- more_options.change(fn=lambda: [False]*8, inputs=[], outputs=[fade, bass_or_treble, limiter, resample_checkbox, distortion_checkbox, gain_checkbox, clipping_checkbox, bitcrush_checkbox])
- audio_combination.change(fn=visible, inputs=[audio_combination], outputs=[audio_combination_input])
- audio_combination.change(fn=lambda a: [visible(a)]*2, inputs=[audio_combination], outputs=[main_vol, combine_vol])
- with gr.Row():
- upload_presets.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["presets_path"]), inputs=[upload_presets], outputs=[presets_name])
- refresh_click.click(fn=change_effect_preset_choices, inputs=[], outputs=[presets_name])
- with gr.Row():
- load_click.click(
- fn=audio_effect_load_presets,
- inputs=[
- presets_name,
- resample_checkbox,
- audio_effect_resample_sr,
- chorus_depth,
- chorus_rate_hz,
- chorus_mix,
- chorus_centre_delay_ms,
- chorus_feedback,
- distortion_drive_db,
- reverb_room_size,
- reverb_damping,
- reverb_wet_level,
- reverb_dry_level,
- reverb_width,
- reverb_freeze_mode,
- pitch_shift_semitones,
- delay_second,
- delay_feedback,
- delay_mix,
- compressor_threshold_db,
- compressor_ratio,
- compressor_attack_ms,
- compressor_release_ms,
- limiter_threshold_db,
- limiter_release_ms,
- gain_db,
- bitcrush_bit_depth,
- clipping_threshold_db,
- phaser_rate_hz,
- phaser_depth,
- phaser_centre_frequency_hz,
- phaser_feedback,
- phaser_mix,
- bass_boost,
- bass_frequency,
- treble_boost,
- treble_frequency,
- fade_in,
- fade_out,
- chorus_check_box,
- distortion_checkbox,
- reverb_check_box,
- delay_check_box,
- compressor_check_box,
- limiter,
- gain_checkbox,
- bitcrush_checkbox,
- clipping_checkbox,
- phaser_check_box,
- bass_or_treble,
- fade
- ],
- outputs=[
- resample_checkbox,
- audio_effect_resample_sr,
- chorus_depth,
- chorus_rate_hz,
- chorus_mix,
- chorus_centre_delay_ms,
- chorus_feedback,
- distortion_drive_db,
- reverb_room_size,
- reverb_damping,
- reverb_wet_level,
- reverb_dry_level,
- reverb_width,
- reverb_freeze_mode,
- pitch_shift_semitones,
- delay_second,
- delay_feedback,
- delay_mix,
- compressor_threshold_db,
- compressor_ratio,
- compressor_attack_ms,
- compressor_release_ms,
- limiter_threshold_db,
- limiter_release_ms,
- gain_db,
- bitcrush_bit_depth,
- clipping_threshold_db,
- phaser_rate_hz,
- phaser_depth,
- phaser_centre_frequency_hz,
- phaser_feedback,
- phaser_mix,
- bass_boost,
- bass_frequency,
- treble_boost,
- treble_frequency,
- fade_in,
- fade_out,
- chorus_check_box,
- distortion_checkbox,
- reverb_check_box,
- delay_check_box,
- compressor_check_box,
- limiter,
- gain_checkbox,
- bitcrush_checkbox,
- clipping_checkbox,
- phaser_check_box,
- bass_or_treble,
- fade
- ],
- )
- save_file_button.click(
- fn=audio_effect_save_presets,
- inputs=[
- name_to_save_file,
- resample_checkbox,
- audio_effect_resample_sr,
- chorus_depth,
- chorus_rate_hz,
- chorus_mix,
- chorus_centre_delay_ms,
- chorus_feedback,
- distortion_drive_db,
- reverb_room_size,
- reverb_damping,
- reverb_wet_level,
- reverb_dry_level,
- reverb_width,
- reverb_freeze_mode,
- pitch_shift_semitones,
- delay_second,
- delay_feedback,
- delay_mix,
- compressor_threshold_db,
- compressor_ratio,
- compressor_attack_ms,
- compressor_release_ms,
- limiter_threshold_db,
- limiter_release_ms,
- gain_db,
- bitcrush_bit_depth,
- clipping_threshold_db,
- phaser_rate_hz,
- phaser_depth,
- phaser_centre_frequency_hz,
- phaser_feedback,
- phaser_mix,
- bass_boost,
- bass_frequency,
- treble_boost,
- treble_frequency,
- fade_in,
- fade_out,
- chorus_check_box,
- distortion_checkbox,
- reverb_check_box,
- delay_check_box,
- compressor_check_box,
- limiter,
- gain_checkbox,
- bitcrush_checkbox,
- clipping_checkbox,
- phaser_check_box,
- bass_or_treble,
- fade
- ],
- outputs=[presets_name]
- )
- with gr.Row():
- apply_effects_button.click(
- fn=audio_effects,
- inputs=[
- audio_in_path,
- audio_out_path,
- resample_checkbox,
- audio_effect_resample_sr,
- chorus_depth,
- chorus_rate_hz,
- chorus_mix,
- chorus_centre_delay_ms,
- chorus_feedback,
- distortion_drive_db,
- reverb_room_size,
- reverb_damping,
- reverb_wet_level,
- reverb_dry_level,
- reverb_width,
- reverb_freeze_mode,
- pitch_shift_semitones,
- delay_second,
- delay_feedback,
- delay_mix,
- compressor_threshold_db,
- compressor_ratio,
- compressor_attack_ms,
- compressor_release_ms,
- limiter_threshold_db,
- limiter_release_ms,
- gain_db,
- bitcrush_bit_depth,
- clipping_threshold_db,
- phaser_rate_hz,
- phaser_depth,
- phaser_centre_frequency_hz,
- phaser_feedback,
- phaser_mix,
- bass_boost,
- bass_frequency,
- treble_boost,
- treble_frequency,
- fade_in,
- fade_out,
- audio_output_format,
- chorus_check_box,
- distortion_checkbox,
- reverb_check_box,
- delay_check_box,
- compressor_check_box,
- limiter,
- gain_checkbox,
- bitcrush_checkbox,
- clipping_checkbox,
- phaser_check_box,
- bass_or_treble,
- fade,
- audio_combination,
- audio_combination_input,
- main_vol,
- combine_vol
- ],
- outputs=[audio_play_output],
- api_name="audio_effects"
- )
\ No newline at end of file
diff --git a/main/app/tabs/editing/child/quirk.py b/main/app/tabs/editing/child/quirk.py
deleted file mode 100644
index 7a7c1feb217f1fd480fb208e1500fd1b47c78fad..0000000000000000000000000000000000000000
--- a/main/app/tabs/editing/child/quirk.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.editing import apply_voice_quirk
-from main.app.core.ui import change_audios_choices, shutil_move
-from main.app.variables import translations, paths_for_files, configs
-
-def quirk_tab():
- with gr.Row():
- gr.Markdown(translations["quirk_markdown"])
- with gr.Row():
- input_audio_play = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
- with gr.Row():
- quirk_choice = gr.Radio(label=translations["quirk_label"], info=translations["quirk_label_info"], choices=list(translations["quirk_choice"].keys()), interactive=True, value=list(translations["quirk_choice"].keys())[0])
- with gr.Row():
- apply_quirk_button = gr.Button(translations["apply"], variant="primary")
- with gr.Row():
- with gr.Accordion(translations["input_output"], open=False):
- with gr.Row():
- quirk_upload_audio = gr.File(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"])
- with gr.Column():
- quirk_export_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"], value="wav", interactive=True)
- quirk_input_path = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
- quirk_output_path = gr.Textbox(label=translations["output_path"], value="audios/output.wav", placeholder="audios/output.wav", info=translations["output_path_info"], interactive=True)
- with gr.Column():
- quirk_refresh = gr.Button(translations["refresh"])
- with gr.Row():
- output_audio_play = gr.Audio(show_download_button=True, interactive=False, label=translations["output_audio"])
- with gr.Row():
- quirk_upload_audio.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["audios_path"]), inputs=[quirk_upload_audio], outputs=[quirk_input_path])
- quirk_input_path.change(fn=lambda audio: audio if audio else None, inputs=[quirk_input_path], outputs=[input_audio_play])
- quirk_refresh.click(fn=change_audios_choices, inputs=[quirk_input_path], outputs=[quirk_input_path])
- with gr.Row():
- apply_quirk_button.click(
- fn=apply_voice_quirk,
- inputs=[
- quirk_input_path,
- quirk_choice,
- quirk_output_path,
- quirk_export_format
- ],
- outputs=[output_audio_play],
- api_name="quirk"
- )
\ No newline at end of file
diff --git a/main/app/tabs/editing/editing.py b/main/app/tabs/editing/editing.py
deleted file mode 100644
index 10964204b1e39de7c2d239fdfe959eb6900f6ae9..0000000000000000000000000000000000000000
--- a/main/app/tabs/editing/editing.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.variables import configs, translations
-from main.app.tabs.editing.child.quirk import quirk_tab
-from main.app.tabs.editing.child.audio_effects import audio_effects_tab
-
-def editing_tab():
- with gr.TabItem(translations["editing"], visible=configs.get("editing_tab", True)):
- with gr.TabItem(translations["audio_effects"], visible=configs.get("effects_tab", True)):
- gr.Markdown(translations["apply_audio_effects"])
- audio_effects_tab()
-
- with gr.TabItem(translations["quirk"], visible=configs.get("quirk", True)):
- gr.Markdown(translations["quirk_info"])
- quirk_tab()
\ No newline at end of file
diff --git a/main/app/tabs/extra/child/convert_model.py b/main/app/tabs/extra/child/convert_model.py
deleted file mode 100644
index 410ffd88ab46a829484266b7fd2bf6d6e18743f6..0000000000000000000000000000000000000000
--- a/main/app/tabs/extra/child/convert_model.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.ui import visible, shutil_move
-from main.app.core.model_utils import onnx_export
-from main.app.variables import translations, configs
-
-def convert_model_tab():
- with gr.Row():
- gr.Markdown(translations["pytorch2onnx_markdown"])
- with gr.Row():
- model_pth_upload = gr.File(label=translations["drop_model"], file_types=[".pth"])
- with gr.Row():
- convert_onnx = gr.Button(translations["convert_model"], variant="primary", scale=2)
- with gr.Row():
- model_pth_path = gr.Textbox(label=translations["model_path"], value="", placeholder="assets/weights/Model.pth", info=translations["model_path_info"], interactive=True)
- with gr.Row():
- output_model2 = gr.File(label=translations["output_model_path"], file_types=[".pth", ".onnx"], interactive=False, visible=False)
- with gr.Row():
- model_pth_upload.upload(fn=lambda model_pth_upload: shutil_move(model_pth_upload.name, configs["weights_path"]), inputs=[model_pth_upload], outputs=[model_pth_path])
- convert_onnx.click(
- fn=onnx_export,
- inputs=[model_pth_path],
- outputs=[output_model2],
- api_name="model_onnx_export"
- )
- convert_onnx.click(fn=lambda: visible(True), inputs=[], outputs=[output_model2])
\ No newline at end of file
diff --git a/main/app/tabs/extra/child/f0_extract.py b/main/app/tabs/extra/child/f0_extract.py
deleted file mode 100644
index 997f15ebaa1416246ef8b1e8cbcf3230af688e86..0000000000000000000000000000000000000000
--- a/main/app/tabs/extra/child/f0_extract.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.f0_extract import f0_extract
-from main.app.core.ui import change_audios_choices, unlock_f0, shutil_move
-from main.app.variables import translations, paths_for_files, method_f0, configs
-
-def f0_extract_tab():
- with gr.Row():
- gr.Markdown(translations["f0_extractor_markdown_2"])
- with gr.Row():
- extractor_button = gr.Button(translations["extract_button"].replace("2. ", ""), variant="primary")
- with gr.Row():
- with gr.Column():
- upload_audio_file = gr.File(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"])
- audioplay = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
- with gr.Column():
- with gr.Accordion(translations["f0_method"], open=False):
- with gr.Group():
- with gr.Row():
- onnx_f0_mode3 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
- unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
- f0_method_extract = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=[m for m in method_f0 if m != "hybrid"], value="rmvpe", interactive=True)
- with gr.Accordion(translations["audio_path"], open=True):
- input_audio_path = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, allow_custom_value=True, interactive=True)
- refresh_audio_button = gr.Button(translations["refresh"])
- with gr.Row():
- gr.Markdown("___")
- with gr.Row():
- file_output = gr.File(label="", file_types=[".txt"], interactive=False)
- image_output = gr.Image(label="", interactive=False, show_download_button=True)
- with gr.Row():
- upload_audio_file.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["audios_path"]), inputs=[upload_audio_file], outputs=[input_audio_path])
- input_audio_path.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio_path], outputs=[audioplay])
- refresh_audio_button.click(fn=change_audios_choices, inputs=[input_audio_path], outputs=[input_audio_path])
- with gr.Row():
- unlock_full_method.change(fn=lambda method: [m for m in unlock_f0(method) if m != "hybrid"], inputs=[unlock_full_method], outputs=[f0_method_extract])
- extractor_button.click(
- fn=f0_extract,
- inputs=[
- input_audio_path,
- f0_method_extract,
- onnx_f0_mode3
- ],
- outputs=[file_output, image_output],
- api_name="f0_extract"
- )
\ No newline at end of file
diff --git a/main/app/tabs/extra/child/fushion.py b/main/app/tabs/extra/child/fushion.py
deleted file mode 100644
index 0064ef81ec702236ded2833a65d1d394d552e312..0000000000000000000000000000000000000000
--- a/main/app/tabs/extra/child/fushion.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.ui import visible, shutil_move
-from main.app.core.model_utils import fushion_model
-from main.app.variables import translations, configs
-
-def fushion_tab():
- with gr.Row():
- gr.Markdown(translations["fushion_markdown_2"])
- with gr.Row():
- name_to_save = gr.Textbox(label=translations["modelname"], placeholder="Model.pth", value="", max_lines=1, interactive=True)
- with gr.Row():
- fushion_button = gr.Button(translations["fushion"], variant="primary", scale=4)
- with gr.Column():
- with gr.Row():
- model_a = gr.File(label=f"{translations['model_name']} 1", file_types=[".pth", ".onnx"])
- model_b = gr.File(label=f"{translations['model_name']} 2", file_types=[".pth", ".onnx"])
- with gr.Row():
- model_path_a = gr.Textbox(label=f"{translations['model_path']} 1", value="", placeholder="assets/weights/Model_1.pth")
- model_path_b = gr.Textbox(label=f"{translations['model_path']} 2", value="", placeholder="assets/weights/Model_2.pth")
- with gr.Row():
- ratio = gr.Slider(minimum=0, maximum=1, label=translations["model_ratio"], info=translations["model_ratio_info"], value=0.5, interactive=True)
- with gr.Row():
- output_model = gr.File(label=translations["output_model_path"], file_types=[".pth", ".onnx"], interactive=False, visible=False)
- with gr.Row():
- model_a.upload(fn=lambda model: shutil_move(model.name, configs["weights_path"]), inputs=[model_a], outputs=[model_path_a])
- model_b.upload(fn=lambda model: shutil_move(model.name, configs["weights_path"]), inputs=[model_b], outputs=[model_path_b])
- with gr.Row():
- fushion_button.click(
- fn=fushion_model,
- inputs=[
- name_to_save,
- model_path_a,
- model_path_b,
- ratio
- ],
- outputs=[name_to_save, output_model],
- api_name="fushion_model"
- )
- fushion_button.click(fn=lambda: visible(True), inputs=[], outputs=[output_model])
\ No newline at end of file
diff --git a/main/app/tabs/extra/child/read_model.py b/main/app/tabs/extra/child/read_model.py
deleted file mode 100644
index 4ca25625fd48dbff9e64bbb388851fc35883a450..0000000000000000000000000000000000000000
--- a/main/app/tabs/extra/child/read_model.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.ui import shutil_move
-from main.app.core.model_utils import model_info
-from main.app.variables import translations, configs
-
-def read_model_tab():
- with gr.Row():
- gr.Markdown(translations["read_model_markdown_2"])
- with gr.Row():
- model = gr.File(label=translations["drop_model"], file_types=[".pth", ".onnx"])
- with gr.Row():
- read_button = gr.Button(translations["readmodel"], variant="primary", scale=2)
- with gr.Column():
- model_path = gr.Textbox(label=translations["model_path"], value="", placeholder="assets/weights/Model.pth", info=translations["model_path_info"], interactive=True)
- output_info = gr.Textbox(label=translations["modelinfo"], value="", interactive=False, scale=6)
- with gr.Row():
- model.upload(fn=lambda model: shutil_move(model.name, configs["weights_path"]), inputs=[model], outputs=[model_path])
- read_button.click(
- fn=model_info,
- inputs=[model_path],
- outputs=[output_info],
- api_name="read_model"
- )
\ No newline at end of file
diff --git a/main/app/tabs/extra/child/report_bugs.py b/main/app/tabs/extra/child/report_bugs.py
deleted file mode 100644
index c8b2ba8a01941963577cf957b5368c0bd5a1ebcc..0000000000000000000000000000000000000000
--- a/main/app/tabs/extra/child/report_bugs.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import os
-import sys
-import codecs
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.utils import report_bug
-from main.app.variables import translations
-
-def report_bugs_tab():
- with gr.Row():
- gr.Markdown(translations["report_bug_info"])
- with gr.Row():
- with gr.Column():
- with gr.Group():
- agree_log = gr.Checkbox(label=translations["agree_log"], value=True, interactive=True)
- report_text = gr.Textbox(label=translations["error_info"], info=translations["error_info_2"], interactive=True)
- report_button = gr.Button(translations["report_bugs"], variant="primary", scale=2)
- with gr.Row():
- gr.Markdown(translations["report_info"].format(github=codecs.decode("uggcf://tvguho.pbz/CunzUhlauNau16/Ivrganzrfr-EIP/vffhrf", "rot13")))
- with gr.Row():
- report_button.click(fn=report_bug, inputs=[report_text, agree_log], outputs=[])
\ No newline at end of file
diff --git a/main/app/tabs/extra/child/settings.py b/main/app/tabs/extra/child/settings.py
deleted file mode 100644
index 29a03cf1ec94ead13e6b95b3dd1498df64c6fccb..0000000000000000000000000000000000000000
--- a/main/app/tabs/extra/child/settings.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.ui import change_fp
-from main.app.core.utils import stop_pid
-from main.app.core.restart import change_font, change_language, change_theme
-from main.app.variables import translations, theme, font, configs, language, config
-
-def settings_tab(app):
- with gr.Row():
- gr.Markdown(translations["settings_markdown_2"])
- with gr.Row():
- toggle_button = gr.Button(translations["change_light_dark"], variant="secondary", scale=2)
- with gr.Row():
- with gr.Column():
- language_dropdown = gr.Dropdown(label=translations["lang"], interactive=True, info=translations["lang_restart"], choices=configs.get("support_language", "vi-VN"), value=language)
- change_lang = gr.Button(translations["change_lang"], variant="primary", scale=2)
- with gr.Column():
- theme_dropdown = gr.Dropdown(label=translations["theme"], interactive=True, info=translations["theme_restart"], choices=configs.get("themes", theme), value=theme, allow_custom_value=True)
- changetheme = gr.Button(translations["theme_button"], variant="primary", scale=2)
- with gr.Row():
- with gr.Column():
- fp_choice = gr.Radio(choices=["fp16","fp32"], value="fp16" if configs.get("fp16", False) else "fp32", label=translations["precision"], info=translations["precision_info"], interactive=config.device not in ["cpu", "mps", "ocl:0"])
- fp_button = gr.Button(translations["update_precision"], variant="secondary", scale=2)
- with gr.Column():
- font_choice = gr.Textbox(label=translations["font"], info=translations["font_info"], value=font, interactive=True)
- font_button = gr.Button(translations["change_font"])
- with gr.Row():
- with gr.Column():
- with gr.Accordion(translations["stop"], open=False, visible=config.debug_mode):
- separate_stop = gr.Button(translations["stop_separate"])
- convert_stop = gr.Button(translations["stop_convert"])
- create_dataset_stop = gr.Button(translations["stop_create_dataset"])
- with gr.Accordion(translations["stop_training"], open=False):
- model_name_stop = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
- preprocess_stop = gr.Button(translations["stop_preprocess"])
- extract_stop = gr.Button(translations["stop_extract"])
- train_stop = gr.Button(translations["stop_training"])
- with gr.Row():
- toggle_button.click(fn=None, js="() => {document.body.classList.toggle('dark')}")
- fp_button.click(fn=change_fp, inputs=[fp_choice], outputs=[fp_choice])
- with gr.Row():
- change_lang.click(fn=lambda a: change_language(a, app), inputs=[language_dropdown], outputs=[])
- changetheme.click(fn=lambda a: change_theme(a, app) , inputs=[theme_dropdown], outputs=[])
- font_button.click(fn=lambda a: change_font(a, app), inputs=[font_choice], outputs=[])
- with gr.Row():
- change_lang.click(fn=None, js="setTimeout(function() {location.reload()}, 30000)", inputs=[], outputs=[])
- changetheme.click(fn=None, js="setTimeout(function() {location.reload()}, 30000)", inputs=[], outputs=[])
- font_button.click(fn=None, js="setTimeout(function() {location.reload()}, 30000)", inputs=[], outputs=[])
- with gr.Row():
- separate_stop.click(fn=lambda: stop_pid("separate_pid", None, False), inputs=[], outputs=[])
- convert_stop.click(fn=lambda: stop_pid("convert_pid", None, False), inputs=[], outputs=[])
- create_dataset_stop.click(fn=lambda: stop_pid("create_dataset_pid", None, False), inputs=[], outputs=[])
- with gr.Row():
- preprocess_stop.click(fn=lambda model_name_stop: stop_pid("preprocess_pid", model_name_stop, False), inputs=[model_name_stop], outputs=[])
- extract_stop.click(fn=lambda model_name_stop: stop_pid("extract_pid", model_name_stop, False), inputs=[model_name_stop], outputs=[])
- train_stop.click(fn=lambda model_name_stop: stop_pid("train_pid", model_name_stop, True), inputs=[model_name_stop], outputs=[])
\ No newline at end of file
diff --git a/main/app/tabs/extra/extra.py b/main/app/tabs/extra/extra.py
deleted file mode 100644
index e3a61c89a82d006ea910b80b2e4a2c78f61fb2f3..0000000000000000000000000000000000000000
--- a/main/app/tabs/extra/extra.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.variables import translations, configs
-from main.app.tabs.extra.child.fushion import fushion_tab
-from main.app.tabs.extra.child.settings import settings_tab
-from main.app.tabs.extra.child.read_model import read_model_tab
-from main.app.tabs.extra.child.f0_extract import f0_extract_tab
-from main.app.tabs.extra.child.report_bugs import report_bugs_tab
-from main.app.tabs.extra.child.convert_model import convert_model_tab
-
-def extra_tab(app):
- with gr.TabItem(translations["extra"], visible=configs.get("extra_tab", True)):
- with gr.TabItem(translations["fushion"], visible=configs.get("fushion_tab", True)):
- gr.Markdown(translations["fushion_markdown"])
- fushion_tab()
-
- with gr.TabItem(translations["read_model"], visible=configs.get("read_tab", True)):
- gr.Markdown(translations["read_model_markdown"])
- read_model_tab()
-
- with gr.TabItem(translations["convert_model"], visible=configs.get("onnx_tab", True)):
- gr.Markdown(translations["pytorch2onnx"])
- convert_model_tab()
-
- with gr.TabItem(translations["f0_extractor_tab"], visible=configs.get("f0_extractor_tab", True)):
- gr.Markdown(translations["f0_extractor_markdown"])
- f0_extract_tab()
-
- with gr.TabItem(translations["settings"], visible=configs.get("settings_tab", True)):
- gr.Markdown(translations["settings_markdown"])
- settings_tab(app)
-
- with gr.TabItem(translations["report_bugs"], visible=configs.get("report_bug_tab", True)):
- gr.Markdown(translations["report_bugs"])
- report_bugs_tab()
\ No newline at end of file
diff --git a/main/app/tabs/inference/child/convert.py b/main/app/tabs/inference/child/convert.py
deleted file mode 100644
index 87930e4d94fd61636957098e62a95db8e7617f0d..0000000000000000000000000000000000000000
--- a/main/app/tabs/inference/child/convert.py
+++ /dev/null
@@ -1,313 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.presets import load_presets, save_presets
-from main.app.core.inference import convert_audio, convert_selection
-from main.app.variables import translations, paths_for_files, sample_rate_choice, model_name, index_path, method_f0, f0_file, embedders_mode, embedders_model, presets_file, configs
-from main.app.core.ui import visible, valueFalse_interactive, change_audios_choices, change_f0_choices, unlock_f0, change_preset_choices, change_backing_choices, hoplength_show, change_models_choices, get_index, index_strength_show, visible_embedders, shutil_move
-
-def convert_tab():
- with gr.Row():
- gr.Markdown(translations["convert_info"])
- with gr.Row():
- with gr.Column():
- with gr.Group():
- with gr.Row():
- cleaner0 = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
- autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
- use_audio = gr.Checkbox(label=translations["use_audio"], value=False, interactive=True)
- checkpointing = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
- with gr.Row():
- use_original = gr.Checkbox(label=translations["convert_original"], value=False, interactive=True, visible=use_audio.value)
- convert_backing = gr.Checkbox(label=translations["convert_backing"], value=False, interactive=True, visible=use_audio.value)
- not_merge_backing = gr.Checkbox(label=translations["not_merge_backing"], value=False, interactive=True, visible=use_audio.value)
- merge_instrument = gr.Checkbox(label=translations["merge_instruments"], value=False, interactive=True, visible=use_audio.value)
- with gr.Row():
- pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
- clean_strength0 = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner0.value)
- with gr.Row():
- with gr.Column():
- audio_select = gr.Dropdown(label=translations["select_separate"], choices=[], value="", interactive=True, allow_custom_value=True, visible=False)
- convert_button_2 = gr.Button(translations["convert_audio"], visible=False)
- with gr.Row():
- with gr.Column():
- convert_button = gr.Button(translations["convert_audio"], variant="primary")
- with gr.Row():
- with gr.Column():
- input0 = gr.File(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"])
- play_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
- with gr.Column():
- with gr.Accordion(translations["model_accordion"], open=True):
- with gr.Row():
- model_pth = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
- model_index = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
- with gr.Row():
- refresh = gr.Button(translations["refresh"])
- with gr.Row():
- index_strength = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index.value != "")
- with gr.Accordion(translations["input_output"], open=False):
- with gr.Column():
- export_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"], value="wav", interactive=True)
- input_audio0 = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
- output_audio = gr.Textbox(label=translations["output_path"], value="audios/output.wav", placeholder="audios/output.wav", info=translations["output_path_info"], interactive=True)
- with gr.Column():
- refresh0 = gr.Button(translations["refresh"])
- with gr.Accordion(translations["setting"], open=False):
- with gr.Accordion(translations["f0_method"], open=False):
- with gr.Group():
- with gr.Row():
- onnx_f0_mode = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
- unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
- method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
- hybrid_method = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=["hybrid[pm+dio]", "hybrid[pm+crepe-tiny]", "hybrid[pm+crepe]", "hybrid[pm+fcpe]", "hybrid[pm+rmvpe]", "hybrid[pm+harvest]", "hybrid[pm+yin]", "hybrid[dio+crepe-tiny]", "hybrid[dio+crepe]", "hybrid[dio+fcpe]", "hybrid[dio+rmvpe]", "hybrid[dio+harvest]", "hybrid[dio+yin]", "hybrid[crepe-tiny+crepe]", "hybrid[crepe-tiny+fcpe]", "hybrid[crepe-tiny+rmvpe]", "hybrid[crepe-tiny+harvest]", "hybrid[crepe+fcpe]", "hybrid[crepe+rmvpe]", "hybrid[crepe+harvest]", "hybrid[crepe+yin]", "hybrid[fcpe+rmvpe]", "hybrid[fcpe+harvest]", "hybrid[fcpe+yin]", "hybrid[rmvpe+harvest]", "hybrid[rmvpe+yin]", "hybrid[harvest+yin]"], value="hybrid[pm+dio]", interactive=True, allow_custom_value=True, visible=method.value == "hybrid")
- hop_length = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=512, value=128, step=1, interactive=True, visible=False)
- with gr.Accordion(translations["f0_file"], open=False):
- upload_f0_file = gr.File(label=translations["upload_f0"], file_types=[".txt"])
- f0_file_dropdown = gr.Dropdown(label=translations["f0_file_2"], value="", choices=f0_file, allow_custom_value=True, interactive=True)
- refresh_f0_file = gr.Button(translations["refresh"])
- with gr.Accordion(translations["hubert_model"], open=False):
- embed_mode = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
- embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
- custom_embedders = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders.value == "custom")
- with gr.Accordion(translations["use_presets"], open=False):
- with gr.Row():
- presets_name = gr.Dropdown(label=translations["file_preset"], choices=presets_file, value=presets_file[0] if len(presets_file) > 0 else '', interactive=True, allow_custom_value=True)
- with gr.Row():
- load_click = gr.Button(translations["load_file"], variant="primary")
- refresh_click = gr.Button(translations["refresh"])
- with gr.Accordion(translations["export_file"], open=False):
- with gr.Row():
- with gr.Column():
- with gr.Group():
- with gr.Row():
- cleaner_chbox = gr.Checkbox(label=translations["save_clean"], value=True, interactive=True)
- autotune_chbox = gr.Checkbox(label=translations["save_autotune"], value=True, interactive=True)
- pitch_chbox = gr.Checkbox(label=translations["save_pitch"], value=True, interactive=True)
- index_strength_chbox = gr.Checkbox(label=translations["save_index_2"], value=True, interactive=True)
- resample_sr_chbox = gr.Checkbox(label=translations["save_resample"], value=True, interactive=True)
- filter_radius_chbox = gr.Checkbox(label=translations["save_filter"], value=True, interactive=True)
- rms_mix_rate_chbox = gr.Checkbox(label=translations["save_envelope"], value=True, interactive=True)
- protect_chbox = gr.Checkbox(label=translations["save_protect"], value=True, interactive=True)
- split_audio_chbox = gr.Checkbox(label=translations["save_split"], value=True, interactive=True)
- formant_shifting_chbox = gr.Checkbox(label=translations["formantshift"], value=True, interactive=True)
- with gr.Row():
- with gr.Column():
- name_to_save_file = gr.Textbox(label=translations["filename_to_save"])
- save_file_button = gr.Button(translations["export_file"])
- with gr.Row():
- upload_presets = gr.File(label=translations["upload_presets"], file_types=[".conversion.json"])
- with gr.Column():
- with gr.Row():
- split_audio = gr.Checkbox(label=translations["split_audio"], value=False, interactive=True)
- formant_shifting = gr.Checkbox(label=translations["formantshift"], value=False, interactive=True)
- proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
- resample_sr = gr.Radio(choices=[0]+sample_rate_choice, label=translations["resample"], info=translations["resample_info"], value=0, interactive=True)
- proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
- f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune.value)
- filter_radius = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
- rms_mix_rate = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
- protect = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
- with gr.Row():
- formant_qfrency = gr.Slider(value=1.0, label=translations["formant_qfrency"], info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
- formant_timbre = gr.Slider(value=1.0, label=translations["formant_timbre"], info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
- with gr.Row():
- gr.Markdown(translations["output_convert"])
- with gr.Row():
- main_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["main_convert"])
- backing_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["convert_backing"], visible=convert_backing.value)
- main_backing = gr.Audio(show_download_button=True, interactive=False, label=translations["main_or_backing"], visible=convert_backing.value)
- with gr.Row():
- original_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["convert_original"], visible=use_original.value)
- vocal_instrument = gr.Audio(show_download_button=True, interactive=False, label=translations["voice_or_instruments"], visible=merge_instrument.value)
- with gr.Row():
- upload_f0_file.upload(fn=lambda inp: shutil_move(inp.name, configs["f0_path"]), inputs=[upload_f0_file], outputs=[f0_file_dropdown])
- refresh_f0_file.click(fn=change_f0_choices, inputs=[], outputs=[f0_file_dropdown])
- unlock_full_method.change(fn=unlock_f0, inputs=[unlock_full_method], outputs=[method])
- with gr.Row():
- load_click.click(
- fn=load_presets,
- inputs=[
- presets_name,
- cleaner0,
- autotune,
- pitch,
- clean_strength0,
- index_strength,
- resample_sr,
- filter_radius,
- rms_mix_rate,
- protect,
- split_audio,
- f0_autotune_strength,
- formant_qfrency,
- formant_timbre
- ],
- outputs=[
- cleaner0,
- autotune,
- pitch,
- clean_strength0,
- index_strength,
- resample_sr,
- filter_radius,
- rms_mix_rate,
- protect,
- split_audio,
- f0_autotune_strength,
- formant_shifting,
- formant_qfrency,
- formant_timbre
- ]
- )
- refresh_click.click(fn=change_preset_choices, inputs=[], outputs=[presets_name])
- save_file_button.click(
- fn=save_presets,
- inputs=[
- name_to_save_file,
- cleaner0,
- autotune,
- pitch,
- clean_strength0,
- index_strength,
- resample_sr,
- filter_radius,
- rms_mix_rate,
- protect,
- split_audio,
- f0_autotune_strength,
- cleaner_chbox,
- autotune_chbox,
- pitch_chbox,
- index_strength_chbox,
- resample_sr_chbox,
- filter_radius_chbox,
- rms_mix_rate_chbox,
- protect_chbox,
- split_audio_chbox,
- formant_shifting_chbox,
- formant_shifting,
- formant_qfrency,
- formant_timbre
- ],
- outputs=[presets_name]
- )
- with gr.Row():
- upload_presets.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["presets_path"]), inputs=[upload_presets], outputs=[presets_name])
- autotune.change(fn=visible, inputs=[autotune], outputs=[f0_autotune_strength])
- use_audio.change(fn=lambda a: [visible(a), visible(a), visible(a), visible(a), visible(a), valueFalse_interactive(a), valueFalse_interactive(a), valueFalse_interactive(a), valueFalse_interactive(a), visible(not a), visible(not a), visible(not a), visible(not a)], inputs=[use_audio], outputs=[main_backing, use_original, convert_backing, not_merge_backing, merge_instrument, use_original, convert_backing, not_merge_backing, merge_instrument, input_audio0, output_audio, input0, play_audio])
- with gr.Row():
- convert_backing.change(fn=lambda a,b: [change_backing_choices(a, b), visible(a)], inputs=[convert_backing, not_merge_backing], outputs=[use_original, backing_convert])
- use_original.change(fn=lambda audio, original: [visible(original), visible(not original), visible(audio and not original), valueFalse_interactive(not original), valueFalse_interactive(not original)], inputs=[use_audio, use_original], outputs=[original_convert, main_convert, main_backing, convert_backing, not_merge_backing])
- cleaner0.change(fn=visible, inputs=[cleaner0], outputs=[clean_strength0])
- with gr.Row():
- merge_instrument.change(fn=visible, inputs=[merge_instrument], outputs=[vocal_instrument])
- not_merge_backing.change(fn=lambda audio, merge, cvb: [visible(audio and not merge), change_backing_choices(cvb, merge)], inputs=[use_audio, not_merge_backing, convert_backing], outputs=[main_backing, use_original])
- method.change(fn=lambda method, hybrid: [visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[method, hybrid_method], outputs=[hybrid_method, hop_length])
- with gr.Row():
- hybrid_method.change(fn=hoplength_show, inputs=[method, hybrid_method], outputs=[hop_length])
- refresh.click(fn=change_models_choices, inputs=[], outputs=[model_pth, model_index])
- model_pth.change(fn=get_index, inputs=[model_pth], outputs=[model_index])
- with gr.Row():
- input0.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["audios_path"]), inputs=[input0], outputs=[input_audio0])
- input_audio0.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio0], outputs=[play_audio])
- formant_shifting.change(fn=lambda a: [visible(a)]*2, inputs=[formant_shifting], outputs=[formant_qfrency, formant_timbre])
- with gr.Row():
- embedders.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders], outputs=[custom_embedders])
- refresh0.click(fn=change_audios_choices, inputs=[input_audio0], outputs=[input_audio0])
- model_index.change(fn=index_strength_show, inputs=[model_index], outputs=[index_strength])
- with gr.Row():
- convert_button.click(fn=lambda: visible(False), inputs=[], outputs=[convert_button])
- convert_button_2.click(fn=lambda: [visible(False), visible(False)], inputs=[], outputs=[audio_select, convert_button_2])
- with gr.Row():
- embed_mode.change(fn=visible_embedders, inputs=[embed_mode], outputs=[embedders])
- proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
- with gr.Row():
- convert_button.click(
- fn=convert_selection,
- inputs=[
- cleaner0,
- autotune,
- use_audio,
- use_original,
- convert_backing,
- not_merge_backing,
- merge_instrument,
- pitch,
- clean_strength0,
- model_pth,
- model_index,
- index_strength,
- input_audio0,
- output_audio,
- export_format,
- method,
- hybrid_method,
- hop_length,
- embedders,
- custom_embedders,
- resample_sr,
- filter_radius,
- rms_mix_rate,
- protect,
- split_audio,
- f0_autotune_strength,
- checkpointing,
- onnx_f0_mode,
- formant_shifting,
- formant_qfrency,
- formant_timbre,
- f0_file_dropdown,
- embed_mode,
- proposal_pitch,
- proposal_pitch_threshold
- ],
- outputs=[audio_select, main_convert, backing_convert, main_backing, original_convert, vocal_instrument, convert_button, convert_button_2],
- api_name="convert_selection"
- )
- convert_button_2.click(
- fn=convert_audio,
- inputs=[
- cleaner0,
- autotune,
- use_audio,
- use_original,
- convert_backing,
- not_merge_backing,
- merge_instrument,
- pitch,
- clean_strength0,
- model_pth,
- model_index,
- index_strength,
- input_audio0,
- output_audio,
- export_format,
- method,
- hybrid_method,
- hop_length,
- embedders,
- custom_embedders,
- resample_sr,
- filter_radius,
- rms_mix_rate,
- protect,
- split_audio,
- f0_autotune_strength,
- audio_select,
- checkpointing,
- onnx_f0_mode,
- formant_shifting,
- formant_qfrency,
- formant_timbre,
- f0_file_dropdown,
- embed_mode,
- proposal_pitch,
- proposal_pitch_threshold
- ],
- outputs=[main_convert, backing_convert, main_backing, original_convert, vocal_instrument, convert_button],
- api_name="convert_audio"
- )
\ No newline at end of file
diff --git a/main/app/tabs/inference/child/convert_tts.py b/main/app/tabs/inference/child/convert_tts.py
deleted file mode 100644
index 670f2e8931a835a11f0255d0e3be9479ba5e0b7d..0000000000000000000000000000000000000000
--- a/main/app/tabs/inference/child/convert_tts.py
+++ /dev/null
@@ -1,171 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.tts import TTS
-from main.app.core.process import process_input
-from main.app.core.inference import convert_tts
-from main.app.core.utils import google_translate
-from main.app.variables import translations, sample_rate_choice, model_name, index_path, method_f0, f0_file, embedders_mode, embedders_model, edgetts, google_tts_voice, configs
-from main.app.core.ui import visible, change_f0_choices, unlock_f0, hoplength_show, change_models_choices, get_index, index_strength_show, visible_embedders, change_tts_voice_choices, shutil_move
-
-def convert_tts_tab():
- with gr.Row():
- gr.Markdown(translations["convert_text_markdown_2"])
- with gr.Row():
- with gr.Column():
- with gr.Group():
- with gr.Row():
- use_txt = gr.Checkbox(label=translations["input_txt"], value=False, interactive=True)
- google_tts_check_box = gr.Checkbox(label=translations["googletts"], value=False, interactive=True)
- prompt = gr.Textbox(label=translations["text_to_speech"], value="", placeholder="Hello Words", lines=3)
- with gr.Column():
- speed = gr.Slider(label=translations["voice_speed"], info=translations["voice_speed_info"], minimum=-100, maximum=100, value=0, step=1)
- pitch0 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
- with gr.Row():
- tts_button = gr.Button(translations["tts_1"], variant="primary", scale=2)
- convert_button0 = gr.Button(translations["tts_2"], variant="secondary", scale=2)
- with gr.Row():
- with gr.Column():
- txt_input = gr.File(label=translations["drop_text"], file_types=[".txt", ".srt", ".docx"], visible=use_txt.value)
- tts_voice = gr.Dropdown(label=translations["voice"], choices=edgetts, interactive=True, value="vi-VN-NamMinhNeural")
- tts_pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info_2"], label=translations["pitch"], value=0, interactive=True)
- with gr.Accordion(translations["translate"], open=False):
- with gr.Row():
- source_lang = gr.Dropdown(label=translations["source_lang"], choices=["auto"]+google_tts_voice, interactive=True, value="auto")
- target_lang = gr.Dropdown(label=translations["target_lang"], choices=google_tts_voice, interactive=True, value="en")
- translate_button = gr.Button(translations["translate"])
- with gr.Column():
- with gr.Accordion(translations["model_accordion"], open=True):
- with gr.Row():
- model_pth0 = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
- model_index0 = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
- with gr.Row():
- refresh1 = gr.Button(translations["refresh"])
- with gr.Row():
- index_strength0 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index0.value != "")
- with gr.Accordion(translations["output_path"], open=False):
- export_format0 = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"], value="wav", interactive=True)
- output_audio0 = gr.Textbox(label=translations["output_tts"], value="audios/tts.wav", placeholder="audios/tts.wav", info=translations["tts_output"], interactive=True)
- output_audio1 = gr.Textbox(label=translations["output_tts_convert"], value="audios/tts-convert.wav", placeholder="audios/tts-convert.wav", info=translations["tts_output"], interactive=True)
- with gr.Accordion(translations["setting"], open=False):
- with gr.Accordion(translations["f0_method"], open=False):
- with gr.Group():
- with gr.Row():
- onnx_f0_mode1 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
- unlock_full_method3 = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
- method0 = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
- hybrid_method0 = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=["hybrid[pm+dio]", "hybrid[pm+crepe-tiny]", "hybrid[pm+crepe]", "hybrid[pm+fcpe]", "hybrid[pm+rmvpe]", "hybrid[pm+harvest]", "hybrid[pm+yin]", "hybrid[dio+crepe-tiny]", "hybrid[dio+crepe]", "hybrid[dio+fcpe]", "hybrid[dio+rmvpe]", "hybrid[dio+harvest]", "hybrid[dio+yin]", "hybrid[crepe-tiny+crepe]", "hybrid[crepe-tiny+fcpe]", "hybrid[crepe-tiny+rmvpe]", "hybrid[crepe-tiny+harvest]", "hybrid[crepe+fcpe]", "hybrid[crepe+rmvpe]", "hybrid[crepe+harvest]", "hybrid[crepe+yin]", "hybrid[fcpe+rmvpe]", "hybrid[fcpe+harvest]", "hybrid[fcpe+yin]", "hybrid[rmvpe+harvest]", "hybrid[rmvpe+yin]", "hybrid[harvest+yin]"], value="hybrid[pm+dio]", interactive=True, allow_custom_value=True, visible=method0.value == "hybrid")
- hop_length0 = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=512, value=128, step=1, interactive=True, visible=False)
- with gr.Accordion(translations["f0_file"], open=False):
- upload_f0_file0 = gr.File(label=translations["upload_f0"], file_types=[".txt"])
- f0_file_dropdown0 = gr.Dropdown(label=translations["f0_file_2"], value="", choices=f0_file, allow_custom_value=True, interactive=True)
- refresh_f0_file0 = gr.Button(translations["refresh"])
- with gr.Accordion(translations["hubert_model"], open=False):
- embed_mode1 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
- embedders0 = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
- custom_embedders0 = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders0.value == "custom")
- with gr.Group():
- with gr.Row():
- formant_shifting1 = gr.Checkbox(label=translations["formantshift"], value=False, interactive=True)
- split_audio0 = gr.Checkbox(label=translations["split_audio"], value=False, interactive=True)
- cleaner1 = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
- with gr.Row():
- autotune3 = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
- checkpointing0 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
- proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
- with gr.Column():
- resample_sr0 = gr.Radio(choices=[0]+sample_rate_choice, label=translations["resample"], info=translations["resample_info"], value=0, interactive=True)
- proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
- f0_autotune_strength0 = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune3.value)
- clean_strength1 = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner1.value)
- filter_radius0 = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
- rms_mix_rate0 = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
- protect0 = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
- with gr.Row():
- formant_qfrency1 = gr.Slider(value=1.0, label=translations["formant_qfrency"], info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
- formant_timbre1 = gr.Slider(value=1.0, label=translations["formant_timbre"], info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
- with gr.Row():
- gr.Markdown(translations["output_tts_markdown"])
- with gr.Row():
- tts_voice_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["output_text_to_speech"])
- tts_voice_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["output_file_tts_convert"])
- with gr.Row():
- proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
- translate_button.click(fn=google_translate, inputs=[prompt, source_lang, target_lang], outputs=[prompt], api_name="google_translate")
- with gr.Row():
- unlock_full_method3.change(fn=unlock_f0, inputs=[unlock_full_method3], outputs=[method0])
- upload_f0_file0.upload(fn=lambda inp: shutil_move(inp.name, configs["f0_path"]), inputs=[upload_f0_file0], outputs=[f0_file_dropdown0])
- refresh_f0_file0.click(fn=change_f0_choices, inputs=[], outputs=[f0_file_dropdown0])
- with gr.Row():
- embed_mode1.change(fn=visible_embedders, inputs=[embed_mode1], outputs=[embedders0])
- autotune3.change(fn=visible, inputs=[autotune3], outputs=[f0_autotune_strength0])
- model_pth0.change(fn=get_index, inputs=[model_pth0], outputs=[model_index0])
- with gr.Row():
- cleaner1.change(fn=visible, inputs=[cleaner1], outputs=[clean_strength1])
- method0.change(fn=lambda method, hybrid: [visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[method0, hybrid_method0], outputs=[hybrid_method0, hop_length0])
- hybrid_method0.change(fn=hoplength_show, inputs=[method0, hybrid_method0], outputs=[hop_length0])
- with gr.Row():
- refresh1.click(fn=change_models_choices, inputs=[], outputs=[model_pth0, model_index0])
- embedders0.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders0], outputs=[custom_embedders0])
- formant_shifting1.change(fn=lambda a: [visible(a)]*2, inputs=[formant_shifting1], outputs=[formant_qfrency1, formant_timbre1])
- with gr.Row():
- model_index0.change(fn=index_strength_show, inputs=[model_index0], outputs=[index_strength0])
- txt_input.upload(fn=process_input, inputs=[txt_input], outputs=[prompt])
- use_txt.change(fn=visible, inputs=[use_txt], outputs=[txt_input])
- with gr.Row():
- google_tts_check_box.change(fn=change_tts_voice_choices, inputs=[google_tts_check_box], outputs=[tts_voice])
- tts_button.click(
- fn=TTS,
- inputs=[
- prompt,
- tts_voice,
- speed,
- output_audio0,
- tts_pitch,
- google_tts_check_box,
- txt_input
- ],
- outputs=[tts_voice_audio],
- api_name="text-to-speech"
- )
- convert_button0.click(
- fn=convert_tts,
- inputs=[
- cleaner1,
- autotune3,
- pitch0,
- clean_strength1,
- model_pth0,
- model_index0,
- index_strength0,
- output_audio0,
- output_audio1,
- export_format0,
- method0,
- hybrid_method0,
- hop_length0,
- embedders0,
- custom_embedders0,
- resample_sr0,
- filter_radius0,
- rms_mix_rate0,
- protect0,
- split_audio0,
- f0_autotune_strength0,
- checkpointing0,
- onnx_f0_mode1,
- formant_shifting1,
- formant_qfrency1,
- formant_timbre1,
- f0_file_dropdown0,
- embed_mode1,
- proposal_pitch,
- proposal_pitch_threshold
- ],
- outputs=[tts_voice_convert],
- api_name="convert_tts"
- )
\ No newline at end of file
diff --git a/main/app/tabs/inference/child/convert_with_whisper.py b/main/app/tabs/inference/child/convert_with_whisper.py
deleted file mode 100644
index db63474ad3f8c6478b7e659214c551ceee3fec39..0000000000000000000000000000000000000000
--- a/main/app/tabs/inference/child/convert_with_whisper.py
+++ /dev/null
@@ -1,160 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.inference import convert_with_whisper
-from main.app.variables import translations, paths_for_files, sample_rate_choice, model_name, index_path, method_f0, embedders_mode, embedders_model, configs
-from main.app.core.ui import visible, change_audios_choices, unlock_f0, hoplength_show, change_models_choices, get_index, index_strength_show, visible_embedders, shutil_move
-
-def convert_with_whisper_tab():
- with gr.Row():
- gr.Markdown(translations["convert_with_whisper_info"])
- with gr.Row():
- with gr.Column():
- with gr.Group():
- with gr.Row():
- cleaner2 = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
- autotune2 = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
- checkpointing2 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
- formant_shifting2 = gr.Checkbox(label=translations["formantshift"], value=False, interactive=True)
- proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
- with gr.Row():
- num_spk = gr.Slider(minimum=2, maximum=8, step=1, info=translations["num_spk_info"], label=translations["num_spk"], value=2, interactive=True)
- with gr.Row():
- with gr.Column():
- convert_button3 = gr.Button(translations["convert_audio"], variant="primary")
- with gr.Row():
- with gr.Column():
- with gr.Accordion(translations["model_accordion"] + " 1", open=True):
- with gr.Row():
- model_pth2 = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
- model_index2 = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
- with gr.Row():
- refresh2 = gr.Button(translations["refresh"])
- with gr.Row():
- pitch3 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
- index_strength2 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index2.value != "")
- with gr.Accordion(translations["input_output"], open=False):
- with gr.Column():
- export_format2 = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"], value="wav", interactive=True)
- input_audio1 = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
- output_audio2 = gr.Textbox(label=translations["output_path"], value="audios/output.wav", placeholder="audios/output.wav", info=translations["output_path_info"], interactive=True)
- with gr.Column():
- refresh4 = gr.Button(translations["refresh"])
- with gr.Row():
- input2 = gr.File(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"])
- with gr.Column():
- with gr.Accordion(translations["model_accordion"] + " 2", open=True):
- with gr.Row():
- model_pth3 = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
- model_index3 = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
- with gr.Row():
- refresh3 = gr.Button(translations["refresh"])
- with gr.Row():
- pitch4 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
- index_strength3 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index3.value != "")
- with gr.Accordion(translations["setting"], open=False):
- with gr.Row():
- model_size = gr.Radio(label=translations["model_size"], info=translations["model_size_info"], choices=["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"], value="medium", interactive=True)
- with gr.Accordion(translations["f0_method"], open=False):
- with gr.Group():
- with gr.Row():
- onnx_f0_mode4 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
- unlock_full_method2 = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
- method3 = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
- hybrid_method3 = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=["hybrid[pm+dio]", "hybrid[pm+crepe-tiny]", "hybrid[pm+crepe]", "hybrid[pm+fcpe]", "hybrid[pm+rmvpe]", "hybrid[pm+harvest]", "hybrid[pm+yin]", "hybrid[dio+crepe-tiny]", "hybrid[dio+crepe]", "hybrid[dio+fcpe]", "hybrid[dio+rmvpe]", "hybrid[dio+harvest]", "hybrid[dio+yin]", "hybrid[crepe-tiny+crepe]", "hybrid[crepe-tiny+fcpe]", "hybrid[crepe-tiny+rmvpe]", "hybrid[crepe-tiny+harvest]", "hybrid[crepe+fcpe]", "hybrid[crepe+rmvpe]", "hybrid[crepe+harvest]", "hybrid[crepe+yin]", "hybrid[fcpe+rmvpe]", "hybrid[fcpe+harvest]", "hybrid[fcpe+yin]", "hybrid[rmvpe+harvest]", "hybrid[rmvpe+yin]", "hybrid[harvest+yin]"], value="hybrid[pm+dio]", interactive=True, allow_custom_value=True, visible=method3.value == "hybrid")
- hop_length3 = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=512, value=128, step=1, interactive=True, visible=False)
- with gr.Accordion(translations["hubert_model"], open=False):
- embed_mode3 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
- embedders3 = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
- custom_embedders3 = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders3.value == "custom")
- with gr.Column():
- resample_sr3 = gr.Radio(choices=[0]+sample_rate_choice, label=translations["resample"], info=translations["resample_info"], value=0, interactive=True)
- proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
- clean_strength3 = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner2.value)
- f0_autotune_strength3 = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune2.value)
- filter_radius3 = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
- rms_mix_rate3 = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
- protect3 = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
- with gr.Row():
- formant_qfrency3 = gr.Slider(value=1.0, label=translations["formant_qfrency"] + " 1", info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
- formant_timbre3 = gr.Slider(value=1.0, label=translations["formant_timbre"] + " 1", info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
- with gr.Row():
- formant_qfrency4 = gr.Slider(value=1.0, label=translations["formant_qfrency"] + " 2", info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
- formant_timbre4 = gr.Slider(value=1.0, label=translations["formant_timbre"] + " 2", info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
- with gr.Row():
- gr.Markdown(translations["input_output"])
- with gr.Row():
- play_audio2 = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
- play_audio3 = gr.Audio(show_download_button=True, interactive=False, label=translations["output_file_tts_convert"])
- with gr.Row():
- autotune2.change(fn=visible, inputs=[autotune2], outputs=[f0_autotune_strength3])
- cleaner2.change(fn=visible, inputs=[cleaner2], outputs=[clean_strength3])
- method3.change(fn=lambda method, hybrid: [visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[method3, hybrid_method3], outputs=[hybrid_method3, hop_length3])
- with gr.Row():
- hybrid_method3.change(fn=hoplength_show, inputs=[method3, hybrid_method3], outputs=[hop_length3])
- refresh2.click(fn=change_models_choices, inputs=[], outputs=[model_pth2, model_index2])
- model_pth2.change(fn=get_index, inputs=[model_pth2], outputs=[model_index2])
- with gr.Row():
- refresh3.click(fn=change_models_choices, inputs=[], outputs=[model_pth3, model_index3])
- model_pth3.change(fn=get_index, inputs=[model_pth3], outputs=[model_index3])
- input2.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["audios_path"]), inputs=[input2], outputs=[input_audio1])
- with gr.Row():
- input_audio1.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio1], outputs=[play_audio2])
- formant_shifting2.change(fn=lambda a: [visible(a)]*4, inputs=[formant_shifting2], outputs=[formant_qfrency3, formant_timbre3, formant_qfrency4, formant_timbre4])
- embedders3.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders3], outputs=[custom_embedders3])
- with gr.Row():
- refresh4.click(fn=change_audios_choices, inputs=[input_audio1], outputs=[input_audio1])
- model_index2.change(fn=index_strength_show, inputs=[model_index2], outputs=[index_strength2])
- model_index3.change(fn=index_strength_show, inputs=[model_index3], outputs=[index_strength3])
- with gr.Row():
- unlock_full_method2.change(fn=unlock_f0, inputs=[unlock_full_method2], outputs=[method3])
- embed_mode3.change(fn=visible_embedders, inputs=[embed_mode3], outputs=[embedders3])
- proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
- with gr.Row():
- convert_button3.click(
- fn=convert_with_whisper,
- inputs=[
- num_spk,
- model_size,
- cleaner2,
- clean_strength3,
- autotune2,
- f0_autotune_strength3,
- checkpointing2,
- model_pth2,
- model_pth3,
- model_index2,
- model_index3,
- pitch3,
- pitch4,
- index_strength2,
- index_strength3,
- export_format2,
- input_audio1,
- output_audio2,
- onnx_f0_mode4,
- method3,
- hybrid_method3,
- hop_length3,
- embed_mode3,
- embedders3,
- custom_embedders3,
- resample_sr3,
- filter_radius3,
- rms_mix_rate3,
- protect3,
- formant_shifting2,
- formant_qfrency3,
- formant_timbre3,
- formant_qfrency4,
- formant_timbre4,
- proposal_pitch,
- proposal_pitch_threshold
- ],
- outputs=[play_audio3],
- api_name="convert_with_whisper"
- )
\ No newline at end of file
diff --git a/main/app/tabs/inference/child/separate.py b/main/app/tabs/inference/child/separate.py
deleted file mode 100644
index 87ffdaa4bbcdb2836ff0e84f3795e022ca7cb017..0000000000000000000000000000000000000000
--- a/main/app/tabs/inference/child/separate.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.downloads import download_url
-from main.app.core.separate import separator_music
-from main.app.core.ui import visible, valueFalse_interactive, change_audios_choices, shutil_move
-from main.app.variables import translations, uvr_model, paths_for_files, mdx_model, sample_rate_choice, configs
-
-def separate_tab():
- with gr.Row():
- gr.Markdown(translations["4_part"])
- with gr.Row():
- with gr.Column():
- with gr.Group():
- with gr.Row():
- cleaner = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True, min_width=140)
- backing = gr.Checkbox(label=translations["separator_backing"], value=False, interactive=True, min_width=140)
- reverb = gr.Checkbox(label=translations["dereveb_audio"], value=False, interactive=True, min_width=140)
- backing_reverb = gr.Checkbox(label=translations["dereveb_backing"], value=False, interactive=False, min_width=140)
- denoise = gr.Checkbox(label=translations["denoise_mdx"], value=False, interactive=False, min_width=140)
- with gr.Row():
- separator_model = gr.Dropdown(label=translations["separator_model"], value=uvr_model[0], choices=uvr_model, interactive=True)
- separator_backing_model = gr.Dropdown(label=translations["separator_backing_model"], value="Version-1", choices=["Version-1", "Version-2"], interactive=True, visible=backing.value)
- with gr.Row():
- with gr.Column():
- separator_button = gr.Button(translations["separator_tab"], variant="primary")
- with gr.Row():
- with gr.Column():
- with gr.Group():
- with gr.Row():
- shifts = gr.Slider(label=translations["shift"], info=translations["shift_info"], minimum=1, maximum=20, value=2, step=1, interactive=True)
- segment_size = gr.Slider(label=translations["segments_size"], info=translations["segments_size_info"], minimum=32, maximum=3072, value=256, step=32, interactive=True)
- with gr.Row():
- mdx_batch_size = gr.Slider(label=translations["batch_size"], info=translations["mdx_batch_size_info"], minimum=1, maximum=64, value=1, step=1, interactive=True, visible=backing.value or reverb.value or separator_model.value in mdx_model)
- with gr.Column():
- with gr.Group():
- with gr.Row():
- overlap = gr.Radio(label=translations["overlap"], info=translations["overlap_info"], choices=["0.25", "0.5", "0.75", "0.99"], value="0.25", interactive=True)
- with gr.Row():
- mdx_hop_length = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=8192, value=1024, step=1, interactive=True, visible=backing.value or reverb.value or separator_model.value in mdx_model)
- with gr.Row():
- with gr.Column():
- input = gr.File(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"])
- with gr.Accordion(translations["use_url"], open=False):
- url = gr.Textbox(label=translations["url_audio"], value="", placeholder="https://www.youtube.com/...", scale=6)
- download_button = gr.Button(translations["downloads"])
- with gr.Column():
- with gr.Row():
- clean_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner.value)
- sample_rate1 = gr.Radio(choices=sample_rate_choice, value=44100, label=translations["sr"], info=translations["sr_info"], interactive=True)
- with gr.Accordion(translations["input_output"], open=False):
- format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"], value="wav", interactive=True)
- input_audio = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, allow_custom_value=True, interactive=True)
- refresh_separator = gr.Button(translations["refresh"])
- output_separator = gr.Textbox(label=translations["output_folder"], value="audios", placeholder="audios", info=translations["output_folder_info"], interactive=True)
- audio_input = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
- with gr.Row():
- gr.Markdown(translations["output_separator"])
- with gr.Row():
- instruments_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["instruments"])
- original_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["original_vocal"])
- main_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["main_vocal"], visible=backing.value)
- backing_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["backing_vocal"], visible=backing.value)
- with gr.Row():
- separator_model.change(fn=lambda a, b, c: [visible(a or b or c in mdx_model), visible(a or b or c in mdx_model), valueFalse_interactive(a or b or c in mdx_model), visible(c not in mdx_model)], inputs=[backing, reverb, separator_model], outputs=[mdx_batch_size, mdx_hop_length, denoise, shifts])
- backing.change(fn=lambda a, b, c: [visible(a or b or c in mdx_model), visible(a or b or c in mdx_model), valueFalse_interactive(a or b or c in mdx_model), visible(a), visible(a), visible(a), valueFalse_interactive(a and b)], inputs=[backing, reverb, separator_model], outputs=[mdx_batch_size, mdx_hop_length, denoise, separator_backing_model, main_vocals, backing_vocals, backing_reverb])
- reverb.change(fn=lambda a, b, c: [visible(a or b or c in mdx_model), visible(a or b or c in mdx_model), valueFalse_interactive(a or b or c in mdx_model), valueFalse_interactive(a and b)], inputs=[backing, reverb, separator_model], outputs=[mdx_batch_size, mdx_hop_length, denoise, backing_reverb])
- with gr.Row():
- input_audio.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio], outputs=[audio_input])
- cleaner.change(fn=visible, inputs=[cleaner], outputs=[clean_strength])
- with gr.Row():
- input.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["audios_path"]), inputs=[input], outputs=[input_audio])
- refresh_separator.click(fn=change_audios_choices, inputs=[input_audio], outputs=[input_audio])
- with gr.Row():
- download_button.click(
- fn=download_url,
- inputs=[url],
- outputs=[input_audio, audio_input, url],
- api_name='download_url'
- )
- separator_button.click(
- fn=separator_music,
- inputs=[
- input_audio,
- output_separator,
- format,
- shifts,
- segment_size,
- overlap,
- cleaner,
- clean_strength,
- denoise,
- separator_model,
- separator_backing_model,
- backing,
- reverb,
- backing_reverb,
- mdx_hop_length,
- mdx_batch_size,
- sample_rate1
- ],
- outputs=[original_vocals, instruments_audio, main_vocals, backing_vocals],
- api_name='separator_music'
- )
\ No newline at end of file
diff --git a/main/app/tabs/inference/inference.py b/main/app/tabs/inference/inference.py
deleted file mode 100644
index 437ba78589fc35337e8bd1fdf9145b83f96301e8..0000000000000000000000000000000000000000
--- a/main/app/tabs/inference/inference.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.variables import translations, configs
-from main.app.tabs.inference.child.convert import convert_tab
-from main.app.tabs.inference.child.separate import separate_tab
-from main.app.tabs.inference.child.convert_tts import convert_tts_tab
-from main.app.tabs.inference.child.convert_with_whisper import convert_with_whisper_tab
-
-def inference_tab():
- with gr.TabItem(translations["inference"], visible=configs.get("inference_tab", True)):
- with gr.TabItem(translations["separator_tab"], visible=configs.get("separator_tab", True)):
- gr.Markdown(f"## {translations['separator_tab']}")
- separate_tab()
-
- with gr.TabItem(translations["convert_audio"], visible=configs.get("convert_tab", True)):
- gr.Markdown(f"## {translations['convert_audio']}")
- convert_tab()
-
- with gr.TabItem(translations["convert_with_whisper"], visible=configs.get("convert_with_whisper", True)):
- gr.Markdown(f"## {translations['convert_with_whisper']}")
- convert_with_whisper_tab()
-
- with gr.TabItem(translations["convert_text"], visible=configs.get("tts_tab", True)):
- gr.Markdown(translations["convert_text_markdown"])
- convert_tts_tab()
diff --git a/main/app/tabs/training/child/create_dataset.py b/main/app/tabs/training/child/create_dataset.py
deleted file mode 100644
index d37a269d43a15714515b3352597fd356734f4880..0000000000000000000000000000000000000000
--- a/main/app/tabs/training/child/create_dataset.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.training import create_dataset
-from main.app.core.ui import visible, valueEmpty_visible1
-from main.app.variables import translations, sample_rate_choice
-
-def create_dataset_tab():
- with gr.Row():
- gr.Markdown(translations["create_dataset_markdown_2"])
- with gr.Row():
- dataset_url = gr.Textbox(label=translations["url_audio"], info=translations["create_dataset_url"], value="", placeholder="https://www.youtube.com/...", interactive=True)
- output_dataset = gr.Textbox(label=translations["output_data"], info=translations["output_data_info"], value="dataset", placeholder="dataset", interactive=True)
- with gr.Row():
- with gr.Column():
- with gr.Group():
- with gr.Row():
- separator_reverb = gr.Checkbox(label=translations["dereveb_audio"], value=False, interactive=True)
- denoise_mdx = gr.Checkbox(label=translations["denoise"], value=False, interactive=True)
- with gr.Row():
- kim_vocal_version = gr.Radio(label=translations["model_ver"], info=translations["model_ver_info"], choices=["Version-1", "Version-2"], value="Version-2", interactive=True)
- kim_vocal_overlap = gr.Radio(label=translations["overlap"], info=translations["overlap_info"], choices=["0.25", "0.5", "0.75", "0.99"], value="0.25", interactive=True)
- with gr.Row():
- kim_vocal_hop_length = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=8192, value=1024, step=1, interactive=True)
- kim_vocal_batch_size = gr.Slider(label=translations["batch_size"], info=translations["mdx_batch_size_info"], minimum=1, maximum=64, value=1, step=1, interactive=True)
- with gr.Row():
- kim_vocal_segments_size = gr.Slider(label=translations["segments_size"], info=translations["segments_size_info"], minimum=32, maximum=3072, value=256, step=32, interactive=True)
- with gr.Row():
- sample_rate0 = gr.Radio(choices=sample_rate_choice, value=44100, label=translations["sr"], info=translations["sr_info"], interactive=True)
- with gr.Column():
- create_button = gr.Button(translations["createdataset"], variant="primary", scale=2, min_width=4000)
- with gr.Group():
- with gr.Row():
- clean_audio = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
- skip = gr.Checkbox(label=translations["skip"], value=False, interactive=True)
- with gr.Row():
- dataset_clean_strength = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.5, label=translations["clean_strength"], info=translations["clean_strength_info"], interactive=True, visible=clean_audio.value)
- with gr.Row():
- skip_start = gr.Textbox(label=translations["skip_start"], info=translations["skip_start_info"], value="", placeholder="0,...", interactive=True, visible=skip.value)
- skip_end = gr.Textbox(label=translations["skip_end"], info=translations["skip_end_info"], value="", placeholder="0,...", interactive=True, visible=skip.value)
- create_dataset_info = gr.Textbox(label=translations["create_dataset_info"], value="", interactive=False)
- with gr.Row():
- clean_audio.change(fn=visible, inputs=[clean_audio], outputs=[dataset_clean_strength])
- skip.change(fn=lambda a: [valueEmpty_visible1(a)]*2, inputs=[skip], outputs=[skip_start, skip_end])
- with gr.Row():
- create_button.click(
- fn=create_dataset,
- inputs=[
- dataset_url,
- output_dataset,
- clean_audio,
- dataset_clean_strength,
- separator_reverb,
- kim_vocal_version,
- kim_vocal_overlap,
- kim_vocal_segments_size,
- denoise_mdx,
- skip,
- skip_start,
- skip_end,
- kim_vocal_hop_length,
- kim_vocal_batch_size,
- sample_rate0
- ],
- outputs=[create_dataset_info],
- api_name="create_dataset"
- )
\ No newline at end of file
diff --git a/main/app/tabs/training/child/training.py b/main/app/tabs/training/child/training.py
deleted file mode 100644
index 2f6356cde72edaad35da242eb5f97615ddc1e537..0000000000000000000000000000000000000000
--- a/main/app/tabs/training/child/training.py
+++ /dev/null
@@ -1,237 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.core.process import zip_file
-from main.app.core.training import preprocess, extract, create_index, training
-from main.app.variables import translations, model_name, index_path, method_f0, embedders_mode, embedders_model, pretrainedD, pretrainedG, config
-from main.app.core.ui import gr_warning, visible, unlock_f0, hoplength_show, change_models_choices, get_gpu_info, visible_embedders, pitch_guidance_lock, vocoders_lock, unlock_ver, unlock_vocoder, change_pretrained_choices, gpu_number_str, shutil_move
-
-def training_model_tab():
- with gr.Row():
- gr.Markdown(translations["training_markdown"])
- with gr.Row():
- with gr.Column():
- with gr.Row():
- with gr.Column():
- training_name = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
- training_sr = gr.Radio(label=translations["sample_rate"], info=translations["sample_rate_info"], choices=["32k", "40k", "48k"], value="48k", interactive=True)
- training_ver = gr.Radio(label=translations["training_version"], info=translations["training_version_info"], choices=["v1", "v2"], value="v2", interactive=True)
- with gr.Row():
- clean_dataset = gr.Checkbox(label=translations["clear_dataset"], value=False, interactive=True)
- preprocess_cut = gr.Checkbox(label=translations["split_audio"], value=True, interactive=True)
- process_effects = gr.Checkbox(label=translations["preprocess_effect"], value=False, interactive=True)
- checkpointing1 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
- training_f0 = gr.Checkbox(label=translations["training_pitch"], value=True, interactive=True)
- upload = gr.Checkbox(label=translations["upload_dataset"], value=False, interactive=True)
- with gr.Row():
- clean_dataset_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.7, step=0.1, interactive=True, visible=clean_dataset.value)
- with gr.Column():
- preprocess_button = gr.Button(translations["preprocess_button"], scale=2)
- upload_dataset = gr.Files(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"], visible=upload.value)
- preprocess_info = gr.Textbox(label=translations["preprocess_info"], value="", interactive=False)
- with gr.Column():
- with gr.Row():
- with gr.Column():
- with gr.Accordion(label=translations["f0_method"], open=False):
- with gr.Group():
- with gr.Row():
- onnx_f0_mode2 = gr.Checkbox(label=translations["f0_onnx_mode"], value=False, interactive=True)
- unlock_full_method4 = gr.Checkbox(label=translations["f0_unlock"], value=False, interactive=True)
- autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
- extract_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
- extract_hybrid_method = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=["hybrid[pm+dio]", "hybrid[pm+crepe-tiny]", "hybrid[pm+crepe]", "hybrid[pm+fcpe]", "hybrid[pm+rmvpe]", "hybrid[pm+harvest]", "hybrid[pm+yin]", "hybrid[dio+crepe-tiny]", "hybrid[dio+crepe]", "hybrid[dio+fcpe]", "hybrid[dio+rmvpe]", "hybrid[dio+harvest]", "hybrid[dio+yin]", "hybrid[crepe-tiny+crepe]", "hybrid[crepe-tiny+fcpe]", "hybrid[crepe-tiny+rmvpe]", "hybrid[crepe-tiny+harvest]", "hybrid[crepe+fcpe]", "hybrid[crepe+rmvpe]", "hybrid[crepe+harvest]", "hybrid[crepe+yin]", "hybrid[fcpe+rmvpe]", "hybrid[fcpe+harvest]", "hybrid[fcpe+yin]", "hybrid[rmvpe+harvest]", "hybrid[rmvpe+yin]", "hybrid[harvest+yin]"], value="hybrid[pm+dio]", interactive=True, allow_custom_value=True, visible=extract_method.value == "hybrid")
- extract_hop_length = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=512, value=128, step=1, interactive=True, visible=False)
- f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune.value)
- with gr.Accordion(label=translations["hubert_model"], open=False):
- with gr.Group():
- embed_mode2 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
- extract_embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
- with gr.Row():
- extract_embedders_custom = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=extract_embedders.value == "custom")
- with gr.Column():
- extract_button = gr.Button(translations["extract_button"], scale=2)
- extract_info = gr.Textbox(label=translations["extract_info"], value="", interactive=False)
- with gr.Column():
- with gr.Row():
- with gr.Column():
- total_epochs = gr.Slider(label=translations["total_epoch"], info=translations["total_epoch_info"], minimum=1, maximum=10000, value=300, step=1, interactive=True)
- save_epochs = gr.Slider(label=translations["save_epoch"], info=translations["save_epoch_info"], minimum=1, maximum=10000, value=50, step=1, interactive=True)
- with gr.Column():
- index_button = gr.Button(f"3. {translations['create_index']}", variant="primary", scale=2)
- training_button = gr.Button(f"4. {translations['training_model']}", variant="primary", scale=2)
- with gr.Row():
- with gr.Accordion(label=translations["setting"], open=False):
- with gr.Row():
- index_algorithm = gr.Radio(label=translations["index_algorithm"], info=translations["index_algorithm_info"], choices=["Auto", "Faiss", "KMeans"], value="Auto", interactive=True)
- with gr.Row():
- cache_in_gpu = gr.Checkbox(label=translations["cache_in_gpu"], info=translations["cache_in_gpu_info"], value=True, interactive=True)
- rms_extract = gr.Checkbox(label=translations["train&energy"], info=translations["train&energy_info"], value=False, interactive=True)
- overtraining_detector = gr.Checkbox(label=translations["overtraining_detector"], info=translations["overtraining_detector_info"], value=False, interactive=True)
- with gr.Row():
- custom_dataset = gr.Checkbox(label=translations["custom_dataset"], info=translations["custom_dataset_info"], value=False, interactive=True)
- save_only_latest = gr.Checkbox(label=translations["save_only_latest"], info=translations["save_only_latest_info"], value=True, interactive=True)
- save_every_weights = gr.Checkbox(label=translations["save_every_weights"], info=translations["save_every_weights_info"], value=True, interactive=True)
- with gr.Row():
- clean_up = gr.Checkbox(label=translations["cleanup_training"], info=translations["cleanup_training_info"], value=False, interactive=True)
- not_use_pretrain = gr.Checkbox(label=translations["not_use_pretrain_2"], info=translations["not_use_pretrain_info"], value=False, interactive=True)
- custom_pretrain = gr.Checkbox(label=translations["custom_pretrain"], info=translations["custom_pretrain_info"], value=False, interactive=True)
- with gr.Column():
- dataset_path = gr.Textbox(label=translations["dataset_folder"], value="dataset", interactive=True, visible=custom_dataset.value)
- with gr.Column():
- threshold = gr.Slider(minimum=1, maximum=100, value=50, step=1, label=translations["threshold"], interactive=True, visible=overtraining_detector.value)
- with gr.Accordion(translations["setting_cpu_gpu"], open=False):
- with gr.Column():
- gpu_number = gr.Textbox(label=translations["gpu_number"], value=gpu_number_str(), info=translations["gpu_number_info"], interactive=True)
- gpu_info = gr.Textbox(label=translations["gpu_info"], value=get_gpu_info(), info=translations["gpu_info_2"], interactive=False)
- cpu_core = gr.Slider(label=translations["cpu_core"], info=translations["cpu_core_info"], minimum=1, maximum=os.cpu_count(), value=os.cpu_count(), step=1, interactive=True)
- train_batch_size = gr.Slider(label=translations["batch_size"], info=translations["batch_size_info"], minimum=1, maximum=64, value=8, step=1, interactive=True)
- with gr.Row():
- vocoders = gr.Radio(label=translations["vocoder"], info=translations["vocoder_info"], choices=["Default", "MRF-HiFi-GAN", "RefineGAN"], value="Default", interactive=True)
- with gr.Row():
- deterministic = gr.Checkbox(label=translations["deterministic"], info=translations["deterministic_info"], value=False, interactive=config.device.startswith("cuda"))
- benchmark = gr.Checkbox(label=translations["benchmark"], info=translations["benchmark_info"], value=False, interactive=config.device.startswith("cuda"))
- with gr.Row():
- optimizer = gr.Radio(label=translations["optimizer"], info=translations["optimizer_info"], value="AdamW", choices=["AdamW", "RAdam"], interactive=True)
- with gr.Row():
- model_author = gr.Textbox(label=translations["training_author"], info=translations["training_author_info"], value="", placeholder=translations["training_author"], interactive=True)
- with gr.Row():
- with gr.Column():
- with gr.Accordion(translations["custom_pretrain_info"], open=False, visible=custom_pretrain.value and not not_use_pretrain.value) as pretrain_setting:
- pretrained_D = gr.Dropdown(label=translations["pretrain_file"].format(dg="D"), choices=pretrainedD, value=pretrainedD[0] if len(pretrainedD) > 0 else '', interactive=True, allow_custom_value=True)
- pretrained_G = gr.Dropdown(label=translations["pretrain_file"].format(dg="G"), choices=pretrainedG, value=pretrainedG[0] if len(pretrainedG) > 0 else '', interactive=True, allow_custom_value=True)
- refresh_pretrain = gr.Button(translations["refresh"], scale=2)
- with gr.Row():
- training_info = gr.Textbox(label=translations["train_info"], value="", interactive=False)
- with gr.Row():
- with gr.Column():
- with gr.Accordion(translations["export_model"], open=False):
- with gr.Row():
- model_file= gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
- index_file = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
- with gr.Row():
- refresh_file = gr.Button(f"1. {translations['refresh']}", scale=2)
- zip_model = gr.Button(translations["zip_model"], variant="primary", scale=2)
- with gr.Row():
- zip_output = gr.File(label=translations["output_zip"], file_types=[".zip"], interactive=False, visible=False)
- with gr.Row():
- vocoders.change(fn=pitch_guidance_lock, inputs=[vocoders], outputs=[training_f0])
- training_f0.change(fn=vocoders_lock, inputs=[training_f0, vocoders], outputs=[vocoders])
- unlock_full_method4.change(fn=unlock_f0, inputs=[unlock_full_method4], outputs=[extract_method])
- with gr.Row():
- refresh_file.click(fn=change_models_choices, inputs=[], outputs=[model_file, index_file])
- zip_model.click(fn=zip_file, inputs=[training_name, model_file, index_file], outputs=[zip_output])
- dataset_path.change(fn=lambda folder: os.makedirs(folder, exist_ok=True), inputs=[dataset_path], outputs=[])
- with gr.Row():
- upload.change(fn=visible, inputs=[upload], outputs=[upload_dataset])
- overtraining_detector.change(fn=visible, inputs=[overtraining_detector], outputs=[threshold])
- clean_dataset.change(fn=visible, inputs=[clean_dataset], outputs=[clean_dataset_strength])
- with gr.Row():
- custom_dataset.change(fn=lambda custom_dataset: [visible(custom_dataset), "dataset"],inputs=[custom_dataset], outputs=[dataset_path, dataset_path])
- training_ver.change(fn=unlock_vocoder, inputs=[training_ver, vocoders], outputs=[vocoders])
- vocoders.change(fn=unlock_ver, inputs=[training_ver, vocoders], outputs=[training_ver])
- with gr.Row():
- extract_method.change(fn=lambda method, hybrid: [visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[extract_method, extract_hybrid_method], outputs=[extract_hybrid_method, extract_hop_length])
- extract_hybrid_method.change(fn=hoplength_show, inputs=[extract_method, extract_hybrid_method], outputs=[extract_hop_length])
- with gr.Row():
- autotune.change(fn=visible, inputs=[autotune], outputs=[f0_autotune_strength])
- upload_dataset.upload(
- fn=lambda files, folder: [shutil_move(f.name, os.path.join(folder, os.path.split(f.name)[1])) for f in files] if folder != "" else gr_warning(translations["dataset_folder1"]),
- inputs=[upload_dataset, dataset_path],
- outputs=[],
- api_name="upload_dataset"
- )
- with gr.Row():
- not_use_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
- custom_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
- refresh_pretrain.click(fn=change_pretrained_choices, inputs=[], outputs=[pretrained_D, pretrained_G])
- with gr.Row():
- preprocess_button.click(
- fn=preprocess,
- inputs=[
- training_name,
- training_sr,
- cpu_core,
- preprocess_cut,
- process_effects,
- dataset_path,
- clean_dataset,
- clean_dataset_strength
- ],
- outputs=[preprocess_info],
- api_name="preprocess"
- )
- with gr.Row():
- embed_mode2.change(fn=visible_embedders, inputs=[embed_mode2], outputs=[extract_embedders])
- extract_embedders.change(fn=lambda extract_embedders: visible(extract_embedders == "custom"), inputs=[extract_embedders], outputs=[extract_embedders_custom])
- with gr.Row():
- extract_button.click(
- fn=extract,
- inputs=[
- training_name,
- training_ver,
- extract_method,
- training_f0,
- extract_hop_length,
- cpu_core,
- gpu_number,
- training_sr,
- extract_embedders,
- extract_embedders_custom,
- onnx_f0_mode2,
- embed_mode2,
- autotune,
- f0_autotune_strength,
- extract_hybrid_method,
- rms_extract
- ],
- outputs=[extract_info],
- api_name="extract"
- )
- with gr.Row():
- index_button.click(
- fn=create_index,
- inputs=[
- training_name,
- training_ver,
- index_algorithm
- ],
- outputs=[training_info],
- api_name="create_index"
- )
- with gr.Row():
- training_button.click(
- fn=training,
- inputs=[
- training_name,
- training_ver,
- save_epochs,
- save_only_latest,
- save_every_weights,
- total_epochs,
- training_sr,
- train_batch_size,
- gpu_number,
- training_f0,
- not_use_pretrain,
- custom_pretrain,
- pretrained_G,
- pretrained_D,
- overtraining_detector,
- threshold,
- clean_up,
- cache_in_gpu,
- model_author,
- vocoders,
- checkpointing1,
- deterministic,
- benchmark,
- optimizer,
- rms_extract
- ],
- outputs=[training_info],
- api_name="training_model"
- )
\ No newline at end of file
diff --git a/main/app/tabs/training/training.py b/main/app/tabs/training/training.py
deleted file mode 100644
index f12944fa475f13ed3bd52e3500c43c105027c292..0000000000000000000000000000000000000000
--- a/main/app/tabs/training/training.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import os
-import sys
-
-import gradio as gr
-
-sys.path.append(os.getcwd())
-
-from main.app.variables import translations, configs
-from main.app.tabs.training.child.training import training_model_tab
-from main.app.tabs.training.child.create_dataset import create_dataset_tab
-
-def training_tab():
- with gr.TabItem(translations["training_model"], visible=configs.get("create_and_training_tab", True)):
- with gr.TabItem(translations["createdataset"], visible=configs.get("create_dataset_tab", True)):
- gr.Markdown(translations["create_dataset_markdown"])
- create_dataset_tab()
-
- with gr.TabItem(translations["training_model"], visible=configs.get("training_tab", True)):
- gr.Markdown(f"## {translations['training_model']}")
- training_model_tab()
\ No newline at end of file
diff --git a/main/app/variables.py b/main/app/variables.py
deleted file mode 100644
index 77677c52ce51f9f1d110edbe7a2639d19a77a26f..0000000000000000000000000000000000000000
--- a/main/app/variables.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import os
-import sys
-import csv
-import json
-import codecs
-import logging
-import urllib.request
-import logging.handlers
-
-sys.path.append(os.getcwd())
-
-from main.configs.config import Config
-
-logger = logging.getLogger(__name__)
-logger.propagate = False
-
-config = Config()
-python = sys.executable
-translations = config.translations
-configs_json = os.path.join("main", "configs", "config.json")
-configs = json.load(open(configs_json, "r"))
-
-if not logger.hasHandlers():
- console_handler = logging.StreamHandler()
- console_formatter = logging.Formatter(fmt="\n%(asctime)s.%(msecs)03d | %(levelname)s | %(module)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
- console_handler.setFormatter(console_formatter)
- console_handler.setLevel(logging.DEBUG if config.debug_mode else logging.INFO)
- file_handler = logging.handlers.RotatingFileHandler(os.path.join(configs["logs_path"], "app.log"), maxBytes=5*1024*1024, backupCount=3, encoding='utf-8')
- file_formatter = logging.Formatter(fmt="\n%(asctime)s.%(msecs)03d | %(levelname)s | %(module)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
- file_handler.setFormatter(file_formatter)
- file_handler.setLevel(logging.DEBUG)
- logger.addHandler(console_handler)
- logger.addHandler(file_handler)
- logger.setLevel(logging.DEBUG)
-
-if config.device in ["cpu", "mps", "ocl:0"] and configs.get("fp16", False):
- logger.warning(translations["fp16_not_support"])
- configs["fp16"] = config.is_half = False
-
- with open(configs_json, "w") as f:
- json.dump(configs, f, indent=4)
-
-models = {}
-model_options = {}
-
-method_f0 = ["mangio-crepe-full", "crepe-full", "fcpe", "rmvpe", "harvest", "pyin", "hybrid"]
-method_f0_full = ["pm-ac", "pm-cc", "pm-shs", "dio", "mangio-crepe-tiny", "mangio-crepe-small", "mangio-crepe-medium", "mangio-crepe-large", "mangio-crepe-full", "crepe-tiny", "crepe-small", "crepe-medium", "crepe-large", "crepe-full", "fcpe", "fcpe-legacy", "rmvpe", "rmvpe-legacy", "harvest", "yin", "pyin", "swipe", "piptrack", "fcn", "hybrid"]
-
-embedders_mode = ["fairseq", "onnx", "transformers", "spin"]
-embedders_model = ["contentvec_base", "hubert_base", "vietnamese_hubert_base", "japanese_hubert_base", "korean_hubert_base", "chinese_hubert_base", "portuguese_hubert_base", "custom"]
-
-paths_for_files = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["audios_path"]) for f in files if os.path.splitext(f)[1].lower() in (".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3")])
-
-model_name = sorted(list(model for model in os.listdir(configs["weights_path"]) if model.endswith((".pth", ".onnx")) and not model.startswith("G_") and not model.startswith("D_")))
-index_path = sorted([os.path.join(root, name) for root, _, files in os.walk(configs["logs_path"], topdown=False) for name in files if name.endswith(".index") and "trained" not in name])
-
-pretrainedD = [model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "D" in model]
-pretrainedG = [model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "G" in model]
-
-presets_file = sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".conversion.json")))
-audio_effect_presets_file = sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".effect.json")))
-f0_file = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["f0_path"]) for f in files if f.endswith(".txt")])
-
-language = configs.get("language", "vi-VN")
-theme = configs.get("theme", "NoCrypt/miku")
-
-edgetts = configs.get("edge_tts", ["vi-VN-HoaiMyNeural", "vi-VN-NamMinhNeural"])
-google_tts_voice = configs.get("google_tts_voice", ["vi", "en"])
-
-mdx_model = configs.get("mdx_model", "MDXNET_Main")
-uvr_model = configs.get("demucs_model", "HD_MMI") + mdx_model
-
-font = configs.get("font", "https://fonts.googleapis.com/css2?family=Courgette&display=swap")
-sample_rate_choice = [8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 96000]
-csv_path = configs["csv_path"]
-
-if "--allow_all_disk" in sys.argv and sys.platform == "win32":
- try:
- import win32api
- except:
- os.system(f"{python} -m pip install pywin32")
- import win32api
-
- allow_disk = win32api.GetLogicalDriveStrings().split('\x00')[:-1]
-else: allow_disk = []
-
-try:
- if os.path.exists(csv_path): reader = list(csv.DictReader(open(csv_path, newline='', encoding='utf-8')))
- else:
- reader = list(csv.DictReader([line.decode('utf-8') for line in urllib.request.urlopen(codecs.decode("uggcf://qbpf.tbbtyr.pbz/fcernqfurrgf/q/1gNHnDeRULtEfz1Yieaw14USUQjWJy0Oq9k0DrCrjApb/rkcbeg?sbezng=pfi&tvq=1977693859", "rot13")).readlines()]))
- writer = csv.DictWriter(open(csv_path, mode='w', newline='', encoding='utf-8'), fieldnames=reader[0].keys())
- writer.writeheader()
- writer.writerows(reader)
-
- for row in reader:
- filename = row['Filename']
- url = None
-
- for value in row.values():
- if isinstance(value, str) and "huggingface" in value:
- url = value
- break
-
- if url: models[filename] = url
-except:
- pass
\ No newline at end of file
diff --git a/main/configs/config.json b/main/configs/config.json
deleted file mode 100644
index 38ea58467210e5eed584f4e5ea84ec5069b1b3d5..0000000000000000000000000000000000000000
--- a/main/configs/config.json
+++ /dev/null
@@ -1,584 +0,0 @@
-{
- "language": "vi-VN",
- "support_language": [
- "en-US",
- "vi-VN"
- ],
- "theme": "NoCrypt/miku",
- "themes": [
- "NoCrypt/miku",
- "gstaff/xkcd",
- "JohnSmith9982/small_and_pretty",
- "ParityError/Interstellar",
- "earneleh/paris",
- "shivi/calm_seafoam",
- "Hev832/Applio",
- "YTheme/Minecraft",
- "gstaff/sketch",
- "SebastianBravo/simci_css",
- "allenai/gradio-theme",
- "Nymbo/Nymbo_Theme_5",
- "lone17/kotaemon",
- "Zarkel/IBM_Carbon_Theme",
- "SherlockRamos/Feliz",
- "freddyaboulton/dracula_revamped",
- "freddyaboulton/bad-theme-space",
- "gradio/dracula_revamped",
- "abidlabs/dracula_revamped",
- "gradio/dracula_test",
- "gradio/seafoam",
- "gradio/glass",
- "gradio/monochrome",
- "gradio/soft",
- "gradio/default",
- "gradio/base",
- "abidlabs/pakistan",
- "dawood/microsoft_windows",
- "ysharma/steampunk",
- "ysharma/huggingface",
- "abidlabs/Lime",
- "freddyaboulton/this-theme-does-not-exist-2",
- "aliabid94/new-theme",
- "aliabid94/test2",
- "aliabid94/test3",
- "aliabid94/test4",
- "abidlabs/banana",
- "freddyaboulton/test-blue",
- "gstaff/whiteboard",
- "ysharma/llamas",
- "abidlabs/font-test",
- "YenLai/Superhuman",
- "bethecloud/storj_theme",
- "sudeepshouche/minimalist",
- "knotdgaf/gradiotest",
- "ParityError/Anime",
- "Ajaxon6255/Emerald_Isle",
- "ParityError/LimeFace",
- "finlaymacklon/smooth_slate",
- "finlaymacklon/boxy_violet",
- "derekzen/stardust",
- "EveryPizza/Cartoony-Gradio-Theme",
- "Ifeanyi/Cyanister",
- "Tshackelton/IBMPlex-DenseReadable",
- "snehilsanyal/scikit-learn",
- "Himhimhim/xkcd",
- "nota-ai/theme",
- "rawrsor1/Everforest",
- "rottenlittlecreature/Moon_Goblin",
- "abidlabs/test-yellow",
- "abidlabs/test-yellow3",
- "idspicQstitho/dracula_revamped",
- "kfahn/AnimalPose",
- "HaleyCH/HaleyCH_Theme",
- "simulKitke/dracula_test",
- "braintacles/CrimsonNight",
- "wentaohe/whiteboardv2",
- "reilnuud/polite",
- "remilia/Ghostly",
- "Franklisi/darkmode",
- "coding-alt/soft",
- "xiaobaiyuan/theme_land",
- "step-3-profit/Midnight-Deep",
- "xiaobaiyuan/theme_demo",
- "Taithrah/Minimal",
- "Insuz/SimpleIndigo",
- "zkunn/Alipay_Gradio_theme",
- "Insuz/Mocha",
- "xiaobaiyuan/theme_brief",
- "Ama434/434-base-Barlow",
- "Ama434/def_barlow",
- "Ama434/neutral-barlow",
- "dawood/dracula_test",
- "nuttea/Softblue",
- "BlueDancer/Alien_Diffusion",
- "naughtondale/monochrome",
- "Dagfinn1962/standard",
- "default"
- ],
- "mdx_model": [
- "Main_340",
- "Main_390",
- "Main_406",
- "Main_427",
- "Main_438",
- "Inst_full_292",
- "Inst_HQ_1",
- "Inst_HQ_2",
- "Inst_HQ_3",
- "Inst_HQ_4",
- "Inst_HQ_5",
- "Kim_Vocal_1",
- "Kim_Vocal_2",
- "Kim_Inst",
- "Inst_187_beta",
- "Inst_82_beta",
- "Inst_90_beta",
- "Voc_FT",
- "Crowd_HQ",
- "Inst_1",
- "Inst_2",
- "Inst_3",
- "MDXNET_1_9703",
- "MDXNET_2_9682",
- "MDXNET_3_9662",
- "Inst_Main",
- "MDXNET_Main",
- "MDXNET_9482"
- ],
- "demucs_model": [
- "HT-Normal",
- "HT-Tuned",
- "HD_MMI",
- "HT_6S"
- ],
- "edge_tts": [
- "af-ZA-AdriNeural",
- "af-ZA-WillemNeural",
- "sq-AL-AnilaNeural",
- "sq-AL-IlirNeural",
- "am-ET-AmehaNeural",
- "am-ET-MekdesNeural",
- "ar-DZ-AminaNeural",
- "ar-DZ-IsmaelNeural",
- "ar-BH-AliNeural",
- "ar-BH-LailaNeural",
- "ar-EG-SalmaNeural",
- "ar-EG-ShakirNeural",
- "ar-IQ-BasselNeural",
- "ar-IQ-RanaNeural",
- "ar-JO-SanaNeural",
- "ar-JO-TaimNeural",
- "ar-KW-FahedNeural",
- "ar-KW-NouraNeural",
- "ar-LB-LaylaNeural",
- "ar-LB-RamiNeural",
- "ar-LY-ImanNeural",
- "ar-LY-OmarNeural",
- "ar-MA-JamalNeural",
- "ar-MA-MounaNeural",
- "ar-OM-AbdullahNeural",
- "ar-OM-AyshaNeural",
- "ar-QA-AmalNeural",
- "ar-QA-MoazNeural",
- "ar-SA-HamedNeural",
- "ar-SA-ZariyahNeural",
- "ar-SY-AmanyNeural",
- "ar-SY-LaithNeural",
- "ar-TN-HediNeural",
- "ar-TN-ReemNeural",
- "ar-AE-FatimaNeural",
- "ar-AE-HamdanNeural",
- "ar-YE-MaryamNeural",
- "ar-YE-SalehNeural",
- "az-AZ-BabekNeural",
- "az-AZ-BanuNeural",
- "bn-BD-NabanitaNeural",
- "bn-BD-PradeepNeural",
- "bn-IN-BashkarNeural",
- "bn-IN-TanishaaNeural",
- "bs-BA-GoranNeural",
- "bs-BA-VesnaNeural",
- "bg-BG-BorislavNeural",
- "bg-BG-KalinaNeural",
- "my-MM-NilarNeural",
- "my-MM-ThihaNeural",
- "ca-ES-EnricNeural",
- "ca-ES-JoanaNeural",
- "zh-HK-HiuGaaiNeural",
- "zh-HK-HiuMaanNeural",
- "zh-HK-WanLungNeural",
- "zh-CN-XiaoxiaoNeural",
- "zh-CN-XiaoyiNeural",
- "zh-CN-YunjianNeural",
- "zh-CN-YunxiNeural",
- "zh-CN-YunxiaNeural",
- "zh-CN-YunyangNeural",
- "zh-CN-liaoning-XiaobeiNeural",
- "zh-TW-HsiaoChenNeural",
- "zh-TW-YunJheNeural",
- "zh-TW-HsiaoYuNeural",
- "zh-CN-shaanxi-XiaoniNeural",
- "hr-HR-GabrijelaNeural",
- "hr-HR-SreckoNeural",
- "cs-CZ-AntoninNeural",
- "cs-CZ-VlastaNeural",
- "da-DK-ChristelNeural",
- "da-DK-JeppeNeural",
- "nl-BE-ArnaudNeural",
- "nl-BE-DenaNeural",
- "nl-NL-ColetteNeural",
- "nl-NL-FennaNeural",
- "nl-NL-MaartenNeural",
- "en-AU-NatashaNeural",
- "en-AU-WilliamNeural",
- "en-CA-ClaraNeural",
- "en-CA-LiamNeural",
- "en-HK-SamNeural",
- "en-HK-YanNeural",
- "en-IN-NeerjaExpressiveNeural",
- "en-IN-NeerjaNeural",
- "en-IN-PrabhatNeural",
- "en-IE-ConnorNeural",
- "en-IE-EmilyNeural",
- "en-KE-AsiliaNeural",
- "en-KE-ChilembaNeural",
- "en-NZ-MitchellNeural",
- "en-NZ-MollyNeural",
- "en-NG-AbeoNeural",
- "en-NG-EzinneNeural",
- "en-PH-JamesNeural",
- "en-PH-RosaNeural",
- "en-SG-LunaNeural",
- "en-SG-WayneNeural",
- "en-ZA-LeahNeural",
- "en-ZA-LukeNeural",
- "en-TZ-ElimuNeural",
- "en-TZ-ImaniNeural",
- "en-GB-LibbyNeural",
- "en-GB-MaisieNeural",
- "en-GB-RyanNeural",
- "en-GB-SoniaNeural",
- "en-GB-ThomasNeural",
- "en-US-AvaMultilingualNeural",
- "en-US-AndrewMultilingualNeural",
- "en-US-EmmaMultilingualNeural",
- "en-US-BrianMultilingualNeural",
- "en-US-AvaNeural",
- "en-US-AndrewNeural",
- "en-US-EmmaNeural",
- "en-US-BrianNeural",
- "en-US-AnaNeural",
- "en-US-AriaNeural",
- "en-US-ChristopherNeural",
- "en-US-EricNeural",
- "en-US-GuyNeural",
- "en-US-JennyNeural",
- "en-US-MichelleNeural",
- "en-US-RogerNeural",
- "en-US-SteffanNeural",
- "et-EE-AnuNeural",
- "et-EE-KertNeural",
- "fil-PH-AngeloNeural",
- "fil-PH-BlessicaNeural",
- "fi-FI-HarriNeural",
- "fi-FI-NooraNeural",
- "fr-BE-CharlineNeural",
- "fr-BE-GerardNeural",
- "fr-CA-ThierryNeural",
- "fr-CA-AntoineNeural",
- "fr-CA-JeanNeural",
- "fr-CA-SylvieNeural",
- "fr-FR-VivienneMultilingualNeural",
- "fr-FR-RemyMultilingualNeural",
- "fr-FR-DeniseNeural",
- "fr-FR-EloiseNeural",
- "fr-FR-HenriNeural",
- "fr-CH-ArianeNeural",
- "fr-CH-FabriceNeural",
- "gl-ES-RoiNeural",
- "gl-ES-SabelaNeural",
- "ka-GE-EkaNeural",
- "ka-GE-GiorgiNeural",
- "de-AT-IngridNeural",
- "de-AT-JonasNeural",
- "de-DE-SeraphinaMultilingualNeural",
- "de-DE-FlorianMultilingualNeural",
- "de-DE-AmalaNeural",
- "de-DE-ConradNeural",
- "de-DE-KatjaNeural",
- "de-DE-KillianNeural",
- "de-CH-JanNeural",
- "de-CH-LeniNeural",
- "el-GR-AthinaNeural",
- "el-GR-NestorasNeural",
- "gu-IN-DhwaniNeural",
- "gu-IN-NiranjanNeural",
- "he-IL-AvriNeural",
- "he-IL-HilaNeural",
- "hi-IN-MadhurNeural",
- "hi-IN-SwaraNeural",
- "hu-HU-NoemiNeural",
- "hu-HU-TamasNeural",
- "is-IS-GudrunNeural",
- "is-IS-GunnarNeural",
- "id-ID-ArdiNeural",
- "id-ID-GadisNeural",
- "ga-IE-ColmNeural",
- "ga-IE-OrlaNeural",
- "it-IT-GiuseppeNeural",
- "it-IT-DiegoNeural",
- "it-IT-ElsaNeural",
- "it-IT-IsabellaNeural",
- "ja-JP-KeitaNeural",
- "ja-JP-NanamiNeural",
- "jv-ID-DimasNeural",
- "jv-ID-SitiNeural",
- "kn-IN-GaganNeural",
- "kn-IN-SapnaNeural",
- "kk-KZ-AigulNeural",
- "kk-KZ-DauletNeural",
- "km-KH-PisethNeural",
- "km-KH-SreymomNeural",
- "ko-KR-HyunsuNeural",
- "ko-KR-InJoonNeural",
- "ko-KR-SunHiNeural",
- "lo-LA-ChanthavongNeural",
- "lo-LA-KeomanyNeural",
- "lv-LV-EveritaNeural",
- "lv-LV-NilsNeural",
- "lt-LT-LeonasNeural",
- "lt-LT-OnaNeural",
- "mk-MK-AleksandarNeural",
- "mk-MK-MarijaNeural",
- "ms-MY-OsmanNeural",
- "ms-MY-YasminNeural",
- "ml-IN-MidhunNeural",
- "ml-IN-SobhanaNeural",
- "mt-MT-GraceNeural",
- "mt-MT-JosephNeural",
- "mr-IN-AarohiNeural",
- "mr-IN-ManoharNeural",
- "mn-MN-BataaNeural",
- "mn-MN-YesuiNeural",
- "ne-NP-HemkalaNeural",
- "ne-NP-SagarNeural",
- "nb-NO-FinnNeural",
- "nb-NO-PernilleNeural",
- "ps-AF-GulNawazNeural",
- "ps-AF-LatifaNeural",
- "fa-IR-DilaraNeural",
- "fa-IR-FaridNeural",
- "pl-PL-MarekNeural",
- "pl-PL-ZofiaNeural",
- "pt-BR-ThalitaNeural",
- "pt-BR-AntonioNeural",
- "pt-BR-FranciscaNeural",
- "pt-PT-DuarteNeural",
- "pt-PT-RaquelNeural",
- "ro-RO-AlinaNeural",
- "ro-RO-EmilNeural",
- "ru-RU-DmitryNeural",
- "ru-RU-SvetlanaNeural",
- "sr-RS-NicholasNeural",
- "sr-RS-SophieNeural",
- "si-LK-SameeraNeural",
- "si-LK-ThiliniNeural",
- "sk-SK-LukasNeural",
- "sk-SK-ViktoriaNeural",
- "sl-SI-PetraNeural",
- "sl-SI-RokNeural",
- "so-SO-MuuseNeural",
- "so-SO-UbaxNeural",
- "es-AR-ElenaNeural",
- "es-AR-TomasNeural",
- "es-BO-MarceloNeural",
- "es-BO-SofiaNeural",
- "es-CL-CatalinaNeural",
- "es-CL-LorenzoNeural",
- "es-ES-XimenaNeural",
- "es-CO-GonzaloNeural",
- "es-CO-SalomeNeural",
- "es-CR-JuanNeural",
- "es-CR-MariaNeural",
- "es-CU-BelkysNeural",
- "es-CU-ManuelNeural",
- "es-DO-EmilioNeural",
- "es-DO-RamonaNeural",
- "es-EC-AndreaNeural",
- "es-EC-LuisNeural",
- "es-SV-LorenaNeural",
- "es-SV-RodrigoNeural",
- "es-GQ-JavierNeural",
- "es-GQ-TeresaNeural",
- "es-GT-AndresNeural",
- "es-GT-MartaNeural",
- "es-HN-CarlosNeural",
- "es-HN-KarlaNeural",
- "es-MX-DaliaNeural",
- "es-MX-JorgeNeural",
- "es-NI-FedericoNeural",
- "es-NI-YolandaNeural",
- "es-PA-MargaritaNeural",
- "es-PA-RobertoNeural",
- "es-PY-MarioNeural",
- "es-PY-TaniaNeural",
- "es-PE-AlexNeural",
- "es-PE-CamilaNeural",
- "es-PR-KarinaNeural",
- "es-PR-VictorNeural",
- "es-ES-AlvaroNeural",
- "es-ES-ElviraNeural",
- "es-US-AlonsoNeural",
- "es-US-PalomaNeural",
- "es-UY-MateoNeural",
- "es-UY-ValentinaNeural",
- "es-VE-PaolaNeural",
- "es-VE-SebastianNeural",
- "su-ID-JajangNeural",
- "su-ID-TutiNeural",
- "sw-KE-RafikiNeural",
- "sw-KE-ZuriNeural",
- "sw-TZ-DaudiNeural",
- "sw-TZ-RehemaNeural",
- "sv-SE-MattiasNeural",
- "sv-SE-SofieNeural",
- "ta-IN-PallaviNeural",
- "ta-IN-ValluvarNeural",
- "ta-MY-KaniNeural",
- "ta-MY-SuryaNeural",
- "ta-SG-AnbuNeural",
- "ta-SG-VenbaNeural",
- "ta-LK-KumarNeural",
- "ta-LK-SaranyaNeural",
- "te-IN-MohanNeural",
- "te-IN-ShrutiNeural",
- "th-TH-NiwatNeural",
- "th-TH-PremwadeeNeural",
- "tr-TR-AhmetNeural",
- "tr-TR-EmelNeural",
- "uk-UA-OstapNeural",
- "uk-UA-PolinaNeural",
- "ur-IN-GulNeural",
- "ur-IN-SalmanNeural",
- "ur-PK-AsadNeural",
- "ur-PK-UzmaNeural",
- "uz-UZ-MadinaNeural",
- "uz-UZ-SardorNeural",
- "vi-VN-HoaiMyNeural",
- "vi-VN-NamMinhNeural",
- "cy-GB-AledNeural",
- "cy-GB-NiaNeural",
- "zu-ZA-ThandoNeural",
- "zu-ZA-ThembaNeural"
- ],
- "google_tts_voice": [
- "af",
- "am",
- "ar",
- "bg",
- "bn",
- "bs",
- "ca",
- "cs",
- "cy",
- "da",
- "de",
- "el",
- "en",
- "es",
- "et",
- "eu",
- "fi",
- "fr",
- "fr-CA",
- "gl",
- "gu",
- "ha",
- "hi",
- "hr",
- "hu",
- "id",
- "is",
- "it",
- "iw",
- "ja",
- "jw",
- "km",
- "kn",
- "ko",
- "la",
- "lt",
- "lv",
- "ml",
- "mr",
- "ms",
- "my",
- "ne",
- "nl",
- "no",
- "pa",
- "pl",
- "pt",
- "pt-PT",
- "ro",
- "ru",
- "si",
- "sk",
- "sq",
- "sr",
- "su",
- "sv",
- "sw",
- "ta",
- "te",
- "th",
- "tl",
- "tr",
- "uk",
- "ur",
- "vi",
- "yue",
- "zh-CN",
- "zh-TW",
- "zh"
- ],
- "fp16": false,
- "editing_tab": true,
- "inference_tab": true,
- "create_and_training_tab": true,
- "extra_tab": true,
- "separator_tab": true,
- "convert_tab": true,
- "convert_with_whisper": true,
- "tts_tab": true,
- "effects_tab": true,
- "quirk": true,
- "create_dataset_tab": true,
- "training_tab": true,
- "fushion_tab": true,
- "read_tab": true,
- "onnx_tab": true,
- "downloads_tab": true,
- "f0_extractor_tab": true,
- "settings_tab": true,
- "report_bug_tab": false,
- "font": "https://fonts.googleapis.com/css2?family=Roboto&display=swap",
- "app_port": 7860,
- "tensorboard_port": 6870,
- "num_of_restart": 5,
- "server_name": "0.0.0.0",
- "app_show_error": true,
- "delete_exists_file": false,
- "audio_effects_path": "main/inference/audio_effects.py",
- "convert_path": "main/inference/conversion/convert.py",
- "separate_path": "main/inference/separator_music.py",
- "create_dataset_path": "main/inference/create_dataset.py",
- "preprocess_path": "main/inference/preprocess/preprocess.py",
- "extract_path": "main/inference/extracting/extract.py",
- "create_index_path": "main/inference/create_index.py",
- "train_path": "main/inference/training/train.py",
- "ico_path": "assets/ico.png",
- "csv_path": "assets/spreadsheet.csv",
- "weights_path": "assets/weights",
- "logs_path": "assets/logs",
- "binary_path": "assets/binary",
- "f0_path": "assets/f0",
- "language_path": "assets/languages",
- "presets_path": "assets/presets",
- "embedders_path": "assets/models/embedders",
- "predictors_path": "assets/models/predictors",
- "pretrained_custom_path": "assets/models/pretrained_custom",
- "pretrained_v1_path": "assets/models/pretrained_v1",
- "pretrained_v2_path": "assets/models/pretrained_v2",
- "speaker_diarization_path": "assets/models/speaker_diarization",
- "uvr5_path": "assets/models/uvr5",
- "audios_path": "audios",
- "demucs_segments_enable": true,
- "demucs_cpu_mode": false,
- "limit_f0": 8,
- "debug_mode": false,
- "pretrain_verify_shape": true,
- "pretrain_strict": true,
- "cpu_mode": false,
- "brain": false
-}
\ No newline at end of file
diff --git a/main/configs/config.py b/main/configs/config.py
deleted file mode 100644
index 92aea2d1e93cae7ee845bae026c5a7b37b397453..0000000000000000000000000000000000000000
--- a/main/configs/config.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import os
-import sys
-import json
-import torch
-
-sys.path.append(os.getcwd())
-
-from main.library import opencl
-
-version_config_paths = [os.path.join(version, size) for version in ["v1", "v2"] for size in ["32000.json", "40000.json", "48000.json"]]
-
-def singleton(cls):
- instances = {}
-
- def get_instance(*args, **kwargs):
- if cls not in instances: instances[cls] = cls(*args, **kwargs)
- return instances[cls]
-
- return get_instance
-
-@singleton
-class Config:
- def __init__(self):
- self.device = "cuda:0" if torch.cuda.is_available() else ("ocl:0" if opencl.is_available() else "cpu")
- self.configs_path = os.path.join("main", "configs", "config.json")
- self.configs = json.load(open(self.configs_path, "r"))
- self.translations = self.multi_language()
- self.json_config = self.load_config_json()
- self.gpu_mem = None
- self.per_preprocess = 3.7
- self.is_half = self.is_fp16()
- self.brain = self.configs.get("brain", False)
- self.cpu_mode = self.configs.get("cpu_mode", False)
- if self.cpu_mode: self.device = "cpu"
- self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
- self.debug_mode = self.configs.get("debug_mode", False)
-
- def multi_language(self):
- try:
- lang = self.configs.get("language", "vi-VN")
- if len([l for l in os.listdir(self.configs["language_path"]) if l.endswith(".json")]) < 1: raise FileNotFoundError("Không tìm thấy bất cứ gói ngôn ngữ nào(No package languages found)")
-
- if not lang: lang = "vi-VN"
- if lang not in self.configs["support_language"]: raise ValueError("Ngôn ngữ không được hỗ trợ(Language not supported)")
-
- lang_path = os.path.join(self.configs["language_path"], f"{lang}.json")
- if not os.path.exists(lang_path): lang_path = os.path.join(self.configs["language_path"], "vi-VN.json")
-
- with open(lang_path, encoding="utf-8") as f:
- translations = json.load(f)
- except json.JSONDecodeError:
- print(self.translations["empty_json"].format(file=lang))
- pass
-
- return translations
-
- def is_fp16(self):
- fp16 = self.configs.get("fp16", False)
-
- if self.device in ["cpu", "mps"] and fp16:
- self.configs["fp16"] = False
- fp16 = False
-
- with open(self.configs_path, "w") as f:
- json.dump(self.configs, f, indent=4)
-
- if not fp16: self.preprocess_per = 3.0
- return fp16
-
- def load_config_json(self):
- configs = {}
-
- for config_file in version_config_paths:
- try:
- with open(os.path.join("main", "configs", config_file), "r") as f:
- configs[config_file] = json.load(f)
- except json.JSONDecodeError:
- print(self.translations["empty_json"].format(file=config_file))
- pass
-
- return configs
-
- def device_config(self):
- if not self.cpu_mode:
- if self.device.startswith("cuda"): self.set_cuda_config()
- elif opencl.is_available(): self.device = "ocl:0"
- elif self.has_mps(): self.device = "mps"
- else: self.device = "cpu"
-
- if self.gpu_mem is not None and self.gpu_mem <= 4:
- self.preprocess_per = 3.0
- return 1, 5, 30, 32
-
- return (3, 10, 60, 65) if self.is_half else (1, 6, 38, 41)
-
- def set_cuda_config(self):
- i_device = int(self.device.split(":")[-1])
- self.gpu_mem = torch.cuda.get_device_properties(i_device).total_memory // (1024**3)
-
- def has_mps(self):
- return torch.backends.mps.is_available()
\ No newline at end of file
diff --git a/main/configs/v1/32000.json b/main/configs/v1/32000.json
deleted file mode 100644
index 224c3757d9bff4d5dda025b6b33d6c9296b312b9..0000000000000000000000000000000000000000
--- a/main/configs/v1/32000.json
+++ /dev/null
@@ -1,46 +0,0 @@
-{
- "train": {
- "log_interval": 200,
- "seed": 1234,
- "epochs": 20000,
- "learning_rate": 0.0001,
- "betas": [0.8, 0.99],
- "eps": 1e-09,
- "batch_size": 4,
- "lr_decay": 0.999875,
- "segment_size": 12800,
- "init_lr_ratio": 1,
- "warmup_epochs": 0,
- "c_mel": 45,
- "c_kl": 1.0
- },
- "data": {
- "max_wav_value": 32768.0,
- "sample_rate": 32000,
- "filter_length": 1024,
- "hop_length": 320,
- "win_length": 1024,
- "n_mel_channels": 80,
- "mel_fmin": 0.0,
- "mel_fmax": null
- },
- "model": {
- "inter_channels": 192,
- "hidden_channels": 192,
- "filter_channels": 768,
- "text_enc_hidden_dim": 256,
- "n_heads": 2,
- "n_layers": 6,
- "kernel_size": 3,
- "p_dropout": 0,
- "resblock": "1",
- "resblock_kernel_sizes": [3, 7, 11],
- "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
- "upsample_rates": [10, 4, 2, 2, 2],
- "upsample_initial_channel": 512,
- "upsample_kernel_sizes": [16, 16, 4, 4, 4],
- "use_spectral_norm": false,
- "gin_channels": 256,
- "spk_embed_dim": 109
- }
-}
\ No newline at end of file
diff --git a/main/configs/v1/40000.json b/main/configs/v1/40000.json
deleted file mode 100644
index 45ad70b94322c76b248ee9a5bd0885620623b5bb..0000000000000000000000000000000000000000
--- a/main/configs/v1/40000.json
+++ /dev/null
@@ -1,46 +0,0 @@
-{
- "train": {
- "log_interval": 200,
- "seed": 1234,
- "epochs": 20000,
- "learning_rate": 0.0001,
- "betas": [0.8, 0.99],
- "eps": 1e-09,
- "batch_size": 4,
- "lr_decay": 0.999875,
- "segment_size": 12800,
- "init_lr_ratio": 1,
- "warmup_epochs": 0,
- "c_mel": 45,
- "c_kl": 1.0
- },
- "data": {
- "max_wav_value": 32768.0,
- "sample_rate": 40000,
- "filter_length": 2048,
- "hop_length": 400,
- "win_length": 2048,
- "n_mel_channels": 125,
- "mel_fmin": 0.0,
- "mel_fmax": null
- },
- "model": {
- "inter_channels": 192,
- "hidden_channels": 192,
- "filter_channels": 768,
- "text_enc_hidden_dim": 256,
- "n_heads": 2,
- "n_layers": 6,
- "kernel_size": 3,
- "p_dropout": 0,
- "resblock": "1",
- "resblock_kernel_sizes": [3, 7, 11],
- "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
- "upsample_rates": [10, 10, 2, 2],
- "upsample_initial_channel": 512,
- "upsample_kernel_sizes": [16, 16, 4, 4],
- "use_spectral_norm": false,
- "gin_channels": 256,
- "spk_embed_dim": 109
- }
-}
\ No newline at end of file
diff --git a/main/configs/v1/48000.json b/main/configs/v1/48000.json
deleted file mode 100644
index 9c87fa8f9152310b850edf3c291f242dbcb6cddb..0000000000000000000000000000000000000000
--- a/main/configs/v1/48000.json
+++ /dev/null
@@ -1,46 +0,0 @@
-{
- "train": {
- "log_interval": 200,
- "seed": 1234,
- "epochs": 20000,
- "learning_rate": 0.0001,
- "betas": [0.8, 0.99],
- "eps": 1e-09,
- "batch_size": 4,
- "lr_decay": 0.999875,
- "segment_size": 11520,
- "init_lr_ratio": 1,
- "warmup_epochs": 0,
- "c_mel": 45,
- "c_kl": 1.0
- },
- "data": {
- "max_wav_value": 32768.0,
- "sample_rate": 48000,
- "filter_length": 2048,
- "hop_length": 480,
- "win_length": 2048,
- "n_mel_channels": 128,
- "mel_fmin": 0.0,
- "mel_fmax": null
- },
- "model": {
- "inter_channels": 192,
- "hidden_channels": 192,
- "filter_channels": 768,
- "text_enc_hidden_dim": 256,
- "n_heads": 2,
- "n_layers": 6,
- "kernel_size": 3,
- "p_dropout": 0,
- "resblock": "1",
- "resblock_kernel_sizes": [3, 7, 11],
- "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
- "upsample_rates": [10, 6, 2, 2, 2],
- "upsample_initial_channel": 512,
- "upsample_kernel_sizes": [16, 16, 4, 4, 4],
- "use_spectral_norm": false,
- "gin_channels": 256,
- "spk_embed_dim": 109
- }
-}
\ No newline at end of file
diff --git a/main/configs/v2/32000.json b/main/configs/v2/32000.json
deleted file mode 100644
index 567fa71a6ca8465cc6f77df6d258c8497b9c5a41..0000000000000000000000000000000000000000
--- a/main/configs/v2/32000.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
- "train": {
- "log_interval": 200,
- "seed": 1234,
- "learning_rate": 0.0001,
- "betas": [0.8, 0.99],
- "eps": 1e-09,
- "lr_decay": 0.999875,
- "segment_size": 12800,
- "c_mel": 45,
- "c_kl": 1.0
- },
- "data": {
- "max_wav_value": 32768.0,
- "sample_rate": 32000,
- "filter_length": 1024,
- "hop_length": 320,
- "win_length": 1024,
- "n_mel_channels": 80,
- "mel_fmin": 0.0,
- "mel_fmax": null
- },
- "model": {
- "inter_channels": 192,
- "hidden_channels": 192,
- "filter_channels": 768,
- "text_enc_hidden_dim": 768,
- "n_heads": 2,
- "n_layers": 6,
- "kernel_size": 3,
- "p_dropout": 0,
- "resblock": "1",
- "resblock_kernel_sizes": [3, 7, 11],
- "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
- "upsample_rates": [10, 8, 2, 2],
- "upsample_initial_channel": 512,
- "upsample_kernel_sizes": [20, 16, 4, 4],
- "use_spectral_norm": false,
- "gin_channels": 256,
- "spk_embed_dim": 109
- }
-}
\ No newline at end of file
diff --git a/main/configs/v2/40000.json b/main/configs/v2/40000.json
deleted file mode 100644
index 344a1673c03faa45d499845f7a61664fe8176a96..0000000000000000000000000000000000000000
--- a/main/configs/v2/40000.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
- "train": {
- "log_interval": 200,
- "seed": 1234,
- "learning_rate": 0.0001,
- "betas": [0.8, 0.99],
- "eps": 1e-09,
- "lr_decay": 0.999875,
- "segment_size": 12800,
- "c_mel": 45,
- "c_kl": 1.0
- },
- "data": {
- "max_wav_value": 32768.0,
- "sample_rate": 40000,
- "filter_length": 2048,
- "hop_length": 400,
- "win_length": 2048,
- "n_mel_channels": 125,
- "mel_fmin": 0.0,
- "mel_fmax": null
- },
- "model": {
- "inter_channels": 192,
- "hidden_channels": 192,
- "filter_channels": 768,
- "text_enc_hidden_dim": 768,
- "n_heads": 2,
- "n_layers": 6,
- "kernel_size": 3,
- "p_dropout": 0,
- "resblock": "1",
- "resblock_kernel_sizes": [3, 7, 11],
- "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
- "upsample_rates": [10, 10, 2, 2],
- "upsample_initial_channel": 512,
- "upsample_kernel_sizes": [16, 16, 4, 4],
- "use_spectral_norm": false,
- "gin_channels": 256,
- "spk_embed_dim": 109
- }
-}
\ No newline at end of file
diff --git a/main/configs/v2/48000.json b/main/configs/v2/48000.json
deleted file mode 100644
index 2ad00577a300123be7e4fd1254c07b21ab602c34..0000000000000000000000000000000000000000
--- a/main/configs/v2/48000.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
- "train": {
- "log_interval": 200,
- "seed": 1234,
- "learning_rate": 0.0001,
- "betas": [0.8, 0.99],
- "eps": 1e-09,
- "lr_decay": 0.999875,
- "segment_size": 17280,
- "c_mel": 45,
- "c_kl": 1.0
- },
- "data": {
- "max_wav_value": 32768.0,
- "sample_rate": 48000,
- "filter_length": 2048,
- "hop_length": 480,
- "win_length": 2048,
- "n_mel_channels": 128,
- "mel_fmin": 0.0,
- "mel_fmax": null
- },
- "model": {
- "inter_channels": 192,
- "hidden_channels": 192,
- "filter_channels": 768,
- "text_enc_hidden_dim": 768,
- "n_heads": 2,
- "n_layers": 6,
- "kernel_size": 3,
- "p_dropout": 0,
- "resblock": "1",
- "resblock_kernel_sizes": [3, 7, 11],
- "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
- "upsample_rates": [12, 10, 2, 2],
- "upsample_initial_channel": 512,
- "upsample_kernel_sizes": [24, 20, 4, 4],
- "use_spectral_norm": false,
- "gin_channels": 256,
- "spk_embed_dim": 109
- }
-}
\ No newline at end of file
diff --git a/main/inference/audio_effects.py b/main/inference/audio_effects.py
deleted file mode 100644
index 0214d0528408929684ba04281ea1d771e85bd27e..0000000000000000000000000000000000000000
--- a/main/inference/audio_effects.py
+++ /dev/null
@@ -1,185 +0,0 @@
-import os
-import sys
-import librosa
-import argparse
-
-import numpy as np
-import soundfile as sf
-
-from distutils.util import strtobool
-from scipy.signal import butter, filtfilt
-from pedalboard import Pedalboard, Chorus, Distortion, Reverb, PitchShift, Delay, Limiter, Gain, Bitcrush, Clipping, Compressor, Phaser, HighpassFilter
-
-sys.path.append(os.getcwd())
-
-from main.library.utils import pydub_load
-from main.app.variables import translations, logger
-
-def parse_arguments():
- parser = argparse.ArgumentParser()
- parser.add_argument("--audio_effects", action='store_true')
- parser.add_argument("--input_path", type=str, required=True)
- parser.add_argument("--output_path", type=str, default="./audios/apply_effects.wav")
- parser.add_argument("--export_format", type=str, default="wav")
- parser.add_argument("--resample", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--resample_sr", type=int, default=0)
- parser.add_argument("--chorus", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--chorus_depth", type=float, default=0.5)
- parser.add_argument("--chorus_rate", type=float, default=1.5)
- parser.add_argument("--chorus_mix", type=float, default=0.5)
- parser.add_argument("--chorus_delay", type=int, default=10)
- parser.add_argument("--chorus_feedback", type=float, default=0)
- parser.add_argument("--distortion", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--drive_db", type=int, default=20)
- parser.add_argument("--reverb", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--reverb_room_size", type=float, default=0.5)
- parser.add_argument("--reverb_damping", type=float, default=0.5)
- parser.add_argument("--reverb_wet_level", type=float, default=0.33)
- parser.add_argument("--reverb_dry_level", type=float, default=0.67)
- parser.add_argument("--reverb_width", type=float, default=1)
- parser.add_argument("--reverb_freeze_mode", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--pitchshift", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--pitch_shift", type=int, default=0)
- parser.add_argument("--delay", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--delay_seconds", type=float, default=0.5)
- parser.add_argument("--delay_feedback", type=float, default=0.5)
- parser.add_argument("--delay_mix", type=float, default=0.5)
- parser.add_argument("--compressor", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--compressor_threshold", type=int, default=-20)
- parser.add_argument("--compressor_ratio", type=float, default=4)
- parser.add_argument("--compressor_attack_ms", type=float, default=10)
- parser.add_argument("--compressor_release_ms", type=int, default=200)
- parser.add_argument("--limiter", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--limiter_threshold", type=int, default=0)
- parser.add_argument("--limiter_release", type=int, default=100)
- parser.add_argument("--gain", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--gain_db", type=int, default=0)
- parser.add_argument("--bitcrush", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--bitcrush_bit_depth", type=int, default=16)
- parser.add_argument("--clipping", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--clipping_threshold", type=int, default=-10)
- parser.add_argument("--phaser", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--phaser_rate_hz", type=float, default=0.5)
- parser.add_argument("--phaser_depth", type=float, default=0.5)
- parser.add_argument("--phaser_centre_frequency_hz", type=int, default=1000)
- parser.add_argument("--phaser_feedback", type=float, default=0)
- parser.add_argument("--phaser_mix", type=float, default=0.5)
- parser.add_argument("--treble_bass_boost", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--bass_boost_db", type=int, default=0)
- parser.add_argument("--bass_boost_frequency", type=int, default=100)
- parser.add_argument("--treble_boost_db", type=int, default=0)
- parser.add_argument("--treble_boost_frequency", type=int, default=3000)
- parser.add_argument("--fade_in_out", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--fade_in_duration", type=float, default=2000)
- parser.add_argument("--fade_out_duration", type=float, default=2000)
- parser.add_argument("--audio_combination", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--audio_combination_input", type=str)
- parser.add_argument("--main_volume", type=int, default=0)
- parser.add_argument("--combination_volume", type=int, default=-7)
-
- return parser.parse_args()
-
-def process_audio(input_path, output_path, resample, resample_sr, chorus_depth, chorus_rate, chorus_mix, chorus_delay, chorus_feedback, distortion_drive, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift, delay_seconds, delay_feedback, delay_mix, compressor_threshold, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold, limiter_release, gain_db, bitcrush_bit_depth, clipping_threshold, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost_db, bass_boost_frequency, treble_boost_db, treble_boost_frequency, fade_in_duration, fade_out_duration, export_format, chorus, distortion, reverb, pitchshift, delay, compressor, limiter, gain, bitcrush, clipping, phaser, treble_bass_boost, fade_in_out, audio_combination, audio_combination_input, main_volume, combination_volume):
- def _filtfilt(b, a, audio):
- padlen = 3 * max(len(a), len(b))
- original_len = len(audio)
-
- if original_len <= padlen:
- pad_width = padlen - original_len + 1
- audio = np.pad(audio, (pad_width, 0), mode='reflect')
-
- filtered = filtfilt(b, a, audio, padlen=0)
- return filtered[-original_len:]
-
- def bass_boost(audio, gain_db, frequency, sample_rate):
- if gain_db >= 1:
- b, a = butter(4, frequency / (0.5 * sample_rate), btype='low')
- boosted = _filtfilt(b, a, audio)
- return boosted * (10 ** (gain_db / 20))
- return audio
-
- def treble_boost(audio, gain_db, frequency, sample_rate):
- if gain_db >= 1:
- b, a = butter(4, frequency / (0.5 * sample_rate), btype='high')
- boosted = _filtfilt(b, a, audio)
- return boosted * (10 ** (gain_db / 20))
- return audio
-
- def fade_out_effect(audio, sr, duration=3.0):
- length = int(duration * sr)
- end = audio.shape[0]
- if length > end: length = end
- start = end - length
- audio[start:end] = audio[start:end] * np.linspace(1.0, 0.0, length)
- return audio
-
- def fade_in_effect(audio, sr, duration=3.0):
- length = int(duration * sr)
- start = 0
- if length > audio.shape[0]: length = audio.shape[0]
- end = length
- audio[start:end] = audio[start:end] * np.linspace(0.0, 1.0, length)
- return audio
-
- if not input_path or not os.path.exists(input_path):
- logger.warning(translations["input_not_valid"])
- sys.exit(1)
-
- if not output_path:
- logger.warning(translations["output_not_valid"])
- sys.exit(1)
-
- if os.path.exists(output_path): os.remove(output_path)
-
- try:
- input_path = input_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
- try:
- audio, sample_rate = sf.read(input_path, dtype=np.float32)
- except:
- audio, sample_rate = librosa.load(input_path, sr=None)
- except Exception as e:
- logger.debug(f"{translations['errors_loading_audio']}: {e}")
- raise RuntimeError(f"{translations['errors_loading_audio']}: {e}")
-
- try:
- board = Pedalboard([HighpassFilter()])
-
- if chorus: board.append(Chorus(depth=chorus_depth, rate_hz=chorus_rate, mix=chorus_mix, centre_delay_ms=chorus_delay, feedback=chorus_feedback))
- if distortion: board.append(Distortion(drive_db=distortion_drive))
- if reverb: board.append(Reverb(room_size=reverb_room_size, damping=reverb_damping, wet_level=reverb_wet_level, dry_level=reverb_dry_level, width=reverb_width, freeze_mode=1 if reverb_freeze_mode else 0))
- if pitchshift: board.append(PitchShift(semitones=pitch_shift))
- if delay: board.append(Delay(delay_seconds=delay_seconds, feedback=delay_feedback, mix=delay_mix))
- if compressor: board.append(Compressor(threshold_db=compressor_threshold, ratio=compressor_ratio, attack_ms=compressor_attack_ms, release_ms=compressor_release_ms))
- if limiter: board.append(Limiter(threshold_db=limiter_threshold, release_ms=limiter_release))
- if gain: board.append(Gain(gain_db=gain_db))
- if bitcrush: board.append(Bitcrush(bit_depth=bitcrush_bit_depth))
- if clipping: board.append(Clipping(threshold_db=clipping_threshold))
- if phaser: board.append(Phaser(rate_hz=phaser_rate_hz, depth=phaser_depth, centre_frequency_hz=phaser_centre_frequency_hz, feedback=phaser_feedback, mix=phaser_mix))
-
- processed_audio = board(audio, sample_rate)
-
- if treble_bass_boost:
- processed_audio = bass_boost(processed_audio, bass_boost_db, bass_boost_frequency, sample_rate)
- processed_audio = treble_boost(processed_audio, treble_boost_db, treble_boost_frequency, sample_rate)
-
- if fade_in_out:
- processed_audio = fade_in_effect(processed_audio, sample_rate, fade_in_duration)
- processed_audio = fade_out_effect(processed_audio, sample_rate, fade_out_duration)
-
- if resample and resample_sr != sample_rate and resample_sr > 0:
- processed_audio = librosa.resample(processed_audio, orig_sr=sample_rate, target_sr=resample_sr, res_type="soxr_vhq")
- sample_rate = resample_sr
-
- sf.write(output_path.replace("wav", export_format), processed_audio, sample_rate, format=export_format)
- if audio_combination: pydub_load(audio_combination_input, combination_volume).overlay(pydub_load(output_path.replace("wav", export_format), main_volume)).export(output_path.replace("wav", export_format), format=export_format)
- except Exception as e:
- import traceback
- logger.debug(traceback.format_exc())
- raise RuntimeError(translations["apply_error"].format(e=e))
- return output_path
-
-def main():
- args = parse_arguments()
- process_audio(input_path=args.input_path, output_path=args.output_path, resample=args.resample, resample_sr=args.resample_sr, chorus_depth=args.chorus_depth, chorus_rate=args.chorus_rate, chorus_mix=args.chorus_mix, chorus_delay=args.chorus_delay, chorus_feedback=args.chorus_feedback, distortion_drive=args.drive_db, reverb_room_size=args.reverb_room_size, reverb_damping=args.reverb_damping, reverb_wet_level=args.reverb_wet_level, reverb_dry_level=args.reverb_dry_level, reverb_width=args.reverb_width, reverb_freeze_mode=args.reverb_freeze_mode, pitch_shift=args.pitch_shift, delay_seconds=args.delay_seconds, delay_feedback=args.delay_feedback, delay_mix=args.delay_mix, compressor_threshold=args.compressor_threshold, compressor_ratio=args.compressor_ratio, compressor_attack_ms=args.compressor_attack_ms, compressor_release_ms=args.compressor_release_ms, limiter_threshold=args.limiter_threshold, limiter_release=args.limiter_release, gain_db=args.gain_db, bitcrush_bit_depth=args.bitcrush_bit_depth, clipping_threshold=args.clipping_threshold, phaser_rate_hz=args.phaser_rate_hz, phaser_depth=args.phaser_depth, phaser_centre_frequency_hz=args.phaser_centre_frequency_hz, phaser_feedback=args.phaser_feedback, phaser_mix=args.phaser_mix, bass_boost_db=args.bass_boost_db, bass_boost_frequency=args.bass_boost_frequency, treble_boost_db=args.treble_boost_db, treble_boost_frequency=args.treble_boost_frequency, fade_in_duration=args.fade_in_duration, fade_out_duration=args.fade_out_duration, export_format=args.export_format, chorus=args.chorus, distortion=args.distortion, reverb=args.reverb, pitchshift=args.pitchshift, delay=args.delay, compressor=args.compressor, limiter=args.limiter, gain=args.gain, bitcrush=args.bitcrush, clipping=args.clipping, phaser=args.phaser, treble_bass_boost=args.treble_bass_boost, fade_in_out=args.fade_in_out, audio_combination=args.audio_combination, audio_combination_input=args.audio_combination_input, main_volume=args.main_volume, combination_volume=args.combination_volume)
-
-if __name__ == "__main__": main()
\ No newline at end of file
diff --git a/main/inference/conversion/convert.py b/main/inference/conversion/convert.py
deleted file mode 100644
index fc958a7aab949b82cee150871e3514350ae79f4f..0000000000000000000000000000000000000000
--- a/main/inference/conversion/convert.py
+++ /dev/null
@@ -1,300 +0,0 @@
-import os
-import sys
-import json
-import onnx
-import time
-import torch
-import librosa
-import logging
-import argparse
-import warnings
-import onnxruntime
-
-import numpy as np
-import soundfile as sf
-
-from tqdm import tqdm
-from distutils.util import strtobool
-
-warnings.filterwarnings("ignore")
-sys.path.append(os.getcwd())
-
-from main.inference.conversion.pipeline import Pipeline
-from main.app.variables import config, logger, translations
-from main.library.algorithm.synthesizers import Synthesizer
-from main.inference.conversion.utils import clear_gpu_cache
-from main.library.utils import check_assets, load_audio, load_embedders_model, cut, restore, get_providers
-
-for l in ["torch", "faiss", "omegaconf", "httpx", "httpcore", "faiss.loader", "numba.core", "urllib3", "transformers", "matplotlib"]:
- logging.getLogger(l).setLevel(logging.ERROR)
-
-def parse_arguments():
- parser = argparse.ArgumentParser()
- parser.add_argument("--convert", action='store_true')
- parser.add_argument("--pitch", type=int, default=0)
- parser.add_argument("--filter_radius", type=int, default=3)
- parser.add_argument("--index_rate", type=float, default=0.5)
- parser.add_argument("--rms_mix_rate", type=float, default=1)
- parser.add_argument("--protect", type=float, default=0.33)
- parser.add_argument("--hop_length", type=int, default=64)
- parser.add_argument("--f0_method", type=str, default="rmvpe")
- parser.add_argument("--embedder_model", type=str, default="contentvec_base")
- parser.add_argument("--input_path", type=str, required=True)
- parser.add_argument("--output_path", type=str, default="./audios/output.wav")
- parser.add_argument("--export_format", type=str, default="wav")
- parser.add_argument("--pth_path", type=str, required=True)
- parser.add_argument("--index_path", type=str, default="")
- parser.add_argument("--f0_autotune", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--f0_autotune_strength", type=float, default=1)
- parser.add_argument("--clean_audio", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--clean_strength", type=float, default=0.7)
- parser.add_argument("--resample_sr", type=int, default=0)
- parser.add_argument("--split_audio", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--checkpointing", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--f0_file", type=str, default="")
- parser.add_argument("--f0_onnx", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--embedders_mode", type=str, default="fairseq")
- parser.add_argument("--formant_shifting", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--formant_qfrency", type=float, default=0.8)
- parser.add_argument("--formant_timbre", type=float, default=0.8)
- parser.add_argument("--proposal_pitch", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--proposal_pitch_threshold", type=float, default=255.0)
-
- return parser.parse_args()
-
-def main():
- args = parse_arguments()
- pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0_method, input_path, output_path, pth_path, index_path, f0_autotune, f0_autotune_strength, clean_audio, clean_strength, export_format, embedder_model, resample_sr, split_audio, checkpointing, f0_file, f0_onnx, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, proposal_pitch, proposal_pitch_threshold = args.pitch, args.filter_radius, args.index_rate, args.rms_mix_rate,args.protect, args.hop_length, args.f0_method, args.input_path, args.output_path, args.pth_path, args.index_path, args.f0_autotune, args.f0_autotune_strength, args.clean_audio, args.clean_strength, args.export_format, args.embedder_model, args.resample_sr, args.split_audio, args.checkpointing, args.f0_file, args.f0_onnx, args.embedders_mode, args.formant_shifting, args.formant_qfrency, args.formant_timbre, args.proposal_pitch, args.proposal_pitch_threshold
-
- run_convert_script(pitch=pitch, filter_radius=filter_radius, index_rate=index_rate, rms_mix_rate=rms_mix_rate, protect=protect, hop_length=hop_length, f0_method=f0_method, input_path=input_path, output_path=output_path, pth_path=pth_path, index_path=index_path, f0_autotune=f0_autotune, f0_autotune_strength=f0_autotune_strength, clean_audio=clean_audio, clean_strength=clean_strength, export_format=export_format, embedder_model=embedder_model, resample_sr=resample_sr, split_audio=split_audio, checkpointing=checkpointing, f0_file=f0_file, f0_onnx=f0_onnx, embedders_mode=embedders_mode, formant_shifting=formant_shifting, formant_qfrency=formant_qfrency, formant_timbre=formant_timbre, proposal_pitch=proposal_pitch, proposal_pitch_threshold=proposal_pitch_threshold)
-
-def run_convert_script(pitch=0, filter_radius=3, index_rate=0.5, rms_mix_rate=1, protect=0.5, hop_length=64, f0_method="rmvpe", input_path=None, output_path="./output.wav", pth_path=None, index_path=None, f0_autotune=False, f0_autotune_strength=1, clean_audio=False, clean_strength=0.7, export_format="wav", embedder_model="contentvec_base", resample_sr=0, split_audio=False, checkpointing=False, f0_file=None, f0_onnx=False, embedders_mode="fairseq", formant_shifting=False, formant_qfrency=0.8, formant_timbre=0.8, proposal_pitch=False, proposal_pitch_threshold=255.0):
- check_assets(f0_method, embedder_model, f0_onnx=f0_onnx, embedders_mode=embedders_mode)
- log_data = {translations['pitch']: pitch, translations['filter_radius']: filter_radius, translations['index_strength']: index_rate, translations['rms_mix_rate']: rms_mix_rate, translations['protect']: protect, "Hop length": hop_length, translations['f0_method']: f0_method, translations['audio_path']: input_path, translations['output_path']: output_path.replace('wav', export_format), translations['model_path']: pth_path, translations['indexpath']: index_path, translations['autotune']: f0_autotune, translations['clear_audio']: clean_audio, translations['export_format']: export_format, translations['hubert_model']: embedder_model, translations['split_audio']: split_audio, translations['memory_efficient_training']: checkpointing, translations["f0_onnx_mode"]: f0_onnx, translations["embed_mode"]: embedders_mode, translations["proposal_pitch"]: proposal_pitch}
-
- if clean_audio: log_data[translations['clean_strength']] = clean_strength
- if resample_sr != 0: log_data[translations['sample_rate']] = resample_sr
- if f0_autotune: log_data[translations['autotune_rate_info']] = f0_autotune_strength
- if os.path.isfile(f0_file): log_data[translations['f0_file']] = f0_file
- if proposal_pitch: log_data[translations["proposal_pitch_threshold"]] = proposal_pitch_threshold
- if formant_shifting:
- log_data[translations['formant_qfrency']] = formant_qfrency
- log_data[translations['formant_timbre']] = formant_timbre
-
- for key, value in log_data.items():
- logger.debug(f"{key}: {value}")
-
- if not pth_path or not os.path.exists(pth_path) or os.path.isdir(pth_path) or not pth_path.endswith((".pth", ".onnx")):
- logger.warning(translations["provide_file"].format(filename=translations["model"]))
- sys.exit(1)
-
- cvt = VoiceConverter(pth_path, 0)
- start_time = time.time()
-
- pid_path = os.path.join("assets", "convert_pid.txt")
- with open(pid_path, "w") as pid_file:
- pid_file.write(str(os.getpid()))
-
- if os.path.isdir(input_path):
- logger.info(translations["convert_batch"])
- audio_files = [f for f in os.listdir(input_path) if f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"))]
-
- if not audio_files:
- logger.warning(translations["not_found_audio"])
- sys.exit(1)
-
- logger.info(translations["found_audio"].format(audio_files=len(audio_files)))
-
- for audio in audio_files:
- audio_path = os.path.join(input_path, audio)
- output_audio = os.path.join(input_path, os.path.splitext(audio)[0] + f"_output.{export_format}")
-
- logger.info(f"{translations['convert_audio']} '{audio_path}'...")
- if os.path.exists(output_audio): os.remove(output_audio)
-
- cvt.convert_audio(pitch=pitch, filter_radius=filter_radius, index_rate=index_rate, rms_mix_rate=rms_mix_rate, protect=protect, hop_length=hop_length, f0_method=f0_method, audio_input_path=audio_path, audio_output_path=output_audio, index_path=index_path, f0_autotune=f0_autotune, f0_autotune_strength=f0_autotune_strength, clean_audio=clean_audio, clean_strength=clean_strength, export_format=export_format, embedder_model=embedder_model, resample_sr=resample_sr, checkpointing=checkpointing, f0_file=f0_file, f0_onnx=f0_onnx, embedders_mode=embedders_mode, formant_shifting=formant_shifting, formant_qfrency=formant_qfrency, formant_timbre=formant_timbre, split_audio=split_audio, proposal_pitch=proposal_pitch, proposal_pitch_threshold=proposal_pitch_threshold)
-
- logger.info(translations["convert_batch_success"].format(elapsed_time=f"{(time.time() - start_time):.2f}", output_path=output_path.replace('wav', export_format)))
- else:
- if not os.path.exists(input_path):
- logger.warning(translations["not_found_audio"])
- sys.exit(1)
-
- logger.info(f"{translations['convert_audio']} '{input_path}'...")
- if os.path.exists(output_path): os.remove(output_path)
-
- cvt.convert_audio(pitch=pitch, filter_radius=filter_radius, index_rate=index_rate, rms_mix_rate=rms_mix_rate, protect=protect, hop_length=hop_length, f0_method=f0_method, audio_input_path=input_path, audio_output_path=output_path, index_path=index_path, f0_autotune=f0_autotune, f0_autotune_strength=f0_autotune_strength, clean_audio=clean_audio, clean_strength=clean_strength, export_format=export_format, embedder_model=embedder_model, resample_sr=resample_sr, checkpointing=checkpointing, f0_file=f0_file, f0_onnx=f0_onnx, embedders_mode=embedders_mode, formant_shifting=formant_shifting, formant_qfrency=formant_qfrency, formant_timbre=formant_timbre, split_audio=split_audio, proposal_pitch=proposal_pitch, proposal_pitch_threshold=proposal_pitch_threshold)
- logger.info(translations["convert_audio_success"].format(input_path=input_path, elapsed_time=f"{(time.time() - start_time):.2f}", output_path=output_path.replace('wav', export_format)))
-
- if os.path.exists(pid_path): os.remove(pid_path)
-
-class VoiceConverter:
- def __init__(self, model_path, sid = 0):
- self.config = config
- self.device = config.device
- self.hubert_model = None
- self.tgt_sr = None
- self.net_g = None
- self.vc = None
- self.cpt = None
- self.version = None
- self.n_spk = None
- self.use_f0 = None
- self.loaded_model = None
- self.vocoder = "Default"
- self.checkpointing = False
- self.sample_rate = 16000
- self.sid = sid
- self.get_vc(model_path, sid)
-
- def convert_audio(self, audio_input_path, audio_output_path, index_path, embedder_model, pitch, f0_method, index_rate, rms_mix_rate, protect, hop_length, f0_autotune, f0_autotune_strength, filter_radius, clean_audio, clean_strength, export_format, resample_sr = 0, checkpointing = False, f0_file = None, f0_onnx = False, embedders_mode = "fairseq", formant_shifting = False, formant_qfrency = 0.8, formant_timbre = 0.8, split_audio = False, proposal_pitch = False, proposal_pitch_threshold = 255.0):
- try:
- with tqdm(total=10, desc=translations["convert_audio"], ncols=100, unit="a", leave=not split_audio) as pbar:
- audio = load_audio(audio_input_path, self.sample_rate, formant_shifting=formant_shifting, formant_qfrency=formant_qfrency, formant_timbre=formant_timbre)
- self.checkpointing = checkpointing
-
- audio_max = np.abs(audio).max() / 0.95
- if audio_max > 1: audio /= audio_max
-
- if not self.hubert_model:
- models, embed_suffix = load_embedders_model(embedder_model, embedders_mode)
- self.hubert_model = (models.to(self.device).half() if self.config.is_half else models.to(self.device).float()).eval() if embed_suffix in [".pt", ".safetensors"] else models
- self.embed_suffix = embed_suffix
-
- pbar.update(1)
- if split_audio:
- pbar.close()
- chunks = cut(audio, self.sample_rate, db_thresh=-60, min_interval=500)
-
- logger.info(f"{translations['split_total']}: {len(chunks)}")
- pbar = tqdm(total=len(chunks) * 5 + 4, desc=translations["convert_audio"], ncols=100, unit="a", leave=True)
- else: chunks = [(audio, 0, 0)]
-
- pbar.update(1)
- converted_chunks = [(
- start,
- end,
- self.vc.pipeline(
- logger=logger,
- model=self.hubert_model,
- net_g=self.net_g,
- sid=self.sid,
- audio=waveform,
- f0_up_key=pitch,
- f0_method=f0_method,
- file_index=(index_path.strip().strip('"').strip("\n").strip('"').strip().replace("trained", "added")),
- index_rate=index_rate,
- pitch_guidance=self.use_f0,
- filter_radius=filter_radius,
- rms_mix_rate=rms_mix_rate,
- version=self.version,
- protect=protect,
- hop_length=hop_length,
- f0_autotune=f0_autotune,
- f0_autotune_strength=f0_autotune_strength,
- suffix=self.suffix,
- embed_suffix=self.embed_suffix,
- f0_file=f0_file,
- f0_onnx=f0_onnx,
- pbar=pbar,
- proposal_pitch=proposal_pitch,
- proposal_pitch_threshold=proposal_pitch_threshold,
- energy_use=self.energy
- )
- ) for waveform, start, end in chunks]
-
- pbar.update(1)
-
- del self.net_g, self.hubert_model
- audio_output = restore(converted_chunks, total_len=len(audio), dtype=converted_chunks[0][2].dtype) if split_audio else converted_chunks[0][2]
-
- if self.tgt_sr != resample_sr and resample_sr > 0:
- audio_output = librosa.resample(audio_output, orig_sr=self.tgt_sr, target_sr=resample_sr, res_type="soxr_vhq")
- self.tgt_sr = resample_sr
-
- pbar.update(1)
- if clean_audio:
- from main.tools.noisereduce import reduce_noise
- audio_output = reduce_noise(y=audio_output, sr=self.tgt_sr, prop_decrease=clean_strength, device=self.device)
-
- if len(audio) / self.sample_rate > len(audio_output) / self.tgt_sr:
- padding = np.zeros(int(np.round(len(audio) / self.sample_rate * self.tgt_sr) - len(audio_output)), dtype=audio_output.dtype)
- audio_output = np.concatenate([audio_output, padding])
-
- try:
- sf.write(audio_output_path, audio_output, self.tgt_sr, format=export_format)
- except:
- sf.write(audio_output_path, librosa.resample(audio_output, orig_sr=self.tgt_sr, target_sr=48000, res_type="soxr_vhq"), 48000, format=export_format)
-
- pbar.update(1)
- except Exception as e:
- logger.error(translations["error_convert"].format(e=e))
- import traceback
- logger.debug(traceback.format_exc())
-
- def get_vc(self, weight_root, sid):
- if sid == "" or sid == []:
- self.cleanup()
- clear_gpu_cache()
-
- if not self.loaded_model or self.loaded_model != weight_root:
- self.loaded_model = weight_root
- self.load_model()
- if self.cpt is not None: self.setup()
-
- def cleanup(self):
- if self.hubert_model is not None:
- del self.net_g, self.n_spk, self.vc, self.hubert_model, self.tgt_sr
- self.hubert_model = self.net_g = self.n_spk = self.vc = self.tgt_sr = None
- clear_gpu_cache()
-
- del self.net_g, self.cpt
- clear_gpu_cache()
- self.cpt = None
-
- def load_model(self):
- if os.path.isfile(self.loaded_model):
- if self.loaded_model.endswith(".pth"): self.cpt = torch.load(self.loaded_model, map_location="cpu", weights_only=True)
- else:
- sess_options = onnxruntime.SessionOptions()
- sess_options.log_severity_level = 3
- self.cpt = onnxruntime.InferenceSession(self.loaded_model, sess_options=sess_options, providers=get_providers())
- else: self.cpt = None
-
- def setup(self):
- if self.cpt is not None:
- if self.loaded_model.endswith(".pth"):
- self.tgt_sr = self.cpt["config"][-1]
- self.cpt["config"][-3] = self.cpt["weight"]["emb_g.weight"].shape[0]
-
- self.use_f0 = self.cpt.get("f0", 1)
- self.version = self.cpt.get("version", "v1")
- self.vocoder = self.cpt.get("vocoder", "Default")
- self.energy = self.cpt.get("energy", False)
-
- if self.vocoder != "Default": self.config.is_half = False
- self.net_g = Synthesizer(*self.cpt["config"], use_f0=self.use_f0, text_enc_hidden_dim=768 if self.version == "v2" else 256, vocoder=self.vocoder, checkpointing=self.checkpointing, energy=self.energy)
- del self.net_g.enc_q
-
- self.net_g.load_state_dict(self.cpt["weight"], strict=False)
- self.net_g.eval().to(self.device)
- self.net_g = (self.net_g.half() if self.config.is_half else self.net_g.float())
- self.n_spk = self.cpt["config"][-3]
- self.suffix = ".pth"
- else:
- metadata_dict = None
- for prop in onnx.load(self.loaded_model).metadata_props:
- if prop.key == "model_info":
- metadata_dict = json.loads(prop.value)
- break
-
- self.net_g = self.cpt
- self.tgt_sr = metadata_dict.get("sr", 32000)
- self.use_f0 = metadata_dict.get("f0", 1)
- self.version = metadata_dict.get("version", "v1")
- self.energy = metadata_dict.get("energy", False)
- self.suffix = ".onnx"
-
- self.vc = Pipeline(self.tgt_sr, self.config)
-
-if __name__ == "__main__": main()
\ No newline at end of file
diff --git a/main/inference/conversion/pipeline.py b/main/inference/conversion/pipeline.py
deleted file mode 100644
index 510c672c060f3b24243364df1f81757326710d91..0000000000000000000000000000000000000000
--- a/main/inference/conversion/pipeline.py
+++ /dev/null
@@ -1,251 +0,0 @@
-import os
-import sys
-import torch
-import faiss
-
-import numpy as np
-import torch.nn.functional as F
-
-from scipy import signal
-
-sys.path.append(os.getcwd())
-
-from main.app.variables import translations
-from main.library.utils import extract_features
-from main.library.predictors.Generator import Generator
-from main.inference.extracting.rms import RMSEnergyExtractor
-from main.inference.conversion.utils import change_rms, clear_gpu_cache, get_onnx_argument
-
-bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
-
-class Pipeline:
- def __init__(self, tgt_sr, config):
- self.x_pad = config.x_pad
- self.x_query = config.x_query
- self.x_center = config.x_center
- self.x_max = config.x_max
- self.sample_rate = 16000
- self.window = 160
- self.t_pad = self.sample_rate * self.x_pad
- self.t_pad_tgt = tgt_sr * self.x_pad
- self.t_pad2 = self.t_pad * 2
- self.t_query = self.sample_rate * self.x_query
- self.t_center = self.sample_rate * self.x_center
- self.t_max = self.sample_rate * self.x_max
- self.f0_min = 50
- self.f0_max = 1100
- self.device = config.device
- self.is_half = config.is_half
-
- def voice_conversion(self, model, net_g, sid, audio0, pitch, pitchf, index, big_npy, index_rate, version, protect, energy):
- pitch_guidance = pitch != None and pitchf != None
- energy_use = energy != None
-
- feats = torch.from_numpy(audio0)
- feats = feats.half() if self.is_half else feats.float()
-
- feats = feats.mean(-1) if feats.dim() == 2 else feats
- assert feats.dim() == 1, feats.dim()
- feats = feats.view(1, -1)
-
- with torch.no_grad():
- if self.embed_suffix == ".pt":
- padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
- logits = model.extract_features(**{"source": feats.to(self.device), "padding_mask": padding_mask, "output_layer": 9 if version == "v1" else 12})
- feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
- elif self.embed_suffix == ".onnx": feats = extract_features(model, feats.to(self.device), version).to(self.device)
- elif self.embed_suffix == ".safetensors":
- logits = model(feats.to(self.device))["last_hidden_state"]
- feats = model.final_proj(logits[0]).unsqueeze(0) if version == "v1" else logits
- else: raise ValueError(translations["option_not_valid"])
-
- feats0 = feats.clone() if protect < 0.5 and pitch_guidance else None
-
- if (not isinstance(index, type(None)) and not isinstance(big_npy, type(None)) and index_rate != 0):
- npy = feats[0].cpu().numpy()
- if self.is_half: npy = npy.astype(np.float32)
-
- score, ix = index.search(npy, k=8)
- weight = np.square(1 / score)
-
- npy = np.sum(big_npy[ix] * np.expand_dims(weight / weight.sum(axis=1, keepdims=True), axis=2), axis=1)
- if self.is_half: npy = npy.astype(np.float16)
-
- feats = (torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate + (1 - index_rate) * feats)
-
- feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
- p_len = min(audio0.shape[0] // self.window, feats.shape[1])
-
- if pitch_guidance: pitch, pitchf = pitch[:, :p_len], pitchf[:, :p_len]
- if energy_use: energy = energy[:, :p_len]
-
- if feats0 is not None:
- pitchff = pitchf.clone()
- pitchff[pitchf > 0] = 1
- pitchff[pitchf < 1] = protect
- pitchff = pitchff.unsqueeze(-1)
-
- feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
- feats = (feats * pitchff + feats0 * (1 - pitchff)).to(feats0.dtype)
-
- p_len = torch.tensor([p_len], device=self.device).long()
- feats = feats.half() if self.is_half else feats.float()
-
- if not pitch_guidance: pitch, pitchf = None, None
- else: pitchf = pitchf.half() if self.is_half else pitchf.float()
- if not energy_use: energy = None
- else: energy = energy.half() if self.is_half else energy.float()
-
- audio1 = (
- (
- net_g.infer(
- feats,
- p_len,
- pitch,
- pitchf,
- sid,
- energy
- )[0][0, 0]
- ).data.cpu().float().numpy()
- ) if self.suffix == ".pth" else (
- net_g.run(
- [net_g.get_outputs()[0].name], (
- get_onnx_argument(
- net_g,
- feats,
- p_len,
- sid,
- pitch,
- pitchf,
- energy,
- pitch_guidance,
- energy_use
- )
- )
- )[0][0, 0]
- )
-
- if self.embed_suffix == ".pt": del padding_mask
- del feats, feats0, p_len
-
- clear_gpu_cache()
- return audio1
-
- def pipeline(self, logger, model, net_g, sid, audio, f0_up_key, f0_method, file_index, index_rate, pitch_guidance, filter_radius, rms_mix_rate, version, protect, hop_length, f0_autotune, f0_autotune_strength, suffix, embed_suffix, f0_file=None, f0_onnx=False, pbar=None, proposal_pitch=False, proposal_pitch_threshold=255.0, energy_use=False):
- self.suffix = suffix
- self.embed_suffix = embed_suffix
-
- if file_index != "" and os.path.exists(file_index) and index_rate != 0:
- try:
- index = faiss.read_index(file_index)
- big_npy = index.reconstruct_n(0, index.ntotal)
- except Exception as e:
- logger.error(translations["read_faiss_index_error"].format(e=e))
- index = big_npy = None
- else: index = big_npy = None
-
- if pbar: pbar.update(1)
- opt_ts, audio_opt = [], []
- audio = signal.filtfilt(bh, ah, audio)
- audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect")
-
- if audio_pad.shape[0] > self.t_max:
- audio_sum = np.zeros_like(audio)
-
- for i in range(self.window):
- audio_sum += audio_pad[i : i - self.window]
-
- for t in range(self.t_center, audio.shape[0], self.t_center):
- opt_ts.append(t - self.t_query + np.where(np.abs(audio_sum[t - self.t_query : t + self.t_query]) == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min())[0][0])
-
- s = 0
- t, inp_f0 = None, None
- audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect")
- sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
- p_len = audio_pad.shape[0] // self.window
-
- if hasattr(f0_file, "name"):
- try:
- with open(f0_file.name, "r") as f:
- raw_lines = f.read()
-
- if len(raw_lines) > 0:
- inp_f0 = []
-
- for line in raw_lines.strip("\n").split("\n"):
- inp_f0.append([float(i) for i in line.split(",")])
-
- inp_f0 = np.array(inp_f0, dtype=np.float32)
- except:
- logger.error(translations["error_readfile"])
- inp_f0 = None
-
- if pbar: pbar.update(1)
- if pitch_guidance:
- if not hasattr(self, "f0_generator"): self.f0_generator = Generator(self.sample_rate, hop_length, self.f0_min, self.f0_max, self.is_half, self.device, f0_onnx, f0_onnx)
- pitch, pitchf = self.f0_generator.calculator(self.x_pad, f0_method, audio_pad, f0_up_key, p_len, filter_radius, f0_autotune, f0_autotune_strength, manual_f0=inp_f0, proposal_pitch=proposal_pitch, proposal_pitch_threshold=proposal_pitch_threshold)
-
- if self.device == "mps": pitchf = pitchf.astype(np.float32)
- pitch, pitchf = torch.tensor(pitch[:p_len], device=self.device).unsqueeze(0).long(), torch.tensor(pitchf[:p_len], device=self.device).unsqueeze(0).float()
-
- if pbar: pbar.update(1)
-
- if energy_use:
- if not hasattr(self, "rms_extract"): self.rms_extract = RMSEnergyExtractor(frame_length=2048, hop_length=self.window, center=True, pad_mode = "reflect").to(self.device).eval()
- energy = self.rms_extract(torch.from_numpy(audio_pad).to(self.device).unsqueeze(0)).cpu().numpy()
-
- if self.device == "mps": energy = energy.astype(np.float32)
- energy = torch.tensor(energy[:p_len], device=self.device).unsqueeze(0).float()
-
- if pbar: pbar.update(1)
-
- for t in opt_ts:
- t = t // self.window * self.window
- audio_opt.append(
- self.voice_conversion(
- model,
- net_g,
- sid,
- audio_pad[s : t + self.t_pad2 + self.window],
- pitch[:, s // self.window : (t + self.t_pad2) // self.window] if pitch_guidance else None,
- pitchf[:, s // self.window : (t + self.t_pad2) // self.window] if pitch_guidance else None,
- index,
- big_npy,
- index_rate,
- version,
- protect,
- energy[:, s // self.window : (t + self.t_pad2) // self.window] if energy_use else None
- )[self.t_pad_tgt : -self.t_pad_tgt]
- )
- s = t
-
- audio_opt.append(
- self.voice_conversion(
- model,
- net_g,
- sid,
- audio_pad[t:],
- (pitch[:, t // self.window :] if t is not None else pitch) if pitch_guidance else None,
- (pitchf[:, t // self.window :] if t is not None else pitchf) if pitch_guidance else None,
- index,
- big_npy,
- index_rate,
- version,
- protect,
- (energy[:, t // self.window :] if t is not None else energy) if energy_use else None
- )[self.t_pad_tgt : -self.t_pad_tgt]
- )
-
- audio_opt = np.concatenate(audio_opt)
- if pbar: pbar.update(1)
-
- if rms_mix_rate != 1: audio_opt = change_rms(audio, self.sample_rate, audio_opt, self.sample_rate, rms_mix_rate)
-
- audio_max = np.abs(audio_opt).max() / 0.99
- if audio_max > 1: audio_opt /= audio_max
-
- if pitch_guidance: del pitch, pitchf
- del sid
-
- clear_gpu_cache()
- return audio_opt
\ No newline at end of file
diff --git a/main/inference/conversion/utils.py b/main/inference/conversion/utils.py
deleted file mode 100644
index f423f3dfc70eed6376f5ab424811194def315fd5..0000000000000000000000000000000000000000
--- a/main/inference/conversion/utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import os
-import gc
-import sys
-import torch
-import librosa
-
-import numpy as np
-import torch.nn.functional as F
-
-sys.path.append(os.getcwd())
-
-from main.library import opencl
-
-def autotune_f0(note_dict, f0, f0_autotune_strength):
- autotuned_f0 = np.zeros_like(f0)
-
- for i, freq in enumerate(f0):
- autotuned_f0[i] = freq + (min(note_dict, key=lambda x: abs(x - freq)) - freq) * f0_autotune_strength
-
- return autotuned_f0
-
-def change_rms(source_audio, source_rate, target_audio, target_rate, rate):
- rms2 = F.interpolate(torch.from_numpy(librosa.feature.rms(y=target_audio, frame_length=target_rate // 2 * 2, hop_length=target_rate // 2)).float().unsqueeze(0), size=target_audio.shape[0], mode="linear").squeeze()
- return (target_audio * (torch.pow(F.interpolate(torch.from_numpy(librosa.feature.rms(y=source_audio, frame_length=source_rate // 2 * 2, hop_length=source_rate // 2)).float().unsqueeze(0), size=target_audio.shape[0], mode="linear").squeeze(), 1 - rate) * torch.pow(torch.maximum(rms2, torch.zeros_like(rms2) + 1e-6), rate - 1)).numpy())
-
-def clear_gpu_cache():
- gc.collect()
-
- if torch.cuda.is_available(): torch.cuda.empty_cache()
- elif torch.backends.mps.is_available(): torch.mps.empty_cache()
- elif opencl.is_available(): opencl.pytorch_ocl.empty_cache()
-
-def extract_median_f0(f0):
- f0 = np.where(f0 == 0, np.nan, f0)
- return float(np.median(np.interp(np.arange(len(f0)), np.where(~np.isnan(f0))[0], f0[~np.isnan(f0)])))
-
-def proposal_f0_up_key(f0, target_f0 = 155.0, limit = 12):
- return max(-limit, min(limit, int(np.round(12 * np.log2(target_f0 / extract_median_f0(f0))))))
-
-def get_onnx_argument(net_g, feats, p_len, sid, pitch, pitchf, energy, pitch_guidance, energy_use):
- inputs = {
- net_g.get_inputs()[0].name: feats.cpu().numpy().astype(np.float32),
- net_g.get_inputs()[1].name: p_len.cpu().numpy(),
- net_g.get_inputs()[2].name: np.array([sid.cpu().item()], dtype=np.int64),
- net_g.get_inputs()[3].name: np.random.randn(1, 192, p_len).astype(np.float32)
- }
-
- if energy_use:
- if pitch_guidance:
- inputs.update({
- net_g.get_inputs()[4].name: pitch.cpu().numpy().astype(np.int64),
- net_g.get_inputs()[5].name: pitchf.cpu().numpy().astype(np.float32),
- net_g.get_inputs()[6].name: energy.cpu().numpy().astype(np.float32)
- })
- else:
- inputs.update({
- net_g.get_inputs()[4].name: energy.cpu().numpy().astype(np.float32)
- })
- else:
- if pitch_guidance:
- inputs.update({
- net_g.get_inputs()[4].name: pitch.cpu().numpy().astype(np.int64),
- net_g.get_inputs()[5].name: pitchf.cpu().numpy().astype(np.float32)
- })
-
- return inputs
\ No newline at end of file
diff --git a/main/inference/create_dataset.py b/main/inference/create_dataset.py
deleted file mode 100644
index 03d6035f7577384076f60aa390893402ccdd06ab..0000000000000000000000000000000000000000
--- a/main/inference/create_dataset.py
+++ /dev/null
@@ -1,212 +0,0 @@
-import os
-import sys
-import time
-import yt_dlp
-import shutil
-import librosa
-import argparse
-import warnings
-
-from soundfile import read, write
-from distutils.util import strtobool
-
-sys.path.append(os.getcwd())
-
-from main.app.variables import config, logger, translations
-from main.library.uvr5_lib.separator import Separator
-
-dataset_temp = "dataset_temp"
-
-def parse_arguments():
- parser = argparse.ArgumentParser()
- parser.add_argument("--create_dataset", action='store_true')
- parser.add_argument("--input_audio", type=str, required=True)
- parser.add_argument("--output_dataset", type=str, default="./dataset")
- parser.add_argument("--sample_rate", type=int, default=44100)
- parser.add_argument("--clean_dataset", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--clean_strength", type=float, default=0.7)
- parser.add_argument("--separator_reverb", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--kim_vocal_version", type=int, default=2)
- parser.add_argument("--overlap", type=float, default=0.25)
- parser.add_argument("--segments_size", type=int, default=256)
- parser.add_argument("--mdx_hop_length", type=int, default=1024)
- parser.add_argument("--mdx_batch_size", type=int, default=1)
- parser.add_argument("--denoise_mdx", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--skip", type=lambda x: bool(strtobool(x)), default=False)
- parser.add_argument("--skip_start_audios", type=str, default="0")
- parser.add_argument("--skip_end_audios", type=str, default="0")
-
- return parser.parse_args()
-
-def main():
- pid_path = os.path.join("assets", "create_dataset_pid.txt")
- with open(pid_path, "w") as pid_file:
- pid_file.write(str(os.getpid()))
-
- args = parse_arguments()
- input_audio, output_dataset, sample_rate, clean_dataset, clean_strength, separator_reverb, kim_vocal_version, overlap, segments_size, hop_length, batch_size, denoise_mdx, skip, skip_start_audios, skip_end_audios = args.input_audio, args.output_dataset, args.sample_rate, args.clean_dataset, args.clean_strength, args.separator_reverb, args.kim_vocal_version, args.overlap, args.segments_size, args.mdx_hop_length, args.mdx_batch_size, args.denoise_mdx, args.skip, args.skip_start_audios, args.skip_end_audios
- log_data = {translations['audio_path']: input_audio, translations['output_path']: output_dataset, translations['sr']: sample_rate, translations['clear_dataset']: clean_dataset, translations['dereveb_audio']: separator_reverb, translations['segments_size']: segments_size, translations['overlap']: overlap, "Hop length": hop_length, translations['batch_size']: batch_size, translations['denoise_mdx']: denoise_mdx, translations['skip']: skip}
-
- if clean_dataset: log_data[translations['clean_strength']] = clean_strength
- if skip:
- log_data[translations['skip_start']] = skip_start_audios
- log_data[translations['skip_end']] = skip_end_audios
-
- for key, value in log_data.items():
- logger.debug(f"{key}: {value}")
-
- if kim_vocal_version not in [1, 2]: raise ValueError(translations["version_not_valid"])
- start_time = time.time()
-
- try:
- paths = []
-
- if not os.path.exists(dataset_temp): os.makedirs(dataset_temp, exist_ok=True)
- urls = input_audio.replace(", ", ",").split(",")
-
- for url in urls:
- path = downloader(url, urls.index(url))
- paths.append(path)
-
- if skip:
- skip_start_audios, skip_end_audios = skip_start_audios.replace(", ", ",").split(","), skip_end_audios.replace(", ", ",").split(",")
-
- if len(skip_start_audios) < len(paths) or len(skip_end_audios) < len(paths):
- logger.warning(translations["skip