From 7befbd10d9c44e251a434f4d9b6be2d64896d8f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Thu, 28 Nov 2024 18:03:17 +0900 Subject: [PATCH] optimize(train): combine extract f0 together --- .github/workflows/unitest.yml | 2 +- .../modules/train/extract/extract_f0_print.py | 175 ------------ .../modules/train/extract/extract_f0_rmvpe.py | 141 ---------- ...ct_f0_rmvpe_dml.py => extract_f0_print.py} | 265 +++++++++--------- infer/modules/vc/pipeline.py | 101 +------ rvc/f0/__init__.py | 11 +- rvc/f0/gen.py | 127 +++++++++ rvc/f0/pm.py | 2 +- rvc/onnx/infer.py | 44 +-- web.py | 103 +------ 10 files changed, 280 insertions(+), 691 deletions(-) delete mode 100644 infer/modules/train/extract/extract_f0_print.py delete mode 100644 infer/modules/train/extract/extract_f0_rmvpe.py rename infer/modules/train/{extract/extract_f0_rmvpe_dml.py => extract_f0_print.py} (55%) create mode 100644 rvc/f0/gen.py diff --git a/.github/workflows/unitest.yml b/.github/workflows/unitest.yml index b9cfc91..1afd704 100644 --- a/.github/workflows/unitest.yml +++ b/.github/workflows/unitest.yml @@ -32,5 +32,5 @@ jobs: touch logs/mi-test/preprocess.log python infer/modules/train/preprocess.py logs/mute/0_gt_wavs 48000 8 logs/mi-test True 3.7 touch logs/mi-test/extract_f0_feature.log - python infer/modules/train/extract/extract_f0_print.py logs/mi-test $(nproc) pm + python infer/modules/train/extract/extract_f0_print.py logs/mi-test $(nproc) pm cpu False python infer/modules/train/extract_feature_print.py cpu 1 0 0 logs/mi-test v1 True diff --git a/infer/modules/train/extract/extract_f0_print.py b/infer/modules/train/extract/extract_f0_print.py deleted file mode 100644 index 2309692..0000000 --- a/infer/modules/train/extract/extract_f0_print.py +++ /dev/null @@ -1,175 +0,0 @@ -import os -import sys -import traceback - -import parselmouth - -now_dir = os.getcwd() -sys.path.append(now_dir) -import logging - -import numpy as np -import pyworld - -from infer.lib.audio import load_audio - -logging.getLogger("numba").setLevel(logging.WARNING) -from multiprocessing import Process - -exp_dir = sys.argv[1] -f = open("%s/extract_f0_feature.log" % exp_dir, "a+") - - -def printt(strr): - print(strr) - f.write("%s\n" % strr) - f.flush() - - -n_p = int(sys.argv[2]) -f0method = sys.argv[3] - - -class FeatureInput(object): - def __init__(self, samplerate=16000, hop_size=160): - self.fs = samplerate - self.hop = hop_size - - self.f0_bin = 256 - self.f0_max = 1100.0 - self.f0_min = 50.0 - self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) - self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) - - def compute_f0(self, path, f0_method): - x = load_audio(path, self.fs) - p_len = x.shape[0] // self.hop - if f0_method == "pm": - time_step = 160 / 16000 * 1000 - f0_min = 50 - f0_max = 1100 - f0 = ( - parselmouth.Sound(x, self.fs) - .to_pitch_ac( - time_step=time_step / 1000, - voicing_threshold=0.6, - pitch_floor=f0_min, - pitch_ceiling=f0_max, - ) - .selected_array["frequency"] - ) - pad_size = (p_len - len(f0) + 1) // 2 - if pad_size > 0 or p_len - len(f0) - pad_size > 0: - f0 = np.pad( - f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant" - ) - elif f0_method == "harvest": - f0, t = pyworld.harvest( - x.astype(np.double), - fs=self.fs, - f0_ceil=self.f0_max, - f0_floor=self.f0_min, - frame_period=1000 * self.hop / self.fs, - ) - f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs) - elif f0_method == "dio": - f0, t = pyworld.dio( - x.astype(np.double), - fs=self.fs, - f0_ceil=self.f0_max, - f0_floor=self.f0_min, - frame_period=1000 * self.hop / self.fs, - ) - f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs) - elif f0_method == "rmvpe": - if hasattr(self, "model_rmvpe") == False: - from rvc.f0.rmvpe import RMVPE - - print("Loading rmvpe model") - self.model_rmvpe = RMVPE( - "assets/rmvpe/rmvpe.pt", is_half=False, device="cpu" - ) - f0 = self.model_rmvpe.compute_f0(x, filter_radius=0.03) - return f0 - - def coarse_f0(self, f0): - f0_mel = 1127 * np.log(1 + f0 / 700) - f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * ( - self.f0_bin - 2 - ) / (self.f0_mel_max - self.f0_mel_min) + 1 - - # use 0 or 1 - f0_mel[f0_mel <= 1] = 1 - f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1 - f0_coarse = np.rint(f0_mel).astype(int) - assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, ( - f0_coarse.max(), - f0_coarse.min(), - ) - return f0_coarse - - def go(self, paths, f0_method): - if len(paths) == 0: - printt("no-f0-todo") - else: - printt("todo-f0-%s" % len(paths)) - n = max(len(paths) // 5, 1) # 每个进程最多打印5条 - for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths): - try: - if idx % n == 0: - printt("f0ing,now-%s,all-%s,-%s" % (idx, len(paths), inp_path)) - if ( - os.path.exists(opt_path1 + ".npy") == True - and os.path.exists(opt_path2 + ".npy") == True - ): - continue - featur_pit = self.compute_f0(inp_path, f0_method) - np.save( - opt_path2, - featur_pit, - allow_pickle=False, - ) # nsf - coarse_pit = self.coarse_f0(featur_pit) - np.save( - opt_path1, - coarse_pit, - allow_pickle=False, - ) # ori - except: - printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())) - - -if __name__ == "__main__": - # exp_dir=r"E:\codes\py39\dataset\mi-test" - # n_p=16 - # f = open("%s/log_extract_f0.log"%exp_dir, "w") - printt(" ".join(sys.argv)) - featureInput = FeatureInput() - paths = [] - inp_root = "%s/1_16k_wavs" % (exp_dir) - opt_root1 = "%s/2a_f0" % (exp_dir) - opt_root2 = "%s/2b-f0nsf" % (exp_dir) - - os.makedirs(opt_root1, exist_ok=True) - os.makedirs(opt_root2, exist_ok=True) - for name in sorted(list(os.listdir(inp_root))): - inp_path = "%s/%s" % (inp_root, name) - if "spec" in inp_path: - continue - opt_path1 = "%s/%s" % (opt_root1, name) - opt_path2 = "%s/%s" % (opt_root2, name) - paths.append([inp_path, opt_path1, opt_path2]) - - ps = [] - for i in range(n_p): - p = Process( - target=featureInput.go, - args=( - paths[i::n_p], - f0method, - ), - ) - ps.append(p) - p.start() - for i in range(n_p): - ps[i].join() diff --git a/infer/modules/train/extract/extract_f0_rmvpe.py b/infer/modules/train/extract/extract_f0_rmvpe.py deleted file mode 100644 index 68850fd..0000000 --- a/infer/modules/train/extract/extract_f0_rmvpe.py +++ /dev/null @@ -1,141 +0,0 @@ -import os -import sys -import traceback - -import parselmouth - -now_dir = os.getcwd() -sys.path.append(now_dir) -import logging - -import numpy as np -import pyworld - -from infer.lib.audio import load_audio - -logging.getLogger("numba").setLevel(logging.WARNING) - -n_part = int(sys.argv[1]) -i_part = int(sys.argv[2]) -i_gpu = sys.argv[3] -os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu) -exp_dir = sys.argv[4] -is_half = sys.argv[5] -f = open("%s/extract_f0_feature.log" % exp_dir, "a+") - - -def printt(strr): - print(strr) - f.write("%s\n" % strr) - f.flush() - - -class FeatureInput(object): - def __init__(self, samplerate=16000, hop_size=160): - self.fs = samplerate - self.hop = hop_size - - self.f0_bin = 256 - self.f0_max = 1100.0 - self.f0_min = 50.0 - self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) - self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) - - def compute_f0(self, path, f0_method): - x = load_audio(path, self.fs) - # p_len = x.shape[0] // self.hop - if f0_method == "rmvpe": - if hasattr(self, "model_rmvpe") == False: - from rvc.f0.rmvpe import RMVPE - - print("Loading rmvpe model") - self.model_rmvpe = RMVPE( - "assets/rmvpe/rmvpe.pt", is_half=is_half, device="cuda" - ) - f0 = self.model_rmvpe.compute_f0(x, filter_radius=0.03) - return f0 - - def coarse_f0(self, f0): - f0_mel = 1127 * np.log(1 + f0 / 700) - f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * ( - self.f0_bin - 2 - ) / (self.f0_mel_max - self.f0_mel_min) + 1 - - # use 0 or 1 - f0_mel[f0_mel <= 1] = 1 - f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1 - f0_coarse = np.rint(f0_mel).astype(int) - assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, ( - f0_coarse.max(), - f0_coarse.min(), - ) - return f0_coarse - - def go(self, paths, f0_method): - if len(paths) == 0: - printt("no-f0-todo") - else: - printt("todo-f0-%s" % len(paths)) - n = max(len(paths) // 5, 1) # 每个进程最多打印5条 - for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths): - try: - if idx % n == 0: - printt("f0ing,now-%s,all-%s,-%s" % (idx, len(paths), inp_path)) - if ( - os.path.exists(opt_path1 + ".npy") == True - and os.path.exists(opt_path2 + ".npy") == True - ): - continue - featur_pit = self.compute_f0(inp_path, f0_method) - np.save( - opt_path2, - featur_pit, - allow_pickle=False, - ) # nsf - coarse_pit = self.coarse_f0(featur_pit) - np.save( - opt_path1, - coarse_pit, - allow_pickle=False, - ) # ori - except: - printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())) - - -if __name__ == "__main__": - # exp_dir=r"E:\codes\py39\dataset\mi-test" - # n_p=16 - # f = open("%s/log_extract_f0.log"%exp_dir, "w") - printt(" ".join(sys.argv)) - featureInput = FeatureInput() - paths = [] - inp_root = "%s/1_16k_wavs" % (exp_dir) - opt_root1 = "%s/2a_f0" % (exp_dir) - opt_root2 = "%s/2b-f0nsf" % (exp_dir) - - os.makedirs(opt_root1, exist_ok=True) - os.makedirs(opt_root2, exist_ok=True) - for name in sorted(list(os.listdir(inp_root))): - inp_path = "%s/%s" % (inp_root, name) - if "spec" in inp_path: - continue - opt_path1 = "%s/%s" % (opt_root1, name) - opt_path2 = "%s/%s" % (opt_root2, name) - paths.append([inp_path, opt_path1, opt_path2]) - try: - featureInput.go(paths[i_part::n_part], "rmvpe") - except: - printt("f0_all_fail-%s" % (traceback.format_exc())) - # ps = [] - # for i in range(n_p): - # p = Process( - # target=featureInput.go, - # args=( - # paths[i::n_p], - # f0method, - # ), - # ) - # ps.append(p) - # p.start() - # for i in range(n_p): - # ps[i].join() diff --git a/infer/modules/train/extract/extract_f0_rmvpe_dml.py b/infer/modules/train/extract_f0_print.py similarity index 55% rename from infer/modules/train/extract/extract_f0_rmvpe_dml.py rename to infer/modules/train/extract_f0_print.py index 404d7ef..803790e 100644 --- a/infer/modules/train/extract/extract_f0_rmvpe_dml.py +++ b/infer/modules/train/extract_f0_print.py @@ -1,139 +1,126 @@ -import os -import sys -import traceback - -import parselmouth - -now_dir = os.getcwd() -sys.path.append(now_dir) -import logging - -import numpy as np -import pyworld - -from infer.lib.audio import load_audio - -logging.getLogger("numba").setLevel(logging.WARNING) - -exp_dir = sys.argv[1] -import torch_directml - -device = torch_directml.device(torch_directml.default_device()) -f = open("%s/extract_f0_feature.log" % exp_dir, "a+") - - -def printt(strr): - print(strr) - f.write("%s\n" % strr) - f.flush() - - -class FeatureInput(object): - def __init__(self, samplerate=16000, hop_size=160): - self.fs = samplerate - self.hop = hop_size - - self.f0_bin = 256 - self.f0_max = 1100.0 - self.f0_min = 50.0 - self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) - self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) - - def compute_f0(self, path, f0_method): - x = load_audio(path, self.fs) - # p_len = x.shape[0] // self.hop - if f0_method == "rmvpe": - if hasattr(self, "model_rmvpe") == False: - from rvc.f0.rmvpe import RMVPE - - print("Loading rmvpe model") - self.model_rmvpe = RMVPE( - "assets/rmvpe/rmvpe.pt", is_half=False, device=device - ) - f0 = self.model_rmvpe.compute_f0(x, filter_radius=0.03) - return f0 - - def coarse_f0(self, f0): - f0_mel = 1127 * np.log(1 + f0 / 700) - f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * ( - self.f0_bin - 2 - ) / (self.f0_mel_max - self.f0_mel_min) + 1 - - # use 0 or 1 - f0_mel[f0_mel <= 1] = 1 - f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1 - f0_coarse = np.rint(f0_mel).astype(int) - assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, ( - f0_coarse.max(), - f0_coarse.min(), - ) - return f0_coarse - - def go(self, paths, f0_method): - if len(paths) == 0: - printt("no-f0-todo") - else: - printt("todo-f0-%s" % len(paths)) - n = max(len(paths) // 5, 1) # 每个进程最多打印5条 - for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths): - try: - if idx % n == 0: - printt("f0ing,now-%s,all-%s,-%s" % (idx, len(paths), inp_path)) - if ( - os.path.exists(opt_path1 + ".npy") == True - and os.path.exists(opt_path2 + ".npy") == True - ): - continue - featur_pit = self.compute_f0(inp_path, f0_method) - np.save( - opt_path2, - featur_pit, - allow_pickle=False, - ) # nsf - coarse_pit = self.coarse_f0(featur_pit) - np.save( - opt_path1, - coarse_pit, - allow_pickle=False, - ) # ori - except: - printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())) - - -if __name__ == "__main__": - # exp_dir=r"E:\codes\py39\dataset\mi-test" - # n_p=16 - # f = open("%s/log_extract_f0.log"%exp_dir, "w") - printt(" ".join(sys.argv)) - featureInput = FeatureInput() - paths = [] - inp_root = "%s/1_16k_wavs" % (exp_dir) - opt_root1 = "%s/2a_f0" % (exp_dir) - opt_root2 = "%s/2b-f0nsf" % (exp_dir) - - os.makedirs(opt_root1, exist_ok=True) - os.makedirs(opt_root2, exist_ok=True) - for name in sorted(list(os.listdir(inp_root))): - inp_path = "%s/%s" % (inp_root, name) - if "spec" in inp_path: - continue - opt_path1 = "%s/%s" % (opt_root1, name) - opt_path2 = "%s/%s" % (opt_root2, name) - paths.append([inp_path, opt_path1, opt_path2]) - try: - featureInput.go(paths, "rmvpe") - except: - printt("f0_all_fail-%s" % (traceback.format_exc())) - # ps = [] - # for i in range(n_p): - # p = Process( - # target=featureInput.go, - # args=( - # paths[i::n_p], - # f0method, - # ), - # ) - # ps.append(p) - # p.start() - # for i in range(n_p): - # ps[i].join() +import os +import sys +import traceback +from pathlib import Path + +from dotenv import load_dotenv + +now_dir = os.getcwd() +sys.path.append(now_dir) +load_dotenv() +load_dotenv("sha256.env") + +now_dir = os.getcwd() +sys.path.append(now_dir) +import logging + +import numpy as np + +from infer.lib.audio import load_audio + +from rvc.f0 import Generator + +logging.getLogger("numba").setLevel(logging.WARNING) +from multiprocessing import Process + +exp_dir = sys.argv[1] +f = open("%s/extract_f0_feature.log" % exp_dir, "a+") + + +def printt(strr): + print(strr) + f.write("%s\n" % strr) + f.flush() + + +n_p = int(sys.argv[2]) +f0method = sys.argv[3] +device = sys.argv[4] +is_half = sys.argv[5] == "True" + + +class FeatureInput(object): + def __init__(self, is_half: bool, device = "cpu", samplerate=16000, hop_size=160): + self.fs = samplerate + self.hop = hop_size + + self.f0_bin = 256 + self.f0_max = 1100.0 + self.f0_min = 50.0 + self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) + self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) + + self.f0_gen = Generator( + Path(os.environ["rmvpe_root"]), + is_half, + 0, + device, + hop_size, + samplerate, + ) + + def go(self, paths, f0_method): + if len(paths) == 0: + printt("no-f0-todo") + else: + printt("todo-f0-%s" % len(paths)) + n = max(len(paths) // 5, 1) # 每个进程最多打印5条 + for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths): + try: + if idx % n == 0: + printt("f0ing,now-%s,all-%s,-%s" % (idx, len(paths), inp_path)) + if ( + os.path.exists(opt_path1 + ".npy") == True + and os.path.exists(opt_path2 + ".npy") == True + ): + continue + x = load_audio(inp_path, self.fs) + coarse_pit, feature_pit = self.f0_gen.calculate(x, x.shape[0] // self.hop, 0, f0_method, None) + np.save( + opt_path2, + feature_pit, + allow_pickle=False, + ) # nsf + np.save( + opt_path1, + coarse_pit, + allow_pickle=False, + ) # ori + except: + printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())) + + +if __name__ == "__main__": + # exp_dir=r"E:\codes\py39\dataset\mi-test" + # n_p=16 + # f = open("%s/log_extract_f0.log"%exp_dir, "w") + printt(" ".join(sys.argv)) + featureInput = FeatureInput(is_half, device) + paths = [] + inp_root = "%s/1_16k_wavs" % (exp_dir) + opt_root1 = "%s/2a_f0" % (exp_dir) + opt_root2 = "%s/2b-f0nsf" % (exp_dir) + + os.makedirs(opt_root1, exist_ok=True) + os.makedirs(opt_root2, exist_ok=True) + for name in sorted(list(os.listdir(inp_root))): + inp_path = "%s/%s" % (inp_root, name) + if "spec" in inp_path: + continue + opt_path1 = "%s/%s" % (opt_root1, name) + opt_path2 = "%s/%s" % (opt_root2, name) + paths.append([inp_path, opt_path1, opt_path2]) + + ps = [] + for i in range(n_p): + p = Process( + target=featureInput.go, + args=( + paths[i::n_p], + f0method, + ), + ) + ps.append(p) + p.start() + for i in range(n_p): + ps[i].join() diff --git a/infer/modules/vc/pipeline.py b/infer/modules/vc/pipeline.py index f8857e8..8e0cd03 100644 --- a/infer/modules/vc/pipeline.py +++ b/infer/modules/vc/pipeline.py @@ -5,6 +5,7 @@ import logging logger = logging.getLogger(__name__) +from pathlib import Path from time import time import faiss @@ -14,7 +15,7 @@ import torch import torch.nn.functional as F from scipy import signal -from rvc.f0 import PM, Harvest, RMVPE, CRePE, Dio, FCPE +from rvc.f0 import Generator now_dir = os.getcwd() sys.path.append(now_dir) @@ -63,95 +64,15 @@ class Pipeline(object): self.t_max = self.sr * self.x_max # 免查询时长阈值 self.device = config.device - def get_f0( - self, - x, - p_len, - f0_up_key, - f0_method, - filter_radius, - inp_f0=None, - ): - f0_min = 50 - f0_max = 1100 - f0_mel_min = 1127 * np.log(1 + f0_min / 700) - f0_mel_max = 1127 * np.log(1 + f0_max / 700) - if f0_method == "pm": - if not hasattr(self, "pm"): - self.pm = PM(self.window, f0_min, f0_max, self.sr) - f0 = self.pm.compute_f0(x, p_len=p_len) - if f0_method == "dio": - if not hasattr(self, "dio"): - self.dio = Dio(self.window, f0_min, f0_max, self.sr) - f0 = self.dio.compute_f0(x, p_len=p_len) - elif f0_method == "harvest": - if not hasattr(self, "harvest"): - self.harvest = Harvest(self.window, f0_min, f0_max, self.sr) - f0 = self.harvest.compute_f0(x, p_len=p_len, filter_radius=filter_radius) - elif f0_method == "crepe": - if not hasattr(self, "crepe"): - self.crepe = CRePE( - self.window, - f0_min, - f0_max, - self.sr, - self.device, - ) - f0 = self.crepe.compute_f0(x, p_len=p_len) - elif f0_method == "rmvpe": - if not hasattr(self, "rmvpe"): - logger.info( - "Loading rmvpe model %s" % "%s/rmvpe.pt" % os.environ["rmvpe_root"] - ) - self.rmvpe = RMVPE( - "%s/rmvpe.pt" % os.environ["rmvpe_root"], - is_half=self.is_half, - device=self.device, - # use_jit=self.config.use_jit, - ) - f0 = self.rmvpe.compute_f0(x, p_len=p_len, filter_radius=0.03) + self.f0_gen = Generator( + Path(os.environ["rmvpe_root"]), + self.is_half, + self.x_pad, + self.device, + self.window, + self.sr, + ) - if "privateuseone" in str(self.device): # clean ortruntime memory - del self.rmvpe.model - del self.rmvpe - logger.info("Cleaning ortruntime memory") - - elif f0_method == "fcpe": - if not hasattr(self, "model_fcpe"): - logger.info("Loading fcpe model") - self.model_fcpe = FCPE( - self.window, - f0_min, - f0_max, - self.sr, - self.device, - ) - f0 = self.model_fcpe.compute_f0(x, p_len=p_len) - - f0 *= pow(2, f0_up_key / 12) - # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) - tf0 = self.sr // self.window # 每秒f0点数 - if inp_f0 is not None: - delta_t = np.round( - (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1 - ).astype("int16") - replace_f0 = np.interp( - list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1] - ) - shape = f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)].shape[0] - f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)] = replace_f0[ - :shape - ] - # with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) - f0bak = f0.copy() - f0_mel = 1127 * np.log(1 + f0 / 700) - f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / ( - f0_mel_max - f0_mel_min - ) + 1 - f0_mel[f0_mel <= 1] = 1 - f0_mel[f0_mel > 255] = 255 - f0_coarse = np.rint(f0_mel).astype(np.int32) - return f0_coarse, f0bak # 1-0 def vc( self, @@ -337,7 +258,7 @@ class Pipeline(object): pitch, pitchf = None, None if if_f0: if if_f0 == 1: - pitch, pitchf = self.get_f0( + pitch, pitchf = self.f0_gen.calculate( audio_pad, p_len, f0_up_key, diff --git a/rvc/f0/__init__.py b/rvc/f0/__init__.py index dc91d96..e7b061a 100644 --- a/rvc/f0/__init__.py +++ b/rvc/f0/__init__.py @@ -1,10 +1 @@ -from .f0 import F0Predictor - -from .crepe import CRePE -from .dio import Dio -from .fcpe import FCPE -from .harvest import Harvest -from .pm import PM -from .rmvpe import RMVPE - -__all__ = ["F0Predictor", "CRePE", "Dio", "FCPE", "Harvest", "PM", "RMVPE"] +from .gen import Generator diff --git a/rvc/f0/gen.py b/rvc/f0/gen.py new file mode 100644 index 0000000..2afdd0e --- /dev/null +++ b/rvc/f0/gen.py @@ -0,0 +1,127 @@ +from math import log +from pathlib import Path +from typing import Optional, Union, Literal, Tuple + +from numba import jit +import numpy as np + + +@jit(nopython=True) +def post_process( + sr: int, + window: int, + f0: np.ndarray, + f0_up_key: int, + manual_x_pad: int, + f0_mel_min: float, + f0_mel_max: float, + manual_f0: Optional[Union[np.ndarray, list]]=None, +) -> Tuple[np.ndarray, np.ndarray]: + f0 = np.multiply(f0, pow(2, f0_up_key / 12)) + # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) + tf0 = sr // window # 每秒f0点数 + if manual_f0 is not None: + delta_t = np.round( + (manual_f0[:, 0].max() - manual_f0[:, 0].min()) * tf0 + 1 + ).astype("int16") + replace_f0 = np.interp( + list(range(delta_t)), manual_f0[:, 0] * 100, manual_f0[:, 1] + ) + shape = f0[manual_x_pad * tf0 : manual_x_pad * tf0 + len(replace_f0)].shape[0] + f0[manual_x_pad * tf0 : manual_x_pad * tf0 + len(replace_f0)] = replace_f0[:shape] + # with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (f0_mel_max - f0_mel_min) + 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > 255] = 255 + f0_coarse = np.rint(f0_mel).astype(np.int32) + return f0_coarse, f0 # 1-0 + + +class Generator(object): + def __init__( + self, + rmvpe_root: Path, + is_half: bool, + x_pad: int, + device = "cpu", + window = 160, + sr = 16000 + ): + self.rmvpe_root = rmvpe_root + self.is_half = is_half + self.x_pad = x_pad + self.device = device + self.window = window + self.sr = sr + + def calculate( + self, + x: np.ndarray, + p_len: int, + f0_up_key: int, + f0_method: Literal['pm', 'dio', 'harvest', 'crepe', 'rmvpe', 'fcpe'], + filter_radius: Optional[Union[int, float]], + manual_f0: Optional[Union[np.ndarray, list]]=None, + ) -> Tuple[np.ndarray, np.ndarray]: + f0_min = 50 + f0_max = 1100 + if f0_method == "pm": + if not hasattr(self, "pm"): + from .pm import PM + self.pm = PM(self.window, f0_min, f0_max, self.sr) + f0 = self.pm.compute_f0(x, p_len=p_len) + elif f0_method == "dio": + if not hasattr(self, "dio"): + from .dio import Dio + self.dio = Dio(self.window, f0_min, f0_max, self.sr) + f0 = self.dio.compute_f0(x, p_len=p_len) + elif f0_method == "harvest": + if not hasattr(self, "harvest"): + from .harvest import Harvest + self.harvest = Harvest(self.window, f0_min, f0_max, self.sr) + f0 = self.harvest.compute_f0(x, p_len=p_len, filter_radius=filter_radius) + elif f0_method == "crepe": + if not hasattr(self, "crepe"): + from .crepe import CRePE + self.crepe = CRePE( + self.window, + f0_min, + f0_max, + self.sr, + self.device, + ) + f0 = self.crepe.compute_f0(x, p_len=p_len) + elif f0_method == "rmvpe": + if not hasattr(self, "rmvpe"): + from .rmvpe import RMVPE + self.rmvpe = RMVPE( + str(self.rmvpe_root/"rmvpe.pt"), + is_half=self.is_half, + device=self.device, + # use_jit=self.config.use_jit, + ) + f0 = self.rmvpe.compute_f0(x, p_len=p_len, filter_radius=0.03) + if "privateuseone" in str(self.device): # clean ortruntime memory + del self.rmvpe.model + del self.rmvpe + elif f0_method == "fcpe": + if not hasattr(self, "fcpe"): + from .fcpe import FCPE + self.fcpe = FCPE( + self.window, + f0_min, + f0_max, + self.sr, + self.device, + ) + f0 = self.fcpe.compute_f0(x, p_len=p_len) + else: + raise ValueError(f"f0 method {f0_method} has not yet been supported") + + return post_process( + self.sr, self.window, f0, f0_up_key, self.x_pad, + 1127 * log(1 + f0_min / 700), + 1127 * log(1 + f0_max / 700), + manual_f0, + ) diff --git a/rvc/f0/pm.py b/rvc/f0/pm.py index cf54b3c..16aa66b 100644 --- a/rvc/f0/pm.py +++ b/rvc/f0/pm.py @@ -1,4 +1,4 @@ -from typing import Any, Optional +from typing import Optional import numpy as np import parselmouth diff --git a/rvc/onnx/infer.py b/rvc/onnx/infer.py index b6895cd..639a113 100644 --- a/rvc/onnx/infer.py +++ b/rvc/onnx/infer.py @@ -5,12 +5,7 @@ import librosa import numpy as np import onnxruntime -from rvc.f0 import ( - PM, - Harvest, - Dio, - F0Predictor, -) +from rvc.f0 import Generator class Model: @@ -51,49 +46,28 @@ class ContentVec(Model): return logits.transpose(0, 2, 1) -predictors: typing.Dict[str, F0Predictor] = { - "pm": PM, - "harvest": Harvest, - "dio": Dio, -} - - -def get_f0_predictor( - f0_method: str, hop_length: int, sampling_rate: int -) -> F0Predictor: - return predictors[f0_method](hop_length=hop_length, sampling_rate=sampling_rate) - - class RVC(Model): def __init__( self, model_path: typing.Union[str, bytes, os.PathLike], hop_len=512, + model_sr=40000, vec_path: typing.Union[str, bytes, os.PathLike] = "vec-768-layer-12.onnx", device: typing.Literal["cpu", "cuda", "dml"] = "cpu", ): super().__init__(model_path, device) self.vec_model = ContentVec(vec_path, device) self.hop_len = hop_len + self.f0_gen = Generator(None, False, 0, window=hop_len, sr=model_sr) def infer( self, wav: np.ndarray[typing.Any, np.dtype], wav_sr: int, - model_sr: int = 40000, sid: int = 0, f0_method="dio", f0_up_key=0, ) -> np.ndarray[typing.Any, np.dtype[np.int16]]: - f0_min = 50 - f0_max = 1100 - f0_mel_min = 1127 * np.log(1 + f0_min / 700) - f0_mel_max = 1127 * np.log(1 + f0_max / 700) - f0_predictor = get_f0_predictor( - f0_method, - self.hop_len, - model_sr, - ) org_length = len(wav) if org_length / wav_sr > 50.0: raise RuntimeError("wav max length exceeded") @@ -102,16 +76,8 @@ class RVC(Model): hubert = np.repeat(hubert, 2, axis=2).transpose(0, 2, 1).astype(np.float32) hubert_length = hubert.shape[1] - pitchf = f0_predictor.compute_f0(wav, hubert_length) - pitchf = pitchf * 2 ** (f0_up_key / 12) - pitch = pitchf.copy() - f0_mel = 1127 * np.log(1 + pitch / 700) - f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / ( - f0_mel_max - f0_mel_min - ) + 1 - f0_mel[f0_mel <= 1] = 1 - f0_mel[f0_mel > 255] = 255 - pitch = np.rint(f0_mel).astype(np.int64) + pitch, pitchf = self.f0_gen.calculate(wav, hubert_length, f0_up_key, f0_method, None) + pitch = pitch.astype(np.int64) pitchf = pitchf.reshape(1, len(pitchf)).astype(np.float32) pitch = pitch.reshape(1, len(pitch)) diff --git a/web.py b/web.py index 0e3c3c7..4cbf1d7 100644 --- a/web.py +++ b/web.py @@ -264,28 +264,28 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): # but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2]) -def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvpe): - gpus = gpus.split("-") +def extract_f0_feature(n_p, f0method, if_f0, exp_dir, version19): os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w") f.close() if if_f0: if f0method != "rmvpe_gpu": cmd = ( - '"%s" infer/modules/train/extract/extract_f0_print.py "%s/logs/%s" %s %s' + '"%s" infer/modules/train/extract_f0_print.py "%s/logs/%s" %s %s "%s" %s' % ( config.python_cmd, now_dir, exp_dir, n_p, f0method, + config.device, + str(config.is_half), ) ) logger.info("Execute: " + cmd) p = Popen( cmd, shell=True, cwd=now_dir ) # , stdin=PIPE, stdout=PIPE,stderr=PIPE - # 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 done = [False] threading.Thread( target=if_done, @@ -294,53 +294,6 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp p, ), ).start() - else: - if gpus_rmvpe != "-": - gpus_rmvpe = gpus_rmvpe.split("-") - leng = len(gpus_rmvpe) - ps = [] - for idx, n_g in enumerate(gpus_rmvpe): - cmd = ( - '"%s" infer/modules/train/extract/extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s ' - % ( - config.python_cmd, - leng, - idx, - n_g, - now_dir, - exp_dir, - config.is_half, - ) - ) - logger.info("Execute: " + cmd) - p = Popen( - cmd, shell=True, cwd=now_dir - ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir - ps.append(p) - # 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 - done = [False] - threading.Thread( - target=if_done_multi, # - args=( - done, - ps, - ), - ).start() - else: - cmd = ( - config.python_cmd - + ' infer/modules/train/extract/extract_f0_rmvpe_dml.py "%s/logs/%s" ' - % ( - now_dir, - exp_dir, - ) - ) - logger.info("Execute: " + cmd) - p = Popen( - cmd, shell=True, cwd=now_dir - ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir - p.wait() - done = [True] while 1: with open( "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" @@ -464,7 +417,6 @@ def change_version19(sr2, if_f0_3, version19): def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15 path_str = "" if version19 == "v1" else "_v2" return ( - {"visible": if_f0_3, "__type__": "update"}, {"visible": if_f0_3, "__type__": "update"}, *get_pretrained_models(path_str, "f0" if if_f0_3 == True else "", sr2), ) @@ -719,11 +671,9 @@ def train1key( if_save_latest13, pretrained_G14, pretrained_D15, - gpus16, if_cache_gpu17, if_save_every_weights18, version19, - gpus_rmvpe, author, ): infos = [] @@ -741,7 +691,7 @@ def train1key( [ get_info_str(_) for _ in extract_f0_feature( - gpus16, np7, f0method8, if_f0_3, exp_dir1, version19, gpus_rmvpe + np7, f0method8, if_f0_3, exp_dir1, version19, ) ] @@ -792,17 +742,6 @@ def change_info_(ckpt_path): return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} -F0GPUVisible = config.dml == False - - -def change_f0_method(f0method8): - if f0method8 == "rmvpe_gpu": - visible = F0GPUVisible - else: - visible = False - return {"visible": visible, "__type__": "update"} - - with gr.Blocks(title="RVC WebUI") as app: gr.Markdown("## RVC WebUI") gr.Markdown( @@ -1260,50 +1199,26 @@ with gr.Blocks(title="RVC WebUI") as app: gpu_info9 = gr.Textbox( label=i18n("GPU Information"), value=gpu_info, - visible=F0GPUVisible, - ) - gpus6 = gr.Textbox( - label=i18n( - "Enter the GPU index(es) separated by '-', e.g., 0-1-2 to use GPU 0, 1, and 2" - ), - value=gpus, - interactive=True, - visible=F0GPUVisible, - ) - gpus_rmvpe = gr.Textbox( - label=i18n( - "Enter the GPU index(es) separated by '-', e.g., 0-0-1 to use 2 processes in GPU0 and 1 process in GPU1" - ), - value="%s-%s" % (gpus, gpus), - interactive=True, - visible=F0GPUVisible, ) f0method8 = gr.Radio( label=i18n( "Select the pitch extraction algorithm: when extracting singing, you can use 'pm' to speed up. For high-quality speech with fast performance, but worse CPU usage, you can use 'dio'. 'harvest' results in better quality but is slower. 'rmvpe' has the best results and consumes less CPU/GPU" ), - choices=["pm", "harvest", "dio", "rmvpe", "rmvpe_gpu"], - value="rmvpe_gpu", + choices=["pm", "harvest", "dio", "rmvpe"], + value="rmvpe", interactive=True, ) with gr.Column(): but2 = gr.Button(i18n("Feature extraction"), variant="primary") info2 = gr.Textbox(label=i18n("Output information"), value="") - f0method8.change( - fn=change_f0_method, - inputs=[f0method8], - outputs=[gpus_rmvpe], - ) but2.click( extract_f0_feature, [ - gpus6, np7, f0method8, if_f0_3, exp_dir1, version19, - gpus_rmvpe, ], [info2], api_name="train_extract_f0_feature", @@ -1394,7 +1309,7 @@ with gr.Blocks(title="RVC WebUI") as app: if_f0_3.change( change_f0, [if_f0_3, sr2, version19], - [f0method8, gpus_rmvpe, pretrained_G14, pretrained_D15], + [f0method8, pretrained_G14, pretrained_D15], ) but3 = gr.Button(i18n("Train model"), variant="primary") @@ -1441,11 +1356,9 @@ with gr.Blocks(title="RVC WebUI") as app: if_save_latest13, pretrained_G14, pretrained_D15, - gpus16, if_cache_gpu17, if_save_every_weights18, version19, - gpus_rmvpe, author, ], info3,