1
0
mirror of https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git synced 2026-06-05 01:10:22 +08:00

optimize(train): combine extract f0 together

This commit is contained in:
源文雨
2024-11-28 18:03:17 +09:00
parent d3add81469
commit 7befbd10d9
10 changed files with 280 additions and 691 deletions

View File

@@ -32,5 +32,5 @@ jobs:
touch logs/mi-test/preprocess.log touch logs/mi-test/preprocess.log
python infer/modules/train/preprocess.py logs/mute/0_gt_wavs 48000 8 logs/mi-test True 3.7 python infer/modules/train/preprocess.py logs/mute/0_gt_wavs 48000 8 logs/mi-test True 3.7
touch logs/mi-test/extract_f0_feature.log touch logs/mi-test/extract_f0_feature.log
python infer/modules/train/extract/extract_f0_print.py logs/mi-test $(nproc) pm python infer/modules/train/extract/extract_f0_print.py logs/mi-test $(nproc) pm cpu False
python infer/modules/train/extract_feature_print.py cpu 1 0 0 logs/mi-test v1 True python infer/modules/train/extract_feature_print.py cpu 1 0 0 logs/mi-test v1 True

View File

@@ -1,175 +0,0 @@
import os
import sys
import traceback
import parselmouth
now_dir = os.getcwd()
sys.path.append(now_dir)
import logging
import numpy as np
import pyworld
from infer.lib.audio import load_audio
logging.getLogger("numba").setLevel(logging.WARNING)
from multiprocessing import Process
exp_dir = sys.argv[1]
f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
def printt(strr):
print(strr)
f.write("%s\n" % strr)
f.flush()
n_p = int(sys.argv[2])
f0method = sys.argv[3]
class FeatureInput(object):
def __init__(self, samplerate=16000, hop_size=160):
self.fs = samplerate
self.hop = hop_size
self.f0_bin = 256
self.f0_max = 1100.0
self.f0_min = 50.0
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
def compute_f0(self, path, f0_method):
x = load_audio(path, self.fs)
p_len = x.shape[0] // self.hop
if f0_method == "pm":
time_step = 160 / 16000 * 1000
f0_min = 50
f0_max = 1100
f0 = (
parselmouth.Sound(x, self.fs)
.to_pitch_ac(
time_step=time_step / 1000,
voicing_threshold=0.6,
pitch_floor=f0_min,
pitch_ceiling=f0_max,
)
.selected_array["frequency"]
)
pad_size = (p_len - len(f0) + 1) // 2
if pad_size > 0 or p_len - len(f0) - pad_size > 0:
f0 = np.pad(
f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
)
elif f0_method == "harvest":
f0, t = pyworld.harvest(
x.astype(np.double),
fs=self.fs,
f0_ceil=self.f0_max,
f0_floor=self.f0_min,
frame_period=1000 * self.hop / self.fs,
)
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs)
elif f0_method == "dio":
f0, t = pyworld.dio(
x.astype(np.double),
fs=self.fs,
f0_ceil=self.f0_max,
f0_floor=self.f0_min,
frame_period=1000 * self.hop / self.fs,
)
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs)
elif f0_method == "rmvpe":
if hasattr(self, "model_rmvpe") == False:
from rvc.f0.rmvpe import RMVPE
print("Loading rmvpe model")
self.model_rmvpe = RMVPE(
"assets/rmvpe/rmvpe.pt", is_half=False, device="cpu"
)
f0 = self.model_rmvpe.compute_f0(x, filter_radius=0.03)
return f0
def coarse_f0(self, f0):
f0_mel = 1127 * np.log(1 + f0 / 700)
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * (
self.f0_bin - 2
) / (self.f0_mel_max - self.f0_mel_min) + 1
# use 0 or 1
f0_mel[f0_mel <= 1] = 1
f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1
f0_coarse = np.rint(f0_mel).astype(int)
assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, (
f0_coarse.max(),
f0_coarse.min(),
)
return f0_coarse
def go(self, paths, f0_method):
if len(paths) == 0:
printt("no-f0-todo")
else:
printt("todo-f0-%s" % len(paths))
n = max(len(paths) // 5, 1) # 每个进程最多打印5条
for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths):
try:
if idx % n == 0:
printt("f0ing,now-%s,all-%s,-%s" % (idx, len(paths), inp_path))
if (
os.path.exists(opt_path1 + ".npy") == True
and os.path.exists(opt_path2 + ".npy") == True
):
continue
featur_pit = self.compute_f0(inp_path, f0_method)
np.save(
opt_path2,
featur_pit,
allow_pickle=False,
) # nsf
coarse_pit = self.coarse_f0(featur_pit)
np.save(
opt_path1,
coarse_pit,
allow_pickle=False,
) # ori
except:
printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc()))
if __name__ == "__main__":
# exp_dir=r"E:\codes\py39\dataset\mi-test"
# n_p=16
# f = open("%s/log_extract_f0.log"%exp_dir, "w")
printt(" ".join(sys.argv))
featureInput = FeatureInput()
paths = []
inp_root = "%s/1_16k_wavs" % (exp_dir)
opt_root1 = "%s/2a_f0" % (exp_dir)
opt_root2 = "%s/2b-f0nsf" % (exp_dir)
os.makedirs(opt_root1, exist_ok=True)
os.makedirs(opt_root2, exist_ok=True)
for name in sorted(list(os.listdir(inp_root))):
inp_path = "%s/%s" % (inp_root, name)
if "spec" in inp_path:
continue
opt_path1 = "%s/%s" % (opt_root1, name)
opt_path2 = "%s/%s" % (opt_root2, name)
paths.append([inp_path, opt_path1, opt_path2])
ps = []
for i in range(n_p):
p = Process(
target=featureInput.go,
args=(
paths[i::n_p],
f0method,
),
)
ps.append(p)
p.start()
for i in range(n_p):
ps[i].join()

View File

@@ -1,141 +0,0 @@
import os
import sys
import traceback
import parselmouth
now_dir = os.getcwd()
sys.path.append(now_dir)
import logging
import numpy as np
import pyworld
from infer.lib.audio import load_audio
logging.getLogger("numba").setLevel(logging.WARNING)
n_part = int(sys.argv[1])
i_part = int(sys.argv[2])
i_gpu = sys.argv[3]
os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
exp_dir = sys.argv[4]
is_half = sys.argv[5]
f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
def printt(strr):
print(strr)
f.write("%s\n" % strr)
f.flush()
class FeatureInput(object):
def __init__(self, samplerate=16000, hop_size=160):
self.fs = samplerate
self.hop = hop_size
self.f0_bin = 256
self.f0_max = 1100.0
self.f0_min = 50.0
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
def compute_f0(self, path, f0_method):
x = load_audio(path, self.fs)
# p_len = x.shape[0] // self.hop
if f0_method == "rmvpe":
if hasattr(self, "model_rmvpe") == False:
from rvc.f0.rmvpe import RMVPE
print("Loading rmvpe model")
self.model_rmvpe = RMVPE(
"assets/rmvpe/rmvpe.pt", is_half=is_half, device="cuda"
)
f0 = self.model_rmvpe.compute_f0(x, filter_radius=0.03)
return f0
def coarse_f0(self, f0):
f0_mel = 1127 * np.log(1 + f0 / 700)
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * (
self.f0_bin - 2
) / (self.f0_mel_max - self.f0_mel_min) + 1
# use 0 or 1
f0_mel[f0_mel <= 1] = 1
f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1
f0_coarse = np.rint(f0_mel).astype(int)
assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, (
f0_coarse.max(),
f0_coarse.min(),
)
return f0_coarse
def go(self, paths, f0_method):
if len(paths) == 0:
printt("no-f0-todo")
else:
printt("todo-f0-%s" % len(paths))
n = max(len(paths) // 5, 1) # 每个进程最多打印5条
for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths):
try:
if idx % n == 0:
printt("f0ing,now-%s,all-%s,-%s" % (idx, len(paths), inp_path))
if (
os.path.exists(opt_path1 + ".npy") == True
and os.path.exists(opt_path2 + ".npy") == True
):
continue
featur_pit = self.compute_f0(inp_path, f0_method)
np.save(
opt_path2,
featur_pit,
allow_pickle=False,
) # nsf
coarse_pit = self.coarse_f0(featur_pit)
np.save(
opt_path1,
coarse_pit,
allow_pickle=False,
) # ori
except:
printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc()))
if __name__ == "__main__":
# exp_dir=r"E:\codes\py39\dataset\mi-test"
# n_p=16
# f = open("%s/log_extract_f0.log"%exp_dir, "w")
printt(" ".join(sys.argv))
featureInput = FeatureInput()
paths = []
inp_root = "%s/1_16k_wavs" % (exp_dir)
opt_root1 = "%s/2a_f0" % (exp_dir)
opt_root2 = "%s/2b-f0nsf" % (exp_dir)
os.makedirs(opt_root1, exist_ok=True)
os.makedirs(opt_root2, exist_ok=True)
for name in sorted(list(os.listdir(inp_root))):
inp_path = "%s/%s" % (inp_root, name)
if "spec" in inp_path:
continue
opt_path1 = "%s/%s" % (opt_root1, name)
opt_path2 = "%s/%s" % (opt_root2, name)
paths.append([inp_path, opt_path1, opt_path2])
try:
featureInput.go(paths[i_part::n_part], "rmvpe")
except:
printt("f0_all_fail-%s" % (traceback.format_exc()))
# ps = []
# for i in range(n_p):
# p = Process(
# target=featureInput.go,
# args=(
# paths[i::n_p],
# f0method,
# ),
# )
# ps.append(p)
# p.start()
# for i in range(n_p):
# ps[i].join()

View File

@@ -1,139 +1,126 @@
import os import os
import sys import sys
import traceback import traceback
from pathlib import Path
import parselmouth
from dotenv import load_dotenv
now_dir = os.getcwd()
sys.path.append(now_dir) now_dir = os.getcwd()
import logging sys.path.append(now_dir)
load_dotenv()
import numpy as np load_dotenv("sha256.env")
import pyworld
now_dir = os.getcwd()
from infer.lib.audio import load_audio sys.path.append(now_dir)
import logging
logging.getLogger("numba").setLevel(logging.WARNING)
import numpy as np
exp_dir = sys.argv[1]
import torch_directml from infer.lib.audio import load_audio
device = torch_directml.device(torch_directml.default_device()) from rvc.f0 import Generator
f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
logging.getLogger("numba").setLevel(logging.WARNING)
from multiprocessing import Process
def printt(strr):
print(strr) exp_dir = sys.argv[1]
f.write("%s\n" % strr) f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
f.flush()
def printt(strr):
class FeatureInput(object): print(strr)
def __init__(self, samplerate=16000, hop_size=160): f.write("%s\n" % strr)
self.fs = samplerate f.flush()
self.hop = hop_size
self.f0_bin = 256 n_p = int(sys.argv[2])
self.f0_max = 1100.0 f0method = sys.argv[3]
self.f0_min = 50.0 device = sys.argv[4]
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) is_half = sys.argv[5] == "True"
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
def compute_f0(self, path, f0_method): class FeatureInput(object):
x = load_audio(path, self.fs) def __init__(self, is_half: bool, device = "cpu", samplerate=16000, hop_size=160):
# p_len = x.shape[0] // self.hop self.fs = samplerate
if f0_method == "rmvpe": self.hop = hop_size
if hasattr(self, "model_rmvpe") == False:
from rvc.f0.rmvpe import RMVPE self.f0_bin = 256
self.f0_max = 1100.0
print("Loading rmvpe model") self.f0_min = 50.0
self.model_rmvpe = RMVPE( self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
"assets/rmvpe/rmvpe.pt", is_half=False, device=device self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
)
f0 = self.model_rmvpe.compute_f0(x, filter_radius=0.03) self.f0_gen = Generator(
return f0 Path(os.environ["rmvpe_root"]),
is_half,
def coarse_f0(self, f0): 0,
f0_mel = 1127 * np.log(1 + f0 / 700) device,
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * ( hop_size,
self.f0_bin - 2 samplerate,
) / (self.f0_mel_max - self.f0_mel_min) + 1 )
# use 0 or 1 def go(self, paths, f0_method):
f0_mel[f0_mel <= 1] = 1 if len(paths) == 0:
f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1 printt("no-f0-todo")
f0_coarse = np.rint(f0_mel).astype(int) else:
assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, ( printt("todo-f0-%s" % len(paths))
f0_coarse.max(), n = max(len(paths) // 5, 1) # 每个进程最多打印5条
f0_coarse.min(), for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths):
) try:
return f0_coarse if idx % n == 0:
printt("f0ing,now-%s,all-%s,-%s" % (idx, len(paths), inp_path))
def go(self, paths, f0_method): if (
if len(paths) == 0: os.path.exists(opt_path1 + ".npy") == True
printt("no-f0-todo") and os.path.exists(opt_path2 + ".npy") == True
else: ):
printt("todo-f0-%s" % len(paths)) continue
n = max(len(paths) // 5, 1) # 每个进程最多打印5条 x = load_audio(inp_path, self.fs)
for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths): coarse_pit, feature_pit = self.f0_gen.calculate(x, x.shape[0] // self.hop, 0, f0_method, None)
try: np.save(
if idx % n == 0: opt_path2,
printt("f0ing,now-%s,all-%s,-%s" % (idx, len(paths), inp_path)) feature_pit,
if ( allow_pickle=False,
os.path.exists(opt_path1 + ".npy") == True ) # nsf
and os.path.exists(opt_path2 + ".npy") == True np.save(
): opt_path1,
continue coarse_pit,
featur_pit = self.compute_f0(inp_path, f0_method) allow_pickle=False,
np.save( ) # ori
opt_path2, except:
featur_pit, printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc()))
allow_pickle=False,
) # nsf
coarse_pit = self.coarse_f0(featur_pit) if __name__ == "__main__":
np.save( # exp_dir=r"E:\codes\py39\dataset\mi-test"
opt_path1, # n_p=16
coarse_pit, # f = open("%s/log_extract_f0.log"%exp_dir, "w")
allow_pickle=False, printt(" ".join(sys.argv))
) # ori featureInput = FeatureInput(is_half, device)
except: paths = []
printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())) inp_root = "%s/1_16k_wavs" % (exp_dir)
opt_root1 = "%s/2a_f0" % (exp_dir)
opt_root2 = "%s/2b-f0nsf" % (exp_dir)
if __name__ == "__main__":
# exp_dir=r"E:\codes\py39\dataset\mi-test" os.makedirs(opt_root1, exist_ok=True)
# n_p=16 os.makedirs(opt_root2, exist_ok=True)
# f = open("%s/log_extract_f0.log"%exp_dir, "w") for name in sorted(list(os.listdir(inp_root))):
printt(" ".join(sys.argv)) inp_path = "%s/%s" % (inp_root, name)
featureInput = FeatureInput() if "spec" in inp_path:
paths = [] continue
inp_root = "%s/1_16k_wavs" % (exp_dir) opt_path1 = "%s/%s" % (opt_root1, name)
opt_root1 = "%s/2a_f0" % (exp_dir) opt_path2 = "%s/%s" % (opt_root2, name)
opt_root2 = "%s/2b-f0nsf" % (exp_dir) paths.append([inp_path, opt_path1, opt_path2])
os.makedirs(opt_root1, exist_ok=True) ps = []
os.makedirs(opt_root2, exist_ok=True) for i in range(n_p):
for name in sorted(list(os.listdir(inp_root))): p = Process(
inp_path = "%s/%s" % (inp_root, name) target=featureInput.go,
if "spec" in inp_path: args=(
continue paths[i::n_p],
opt_path1 = "%s/%s" % (opt_root1, name) f0method,
opt_path2 = "%s/%s" % (opt_root2, name) ),
paths.append([inp_path, opt_path1, opt_path2]) )
try: ps.append(p)
featureInput.go(paths, "rmvpe") p.start()
except: for i in range(n_p):
printt("f0_all_fail-%s" % (traceback.format_exc())) ps[i].join()
# ps = []
# for i in range(n_p):
# p = Process(
# target=featureInput.go,
# args=(
# paths[i::n_p],
# f0method,
# ),
# )
# ps.append(p)
# p.start()
# for i in range(n_p):
# ps[i].join()

View File

@@ -5,6 +5,7 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
from pathlib import Path
from time import time from time import time
import faiss import faiss
@@ -14,7 +15,7 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
from scipy import signal from scipy import signal
from rvc.f0 import PM, Harvest, RMVPE, CRePE, Dio, FCPE from rvc.f0 import Generator
now_dir = os.getcwd() now_dir = os.getcwd()
sys.path.append(now_dir) sys.path.append(now_dir)
@@ -63,95 +64,15 @@ class Pipeline(object):
self.t_max = self.sr * self.x_max # 免查询时长阈值 self.t_max = self.sr * self.x_max # 免查询时长阈值
self.device = config.device self.device = config.device
def get_f0( self.f0_gen = Generator(
self, Path(os.environ["rmvpe_root"]),
x, self.is_half,
p_len, self.x_pad,
f0_up_key, self.device,
f0_method, self.window,
filter_radius, self.sr,
inp_f0=None, )
):
f0_min = 50
f0_max = 1100
f0_mel_min = 1127 * np.log(1 + f0_min / 700)
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
if f0_method == "pm":
if not hasattr(self, "pm"):
self.pm = PM(self.window, f0_min, f0_max, self.sr)
f0 = self.pm.compute_f0(x, p_len=p_len)
if f0_method == "dio":
if not hasattr(self, "dio"):
self.dio = Dio(self.window, f0_min, f0_max, self.sr)
f0 = self.dio.compute_f0(x, p_len=p_len)
elif f0_method == "harvest":
if not hasattr(self, "harvest"):
self.harvest = Harvest(self.window, f0_min, f0_max, self.sr)
f0 = self.harvest.compute_f0(x, p_len=p_len, filter_radius=filter_radius)
elif f0_method == "crepe":
if not hasattr(self, "crepe"):
self.crepe = CRePE(
self.window,
f0_min,
f0_max,
self.sr,
self.device,
)
f0 = self.crepe.compute_f0(x, p_len=p_len)
elif f0_method == "rmvpe":
if not hasattr(self, "rmvpe"):
logger.info(
"Loading rmvpe model %s" % "%s/rmvpe.pt" % os.environ["rmvpe_root"]
)
self.rmvpe = RMVPE(
"%s/rmvpe.pt" % os.environ["rmvpe_root"],
is_half=self.is_half,
device=self.device,
# use_jit=self.config.use_jit,
)
f0 = self.rmvpe.compute_f0(x, p_len=p_len, filter_radius=0.03)
if "privateuseone" in str(self.device): # clean ortruntime memory
del self.rmvpe.model
del self.rmvpe
logger.info("Cleaning ortruntime memory")
elif f0_method == "fcpe":
if not hasattr(self, "model_fcpe"):
logger.info("Loading fcpe model")
self.model_fcpe = FCPE(
self.window,
f0_min,
f0_max,
self.sr,
self.device,
)
f0 = self.model_fcpe.compute_f0(x, p_len=p_len)
f0 *= pow(2, f0_up_key / 12)
# with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
tf0 = self.sr // self.window # 每秒f0点数
if inp_f0 is not None:
delta_t = np.round(
(inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1
).astype("int16")
replace_f0 = np.interp(
list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1]
)
shape = f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)].shape[0]
f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)] = replace_f0[
:shape
]
# with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
f0bak = f0.copy()
f0_mel = 1127 * np.log(1 + f0 / 700)
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (
f0_mel_max - f0_mel_min
) + 1
f0_mel[f0_mel <= 1] = 1
f0_mel[f0_mel > 255] = 255
f0_coarse = np.rint(f0_mel).astype(np.int32)
return f0_coarse, f0bak # 1-0
def vc( def vc(
self, self,
@@ -337,7 +258,7 @@ class Pipeline(object):
pitch, pitchf = None, None pitch, pitchf = None, None
if if_f0: if if_f0:
if if_f0 == 1: if if_f0 == 1:
pitch, pitchf = self.get_f0( pitch, pitchf = self.f0_gen.calculate(
audio_pad, audio_pad,
p_len, p_len,
f0_up_key, f0_up_key,

View File

@@ -1,10 +1 @@
from .f0 import F0Predictor from .gen import Generator
from .crepe import CRePE
from .dio import Dio
from .fcpe import FCPE
from .harvest import Harvest
from .pm import PM
from .rmvpe import RMVPE
__all__ = ["F0Predictor", "CRePE", "Dio", "FCPE", "Harvest", "PM", "RMVPE"]

127
rvc/f0/gen.py Normal file
View File

@@ -0,0 +1,127 @@
from math import log
from pathlib import Path
from typing import Optional, Union, Literal, Tuple
from numba import jit
import numpy as np
@jit(nopython=True)
def post_process(
sr: int,
window: int,
f0: np.ndarray,
f0_up_key: int,
manual_x_pad: int,
f0_mel_min: float,
f0_mel_max: float,
manual_f0: Optional[Union[np.ndarray, list]]=None,
) -> Tuple[np.ndarray, np.ndarray]:
f0 = np.multiply(f0, pow(2, f0_up_key / 12))
# with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
tf0 = sr // window # 每秒f0点数
if manual_f0 is not None:
delta_t = np.round(
(manual_f0[:, 0].max() - manual_f0[:, 0].min()) * tf0 + 1
).astype("int16")
replace_f0 = np.interp(
list(range(delta_t)), manual_f0[:, 0] * 100, manual_f0[:, 1]
)
shape = f0[manual_x_pad * tf0 : manual_x_pad * tf0 + len(replace_f0)].shape[0]
f0[manual_x_pad * tf0 : manual_x_pad * tf0 + len(replace_f0)] = replace_f0[:shape]
# with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
f0_mel = 1127 * np.log(1 + f0 / 700)
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (f0_mel_max - f0_mel_min) + 1
f0_mel[f0_mel <= 1] = 1
f0_mel[f0_mel > 255] = 255
f0_coarse = np.rint(f0_mel).astype(np.int32)
return f0_coarse, f0 # 1-0
class Generator(object):
def __init__(
self,
rmvpe_root: Path,
is_half: bool,
x_pad: int,
device = "cpu",
window = 160,
sr = 16000
):
self.rmvpe_root = rmvpe_root
self.is_half = is_half
self.x_pad = x_pad
self.device = device
self.window = window
self.sr = sr
def calculate(
self,
x: np.ndarray,
p_len: int,
f0_up_key: int,
f0_method: Literal['pm', 'dio', 'harvest', 'crepe', 'rmvpe', 'fcpe'],
filter_radius: Optional[Union[int, float]],
manual_f0: Optional[Union[np.ndarray, list]]=None,
) -> Tuple[np.ndarray, np.ndarray]:
f0_min = 50
f0_max = 1100
if f0_method == "pm":
if not hasattr(self, "pm"):
from .pm import PM
self.pm = PM(self.window, f0_min, f0_max, self.sr)
f0 = self.pm.compute_f0(x, p_len=p_len)
elif f0_method == "dio":
if not hasattr(self, "dio"):
from .dio import Dio
self.dio = Dio(self.window, f0_min, f0_max, self.sr)
f0 = self.dio.compute_f0(x, p_len=p_len)
elif f0_method == "harvest":
if not hasattr(self, "harvest"):
from .harvest import Harvest
self.harvest = Harvest(self.window, f0_min, f0_max, self.sr)
f0 = self.harvest.compute_f0(x, p_len=p_len, filter_radius=filter_radius)
elif f0_method == "crepe":
if not hasattr(self, "crepe"):
from .crepe import CRePE
self.crepe = CRePE(
self.window,
f0_min,
f0_max,
self.sr,
self.device,
)
f0 = self.crepe.compute_f0(x, p_len=p_len)
elif f0_method == "rmvpe":
if not hasattr(self, "rmvpe"):
from .rmvpe import RMVPE
self.rmvpe = RMVPE(
str(self.rmvpe_root/"rmvpe.pt"),
is_half=self.is_half,
device=self.device,
# use_jit=self.config.use_jit,
)
f0 = self.rmvpe.compute_f0(x, p_len=p_len, filter_radius=0.03)
if "privateuseone" in str(self.device): # clean ortruntime memory
del self.rmvpe.model
del self.rmvpe
elif f0_method == "fcpe":
if not hasattr(self, "fcpe"):
from .fcpe import FCPE
self.fcpe = FCPE(
self.window,
f0_min,
f0_max,
self.sr,
self.device,
)
f0 = self.fcpe.compute_f0(x, p_len=p_len)
else:
raise ValueError(f"f0 method {f0_method} has not yet been supported")
return post_process(
self.sr, self.window, f0, f0_up_key, self.x_pad,
1127 * log(1 + f0_min / 700),
1127 * log(1 + f0_max / 700),
manual_f0,
)

View File

@@ -1,4 +1,4 @@
from typing import Any, Optional from typing import Optional
import numpy as np import numpy as np
import parselmouth import parselmouth

View File

@@ -5,12 +5,7 @@ import librosa
import numpy as np import numpy as np
import onnxruntime import onnxruntime
from rvc.f0 import ( from rvc.f0 import Generator
PM,
Harvest,
Dio,
F0Predictor,
)
class Model: class Model:
@@ -51,49 +46,28 @@ class ContentVec(Model):
return logits.transpose(0, 2, 1) return logits.transpose(0, 2, 1)
predictors: typing.Dict[str, F0Predictor] = {
"pm": PM,
"harvest": Harvest,
"dio": Dio,
}
def get_f0_predictor(
f0_method: str, hop_length: int, sampling_rate: int
) -> F0Predictor:
return predictors[f0_method](hop_length=hop_length, sampling_rate=sampling_rate)
class RVC(Model): class RVC(Model):
def __init__( def __init__(
self, self,
model_path: typing.Union[str, bytes, os.PathLike], model_path: typing.Union[str, bytes, os.PathLike],
hop_len=512, hop_len=512,
model_sr=40000,
vec_path: typing.Union[str, bytes, os.PathLike] = "vec-768-layer-12.onnx", vec_path: typing.Union[str, bytes, os.PathLike] = "vec-768-layer-12.onnx",
device: typing.Literal["cpu", "cuda", "dml"] = "cpu", device: typing.Literal["cpu", "cuda", "dml"] = "cpu",
): ):
super().__init__(model_path, device) super().__init__(model_path, device)
self.vec_model = ContentVec(vec_path, device) self.vec_model = ContentVec(vec_path, device)
self.hop_len = hop_len self.hop_len = hop_len
self.f0_gen = Generator(None, False, 0, window=hop_len, sr=model_sr)
def infer( def infer(
self, self,
wav: np.ndarray[typing.Any, np.dtype], wav: np.ndarray[typing.Any, np.dtype],
wav_sr: int, wav_sr: int,
model_sr: int = 40000,
sid: int = 0, sid: int = 0,
f0_method="dio", f0_method="dio",
f0_up_key=0, f0_up_key=0,
) -> np.ndarray[typing.Any, np.dtype[np.int16]]: ) -> np.ndarray[typing.Any, np.dtype[np.int16]]:
f0_min = 50
f0_max = 1100
f0_mel_min = 1127 * np.log(1 + f0_min / 700)
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
f0_predictor = get_f0_predictor(
f0_method,
self.hop_len,
model_sr,
)
org_length = len(wav) org_length = len(wav)
if org_length / wav_sr > 50.0: if org_length / wav_sr > 50.0:
raise RuntimeError("wav max length exceeded") raise RuntimeError("wav max length exceeded")
@@ -102,16 +76,8 @@ class RVC(Model):
hubert = np.repeat(hubert, 2, axis=2).transpose(0, 2, 1).astype(np.float32) hubert = np.repeat(hubert, 2, axis=2).transpose(0, 2, 1).astype(np.float32)
hubert_length = hubert.shape[1] hubert_length = hubert.shape[1]
pitchf = f0_predictor.compute_f0(wav, hubert_length) pitch, pitchf = self.f0_gen.calculate(wav, hubert_length, f0_up_key, f0_method, None)
pitchf = pitchf * 2 ** (f0_up_key / 12) pitch = pitch.astype(np.int64)
pitch = pitchf.copy()
f0_mel = 1127 * np.log(1 + pitch / 700)
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (
f0_mel_max - f0_mel_min
) + 1
f0_mel[f0_mel <= 1] = 1
f0_mel[f0_mel > 255] = 255
pitch = np.rint(f0_mel).astype(np.int64)
pitchf = pitchf.reshape(1, len(pitchf)).astype(np.float32) pitchf = pitchf.reshape(1, len(pitchf)).astype(np.float32)
pitch = pitch.reshape(1, len(pitch)) pitch = pitch.reshape(1, len(pitch))

103
web.py
View File

@@ -264,28 +264,28 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
# but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2]) # but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2])
def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvpe): def extract_f0_feature(n_p, f0method, if_f0, exp_dir, version19):
gpus = gpus.split("-")
os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w") f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w")
f.close() f.close()
if if_f0: if if_f0:
if f0method != "rmvpe_gpu": if f0method != "rmvpe_gpu":
cmd = ( cmd = (
'"%s" infer/modules/train/extract/extract_f0_print.py "%s/logs/%s" %s %s' '"%s" infer/modules/train/extract_f0_print.py "%s/logs/%s" %s %s "%s" %s'
% ( % (
config.python_cmd, config.python_cmd,
now_dir, now_dir,
exp_dir, exp_dir,
n_p, n_p,
f0method, f0method,
config.device,
str(config.is_half),
) )
) )
logger.info("Execute: " + cmd) logger.info("Execute: " + cmd)
p = Popen( p = Popen(
cmd, shell=True, cwd=now_dir cmd, shell=True, cwd=now_dir
) # , stdin=PIPE, stdout=PIPE,stderr=PIPE ) # , stdin=PIPE, stdout=PIPE,stderr=PIPE
# 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
done = [False] done = [False]
threading.Thread( threading.Thread(
target=if_done, target=if_done,
@@ -294,53 +294,6 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp
p, p,
), ),
).start() ).start()
else:
if gpus_rmvpe != "-":
gpus_rmvpe = gpus_rmvpe.split("-")
leng = len(gpus_rmvpe)
ps = []
for idx, n_g in enumerate(gpus_rmvpe):
cmd = (
'"%s" infer/modules/train/extract/extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s '
% (
config.python_cmd,
leng,
idx,
n_g,
now_dir,
exp_dir,
config.is_half,
)
)
logger.info("Execute: " + cmd)
p = Popen(
cmd, shell=True, cwd=now_dir
) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
ps.append(p)
# 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
done = [False]
threading.Thread(
target=if_done_multi, #
args=(
done,
ps,
),
).start()
else:
cmd = (
config.python_cmd
+ ' infer/modules/train/extract/extract_f0_rmvpe_dml.py "%s/logs/%s" '
% (
now_dir,
exp_dir,
)
)
logger.info("Execute: " + cmd)
p = Popen(
cmd, shell=True, cwd=now_dir
) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
p.wait()
done = [True]
while 1: while 1:
with open( with open(
"%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r"
@@ -464,7 +417,6 @@ def change_version19(sr2, if_f0_3, version19):
def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15 def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15
path_str = "" if version19 == "v1" else "_v2" path_str = "" if version19 == "v1" else "_v2"
return ( return (
{"visible": if_f0_3, "__type__": "update"},
{"visible": if_f0_3, "__type__": "update"}, {"visible": if_f0_3, "__type__": "update"},
*get_pretrained_models(path_str, "f0" if if_f0_3 == True else "", sr2), *get_pretrained_models(path_str, "f0" if if_f0_3 == True else "", sr2),
) )
@@ -719,11 +671,9 @@ def train1key(
if_save_latest13, if_save_latest13,
pretrained_G14, pretrained_G14,
pretrained_D15, pretrained_D15,
gpus16,
if_cache_gpu17, if_cache_gpu17,
if_save_every_weights18, if_save_every_weights18,
version19, version19,
gpus_rmvpe,
author, author,
): ):
infos = [] infos = []
@@ -741,7 +691,7 @@ def train1key(
[ [
get_info_str(_) get_info_str(_)
for _ in extract_f0_feature( for _ in extract_f0_feature(
gpus16, np7, f0method8, if_f0_3, exp_dir1, version19, gpus_rmvpe np7, f0method8, if_f0_3, exp_dir1, version19,
) )
] ]
@@ -792,17 +742,6 @@ def change_info_(ckpt_path):
return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
F0GPUVisible = config.dml == False
def change_f0_method(f0method8):
if f0method8 == "rmvpe_gpu":
visible = F0GPUVisible
else:
visible = False
return {"visible": visible, "__type__": "update"}
with gr.Blocks(title="RVC WebUI") as app: with gr.Blocks(title="RVC WebUI") as app:
gr.Markdown("## RVC WebUI") gr.Markdown("## RVC WebUI")
gr.Markdown( gr.Markdown(
@@ -1260,50 +1199,26 @@ with gr.Blocks(title="RVC WebUI") as app:
gpu_info9 = gr.Textbox( gpu_info9 = gr.Textbox(
label=i18n("GPU Information"), label=i18n("GPU Information"),
value=gpu_info, value=gpu_info,
visible=F0GPUVisible,
)
gpus6 = gr.Textbox(
label=i18n(
"Enter the GPU index(es) separated by '-', e.g., 0-1-2 to use GPU 0, 1, and 2"
),
value=gpus,
interactive=True,
visible=F0GPUVisible,
)
gpus_rmvpe = gr.Textbox(
label=i18n(
"Enter the GPU index(es) separated by '-', e.g., 0-0-1 to use 2 processes in GPU0 and 1 process in GPU1"
),
value="%s-%s" % (gpus, gpus),
interactive=True,
visible=F0GPUVisible,
) )
f0method8 = gr.Radio( f0method8 = gr.Radio(
label=i18n( label=i18n(
"Select the pitch extraction algorithm: when extracting singing, you can use 'pm' to speed up. For high-quality speech with fast performance, but worse CPU usage, you can use 'dio'. 'harvest' results in better quality but is slower. 'rmvpe' has the best results and consumes less CPU/GPU" "Select the pitch extraction algorithm: when extracting singing, you can use 'pm' to speed up. For high-quality speech with fast performance, but worse CPU usage, you can use 'dio'. 'harvest' results in better quality but is slower. 'rmvpe' has the best results and consumes less CPU/GPU"
), ),
choices=["pm", "harvest", "dio", "rmvpe", "rmvpe_gpu"], choices=["pm", "harvest", "dio", "rmvpe"],
value="rmvpe_gpu", value="rmvpe",
interactive=True, interactive=True,
) )
with gr.Column(): with gr.Column():
but2 = gr.Button(i18n("Feature extraction"), variant="primary") but2 = gr.Button(i18n("Feature extraction"), variant="primary")
info2 = gr.Textbox(label=i18n("Output information"), value="") info2 = gr.Textbox(label=i18n("Output information"), value="")
f0method8.change(
fn=change_f0_method,
inputs=[f0method8],
outputs=[gpus_rmvpe],
)
but2.click( but2.click(
extract_f0_feature, extract_f0_feature,
[ [
gpus6,
np7, np7,
f0method8, f0method8,
if_f0_3, if_f0_3,
exp_dir1, exp_dir1,
version19, version19,
gpus_rmvpe,
], ],
[info2], [info2],
api_name="train_extract_f0_feature", api_name="train_extract_f0_feature",
@@ -1394,7 +1309,7 @@ with gr.Blocks(title="RVC WebUI") as app:
if_f0_3.change( if_f0_3.change(
change_f0, change_f0,
[if_f0_3, sr2, version19], [if_f0_3, sr2, version19],
[f0method8, gpus_rmvpe, pretrained_G14, pretrained_D15], [f0method8, pretrained_G14, pretrained_D15],
) )
but3 = gr.Button(i18n("Train model"), variant="primary") but3 = gr.Button(i18n("Train model"), variant="primary")
@@ -1441,11 +1356,9 @@ with gr.Blocks(title="RVC WebUI") as app:
if_save_latest13, if_save_latest13,
pretrained_G14, pretrained_G14,
pretrained_D15, pretrained_D15,
gpus16,
if_cache_gpu17, if_cache_gpu17,
if_save_every_weights18, if_save_every_weights18,
version19, version19,
gpus_rmvpe,
author, author,
], ],
info3, info3,