1
0
mirror of https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git synced 2026-06-05 01:10:22 +08:00

optimize(onnx): move infer into rvc.onnx

This commit is contained in:
源文雨
2024-06-05 21:23:25 +09:00
parent 8dd06315ed
commit 6ff713c024
12 changed files with 39 additions and 127 deletions

17
.gitignore vendored
View File

@@ -4,25 +4,10 @@ __pycache__
*.pyd *.pyd
.venv .venv
/opt /opt
tools/aria2c/
tools/flag.txt
# Imported from huggingface.co/lj1995/VoiceConversionWebUI
/pretrained
/pretrained_v2
/uvr5_weights
hubert_base.pt
rmvpe.onnx
rmvpe.pt
# Generated by RVC # Generated by RVC
/logs /logs
/weights
# To set a Python version for the project
.tool-versions
/runtime
/assets/weights/* /assets/weights/*
ffmpeg.* ffmpeg.*
ffprobe.* ffprobe.*

View File

@@ -211,27 +211,6 @@ class Config:
x_max = 32 x_max = 32
if self.dml: if self.dml:
logger.info("Use DirectML instead") logger.info("Use DirectML instead")
if (
os.path.exists(
"runtime\Lib\site-packages\onnxruntime\capi\DirectML.dll"
)
== False
):
try:
os.rename(
"runtime\Lib\site-packages\onnxruntime",
"runtime\Lib\site-packages\onnxruntime-cuda",
)
except:
pass
try:
os.rename(
"runtime\Lib\site-packages\onnxruntime-dml",
"runtime\Lib\site-packages\onnxruntime",
)
except:
pass
# if self.device != "cpu":
import torch_directml import torch_directml
self.device = torch_directml.device(torch_directml.default_device()) self.device = torch_directml.device(torch_directml.default_device())
@@ -239,26 +218,6 @@ class Config:
else: else:
if self.instead: if self.instead:
logger.info(f"Use {self.instead} instead") logger.info(f"Use {self.instead} instead")
if (
os.path.exists(
"runtime\Lib\site-packages\onnxruntime\capi\onnxruntime_providers_cuda.dll"
)
== False
):
try:
os.rename(
"runtime\Lib\site-packages\onnxruntime",
"runtime\Lib\site-packages\onnxruntime-dml",
)
except:
pass
try:
os.rename(
"runtime\Lib\site-packages\onnxruntime-cuda",
"runtime\Lib\site-packages\onnxruntime",
)
except:
pass
logger.info( logger.info(
"Half-precision floating-point: %s, device: %s" "Half-precision floating-point: %s, device: %s"
% (self.is_half, self.device) % (self.is_half, self.device)

View File

@@ -36,15 +36,6 @@ for filename in glob.iglob("**/*.py", recursive=True):
print(filename, len(i18n_strings)) print(filename, len(i18n_strings))
strings.extend(i18n_strings) strings.extend(i18n_strings)
code_keys = set(strings) code_keys = set(strings)
"""
n_i18n.py
gui_v1.py 26
app.py 16
infer-web.py 147
scan_i18n.py 0
i18n.py 0
lib/train/process_ckpt.py 1
"""
print() print()
print("Total unique:", len(code_keys)) print("Total unique:", len(code_keys))

View File

@@ -0,0 +1,3 @@
from .dio import DioF0Predictor
from .harvest import HarvestF0Predictor
from .pm import PMF0Predictor

View File

@@ -1,7 +1,7 @@
import numpy as np import numpy as np
import pyworld import pyworld
from infer.lib.infer_pack.modules.F0Predictor.F0Predictor import F0Predictor from .f0 import F0Predictor
class DioF0Predictor(F0Predictor): class DioF0Predictor(F0Predictor):

View File

@@ -1,7 +1,7 @@
import numpy as np import numpy as np
import pyworld import pyworld
from infer.lib.infer_pack.modules.F0Predictor.F0Predictor import F0Predictor from .f0 import F0Predictor
class HarvestF0Predictor(F0Predictor): class HarvestF0Predictor(F0Predictor):

View File

@@ -1,7 +1,7 @@
import numpy as np import numpy as np
import parselmouth import parselmouth
from infer.lib.infer_pack.modules.F0Predictor.F0Predictor import F0Predictor from .f0 import F0Predictor
class PMF0Predictor(F0Predictor): class PMF0Predictor(F0Predictor):

View File

@@ -1,16 +1,13 @@
import librosa import librosa
import numpy as np import numpy as np
import onnxruntime import onnxruntime
import soundfile
import logging
logger = logging.getLogger(__name__)
from onnx.f0predictor import PMF0Predictor
from onnx.f0predictor import HarvestF0Predictor
from onnx.f0predictor import DioF0Predictor
class ContentVec: class ContentVec:
def __init__(self, vec_path="pretrained/vec-768-layer-12.onnx", device=None): def __init__(self, vec_path: str, device=None):
logger.info("Load model(s) from {}".format(vec_path))
if device == "cpu" or device is None: if device == "cpu" or device is None:
providers = ["CPUExecutionProvider"] providers = ["CPUExecutionProvider"]
elif device == "cuda": elif device == "cuda":
@@ -25,52 +22,33 @@ class ContentVec:
return self.forward(wav) return self.forward(wav)
def forward(self, wav): def forward(self, wav):
feats = wav if wav.ndim == 2: # double channels
if feats.ndim == 2: # double channels wav = wav.mean(-1)
feats = feats.mean(-1) assert wav.ndim == 1, wav.ndim
assert feats.ndim == 1, feats.ndim wav = np.expand_dims(np.expand_dims(wav, 0), 0)
feats = np.expand_dims(np.expand_dims(feats, 0), 0) onnx_input = {self.model.get_inputs()[0].name: wav}
onnx_input = {self.model.get_inputs()[0].name: feats}
logits = self.model.run(None, onnx_input)[0] logits = self.model.run(None, onnx_input)[0]
return logits.transpose(0, 2, 1) return logits.transpose(0, 2, 1)
predicters = {
"pm": PMF0Predictor,
"harvest": HarvestF0Predictor,
"dio": DioF0Predictor,
}
def get_f0_predictor(f0_predictor, hop_length, sampling_rate, **kargs): def get_f0_predictor(f0_method, hop_length, sampling_rate):
if f0_predictor == "pm": return predicters[f0_method](hop_length=hop_length, sampling_rate=sampling_rate)
from lib.infer_pack.modules.F0Predictor.PMF0Predictor import PMF0Predictor
f0_predictor_object = PMF0Predictor(
hop_length=hop_length, sampling_rate=sampling_rate
)
elif f0_predictor == "harvest":
from lib.infer_pack.modules.F0Predictor.HarvestF0Predictor import (
HarvestF0Predictor,
)
f0_predictor_object = HarvestF0Predictor(
hop_length=hop_length, sampling_rate=sampling_rate
)
elif f0_predictor == "dio":
from lib.infer_pack.modules.F0Predictor.DioF0Predictor import DioF0Predictor
f0_predictor_object = DioF0Predictor(
hop_length=hop_length, sampling_rate=sampling_rate
)
else:
raise Exception("Unknown f0 predictor")
return f0_predictor_object
class OnnxRVC: class RVC:
def __init__( def __init__(
self, self,
model_path, model_path,
sr=40000, sr=40000,
hop_size=512, hop_size=512,
vec_path="vec-768-layer-12", vec_path="vec-768-layer-12.onnx",
device="cpu", device="cpu",
): ):
vec_path = f"pretrained/{vec_path}.onnx"
self.vec_model = ContentVec(vec_path, device) self.vec_model = ContentVec(vec_path, device)
if device == "cpu" or device is None: if device == "cpu" or device is None:
providers = ["CPUExecutionProvider"] providers = ["CPUExecutionProvider"]
@@ -97,12 +75,11 @@ class OnnxRVC:
def inference( def inference(
self, self,
raw_path, wav,
sr,
sid, sid,
f0_method="dio", f0_method="dio",
f0_up_key=0, f0_up_key=0,
pad_time=0.5,
cr_threshold=0.02,
): ):
f0_min = 50 f0_min = 50
f0_max = 1100 f0_max = 1100
@@ -110,16 +87,14 @@ class OnnxRVC:
f0_mel_max = 1127 * np.log(1 + f0_max / 700) f0_mel_max = 1127 * np.log(1 + f0_max / 700)
f0_predictor = get_f0_predictor( f0_predictor = get_f0_predictor(
f0_method, f0_method,
hop_length=self.hop_size, self.hop_size,
sampling_rate=self.sampling_rate, self.sampling_rate,
threshold=cr_threshold,
) )
wav, sr = librosa.load(raw_path, sr=self.sampling_rate)
org_length = len(wav) org_length = len(wav)
if org_length / sr > 50.0: if org_length / sr > 50.0:
raise RuntimeError("Reached Max Length") raise RuntimeError("Reached Max Length")
wav16k = librosa.resample(wav, orig_sr=self.sampling_rate, target_sr=16000) wav16k = librosa.resample(wav, orig_sr=sr, target_sr=16000)
wav16k = wav16k wav16k = wav16k
hubert = self.vec_model(wav16k) hubert = self.vec_model(wav16k)

View File

@@ -1,23 +1,24 @@
import soundfile import soundfile
import librosa
from infer.lib.infer_pack.onnx_inference import OnnxRVC from rvc.onnx.infer import RVC
hop_size = 512 hop_size = 512
sampling_rate = 40000 # 采样率 sampling_rate = 40000 # 采样率
f0_up_key = 0 # 升降调 f0_up_key = 0 # 升降调
sid = 0 # 角色ID sid = 0 # 角色ID
f0_method = "dio" # F0提取算法 f0_method = "dio" # F0提取算法
model_path = "ShirohaRVC.onnx" # 模型的完整路径 model_path = "exported_model.onnx" # 模型的完整路径
vec_name = ( vec_path = "vec-256-layer-9.onnx" # 需要onnx的vec模型
"vec-256-layer-9" # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型
)
wav_path = "123.wav" # 输入路径或ByteIO实例 wav_path = "123.wav" # 输入路径或ByteIO实例
out_path = "out.wav" # 输出路径或ByteIO实例 out_path = "out.wav" # 输出路径或ByteIO实例
model = OnnxRVC( model = RVC(
model_path, vec_path=vec_name, sr=sampling_rate, hop_size=hop_size, device="cuda" model_path, vec_path=vec_path, sr=sampling_rate, hop_size=hop_size, device="cuda"
) )
audio = model.inference(wav_path, sid, f0_method=f0_method, f0_up_key=f0_up_key) wav, sr = librosa.load(wav_path, sr=sampling_rate)
audio = model.inference(wav, sr, sid, f0_method=f0_method, f0_up_key=f0_up_key)
soundfile.write(out_path, audio, sampling_rate) soundfile.write(out_path, audio, sampling_rate)

2
web.py
View File

@@ -44,8 +44,6 @@ logger = logging.getLogger(__name__)
tmp = os.path.join(now_dir, "TEMP") tmp = os.path.join(now_dir, "TEMP")
shutil.rmtree(tmp, ignore_errors=True) shutil.rmtree(tmp, ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
os.makedirs(tmp, exist_ok=True) os.makedirs(tmp, exist_ok=True)
os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True) os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True)