1
0
mirror of https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git synced 2026-06-05 01:10:22 +08:00

optimize(onnx): move infer into rvc.onnx

This commit is contained in:
源文雨
2024-06-05 21:23:25 +09:00
parent 8dd06315ed
commit 6ff713c024
12 changed files with 39 additions and 127 deletions

17
.gitignore vendored
View File

@@ -4,25 +4,10 @@ __pycache__
*.pyd
.venv
/opt
tools/aria2c/
tools/flag.txt
# Imported from huggingface.co/lj1995/VoiceConversionWebUI
/pretrained
/pretrained_v2
/uvr5_weights
hubert_base.pt
rmvpe.onnx
rmvpe.pt
# Generated by RVC
/logs
/weights
# To set a Python version for the project
.tool-versions
/runtime
/assets/weights/*
ffmpeg.*
ffprobe.*
ffprobe.*

View File

@@ -211,27 +211,6 @@ class Config:
x_max = 32
if self.dml:
logger.info("Use DirectML instead")
if (
os.path.exists(
"runtime\Lib\site-packages\onnxruntime\capi\DirectML.dll"
)
== False
):
try:
os.rename(
"runtime\Lib\site-packages\onnxruntime",
"runtime\Lib\site-packages\onnxruntime-cuda",
)
except:
pass
try:
os.rename(
"runtime\Lib\site-packages\onnxruntime-dml",
"runtime\Lib\site-packages\onnxruntime",
)
except:
pass
# if self.device != "cpu":
import torch_directml
self.device = torch_directml.device(torch_directml.default_device())
@@ -239,26 +218,6 @@ class Config:
else:
if self.instead:
logger.info(f"Use {self.instead} instead")
if (
os.path.exists(
"runtime\Lib\site-packages\onnxruntime\capi\onnxruntime_providers_cuda.dll"
)
== False
):
try:
os.rename(
"runtime\Lib\site-packages\onnxruntime",
"runtime\Lib\site-packages\onnxruntime-dml",
)
except:
pass
try:
os.rename(
"runtime\Lib\site-packages\onnxruntime-cuda",
"runtime\Lib\site-packages\onnxruntime",
)
except:
pass
logger.info(
"Half-precision floating-point: %s, device: %s"
% (self.is_half, self.device)

View File

@@ -36,15 +36,6 @@ for filename in glob.iglob("**/*.py", recursive=True):
print(filename, len(i18n_strings))
strings.extend(i18n_strings)
code_keys = set(strings)
"""
n_i18n.py
gui_v1.py 26
app.py 16
infer-web.py 147
scan_i18n.py 0
i18n.py 0
lib/train/process_ckpt.py 1
"""
print()
print("Total unique:", len(code_keys))

View File

@@ -0,0 +1,3 @@
from .dio import DioF0Predictor
from .harvest import HarvestF0Predictor
from .pm import PMF0Predictor

View File

@@ -1,7 +1,7 @@
import numpy as np
import pyworld
from infer.lib.infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
from .f0 import F0Predictor
class DioF0Predictor(F0Predictor):

View File

@@ -1,7 +1,7 @@
import numpy as np
import pyworld
from infer.lib.infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
from .f0 import F0Predictor
class HarvestF0Predictor(F0Predictor):

View File

@@ -1,7 +1,7 @@
import numpy as np
import parselmouth
from infer.lib.infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
from .f0 import F0Predictor
class PMF0Predictor(F0Predictor):

View File

@@ -1,16 +1,13 @@
import librosa
import numpy as np
import onnxruntime
import soundfile
import logging
logger = logging.getLogger(__name__)
from onnx.f0predictor import PMF0Predictor
from onnx.f0predictor import HarvestF0Predictor
from onnx.f0predictor import DioF0Predictor
class ContentVec:
def __init__(self, vec_path="pretrained/vec-768-layer-12.onnx", device=None):
logger.info("Load model(s) from {}".format(vec_path))
def __init__(self, vec_path: str, device=None):
if device == "cpu" or device is None:
providers = ["CPUExecutionProvider"]
elif device == "cuda":
@@ -25,52 +22,33 @@ class ContentVec:
return self.forward(wav)
def forward(self, wav):
feats = wav
if feats.ndim == 2: # double channels
feats = feats.mean(-1)
assert feats.ndim == 1, feats.ndim
feats = np.expand_dims(np.expand_dims(feats, 0), 0)
onnx_input = {self.model.get_inputs()[0].name: feats}
if wav.ndim == 2: # double channels
wav = wav.mean(-1)
assert wav.ndim == 1, wav.ndim
wav = np.expand_dims(np.expand_dims(wav, 0), 0)
onnx_input = {self.model.get_inputs()[0].name: wav}
logits = self.model.run(None, onnx_input)[0]
return logits.transpose(0, 2, 1)
predicters = {
"pm": PMF0Predictor,
"harvest": HarvestF0Predictor,
"dio": DioF0Predictor,
}
def get_f0_predictor(f0_predictor, hop_length, sampling_rate, **kargs):
if f0_predictor == "pm":
from lib.infer_pack.modules.F0Predictor.PMF0Predictor import PMF0Predictor
f0_predictor_object = PMF0Predictor(
hop_length=hop_length, sampling_rate=sampling_rate
)
elif f0_predictor == "harvest":
from lib.infer_pack.modules.F0Predictor.HarvestF0Predictor import (
HarvestF0Predictor,
)
f0_predictor_object = HarvestF0Predictor(
hop_length=hop_length, sampling_rate=sampling_rate
)
elif f0_predictor == "dio":
from lib.infer_pack.modules.F0Predictor.DioF0Predictor import DioF0Predictor
f0_predictor_object = DioF0Predictor(
hop_length=hop_length, sampling_rate=sampling_rate
)
else:
raise Exception("Unknown f0 predictor")
return f0_predictor_object
def get_f0_predictor(f0_method, hop_length, sampling_rate):
return predicters[f0_method](hop_length=hop_length, sampling_rate=sampling_rate)
class OnnxRVC:
class RVC:
def __init__(
self,
model_path,
sr=40000,
hop_size=512,
vec_path="vec-768-layer-12",
vec_path="vec-768-layer-12.onnx",
device="cpu",
):
vec_path = f"pretrained/{vec_path}.onnx"
self.vec_model = ContentVec(vec_path, device)
if device == "cpu" or device is None:
providers = ["CPUExecutionProvider"]
@@ -97,12 +75,11 @@ class OnnxRVC:
def inference(
self,
raw_path,
wav,
sr,
sid,
f0_method="dio",
f0_up_key=0,
pad_time=0.5,
cr_threshold=0.02,
):
f0_min = 50
f0_max = 1100
@@ -110,16 +87,14 @@ class OnnxRVC:
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
f0_predictor = get_f0_predictor(
f0_method,
hop_length=self.hop_size,
sampling_rate=self.sampling_rate,
threshold=cr_threshold,
self.hop_size,
self.sampling_rate,
)
wav, sr = librosa.load(raw_path, sr=self.sampling_rate)
org_length = len(wav)
if org_length / sr > 50.0:
raise RuntimeError("Reached Max Length")
wav16k = librosa.resample(wav, orig_sr=self.sampling_rate, target_sr=16000)
wav16k = librosa.resample(wav, orig_sr=sr, target_sr=16000)
wav16k = wav16k
hubert = self.vec_model(wav16k)

View File

@@ -1,23 +1,24 @@
import soundfile
import librosa
from infer.lib.infer_pack.onnx_inference import OnnxRVC
from rvc.onnx.infer import RVC
hop_size = 512
sampling_rate = 40000 # 采样率
f0_up_key = 0 # 升降调
sid = 0 # 角色ID
f0_method = "dio" # F0提取算法
model_path = "ShirohaRVC.onnx" # 模型的完整路径
vec_name = (
"vec-256-layer-9" # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型
)
model_path = "exported_model.onnx" # 模型的完整路径
vec_path = "vec-256-layer-9.onnx" # 需要onnx的vec模型
wav_path = "123.wav" # 输入路径或ByteIO实例
out_path = "out.wav" # 输出路径或ByteIO实例
model = OnnxRVC(
model_path, vec_path=vec_name, sr=sampling_rate, hop_size=hop_size, device="cuda"
model = RVC(
model_path, vec_path=vec_path, sr=sampling_rate, hop_size=hop_size, device="cuda"
)
audio = model.inference(wav_path, sid, f0_method=f0_method, f0_up_key=f0_up_key)
wav, sr = librosa.load(wav_path, sr=sampling_rate)
audio = model.inference(wav, sr, sid, f0_method=f0_method, f0_up_key=f0_up_key)
soundfile.write(out_path, audio, sampling_rate)

2
web.py
View File

@@ -44,8 +44,6 @@ logger = logging.getLogger(__name__)
tmp = os.path.join(now_dir, "TEMP")
shutil.rmtree(tmp, ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
os.makedirs(tmp, exist_ok=True)
os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True)