mirror of
https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git
synced 2026-06-05 01:10:22 +08:00
optimize(onnx): move infer into rvc.onnx
This commit is contained in:
17
.gitignore
vendored
17
.gitignore
vendored
@@ -4,25 +4,10 @@ __pycache__
|
||||
*.pyd
|
||||
.venv
|
||||
/opt
|
||||
tools/aria2c/
|
||||
tools/flag.txt
|
||||
|
||||
# Imported from huggingface.co/lj1995/VoiceConversionWebUI
|
||||
/pretrained
|
||||
/pretrained_v2
|
||||
/uvr5_weights
|
||||
hubert_base.pt
|
||||
rmvpe.onnx
|
||||
rmvpe.pt
|
||||
|
||||
# Generated by RVC
|
||||
/logs
|
||||
/weights
|
||||
|
||||
# To set a Python version for the project
|
||||
.tool-versions
|
||||
|
||||
/runtime
|
||||
/assets/weights/*
|
||||
ffmpeg.*
|
||||
ffprobe.*
|
||||
ffprobe.*
|
||||
|
||||
@@ -211,27 +211,6 @@ class Config:
|
||||
x_max = 32
|
||||
if self.dml:
|
||||
logger.info("Use DirectML instead")
|
||||
if (
|
||||
os.path.exists(
|
||||
"runtime\Lib\site-packages\onnxruntime\capi\DirectML.dll"
|
||||
)
|
||||
== False
|
||||
):
|
||||
try:
|
||||
os.rename(
|
||||
"runtime\Lib\site-packages\onnxruntime",
|
||||
"runtime\Lib\site-packages\onnxruntime-cuda",
|
||||
)
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
os.rename(
|
||||
"runtime\Lib\site-packages\onnxruntime-dml",
|
||||
"runtime\Lib\site-packages\onnxruntime",
|
||||
)
|
||||
except:
|
||||
pass
|
||||
# if self.device != "cpu":
|
||||
import torch_directml
|
||||
|
||||
self.device = torch_directml.device(torch_directml.default_device())
|
||||
@@ -239,26 +218,6 @@ class Config:
|
||||
else:
|
||||
if self.instead:
|
||||
logger.info(f"Use {self.instead} instead")
|
||||
if (
|
||||
os.path.exists(
|
||||
"runtime\Lib\site-packages\onnxruntime\capi\onnxruntime_providers_cuda.dll"
|
||||
)
|
||||
== False
|
||||
):
|
||||
try:
|
||||
os.rename(
|
||||
"runtime\Lib\site-packages\onnxruntime",
|
||||
"runtime\Lib\site-packages\onnxruntime-dml",
|
||||
)
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
os.rename(
|
||||
"runtime\Lib\site-packages\onnxruntime-cuda",
|
||||
"runtime\Lib\site-packages\onnxruntime",
|
||||
)
|
||||
except:
|
||||
pass
|
||||
logger.info(
|
||||
"Half-precision floating-point: %s, device: %s"
|
||||
% (self.is_half, self.device)
|
||||
|
||||
@@ -36,15 +36,6 @@ for filename in glob.iglob("**/*.py", recursive=True):
|
||||
print(filename, len(i18n_strings))
|
||||
strings.extend(i18n_strings)
|
||||
code_keys = set(strings)
|
||||
"""
|
||||
n_i18n.py
|
||||
gui_v1.py 26
|
||||
app.py 16
|
||||
infer-web.py 147
|
||||
scan_i18n.py 0
|
||||
i18n.py 0
|
||||
lib/train/process_ckpt.py 1
|
||||
"""
|
||||
print()
|
||||
print("Total unique:", len(code_keys))
|
||||
|
||||
|
||||
3
rvc/onnx/f0predictor/__init__.py
Normal file
3
rvc/onnx/f0predictor/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .dio import DioF0Predictor
|
||||
from .harvest import HarvestF0Predictor
|
||||
from .pm import PMF0Predictor
|
||||
@@ -1,7 +1,7 @@
|
||||
import numpy as np
|
||||
import pyworld
|
||||
|
||||
from infer.lib.infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
|
||||
from .f0 import F0Predictor
|
||||
|
||||
|
||||
class DioF0Predictor(F0Predictor):
|
||||
@@ -1,7 +1,7 @@
|
||||
import numpy as np
|
||||
import pyworld
|
||||
|
||||
from infer.lib.infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
|
||||
from .f0 import F0Predictor
|
||||
|
||||
|
||||
class HarvestF0Predictor(F0Predictor):
|
||||
@@ -1,7 +1,7 @@
|
||||
import numpy as np
|
||||
import parselmouth
|
||||
|
||||
from infer.lib.infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
|
||||
from .f0 import F0Predictor
|
||||
|
||||
|
||||
class PMF0Predictor(F0Predictor):
|
||||
@@ -1,16 +1,13 @@
|
||||
import librosa
|
||||
import numpy as np
|
||||
import onnxruntime
|
||||
import soundfile
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from onnx.f0predictor import PMF0Predictor
|
||||
from onnx.f0predictor import HarvestF0Predictor
|
||||
from onnx.f0predictor import DioF0Predictor
|
||||
|
||||
class ContentVec:
|
||||
def __init__(self, vec_path="pretrained/vec-768-layer-12.onnx", device=None):
|
||||
logger.info("Load model(s) from {}".format(vec_path))
|
||||
def __init__(self, vec_path: str, device=None):
|
||||
if device == "cpu" or device is None:
|
||||
providers = ["CPUExecutionProvider"]
|
||||
elif device == "cuda":
|
||||
@@ -25,52 +22,33 @@ class ContentVec:
|
||||
return self.forward(wav)
|
||||
|
||||
def forward(self, wav):
|
||||
feats = wav
|
||||
if feats.ndim == 2: # double channels
|
||||
feats = feats.mean(-1)
|
||||
assert feats.ndim == 1, feats.ndim
|
||||
feats = np.expand_dims(np.expand_dims(feats, 0), 0)
|
||||
onnx_input = {self.model.get_inputs()[0].name: feats}
|
||||
if wav.ndim == 2: # double channels
|
||||
wav = wav.mean(-1)
|
||||
assert wav.ndim == 1, wav.ndim
|
||||
wav = np.expand_dims(np.expand_dims(wav, 0), 0)
|
||||
onnx_input = {self.model.get_inputs()[0].name: wav}
|
||||
logits = self.model.run(None, onnx_input)[0]
|
||||
return logits.transpose(0, 2, 1)
|
||||
|
||||
predicters = {
|
||||
"pm": PMF0Predictor,
|
||||
"harvest": HarvestF0Predictor,
|
||||
"dio": DioF0Predictor,
|
||||
}
|
||||
|
||||
def get_f0_predictor(f0_predictor, hop_length, sampling_rate, **kargs):
|
||||
if f0_predictor == "pm":
|
||||
from lib.infer_pack.modules.F0Predictor.PMF0Predictor import PMF0Predictor
|
||||
|
||||
f0_predictor_object = PMF0Predictor(
|
||||
hop_length=hop_length, sampling_rate=sampling_rate
|
||||
)
|
||||
elif f0_predictor == "harvest":
|
||||
from lib.infer_pack.modules.F0Predictor.HarvestF0Predictor import (
|
||||
HarvestF0Predictor,
|
||||
)
|
||||
|
||||
f0_predictor_object = HarvestF0Predictor(
|
||||
hop_length=hop_length, sampling_rate=sampling_rate
|
||||
)
|
||||
elif f0_predictor == "dio":
|
||||
from lib.infer_pack.modules.F0Predictor.DioF0Predictor import DioF0Predictor
|
||||
|
||||
f0_predictor_object = DioF0Predictor(
|
||||
hop_length=hop_length, sampling_rate=sampling_rate
|
||||
)
|
||||
else:
|
||||
raise Exception("Unknown f0 predictor")
|
||||
return f0_predictor_object
|
||||
def get_f0_predictor(f0_method, hop_length, sampling_rate):
|
||||
return predicters[f0_method](hop_length=hop_length, sampling_rate=sampling_rate)
|
||||
|
||||
|
||||
class OnnxRVC:
|
||||
class RVC:
|
||||
def __init__(
|
||||
self,
|
||||
model_path,
|
||||
sr=40000,
|
||||
hop_size=512,
|
||||
vec_path="vec-768-layer-12",
|
||||
vec_path="vec-768-layer-12.onnx",
|
||||
device="cpu",
|
||||
):
|
||||
vec_path = f"pretrained/{vec_path}.onnx"
|
||||
self.vec_model = ContentVec(vec_path, device)
|
||||
if device == "cpu" or device is None:
|
||||
providers = ["CPUExecutionProvider"]
|
||||
@@ -97,12 +75,11 @@ class OnnxRVC:
|
||||
|
||||
def inference(
|
||||
self,
|
||||
raw_path,
|
||||
wav,
|
||||
sr,
|
||||
sid,
|
||||
f0_method="dio",
|
||||
f0_up_key=0,
|
||||
pad_time=0.5,
|
||||
cr_threshold=0.02,
|
||||
):
|
||||
f0_min = 50
|
||||
f0_max = 1100
|
||||
@@ -110,16 +87,14 @@ class OnnxRVC:
|
||||
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
|
||||
f0_predictor = get_f0_predictor(
|
||||
f0_method,
|
||||
hop_length=self.hop_size,
|
||||
sampling_rate=self.sampling_rate,
|
||||
threshold=cr_threshold,
|
||||
self.hop_size,
|
||||
self.sampling_rate,
|
||||
)
|
||||
wav, sr = librosa.load(raw_path, sr=self.sampling_rate)
|
||||
org_length = len(wav)
|
||||
if org_length / sr > 50.0:
|
||||
raise RuntimeError("Reached Max Length")
|
||||
|
||||
wav16k = librosa.resample(wav, orig_sr=self.sampling_rate, target_sr=16000)
|
||||
wav16k = librosa.resample(wav, orig_sr=sr, target_sr=16000)
|
||||
wav16k = wav16k
|
||||
|
||||
hubert = self.vec_model(wav16k)
|
||||
@@ -1,23 +1,24 @@
|
||||
import soundfile
|
||||
import librosa
|
||||
|
||||
from infer.lib.infer_pack.onnx_inference import OnnxRVC
|
||||
from rvc.onnx.infer import RVC
|
||||
|
||||
hop_size = 512
|
||||
sampling_rate = 40000 # 采样率
|
||||
f0_up_key = 0 # 升降调
|
||||
sid = 0 # 角色ID
|
||||
f0_method = "dio" # F0提取算法
|
||||
model_path = "ShirohaRVC.onnx" # 模型的完整路径
|
||||
vec_name = (
|
||||
"vec-256-layer-9" # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型
|
||||
)
|
||||
model_path = "exported_model.onnx" # 模型的完整路径
|
||||
vec_path = "vec-256-layer-9.onnx" # 需要onnx的vec模型
|
||||
wav_path = "123.wav" # 输入路径或ByteIO实例
|
||||
out_path = "out.wav" # 输出路径或ByteIO实例
|
||||
|
||||
model = OnnxRVC(
|
||||
model_path, vec_path=vec_name, sr=sampling_rate, hop_size=hop_size, device="cuda"
|
||||
model = RVC(
|
||||
model_path, vec_path=vec_path, sr=sampling_rate, hop_size=hop_size, device="cuda"
|
||||
)
|
||||
|
||||
audio = model.inference(wav_path, sid, f0_method=f0_method, f0_up_key=f0_up_key)
|
||||
wav, sr = librosa.load(wav_path, sr=sampling_rate)
|
||||
|
||||
audio = model.inference(wav, sr, sid, f0_method=f0_method, f0_up_key=f0_up_key)
|
||||
|
||||
soundfile.write(out_path, audio, sampling_rate)
|
||||
|
||||
2
web.py
2
web.py
@@ -44,8 +44,6 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
tmp = os.path.join(now_dir, "TEMP")
|
||||
shutil.rmtree(tmp, ignore_errors=True)
|
||||
shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
|
||||
shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
|
||||
os.makedirs(tmp, exist_ok=True)
|
||||
os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
|
||||
os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True)
|
||||
|
||||
Reference in New Issue
Block a user