mirror of
https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git
synced 2026-06-05 01:10:22 +08:00
optimize(rvc.onnx): more modelize
This commit is contained in:
1
rvc/onnx/__init__.py
Normal file
1
rvc/onnx/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .infer import RVC
|
||||
@@ -38,9 +38,9 @@ class ContentVec(Model):
|
||||
super().__init__(vec_path, device)
|
||||
|
||||
def __call__(self, wav: np.ndarray[typing.Any, np.dtype]):
|
||||
return self.forward(wav)
|
||||
return self.__forward(wav)
|
||||
|
||||
def forward(self, wav: np.ndarray[typing.Any, np.dtype]):
|
||||
def __forward(self, wav: np.ndarray[typing.Any, np.dtype]):
|
||||
if wav.ndim == 2: # double channels
|
||||
wav = wav.mean(-1)
|
||||
assert wav.ndim == 1, wav.ndim
|
||||
@@ -67,21 +67,20 @@ class RVC(Model):
|
||||
def __init__(
|
||||
self,
|
||||
model_path: str | bytes | os.PathLike,
|
||||
sr=40000,
|
||||
hop_size=512,
|
||||
hop_len=512,
|
||||
vec_path: str | bytes | os.PathLike = "vec-768-layer-12.onnx",
|
||||
device: typing.Literal["cpu", "cuda", "dml"] = "cpu",
|
||||
):
|
||||
super().__init__(model_path, device)
|
||||
self.vec_model = ContentVec(vec_path, device)
|
||||
self.sampling_rate = sr
|
||||
self.hop_size = hop_size
|
||||
self.hop_len = hop_len
|
||||
|
||||
def inference(
|
||||
self,
|
||||
wav: np.ndarray[typing.Any, np.dtype],
|
||||
sr: int,
|
||||
sid: int,
|
||||
wav_sr: int,
|
||||
model_sr: int = 40000,
|
||||
sid: int = 0,
|
||||
f0_method="dio",
|
||||
f0_up_key=0,
|
||||
) -> np.ndarray[typing.Any, np.dtype[np.int16]]:
|
||||
@@ -91,17 +90,14 @@ class RVC(Model):
|
||||
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
|
||||
f0_predictor = get_f0_predictor(
|
||||
f0_method,
|
||||
self.hop_size,
|
||||
self.sampling_rate,
|
||||
self.hop_len,
|
||||
model_sr,
|
||||
)
|
||||
org_length = len(wav)
|
||||
if org_length / sr > 50.0:
|
||||
raise RuntimeError("Reached Max Length")
|
||||
if org_length / wav_sr > 50.0:
|
||||
raise RuntimeError("wav max length exceeded")
|
||||
|
||||
wav16k = librosa.resample(wav, orig_sr=sr, target_sr=16000)
|
||||
wav16k = wav16k
|
||||
|
||||
hubert = self.vec_model(wav16k)
|
||||
hubert = self.vec_model(librosa.resample(wav, orig_sr=wav_sr, target_sr=16000))
|
||||
hubert = np.repeat(hubert, 2, axis=2).transpose(0, 2, 1).astype(np.float32)
|
||||
hubert_length = hubert.shape[1]
|
||||
|
||||
@@ -126,7 +122,9 @@ class RVC(Model):
|
||||
out_wav = self.__forward(
|
||||
hubert, hubert_length, pitch, pitchf, ds, rnd
|
||||
).squeeze()
|
||||
out_wav = np.pad(out_wav, (0, 2 * self.hop_size), "constant")
|
||||
|
||||
out_wav = np.pad(out_wav, (0, 2 * self.hop_len), "constant")
|
||||
|
||||
return out_wav[0:org_length]
|
||||
|
||||
def __forward(
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import soundfile
|
||||
import librosa
|
||||
|
||||
from rvc.onnx.infer import RVC
|
||||
from rvc.onnx import RVC
|
||||
|
||||
hop_size = 512
|
||||
sampling_rate = 40000 # 采样率
|
||||
@@ -14,7 +14,7 @@ wav_path = "123.wav" # 输入路径或ByteIO实例
|
||||
out_path = "out.wav" # 输出路径或ByteIO实例
|
||||
|
||||
model = RVC(
|
||||
model_path, vec_path=vec_path, sr=sampling_rate, hop_size=hop_size, device="cuda"
|
||||
model_path, vec_path=vec_path, sr=sampling_rate, hop_len=hop_size, device="cuda"
|
||||
)
|
||||
|
||||
wav, sr = librosa.load(wav_path, sr=sampling_rate)
|
||||
Reference in New Issue
Block a user