1
0
mirror of https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git synced 2026-06-06 17:50:25 +08:00

optimize: some training optimizations (#95)

* optimzie(train&uvr5): rm sf & simp. AudioPre

* fix(audio): too many mallocs

* feat(audio): load_audio support stereo

* fix(audio): float32 wav saving

* fix(train): missing ckpt var
This commit is contained in:
源文雨
2024-11-28 03:20:14 +09:00
committed by GitHub
parent f4644ec1ec
commit a8783c6639
19 changed files with 163 additions and 433 deletions

View File

@@ -18,7 +18,6 @@ from time import time as ttime
# import pyworld
import librosa
import numpy as np
import soundfile as sf
import torch.nn.functional as F
from fairseq import checkpoint_utils
@@ -33,6 +32,7 @@ from scipy.io import wavfile
# from models import SynthesizerTrn256NSFsim as SynthesizerTrn256#hifigan_nsf
# from models import SynthesizerTrn256NSFsimFlow as SynthesizerTrn256#hifigan_nsf
from infer.lib.audio import load_audio
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = r"E:\codes\py39\vits_vc_gpu_train\assets\hubert\hubert_base.pt" #
@@ -132,7 +132,7 @@ for idx, name in enumerate(
): ##
wav_path = "todo-songs/%s" % name #
f0_up_key = -2 #
audio, sampling_rate = sf.read(wav_path)
audio, sampling_rate = load_audio(wav_path)
if len(audio.shape) > 1:
audio = librosa.to_mono(audio.transpose(1, 0))
if sampling_rate != 16000:

View File

@@ -1,8 +1,9 @@
import soundfile
import librosa
from rvc.onnx import RVC
from infer.lib.audio import save_audio
hop_size = 512
sampling_rate = 40000 # 采样率
f0_up_key = 0 # 升降调
@@ -19,4 +20,4 @@ wav, sr = librosa.load(wav_path, sr=sampling_rate)
audio = model.infer(wav, sr, sampling_rate, sid, f0_method, f0_up_key)
soundfile.write(out_path, audio, sampling_rate)
save_audio(out_path, audio, sampling_rate)