mirror of
https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git
synced 2026-06-06 17:50:25 +08:00
optimize: some training optimizations (#95)
* optimzie(train&uvr5): rm sf & simp. AudioPre * fix(audio): too many mallocs * feat(audio): load_audio support stereo * fix(audio): float32 wav saving * fix(train): missing ckpt var
This commit is contained in:
@@ -18,7 +18,6 @@ from time import time as ttime
|
||||
# import pyworld
|
||||
import librosa
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import torch.nn.functional as F
|
||||
from fairseq import checkpoint_utils
|
||||
|
||||
@@ -33,6 +32,7 @@ from scipy.io import wavfile
|
||||
# from models import SynthesizerTrn256NSFsim as SynthesizerTrn256#hifigan_nsf
|
||||
# from models import SynthesizerTrn256NSFsimFlow as SynthesizerTrn256#hifigan_nsf
|
||||
|
||||
from infer.lib.audio import load_audio
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
model_path = r"E:\codes\py39\vits_vc_gpu_train\assets\hubert\hubert_base.pt" #
|
||||
@@ -132,7 +132,7 @@ for idx, name in enumerate(
|
||||
): ##
|
||||
wav_path = "todo-songs/%s" % name #
|
||||
f0_up_key = -2 #
|
||||
audio, sampling_rate = sf.read(wav_path)
|
||||
audio, sampling_rate = load_audio(wav_path)
|
||||
if len(audio.shape) > 1:
|
||||
audio = librosa.to_mono(audio.transpose(1, 0))
|
||||
if sampling_rate != 16000:
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import soundfile
|
||||
import librosa
|
||||
|
||||
from rvc.onnx import RVC
|
||||
|
||||
from infer.lib.audio import save_audio
|
||||
|
||||
hop_size = 512
|
||||
sampling_rate = 40000 # 采样率
|
||||
f0_up_key = 0 # 升降调
|
||||
@@ -19,4 +20,4 @@ wav, sr = librosa.load(wav_path, sr=sampling_rate)
|
||||
|
||||
audio = model.infer(wav, sr, sampling_rate, sid, f0_method, f0_up_key)
|
||||
|
||||
soundfile.write(out_path, audio, sampling_rate)
|
||||
save_audio(out_path, audio, sampling_rate)
|
||||
Reference in New Issue
Block a user