1
0
mirror of https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git synced 2026-06-09 20:40:48 +08:00

optimize: some training optimizations (#95)

* optimzie(train&uvr5): rm sf & simp. AudioPre

* fix(audio): too many mallocs

* feat(audio): load_audio support stereo

* fix(audio): float32 wav saving

* fix(train): missing ckpt var
This commit is contained in:
源文雨
2024-11-28 03:20:14 +09:00
committed by GitHub
parent f4644ec1ec
commit a8783c6639
19 changed files with 163 additions and 433 deletions

View File

@@ -5,12 +5,10 @@ logger = logging.getLogger(__name__)
import librosa
import numpy as np
import soundfile as sf
import torch
from tqdm import tqdm
import av
from infer.lib.audio import downsample_audio
from infer.lib.audio import downsample_audio, save_audio
cpu = torch.device("cpu")
@@ -210,15 +208,13 @@ class Predictor:
sources = self.demix(mix.T)
opt = sources[0].T
if format in ["wav", "flac"]:
sf.write(
"%s/%s_main_vocal.%s" % (vocal_root, basename, format), mix - opt, rate
)
sf.write("%s/%s_others.%s" % (others_root, basename, format), opt, rate)
save_audio("%s/vocal_%s.%s" % (vocal_root, basename, format), mix - opt, rate)
save_audio("%s/instrument_%s.%s" % (others_root, basename, format), opt, rate)
else:
path_vocal = "%s/%s_main_vocal.wav" % (vocal_root, basename)
path_other = "%s/%s_others.wav" % (others_root, basename)
sf.write(path_vocal, mix - opt, rate)
sf.write(path_other, opt, rate)
path_vocal = "%s/vocal_%s.wav" % (vocal_root, basename)
path_other = "%s/instrument_%s.wav" % (others_root, basename)
save_audio(path_vocal, opt, rate)
save_audio(path_other, opt, rate)
opt_path_vocal = path_vocal[:-4] + ".%s" % format
opt_path_other = path_other[:-4] + ".%s" % format
downsample_audio(path_vocal, opt_path_vocal, format)