1
0
mirror of https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git synced 2026-06-05 17:20:25 +08:00

optimize(uvr5): apply jit to spec_utils & fix flac save

also fix #85
This commit is contained in:
源文雨
2024-11-28 23:19:05 +09:00
parent 4582d4b49a
commit 5969314e8d
11 changed files with 104 additions and 581 deletions

View File

@@ -3,12 +3,11 @@ import logging
logger = logging.getLogger(__name__)
import librosa
import numpy as np
import torch
from tqdm import tqdm
from infer.lib.audio import downsample_audio, save_audio
from infer.lib.audio import load_audio, save_audio
cpu = torch.device("cpu")
@@ -201,29 +200,18 @@ class Predictor:
os.makedirs(vocal_root, exist_ok=True)
os.makedirs(others_root, exist_ok=True)
basename = os.path.basename(m)
mix, rate = librosa.load(m, mono=False, sr=44100)
mix, rate = load_audio(m, mono=False, sr=44100)
if mix.ndim == 1:
mix = np.asfortranarray([mix, mix])
mix = mix.T
sources = self.demix(mix.T)
opt = sources[0].T
if format in ["wav", "flac"]:
save_audio(
"%s/vocal_%s.%s" % (vocal_root, basename, format), mix - opt, rate
)
save_audio(
"%s/instrument_%s.%s" % (others_root, basename, format), opt, rate
)
else:
path_vocal = "%s/vocal_%s.wav" % (vocal_root, basename)
path_other = "%s/instrument_%s.wav" % (others_root, basename)
save_audio(path_vocal, opt, rate)
save_audio(path_other, opt, rate)
opt_path_vocal = path_vocal[:-4] + ".%s" % format
opt_path_other = path_other[:-4] + ".%s" % format
downsample_audio(path_vocal, opt_path_vocal, format)
downsample_audio(path_other, opt_path_other, format)
save_audio(
"%s/vocal_%s.%s" % (vocal_root, basename, format), mix - opt, rate, True, format=format,
)
save_audio(
"%s/instrument_%s.%s" % (others_root, basename, format), opt, rate, True, format=format,
)
class MDXNetDereverb:
def __init__(self, chunks, device):

View File

@@ -55,13 +55,17 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
done = 1
except Exception as e:
need_reformat = 1
print(f"Exception {e} occured. Will reformat")
logger.warning(f"Exception {e} occured. Will reformat")
if need_reformat == 1:
tmp_path = "%s/%s.reformatted.wav" % (
os.path.join(os.environ["TEMP"]),
os.path.basename(inp_path),
)
resample_audio(inp_path, tmp_path, "pcm_s16le", "s16", 44100, "stereo")
try: # Remove the original file
os.remove(inp_path)
except Exception as e:
print(f"Failed to remove the original file: {e}")
inp_path = tmp_path
try:
if done == 0:

View File

@@ -5,7 +5,7 @@ logger = logging.getLogger(__name__)
import librosa
import numpy as np
from infer.lib.audio import downsample_audio, save_audio
from infer.lib.audio import save_audio
import torch
from infer.lib.uvr5_pack.lib_v5 import nets_123821KB as Nets
@@ -119,7 +119,7 @@ class AudioPre:
if ins_root is not None:
if self.data["high_end_process"].startswith("mirroring"):
input_high_end_ = spec_utils.mirroring(
self.data["high_end_process"], y_spec_m, input_high_end, self.mp
self.data["high_end_process"], y_spec_m, input_high_end, self.mp.param["pre_filter_start"]
)
wav_instrument = spec_utils.cmb_spectrogram_to_wave(
y_spec_m, self.mp, input_high_end_h, input_high_end_
@@ -131,23 +131,16 @@ class AudioPre:
head = "vocal_"
else:
head = "instrument_"
if format in ["wav", "flac"]:
save_audio(
os.path.join(
ins_root,
head + "{}_{}.{}".format(name, self.data["agg"], format),
),
wav_instrument,
self.mp.param["sr"],
)
else:
path = os.path.join(
ins_root, head + "{}_{}.wav".format(name, self.data["agg"])
)
save_audio(path, wav_instrument, self.mp.param["sr"])
if os.path.exists(path):
opt_format_path = path[:-4] + ".%s" % format
downsample_audio(path, opt_format_path, format)
save_audio(
os.path.join(
ins_root,
head + "{}_{}.{}".format(name, self.data["agg"], format),
),
wav_instrument,
self.mp.param["sr"],
f32=True,
format=format
)
if vocal_root is not None:
if self.is_reverse:
head = "instrument_"
@@ -155,7 +148,7 @@ class AudioPre:
head = "vocal_"
if self.data["high_end_process"].startswith("mirroring"):
input_high_end_ = spec_utils.mirroring(
self.data["high_end_process"], v_spec_m, input_high_end, self.mp
self.data["high_end_process"], v_spec_m, input_high_end, self.mp.param["pre_filter_start"]
)
wav_vocals = spec_utils.cmb_spectrogram_to_wave(
v_spec_m, self.mp, input_high_end_h, input_high_end_
@@ -163,20 +156,13 @@ class AudioPre:
else:
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
logger.info("%s vocals done" % name)
if format in ["wav", "flac"]:
save_audio(
os.path.join(
vocal_root,
head + "{}_{}.{}".format(name, self.data["agg"], format),
),
wav_vocals,
self.mp.param["sr"],
)
else:
path = os.path.join(
vocal_root, head + "{}_{}.wav".format(name, self.data["agg"])
)
save_audio(path, wav_vocals, self.mp.param["sr"])
if os.path.exists(path):
opt_format_path = path[:-4] + ".%s" % format
downsample_audio(path, opt_format_path, format)
save_audio(
os.path.join(
vocal_root,
head + "{}_{}.{}".format(name, self.data["agg"], format),
),
wav_vocals,
self.mp.param["sr"],
f32=True,
format=format
)

View File

@@ -251,25 +251,13 @@ class VC:
if "Success" in info:
try:
tgt_sr, audio_opt = opt
if format1 in ["wav", "flac"]:
save_audio(
"%s/%s.%s"
% (opt_root, os.path.basename(path), format1),
audio_opt,
tgt_sr,
)
else:
path = "%s/%s.%s" % (
opt_root,
os.path.basename(path),
format1,
)
with open(path, "wb") as outf:
wav2(
float_np_array_to_wav_buf(audio_opt, tgt_sr),
outf,
format1,
)
save_audio(
"%s/%s.%s"
% (opt_root, os.path.basename(path), format1),
audio_opt,
tgt_sr,
f32=True,
)
except:
info += traceback.format_exc()
infos.append("%s->%s" % (os.path.basename(path), info))