mirror of
https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git
synced 2026-06-05 17:20:25 +08:00
@@ -3,12 +3,11 @@ import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
|
||||
from infer.lib.audio import downsample_audio, save_audio
|
||||
from infer.lib.audio import load_audio, save_audio
|
||||
|
||||
cpu = torch.device("cpu")
|
||||
|
||||
@@ -201,29 +200,18 @@ class Predictor:
|
||||
os.makedirs(vocal_root, exist_ok=True)
|
||||
os.makedirs(others_root, exist_ok=True)
|
||||
basename = os.path.basename(m)
|
||||
mix, rate = librosa.load(m, mono=False, sr=44100)
|
||||
mix, rate = load_audio(m, mono=False, sr=44100)
|
||||
if mix.ndim == 1:
|
||||
mix = np.asfortranarray([mix, mix])
|
||||
mix = mix.T
|
||||
sources = self.demix(mix.T)
|
||||
opt = sources[0].T
|
||||
if format in ["wav", "flac"]:
|
||||
save_audio(
|
||||
"%s/vocal_%s.%s" % (vocal_root, basename, format), mix - opt, rate
|
||||
)
|
||||
save_audio(
|
||||
"%s/instrument_%s.%s" % (others_root, basename, format), opt, rate
|
||||
)
|
||||
else:
|
||||
path_vocal = "%s/vocal_%s.wav" % (vocal_root, basename)
|
||||
path_other = "%s/instrument_%s.wav" % (others_root, basename)
|
||||
save_audio(path_vocal, opt, rate)
|
||||
save_audio(path_other, opt, rate)
|
||||
opt_path_vocal = path_vocal[:-4] + ".%s" % format
|
||||
opt_path_other = path_other[:-4] + ".%s" % format
|
||||
downsample_audio(path_vocal, opt_path_vocal, format)
|
||||
downsample_audio(path_other, opt_path_other, format)
|
||||
|
||||
save_audio(
|
||||
"%s/vocal_%s.%s" % (vocal_root, basename, format), mix - opt, rate, True, format=format,
|
||||
)
|
||||
save_audio(
|
||||
"%s/instrument_%s.%s" % (others_root, basename, format), opt, rate, True, format=format,
|
||||
)
|
||||
|
||||
class MDXNetDereverb:
|
||||
def __init__(self, chunks, device):
|
||||
|
||||
@@ -55,13 +55,17 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
|
||||
done = 1
|
||||
except Exception as e:
|
||||
need_reformat = 1
|
||||
print(f"Exception {e} occured. Will reformat")
|
||||
logger.warning(f"Exception {e} occured. Will reformat")
|
||||
if need_reformat == 1:
|
||||
tmp_path = "%s/%s.reformatted.wav" % (
|
||||
os.path.join(os.environ["TEMP"]),
|
||||
os.path.basename(inp_path),
|
||||
)
|
||||
resample_audio(inp_path, tmp_path, "pcm_s16le", "s16", 44100, "stereo")
|
||||
try: # Remove the original file
|
||||
os.remove(inp_path)
|
||||
except Exception as e:
|
||||
print(f"Failed to remove the original file: {e}")
|
||||
inp_path = tmp_path
|
||||
try:
|
||||
if done == 0:
|
||||
|
||||
@@ -5,7 +5,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
from infer.lib.audio import downsample_audio, save_audio
|
||||
from infer.lib.audio import save_audio
|
||||
import torch
|
||||
|
||||
from infer.lib.uvr5_pack.lib_v5 import nets_123821KB as Nets
|
||||
@@ -119,7 +119,7 @@ class AudioPre:
|
||||
if ins_root is not None:
|
||||
if self.data["high_end_process"].startswith("mirroring"):
|
||||
input_high_end_ = spec_utils.mirroring(
|
||||
self.data["high_end_process"], y_spec_m, input_high_end, self.mp
|
||||
self.data["high_end_process"], y_spec_m, input_high_end, self.mp.param["pre_filter_start"]
|
||||
)
|
||||
wav_instrument = spec_utils.cmb_spectrogram_to_wave(
|
||||
y_spec_m, self.mp, input_high_end_h, input_high_end_
|
||||
@@ -131,23 +131,16 @@ class AudioPre:
|
||||
head = "vocal_"
|
||||
else:
|
||||
head = "instrument_"
|
||||
if format in ["wav", "flac"]:
|
||||
save_audio(
|
||||
os.path.join(
|
||||
ins_root,
|
||||
head + "{}_{}.{}".format(name, self.data["agg"], format),
|
||||
),
|
||||
wav_instrument,
|
||||
self.mp.param["sr"],
|
||||
)
|
||||
else:
|
||||
path = os.path.join(
|
||||
ins_root, head + "{}_{}.wav".format(name, self.data["agg"])
|
||||
)
|
||||
save_audio(path, wav_instrument, self.mp.param["sr"])
|
||||
if os.path.exists(path):
|
||||
opt_format_path = path[:-4] + ".%s" % format
|
||||
downsample_audio(path, opt_format_path, format)
|
||||
save_audio(
|
||||
os.path.join(
|
||||
ins_root,
|
||||
head + "{}_{}.{}".format(name, self.data["agg"], format),
|
||||
),
|
||||
wav_instrument,
|
||||
self.mp.param["sr"],
|
||||
f32=True,
|
||||
format=format
|
||||
)
|
||||
if vocal_root is not None:
|
||||
if self.is_reverse:
|
||||
head = "instrument_"
|
||||
@@ -155,7 +148,7 @@ class AudioPre:
|
||||
head = "vocal_"
|
||||
if self.data["high_end_process"].startswith("mirroring"):
|
||||
input_high_end_ = spec_utils.mirroring(
|
||||
self.data["high_end_process"], v_spec_m, input_high_end, self.mp
|
||||
self.data["high_end_process"], v_spec_m, input_high_end, self.mp.param["pre_filter_start"]
|
||||
)
|
||||
wav_vocals = spec_utils.cmb_spectrogram_to_wave(
|
||||
v_spec_m, self.mp, input_high_end_h, input_high_end_
|
||||
@@ -163,20 +156,13 @@ class AudioPre:
|
||||
else:
|
||||
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
|
||||
logger.info("%s vocals done" % name)
|
||||
if format in ["wav", "flac"]:
|
||||
save_audio(
|
||||
os.path.join(
|
||||
vocal_root,
|
||||
head + "{}_{}.{}".format(name, self.data["agg"], format),
|
||||
),
|
||||
wav_vocals,
|
||||
self.mp.param["sr"],
|
||||
)
|
||||
else:
|
||||
path = os.path.join(
|
||||
vocal_root, head + "{}_{}.wav".format(name, self.data["agg"])
|
||||
)
|
||||
save_audio(path, wav_vocals, self.mp.param["sr"])
|
||||
if os.path.exists(path):
|
||||
opt_format_path = path[:-4] + ".%s" % format
|
||||
downsample_audio(path, opt_format_path, format)
|
||||
save_audio(
|
||||
os.path.join(
|
||||
vocal_root,
|
||||
head + "{}_{}.{}".format(name, self.data["agg"], format),
|
||||
),
|
||||
wav_vocals,
|
||||
self.mp.param["sr"],
|
||||
f32=True,
|
||||
format=format
|
||||
)
|
||||
|
||||
@@ -251,25 +251,13 @@ class VC:
|
||||
if "Success" in info:
|
||||
try:
|
||||
tgt_sr, audio_opt = opt
|
||||
if format1 in ["wav", "flac"]:
|
||||
save_audio(
|
||||
"%s/%s.%s"
|
||||
% (opt_root, os.path.basename(path), format1),
|
||||
audio_opt,
|
||||
tgt_sr,
|
||||
)
|
||||
else:
|
||||
path = "%s/%s.%s" % (
|
||||
opt_root,
|
||||
os.path.basename(path),
|
||||
format1,
|
||||
)
|
||||
with open(path, "wb") as outf:
|
||||
wav2(
|
||||
float_np_array_to_wav_buf(audio_opt, tgt_sr),
|
||||
outf,
|
||||
format1,
|
||||
)
|
||||
save_audio(
|
||||
"%s/%s.%s"
|
||||
% (opt_root, os.path.basename(path), format1),
|
||||
audio_opt,
|
||||
tgt_sr,
|
||||
f32=True,
|
||||
)
|
||||
except:
|
||||
info += traceback.format_exc()
|
||||
infos.append("%s->%s" % (os.path.basename(path), info))
|
||||
|
||||
Reference in New Issue
Block a user