mirror of
https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git
synced 2026-06-08 20:10:44 +08:00
optimize(uvr5): remove redundant files
This commit is contained in:
@@ -5,8 +5,6 @@ import os
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def crop_center(h1, h2):
|
||||
@@ -520,153 +518,3 @@ def istft(spec, hl):
|
||||
wave_left = librosa.istft(spec_left, hop_length=hl)
|
||||
wave_right = librosa.istft(spec_right, hop_length=hl)
|
||||
wave = np.asfortranarray([wave_left, wave_right])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
import sys
|
||||
import time
|
||||
|
||||
import cv2
|
||||
from model_param_init import ModelParameters
|
||||
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument(
|
||||
"--algorithm",
|
||||
"-a",
|
||||
type=str,
|
||||
choices=["invert", "invert_p", "min_mag", "max_mag", "deep", "align"],
|
||||
default="min_mag",
|
||||
)
|
||||
p.add_argument(
|
||||
"--model_params",
|
||||
"-m",
|
||||
type=str,
|
||||
default=os.path.join("modelparams", "1band_sr44100_hl512.json"),
|
||||
)
|
||||
p.add_argument("--output_name", "-o", type=str, default="output")
|
||||
p.add_argument("--vocals_only", "-v", action="store_true")
|
||||
p.add_argument("input", nargs="+")
|
||||
args = p.parse_args()
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
if args.algorithm.startswith("invert") and len(args.input) != 2:
|
||||
raise ValueError("There should be two input files.")
|
||||
|
||||
if not args.algorithm.startswith("invert") and len(args.input) < 2:
|
||||
raise ValueError("There must be at least two input files.")
|
||||
|
||||
wave, specs = {}, {}
|
||||
mp = ModelParameters(args.model_params)
|
||||
|
||||
for i in range(len(args.input)):
|
||||
spec = {}
|
||||
|
||||
for d in range(len(mp.param["band"]), 0, -1):
|
||||
bp = mp.param["band"][d]
|
||||
|
||||
if d == len(mp.param["band"]): # high-end band
|
||||
wave[d], _ = librosa.load(
|
||||
args.input[i],
|
||||
bp["sr"],
|
||||
False,
|
||||
dtype=np.float32,
|
||||
res_type=bp["res_type"],
|
||||
)
|
||||
|
||||
if len(wave[d].shape) == 1: # mono to stereo
|
||||
wave[d] = np.array([wave[d], wave[d]])
|
||||
else: # lower bands
|
||||
wave[d] = librosa.resample(
|
||||
wave[d + 1],
|
||||
mp.param["band"][d + 1]["sr"],
|
||||
bp["sr"],
|
||||
res_type=bp["res_type"],
|
||||
)
|
||||
|
||||
spec[d] = wave_to_spectrogram(
|
||||
wave[d],
|
||||
bp["hl"],
|
||||
bp["n_fft"],
|
||||
mp.param["mid_side"],
|
||||
mp.param["mid_side_b2"],
|
||||
mp.param["reverse"],
|
||||
)
|
||||
|
||||
specs[i] = combine_spectrograms(spec, mp)
|
||||
|
||||
del wave
|
||||
|
||||
if args.algorithm == "deep":
|
||||
d_spec = np.where(np.abs(specs[0]) <= np.abs(spec[1]), specs[0], spec[1])
|
||||
v_spec = d_spec - specs[1]
|
||||
sf.write(
|
||||
os.path.join("{}.wav".format(args.output_name)),
|
||||
cmb_spectrogram_to_wave(v_spec, mp),
|
||||
mp.param["sr"],
|
||||
)
|
||||
|
||||
if args.algorithm.startswith("invert"):
|
||||
ln = min([specs[0].shape[2], specs[1].shape[2]])
|
||||
specs[0] = specs[0][:, :, :ln]
|
||||
specs[1] = specs[1][:, :, :ln]
|
||||
|
||||
if "invert_p" == args.algorithm:
|
||||
X_mag = np.abs(specs[0])
|
||||
y_mag = np.abs(specs[1])
|
||||
max_mag = np.where(X_mag >= y_mag, X_mag, y_mag)
|
||||
v_spec = specs[1] - max_mag * np.exp(1.0j * np.angle(specs[0]))
|
||||
else:
|
||||
specs[1] = reduce_vocal_aggressively(specs[0], specs[1], 0.2)
|
||||
v_spec = specs[0] - specs[1]
|
||||
|
||||
if not args.vocals_only:
|
||||
X_mag = np.abs(specs[0])
|
||||
y_mag = np.abs(specs[1])
|
||||
v_mag = np.abs(v_spec)
|
||||
|
||||
X_image = spectrogram_to_image(X_mag)
|
||||
y_image = spectrogram_to_image(y_mag)
|
||||
v_image = spectrogram_to_image(v_mag)
|
||||
|
||||
cv2.imwrite("{}_X.png".format(args.output_name), X_image)
|
||||
cv2.imwrite("{}_y.png".format(args.output_name), y_image)
|
||||
cv2.imwrite("{}_v.png".format(args.output_name), v_image)
|
||||
|
||||
sf.write(
|
||||
"{}_X.wav".format(args.output_name),
|
||||
cmb_spectrogram_to_wave(specs[0], mp),
|
||||
mp.param["sr"],
|
||||
)
|
||||
sf.write(
|
||||
"{}_y.wav".format(args.output_name),
|
||||
cmb_spectrogram_to_wave(specs[1], mp),
|
||||
mp.param["sr"],
|
||||
)
|
||||
|
||||
sf.write(
|
||||
"{}_v.wav".format(args.output_name),
|
||||
cmb_spectrogram_to_wave(v_spec, mp),
|
||||
mp.param["sr"],
|
||||
)
|
||||
else:
|
||||
if not args.algorithm == "deep":
|
||||
sf.write(
|
||||
os.path.join("ensembled", "{}.wav".format(args.output_name)),
|
||||
cmb_spectrogram_to_wave(ensembling(args.algorithm, specs), mp),
|
||||
mp.param["sr"],
|
||||
)
|
||||
|
||||
if args.algorithm == "align":
|
||||
trackalignment = [
|
||||
{
|
||||
"file1": '"{}"'.format(args.input[0]),
|
||||
"file2": '"{}"'.format(args.input[1]),
|
||||
}
|
||||
]
|
||||
|
||||
for i, e in tqdm(enumerate(trackalignment), desc="Performing Alignment..."):
|
||||
os.system(f"python lib/align_tracks.py {e['file1']} {e['file2']}")
|
||||
|
||||
# print('Total time: {0:.{1}f}s'.format(time.time() - start_time, 1))
|
||||
|
||||
Reference in New Issue
Block a user