optimize(uvr5): remove redundant files

2026-06-08 20:10:44 +08:00 · 2024-06-06 21:34:45 +09:00
parent 53e596954c
commit 6f90ce3046
12 changed files with 139 additions and 1174 deletions
--- a/infer/lib/uvr5_pack/lib_v5/spec_utils.py
+++ b/infer/lib/uvr5_pack/lib_v5/spec_utils.py
@@ -5,8 +5,6 @@ import os

 import librosa
 import numpy as np
-import soundfile as sf
-from tqdm import tqdm


 def crop_center(h1, h2):
@@ -520,153 +518,3 @@ def istft(spec, hl):
    wave_left = librosa.istft(spec_left, hop_length=hl)
    wave_right = librosa.istft(spec_right, hop_length=hl)
    wave = np.asfortranarray([wave_left, wave_right])
-
-
-if __name__ == "__main__":
-    import argparse
-    import sys
-    import time
-
-    import cv2
-    from model_param_init import ModelParameters
-
-    p = argparse.ArgumentParser()
-    p.add_argument(
-        "--algorithm",
-        "-a",
-        type=str,
-        choices=["invert", "invert_p", "min_mag", "max_mag", "deep", "align"],
-        default="min_mag",
-    )
-    p.add_argument(
-        "--model_params",
-        "-m",
-        type=str,
-        default=os.path.join("modelparams", "1band_sr44100_hl512.json"),
-    )
-    p.add_argument("--output_name", "-o", type=str, default="output")
-    p.add_argument("--vocals_only", "-v", action="store_true")
-    p.add_argument("input", nargs="+")
-    args = p.parse_args()
-
-    start_time = time.time()
-
-    if args.algorithm.startswith("invert") and len(args.input) != 2:
-        raise ValueError("There should be two input files.")
-
-    if not args.algorithm.startswith("invert") and len(args.input) < 2:
-        raise ValueError("There must be at least two input files.")
-
-    wave, specs = {}, {}
-    mp = ModelParameters(args.model_params)
-
-    for i in range(len(args.input)):
-        spec = {}
-
-        for d in range(len(mp.param["band"]), 0, -1):
-            bp = mp.param["band"][d]
-
-            if d == len(mp.param["band"]):  # high-end band
-                wave[d], _ = librosa.load(
-                    args.input[i],
-                    bp["sr"],
-                    False,
-                    dtype=np.float32,
-                    res_type=bp["res_type"],
-                )
-
-                if len(wave[d].shape) == 1:  # mono to stereo
-                    wave[d] = np.array([wave[d], wave[d]])
-            else:  # lower bands
-                wave[d] = librosa.resample(
-                    wave[d + 1],
-                    mp.param["band"][d + 1]["sr"],
-                    bp["sr"],
-                    res_type=bp["res_type"],
-                )
-
-            spec[d] = wave_to_spectrogram(
-                wave[d],
-                bp["hl"],
-                bp["n_fft"],
-                mp.param["mid_side"],
-                mp.param["mid_side_b2"],
-                mp.param["reverse"],
-            )
-
-        specs[i] = combine_spectrograms(spec, mp)
-
-    del wave
-
-    if args.algorithm == "deep":
-        d_spec = np.where(np.abs(specs[0]) <= np.abs(spec[1]), specs[0], spec[1])
-        v_spec = d_spec - specs[1]
-        sf.write(
-            os.path.join("{}.wav".format(args.output_name)),
-            cmb_spectrogram_to_wave(v_spec, mp),
-            mp.param["sr"],
-        )
-
-    if args.algorithm.startswith("invert"):
-        ln = min([specs[0].shape[2], specs[1].shape[2]])
-        specs[0] = specs[0][:, :, :ln]
-        specs[1] = specs[1][:, :, :ln]
-
-        if "invert_p" == args.algorithm:
-            X_mag = np.abs(specs[0])
-            y_mag = np.abs(specs[1])
-            max_mag = np.where(X_mag >= y_mag, X_mag, y_mag)
-            v_spec = specs[1] - max_mag * np.exp(1.0j * np.angle(specs[0]))
-        else:
-            specs[1] = reduce_vocal_aggressively(specs[0], specs[1], 0.2)
-            v_spec = specs[0] - specs[1]
-
-            if not args.vocals_only:
-                X_mag = np.abs(specs[0])
-                y_mag = np.abs(specs[1])
-                v_mag = np.abs(v_spec)
-
-                X_image = spectrogram_to_image(X_mag)
-                y_image = spectrogram_to_image(y_mag)
-                v_image = spectrogram_to_image(v_mag)
-
-                cv2.imwrite("{}_X.png".format(args.output_name), X_image)
-                cv2.imwrite("{}_y.png".format(args.output_name), y_image)
-                cv2.imwrite("{}_v.png".format(args.output_name), v_image)
-
-                sf.write(
-                    "{}_X.wav".format(args.output_name),
-                    cmb_spectrogram_to_wave(specs[0], mp),
-                    mp.param["sr"],
-                )
-                sf.write(
-                    "{}_y.wav".format(args.output_name),
-                    cmb_spectrogram_to_wave(specs[1], mp),
-                    mp.param["sr"],
-                )
-
-        sf.write(
-            "{}_v.wav".format(args.output_name),
-            cmb_spectrogram_to_wave(v_spec, mp),
-            mp.param["sr"],
-        )
-    else:
-        if not args.algorithm == "deep":
-            sf.write(
-                os.path.join("ensembled", "{}.wav".format(args.output_name)),
-                cmb_spectrogram_to_wave(ensembling(args.algorithm, specs), mp),
-                mp.param["sr"],
-            )
-
-    if args.algorithm == "align":
-        trackalignment = [
-            {
-                "file1": '"{}"'.format(args.input[0]),
-                "file2": '"{}"'.format(args.input[1]),
-            }
-        ]
-
-        for i, e in tqdm(enumerate(trackalignment), desc="Performing Alignment..."):
-            os.system(f"python lib/align_tracks.py {e['file1']} {e['file2']}")
-
-    # print('Total time: {0:.{1}f}s'.format(time.time() - start_time, 1))