diff --git a/infer/lib/audio.py b/infer/lib/audio.py index 6c1b345..417876c 100644 --- a/infer/lib/audio.py +++ b/infer/lib/audio.py @@ -142,7 +142,7 @@ def load_audio( np.copyto(decoded_audio[..., offset:end_index], frame_data) offset += len(frame_data[0]) - + container.close() # Truncate the array to the actual size @@ -188,7 +188,6 @@ def resample_audio( output_container.close() - def get_audio_properties(input_path: str) -> Tuple[int, int]: container = av.open(input_path) audio_stream = next(s for s in container.streams if s.type == "audio") diff --git a/infer/lib/train/utils.py b/infer/lib/train/utils.py index 020490f..761bce6 100644 --- a/infer/lib/train/utils.py +++ b/infer/lib/train/utils.py @@ -104,7 +104,13 @@ def summarize( def latest_checkpoint_path(dir_path, regex="G_*.pth"): f_list = glob.glob(os.path.join(dir_path, regex)) - f_list.sort(key=lambda f: 999999999999 if isinstance(f, str) and f == "latest" else int("0"+"".join(filter(str.isdigit, f)))) + f_list.sort( + key=lambda f: ( + 999999999999 + if isinstance(f, str) and f == "latest" + else int("0" + "".join(filter(str.isdigit, f))) + ) + ) x = f_list[-1] logger.debug(x) return x diff --git a/infer/lib/uvr5_pack/lib_v5/spec_utils.py b/infer/lib/uvr5_pack/lib_v5/spec_utils.py index cd92581..842eacd 100644 --- a/infer/lib/uvr5_pack/lib_v5/spec_utils.py +++ b/infer/lib/uvr5_pack/lib_v5/spec_utils.py @@ -24,9 +24,7 @@ def crop_center(h1, h2): return h1 -def split_lr_waves( - wave, mid_side=False, mid_side_b2=False, reverse=False -): +def split_lr_waves(wave, mid_side=False, mid_side_b2=False, reverse=False): if reverse: wave_left = np.flip(np.asfortranarray(wave[0])) wave_right = np.flip(np.asfortranarray(wave[1])) @@ -48,17 +46,23 @@ def run_librosa_stft(wv, n_fft, hop_length, reverse): return librosa.stft(wv, n_fft=n_fft, hop_length=hop_length) return librosa.stft(np.asfortranarray(wv), n_fft=n_fft, hop_length=hop_length) + def wave_to_spectrogram_mt( wave, hop_length, n_fft, mid_side=False, mid_side_b2=False, reverse=False ): with ThreadPoolExecutor(max_workers=2) as tp: spec = np.asfortranarray( - [spec for spec in tp.map( - run_librosa_stft, - split_lr_waves(wave, mid_side, mid_side_b2, reverse), - [n_fft, n_fft], [hop_length, hop_length], [reverse, reverse] - )] + [ + spec + for spec in tp.map( + run_librosa_stft, + split_lr_waves(wave, mid_side, mid_side_b2, reverse), + [n_fft, n_fft], + [hop_length, hop_length], + [reverse, reverse], + ) + ] ) return spec @@ -144,10 +148,13 @@ def mask_silence(mag, ref, thres=0.2, min_range=64, fade_size=32): def run_librosa_istft(specx, hop_length): return librosa.istft(np.asfortranarray(specx), hop_length=hop_length) + def spectrogram_to_wave(spec, hop_length, mid_side, mid_side_b2, reverse): with ThreadPoolExecutor(max_workers=2) as tp: - wave_left, wave_right = tp.map(run_librosa_istft, spec, [hop_length, hop_length]) + wave_left, wave_right = tp.map( + run_librosa_istft, spec, [hop_length, hop_length] + ) if reverse: return np.asfortranarray([np.flip(wave_left), np.flip(wave_right)]) diff --git a/infer/modules/uvr5/mdxnet.py b/infer/modules/uvr5/mdxnet.py index 7b6c946..6582728 100644 --- a/infer/modules/uvr5/mdxnet.py +++ b/infer/modules/uvr5/mdxnet.py @@ -207,12 +207,21 @@ class Predictor: sources = self.demix(mix.T) opt = sources[0].T save_audio( - "%s/vocal_%s.%s" % (vocal_root, basename, format), mix - opt, rate, True, format=format, + "%s/vocal_%s.%s" % (vocal_root, basename, format), + mix - opt, + rate, + True, + format=format, ) save_audio( - "%s/instrument_%s.%s" % (others_root, basename, format), opt, rate, True, format=format, + "%s/instrument_%s.%s" % (others_root, basename, format), + opt, + rate, + True, + format=format, ) + class MDXNetDereverb: def __init__(self, chunks, device): self.onnx = "assets/uvr5_weights/onnx_dereverb_By_FoxJoy" diff --git a/infer/modules/uvr5/vr.py b/infer/modules/uvr5/vr.py index bfb9de2..999bc19 100644 --- a/infer/modules/uvr5/vr.py +++ b/infer/modules/uvr5/vr.py @@ -119,7 +119,10 @@ class AudioPre: if ins_root is not None: if self.data["high_end_process"].startswith("mirroring"): input_high_end_ = spec_utils.mirroring( - self.data["high_end_process"], y_spec_m, input_high_end, self.mp.param["pre_filter_start"] + self.data["high_end_process"], + y_spec_m, + input_high_end, + self.mp.param["pre_filter_start"], ) wav_instrument = spec_utils.cmb_spectrogram_to_wave( y_spec_m, self.mp, input_high_end_h, input_high_end_ @@ -139,7 +142,7 @@ class AudioPre: wav_instrument, self.mp.param["sr"], f32=True, - format=format + format=format, ) if vocal_root is not None: if self.is_reverse: @@ -148,7 +151,10 @@ class AudioPre: head = "vocal_" if self.data["high_end_process"].startswith("mirroring"): input_high_end_ = spec_utils.mirroring( - self.data["high_end_process"], v_spec_m, input_high_end, self.mp.param["pre_filter_start"] + self.data["high_end_process"], + v_spec_m, + input_high_end, + self.mp.param["pre_filter_start"], ) wav_vocals = spec_utils.cmb_spectrogram_to_wave( v_spec_m, self.mp, input_high_end_h, input_high_end_ @@ -164,5 +170,5 @@ class AudioPre: wav_vocals, self.mp.param["sr"], f32=True, - format=format + format=format, ) diff --git a/infer/modules/vc/modules.py b/infer/modules/vc/modules.py index 3f963ee..76e0c0d 100644 --- a/infer/modules/vc/modules.py +++ b/infer/modules/vc/modules.py @@ -252,8 +252,7 @@ class VC: try: tgt_sr, audio_opt = opt save_audio( - "%s/%s.%s" - % (opt_root, os.path.basename(path), format1), + "%s/%s.%s" % (opt_root, os.path.basename(path), format1), audio_opt, tgt_sr, f32=True,