diff --git a/infer/modules/vc/modules.py b/infer/modules/vc/modules.py index e739c9f..b67ae64 100644 --- a/infer/modules/vc/modules.py +++ b/infer/modules/vc/modules.py @@ -188,7 +188,6 @@ class VC: self.net_g, sid, audio, - input_audio_path, times, f0_up_key, f0_method, diff --git a/infer/modules/vc/pipeline.py b/infer/modules/vc/pipeline.py index fdfcc8c..78a542f 100644 --- a/infer/modules/vc/pipeline.py +++ b/infer/modules/vc/pipeline.py @@ -27,8 +27,8 @@ input_audio_path2wav = {} @lru_cache -def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period): - audio = input_audio_path2wav[input_audio_path] +def cache_harvest_f0(f0_cache_key, fs, f0max, f0min, frame_period): + audio = input_audio_path2wav[f0_cache_key] f0, t = pyworld.harvest( audio, fs=fs, @@ -83,7 +83,6 @@ class Pipeline(object): def get_f0( self, - input_audio_path, x, p_len, f0_up_key, @@ -114,8 +113,10 @@ class Pipeline(object): f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant" ) elif f0_method == "harvest": - input_audio_path2wav[input_audio_path] = x.astype(np.double) - f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10) + from hashlib import md5 + f0_cache_key = md5(x.tobytes()).digest() + input_audio_path2wav[f0_cache_key] = x.astype(np.double) + f0 = cache_harvest_f0(f0_cache_key, self.sr, f0_max, f0_min, 10) if filter_radius > 2: f0 = signal.medfilt(f0, 3) elif f0_method == "crepe": @@ -304,7 +305,6 @@ class Pipeline(object): net_g, sid, audio, - input_audio_path, times, f0_up_key, f0_method, @@ -372,7 +372,6 @@ class Pipeline(object): pitch, pitchf = None, None if if_f0 == 1: pitch, pitchf = self.get_f0( - input_audio_path, audio_pad, p_len, f0_up_key,