diff --git a/gui.py b/gui.py index 6693d24..e1d3a4a 100644 --- a/gui.py +++ b/gui.py @@ -172,6 +172,7 @@ if __name__ == "__main__": data["sr_model"] = data["sr_type"] == "sr_model" data["sr_device"] = data["sr_type"] == "sr_device" data["pm"] = data["f0method"] == "pm" + data["dio"] = data["f0method"] == "dio" data["harvest"] = data["f0method"] == "harvest" data["crepe"] = data["f0method"] == "crepe" data["rmvpe"] = data["f0method"] == "rmvpe" @@ -228,6 +229,7 @@ if __name__ == "__main__": data["sr_model"] = data["sr_type"] == "sr_model" data["sr_device"] = data["sr_type"] == "sr_device" data["pm"] = data["f0method"] == "pm" + data["dio"] = data["f0method"] == "dio" data["harvest"] = data["f0method"] == "harvest" data["crepe"] = data["f0method"] == "crepe" data["rmvpe"] = data["f0method"] == "rmvpe" @@ -405,6 +407,13 @@ if __name__ == "__main__": default=data.get("pm", False), enable_events=True, ), + sg.Radio( + "dio", + "f0method", + key="dio", + default=data.get("dio", False), + enable_events=True, + ), sg.Radio( "harvest", "f0method", @@ -614,9 +623,10 @@ if __name__ == "__main__": # "use_jit": values["use_jit"], "use_jit": False, "use_pv": values["use_pv"], - "f0method": ["pm", "harvest", "crepe", "rmvpe", "fcpe"][ + "f0method": ["pm", "dio", "harvest", "crepe", "rmvpe", "fcpe"][ [ values["pm"], + values["dio"], values["harvest"], values["crepe"], values["rmvpe"], @@ -656,7 +666,7 @@ if __name__ == "__main__": self.rvc.set_index_rate(values["index_rate"]) elif event == "rms_mix_rate": self.gui_config.rms_mix_rate = values["rms_mix_rate"] - elif event in ["pm", "harvest", "crepe", "rmvpe", "fcpe"]: + elif event in ["pm", "dio", "harvest", "crepe", "rmvpe", "fcpe"]: self.gui_config.f0method = event elif event == "I_noise_reduce": self.gui_config.I_noise_reduce = values["I_noise_reduce"] @@ -718,9 +728,10 @@ if __name__ == "__main__": self.gui_config.rms_mix_rate = values["rms_mix_rate"] self.gui_config.index_rate = values["index_rate"] self.gui_config.n_cpu = values["n_cpu"] - self.gui_config.f0method = ["pm", "harvest", "crepe", "rmvpe", "fcpe"][ + self.gui_config.f0method = ["pm", "dio", "harvest", "crepe", "rmvpe", "fcpe"][ [ values["pm"], + values["dio"], values["harvest"], values["crepe"], values["rmvpe"], @@ -1003,7 +1014,7 @@ if __name__ == "__main__": sola_offset = sola_offset.item() else: sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0]) - printt("sola_offset = %d", int(sola_offset)) + # printt("sola_offset = %d", int(sola_offset)) infer_wav = infer_wav[sola_offset:] if "privateuseone" in str(self.config.device) or not self.gui_config.use_pv: infer_wav[: self.sola_buffer_frame] *= self.fade_in_window @@ -1030,7 +1041,7 @@ if __name__ == "__main__": total_time = time.perf_counter() - start_time if flag_vc: self.window["infer_time"].update(int(total_time * 1000)) - printt("Infer time: %.2f", total_time) + # printt("Infer time: %.2f", total_time) def update_devices(self, hostapi_name=None): """获取设备列表""" diff --git a/infer/lib/rtrvc.py b/infer/lib/rtrvc.py index cb3f375..275b5b7 100644 --- a/infer/lib/rtrvc.py +++ b/infer/lib/rtrvc.py @@ -213,7 +213,6 @@ class RVC: pitch, pitchf = self._get_f0( input_wav[-f0_extractor_frame:], self.f0_up_key - self.formant_shift, - self.n_cpu, 3, f0method, ) @@ -275,7 +274,7 @@ class RVC: self, x: torch.Tensor, f0_up_key: Union[int, float], - filter_radius: Union[int, float], + filter_radius: Optional[Union[int, float]] = None, method: Literal["crepe", "rmvpe", "fcpe", "pm", "harvest", "dio"] = "fcpe", ): if method not in self.f0_methods.keys(): @@ -302,7 +301,7 @@ class RVC: f0 = self.pm.compute_f0(x) return self._get_f0_post(f0, f0_up_key) - def _get_f0_harvest(self, x, f0_up_key, filter_radius): + def _get_f0_harvest(self, x, f0_up_key, filter_radius=3): if not hasattr(self, "harvest"): self.harvest = Harvest( self.window, @@ -310,6 +309,7 @@ class RVC: self.f0_max, self.sr, ) + if filter_radius is None: filter_radius=3 f0 = self.harvest.compute_f0(x, filter_radius=filter_radius) return self._get_f0_post(f0, f0_up_key) @@ -344,8 +344,9 @@ class RVC: device=self.device, use_jit=self.use_jit, ) + if filter_radius is None: filter_radius=0.03 return self._get_f0_post( - self.rmvpe.compute_f0(x, thred=filter_radius), + self.rmvpe.compute_f0(x, filter_radius=filter_radius), f0_up_key, ) diff --git a/rvc/layers/synthesizers.py b/rvc/layers/synthesizers.py index e4a178e..b397228 100644 --- a/rvc/layers/synthesizers.py +++ b/rvc/layers/synthesizers.py @@ -187,7 +187,7 @@ class SynthesizerTrnMsNSFsid(nn.Module): length = int(return_length.item()) flow_head = torch.clamp(skip_head - 24, min=0) dec_head = head - int(flow_head.item()) - m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths, flow_head) + m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask z = self.flow(z_p, x_mask, g=g, reverse=True) z = z[:, :, dec_head : dec_head + length]