optimize(infer.synthesizer): all modules inherit from one

2026-06-06 09:40:24 +08:00 · 2024-06-10 21:34:35 +09:00
parent b67050b2f7
commit e33ef19200
5 changed files with 127 additions and 231 deletions
--- a/infer/modules/train/train.py
+++ b/infer/modules/train/train.py
@@ -415,6 +415,7 @@ def train_and_evaluate(
    for batch_idx, info in data_iterator:
        # Data
        ## Unpack
+        pitch = pitchf = None
        if hps.if_f0 == 1:
            (
                phone,
@@ -444,22 +445,13 @@ def train_and_evaluate(

        # Calculate
        with autocast(enabled=hps.train.fp16_run):
-            if hps.if_f0 == 1:
-                (
-                    y_hat,
-                    ids_slice,
-                    x_mask,
-                    z_mask,
-                    (z, z_p, m_p, logs_p, m_q, logs_q),
-                ) = net_g(phone, phone_lengths, pitch, pitchf, spec, spec_lengths, sid)
-            else:
-                (
-                    y_hat,
-                    ids_slice,
-                    x_mask,
-                    z_mask,
-                    (z, z_p, m_p, logs_p, m_q, logs_q),
-                ) = net_g(phone, phone_lengths, spec, spec_lengths, sid)
+            (
+                y_hat,
+                ids_slice,
+                x_mask,
+                z_mask,
+                (z, z_p, m_p, logs_p, m_q, logs_q),
+            ) = net_g(phone, phone_lengths, spec, spec_lengths, sid, pitch, pitchf)
            mel = spec_to_mel_torch(
                spec,
                hps.data.filter_length,
--- a/infer/modules/vc/pipeline.py
+++ b/infer/modules/vc/pipeline.py
@@ -290,10 +290,15 @@ class Pipeline(object):
            feats = feats.to(feats0.dtype)
        p_len = torch.tensor([p_len], device=self.device).long()
        with torch.no_grad():
-            hasp = pitch is not None and pitchf is not None
-            arg = (feats, p_len, pitch, pitchf, sid) if hasp else (feats, p_len, sid)
-            audio1 = (net_g.infer(*arg)[0][0, 0]).data.cpu().float().numpy()
-            del arg
+            audio1 = (
+                net_g.infer(
+                    feats,
+                    p_len,
+                    sid,
+                    pitch=pitch,
+                    pitchf=pitchf,
+                )[0, 0]
+            ).data.cpu().float().numpy()
        del feats, p_len, padding_mask
        if torch.cuda.is_available():
            torch.cuda.empty_cache()