chore(i18n): sync locale on dev

2026-06-26 06:50:23 +08:00 · 2025-07-08 07:46:51 +00:00
35 changed files with 167 additions and 310 deletions
--- a/.github/workflows/checksum.yml
+++ b/.github/workflows/checksum.yml
@@ -15,7 +15,7 @@ jobs:

      - name: Run RVC-Models-Downloader
        run: |
-          wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb
+          wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb
          sudo apt -y install ./rvcmd_linux_amd64.deb
          rm -f ./rvcmd_linux_amd64.deb
          rvcmd -notrs -w 1 -notui assets/rvc
--- a/.github/workflows/unitest.yml
+++ b/.github/workflows/unitest.yml
@@ -11,72 +11,20 @@ jobs:

    steps:
    - uses: actions/checkout@master
-    - name: Space cleanup
-      env:
-        DEBIAN_FRONTEND: noninteractive
-      run: |
-        df -h
-
-        # Source - https://stackoverflow.com/a
-        # Posted by Cosmin Bodnariuc
-        # Retrieved 2025-11-21, License - CC BY-SA 4.0
-        sudo rm -rf /usr/share/dotnet
-        sudo rm -rf /usr/local/share/boost
-        sudo rm -rf /usr/local/share/chromium
-        sudo rm -rf /usr/local/share/powershell
-        sudo rm -rf /usr/local/share/vcpkg
-        sudo rm -rf /usr/local/share/miniconda
-        sudo rm -rf /opt/ghc
-        sudo rm -rf /opt/hostedtoolcache/CodeQL
-        sudo rm -rf /opt/hostedtoolcache/go
-        sudo rm -rf /opt/hostedtoolcache/Python
-        sudo rm -rf /opt/hostedtoolcache/node
-        sudo rm -rf /opt/hostedtoolcache/R
-        sudo rm -rf /opt/hostedtoolcache/Java
-        sudo rm -rf /opt/hostedtoolcache/LLVM
-        sudo rm -rf /opt/hostedtoolcache/Swift
-        sudo rm -rf /opt/hostedtoolcache/Php
-        sudo rm -rf /opt/hostedtoolcache/Perl
-        sudo rm -rf /opt/hostedtoolcache/Scala
-        sudo rm -rf /opt/hostedtoolcache/Julia
-        sudo rm -rf /opt/hostedtoolcache/Mono
-        sudo rm -rf /opt/hostedtoolcache/PowerShell
-        sudo rm -rf /opt/hostedtoolcache/Crystal
-        sudo rm -rf /opt/hostedtoolcache/Elixir
-        sudo rm -rf /opt/hostedtoolcache/Erlang
-        sudo rm -rf /opt/hostedtoolcache/FSharp
-        sudo rm -rf /opt/hostedtoolcache/Haskell
-        sudo rm -rf /opt/hostedtoolcache/OCaml
-        sudo rm -rf /opt/hostedtoolcache/Rust
-        sudo rm -rf /opt/hostedtoolcache/Sbt
-        sudo rm -rf /opt/hostedtoolcache/Solidity
-        sudo rm -rf /opt/hostedtoolcache/VisualStudio
-        sudo rm -rf /opt/hostedtoolcache/WinAppDriver
-        sudo rm -rf /opt/hostedtoolcache/Xamarin
-        sudo rm -rf /opt/hostedtoolcache/Yarn
-        sudo rm -rf /opt/hostedtoolcache/Zephyr
-        sudo rm -rf /opt/hostedtoolcache/zig
-        sudo rm -rf /opt/hostedtoolcache/zulu
-        sudo rm -rf /opt/hostedtoolcache/azcopy  
-
-        sudo -E apt-get update
-        sudo -E apt-get -y autoremove --purge
-        sudo -E apt-get clean
-
-        df -h
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v4
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install dependencies
      run: |
-        wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb
+        sudo apt update
+        wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb
        sudo apt -y install ./rvcmd_linux_amd64.deb
        pip install --force pip==24.0 # fix fairseq installing issue https://github.com/facebookresearch/fairseq/issues/5552
        python -m pip install --upgrade setuptools
        python -m pip install --upgrade wheel
        pip install torch torchvision torchaudio
-        pip install -r requirements/cpu.txt
+        pip install -r requirements/main.txt
        rvcmd -notrs -w 1 -notui assets/rvc
    - name: Test step 1 & 2
      run: |
--- a/configs/config.py
+++ b/configs/config.py
@@ -4,7 +4,6 @@ import sys
 import json
 import shutil
 from multiprocessing import cpu_count
-import importlib.util

 import torch

@@ -47,10 +46,10 @@ class Config(metaclass=Singleton):
            self.global_link,
            self.noparallel,
            self.noautoopen,
+            self.dml,
            self.nocheck,
            self.update,
        ) = self.arg_parse()
-        self.dml = False
        self.instead = ""
        self.preprocess_per = 3.7
        self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
@@ -84,6 +83,11 @@ class Config(metaclass=Singleton):
            action="store_true",
            help="Do not open in browser automatically",
        )
+        parser.add_argument(
+            "--dml",
+            action="store_true",
+            help="torch_dml",
+        )
        parser.add_argument(
            "--nocheck", action="store_true", help="Run without checking assets"
        )
@@ -100,6 +104,7 @@ class Config(metaclass=Singleton):
            cmd_opts.global_link,
            cmd_opts.noparallel,
            cmd_opts.noautoopen,
+            cmd_opts.dml,
            cmd_opts.nocheck,
            cmd_opts.update,
        )
@@ -178,7 +183,7 @@ class Config(metaclass=Singleton):
            if self.has_xpu():
                self.device = self.instead = "xpu:0"
                self.is_half = True
-            i_device = int(str(self.device).split(":")[-1])
+            i_device = int(self.device.split(":")[-1])
            self.gpu_name = torch.cuda.get_device_name(i_device)
            if (
                ("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
@@ -209,7 +214,7 @@ class Config(metaclass=Singleton):
            self.use_fp32_config()
        else:
            logger.info("No supported Nvidia GPU found")
-            self.device = self.instead = torch.get_default_device()
+            self.device = self.instead = "cpu"
            self.is_half = False
            self.use_fp32_config()

@@ -234,13 +239,12 @@ class Config(metaclass=Singleton):
            x_query = 5
            x_center = 30
            x_max = 32
-        if importlib.util.find_spec("torch_directml") is not None:
+        if self.dml:
            logger.info("Use DirectML instead")
            import torch_directml

            self.device = torch_directml.device(torch_directml.default_device())
            self.is_half = False
-            self.dml = True
        else:
            if self.instead:
                logger.info(f"Use {self.instead} instead")
--- a/i18n/locale/ja_JP.json
+++ b/i18n/locale/ja_JP.json
@@ -133,7 +133,7 @@
    "Takeover WASAPI device": "WASAPIデバイスを独占",
    "Target sample rate": "目標サンプリング率",
    "The audio file to be processed": "処理待ち音声",
-    "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "本ソフトウェアは、GNU Affero General Public License バージョン3.0以降の下でライセンスされています。<br>作者は、このソフトウェアの使用に関して一切の管理権限や責任を負いません。<br>このソフトウェアを使用し、生成された音声やファイルを含むあらゆるコンテンツを配布するユーザーは、AGPL 3.0ライセンスの条項への準拠について全責任を負います。<br>これらの条項に同意しない場合、このソフトウェアパッケージに含まれるコードやファイルの使用、参照、配布は禁止されています。<br>詳細については、ルートディレクトリにあるLICENSEファイルを参照してください。",
+    "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.",
    "Total training epochs (total_epoch)": "総エポック数",
    "Train": "学習",
    "Train feature index": "特徴索引の学習",
--- a/i18n/locale/zh_CN.json
+++ b/i18n/locale/zh_CN.json
@@ -133,7 +133,7 @@
    "Takeover WASAPI device": "独占 WASAPI 设备",
    "Target sample rate": "目标采样率",
    "The audio file to be processed": "待处理音频文件",
-    "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "本软件基于GNU Affero通用公共许可证3.0版本或更高版本进行许可。<br>作者对本软件的使用不承担任何控制权或责任。<br>使用本软件并分发任何内容（包括由其生成的声音或文件）的用户，需对遵守AGPL 3.0许可证条款承担全部责任。<br>如果您不接受这些条款，则禁止使用、引用或分发本软件包中包含的任何代码或文件。<br>请参阅位于根目录中的LICENSE文件以获取完整详情。",
+    "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.",
    "Total training epochs (total_epoch)": "总训练轮数total_epoch",
    "Train": "训练",
    "Train feature index": "训练特征索引",
--- a/infer/lib/audio.py
+++ b/infer/lib/audio.py
@@ -195,10 +195,7 @@ def get_audio_properties(input_path: str) -> Tuple[int, int]:
    container = av.open(input_path)
    audio_stream = next(s for s in container.streams if s.type == "audio")
    channels = 1 if audio_stream.layout == "mono" else 2
-    try:
-        rate = audio_stream.base_rate
-    except:
-        rate = audio_stream.sample_rate
+    rate = audio_stream.base_rate
    container.close()
    return channels, rate

--- a/infer/lib/rvcmd.py
+++ b/infer/lib/rvcmd.py
@@ -162,7 +162,15 @@ def download_and_extract_zip(url: str, folder: str):
        logger.info(f"extracted into {folder}")


-def download_all_assets(tmpdir: str, version="0.2.11"):
+def download_dns_yaml(url: str, folder: str):
+    logger.info(f"downloading {url}")
+    response = requests.get(url, stream=True, timeout=(5, 10))
+    with open(os.path.join(folder, "dns.yaml"), "wb") as out_file:
+        out_file.write(response.content)
+        logger.info(f"downloaded into {folder}")
+
+
+def download_all_assets(tmpdir: str, version="0.2.5"):
    import subprocess
    import platform

@@ -190,10 +198,44 @@ def download_all_assets(tmpdir: str, version="0.2.11"):
    suffix = "zip" if is_win else "tar.gz"
    RVCMD_URL = BASE_URL + f"v{version}/rvcmd_{system_type}_{architecture}.{suffix}"
    cmdfile = os.path.join(tmpdir, "rvcmd")
-    if is_win:
-        download_and_extract_zip(RVCMD_URL, tmpdir)
-        cmdfile += ".exe"
-    else:
-        download_and_extract_tar_gz(RVCMD_URL, tmpdir)
-        os.chmod(cmdfile, 0o755)
-    subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"])
+    try:
+        if is_win:
+            download_and_extract_zip(RVCMD_URL, tmpdir)
+            cmdfile += ".exe"
+        else:
+            download_and_extract_tar_gz(RVCMD_URL, tmpdir)
+            os.chmod(cmdfile, 0o755)
+        subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"])
+    except Exception:
+        BASE_URL = "https://raw.gitcode.com/u011570312/RVC-Models-Downloader/assets/"
+        suffix = {
+            "darwin_amd64": "555",
+            "darwin_arm64": "556",
+            "linux_386": "557",
+            "linux_amd64": "558",
+            "linux_arm64": "559",
+            "windows_386": "562",
+            "windows_amd64": "563",
+        }[f"{system_type}_{architecture}"]
+        RVCMD_URL = BASE_URL + suffix
+        download_dns_yaml(
+            "https://raw.gitcode.com/u011570312/RVC-Models-Downloader/raw/main/dns.yaml",
+            tmpdir,
+        )
+        if is_win:
+            download_and_extract_zip(RVCMD_URL, tmpdir)
+            cmdfile += ".exe"
+        else:
+            download_and_extract_tar_gz(RVCMD_URL, tmpdir)
+            os.chmod(cmdfile, 0o755)
+        subprocess.run(
+            [
+                cmdfile,
+                "-notui",
+                "-w",
+                "0",
+                "-dns",
+                os.path.join(tmpdir, "dns.yaml"),
+                "assets/rvc",
+            ]
+        )
--- a/infer/lib/train/data_utils.py
+++ b/infer/lib/train/data_utils.py
@@ -303,7 +303,7 @@ class TextAudioLoader(torch.utils.data.Dataset):
        spec_filename = filename.replace(".wav", ".spec.pt")
        if os.path.exists(spec_filename):
            try:
-                spec = torch.load(spec_filename, weights_only=True)
+                spec = torch.load(spec_filename)
            except:
                logger.warning("%s %s", spec_filename, traceback.format_exc())
                spec = spectrogram_torch(
--- a/infer/lib/train/process_ckpt.py
+++ b/infer/lib/train/process_ckpt.py
@@ -59,7 +59,7 @@ def save_small_model(ckpt, sr, if_f0, name, epoch, version, hps):

 def extract_small_model(path, name, author, sr, if_f0, info, version):
    try:
-        ckpt = torch.load(path, map_location="cpu", weights_only=True)
+        ckpt = torch.load(path, map_location="cpu")
        if "model" in ckpt:
            ckpt = ckpt["model"]
        opt = OrderedDict()
@@ -196,7 +196,7 @@ def extract_small_model(path, name, author, sr, if_f0, info, version):

 def change_info(path, info, name):
    try:
-        ckpt = torch.load(path, map_location="cpu", weights_only=True)
+        ckpt = torch.load(path, map_location="cpu")
        ckpt["info"] = info
        if name == "":
            name = os.path.basename(path)
@@ -229,8 +229,8 @@ def merge(path1, path2, alpha1, sr, f0, info, name, version):
                a2 = "Unknown"
            return f"{a1} & {a2}"

-        ckpt1 = torch.load(path1, map_location="cpu", weights_only=True)
-        ckpt2 = torch.load(path2, map_location="cpu", weights_only=True)
+        ckpt1 = torch.load(path1, map_location="cpu")
+        ckpt2 = torch.load(path2, map_location="cpu")
        cfg = ckpt1["config"]
        if "model" in ckpt1:
            ckpt1 = extract(ckpt1)
--- a/infer/modules/train/extract_f0_print.py
+++ b/infer/modules/train/extract_f0_print.py
@@ -2,7 +2,6 @@ import os
 import sys
 import traceback
 from pathlib import Path
-import importlib.util

 from dotenv import load_dotenv

@@ -39,9 +38,6 @@ f0method = sys.argv[3]
 device = sys.argv[4]
 is_half = sys.argv[5] == "True"

-if importlib.util.find_spec("torch_directml") is not None:
-    import torch_directml  # use side effect
-

 class FeatureInput(object):
    def __init__(self, is_half: bool, device="cpu", samplerate=16000, hop_size=160):
@@ -106,12 +102,6 @@ if __name__ == "__main__":
    Config.use_insecure_load()

    printt(" ".join(sys.argv))
-    # GPU methods (rmvpe, fcpe, crepe, etc.) gain nothing from multiprocessing since
-    # all processes share one GPU. Spawning n_p processes each lazily loading
-    # the model onto the same CUDA device exhausts VRAM and causes deadlocks.
-    if "cuda" in device:
-        printt("WARN: use 1 thread since GPU is used.")
-        n_p = 1
    featureInput = FeatureInput(is_half, device)
    paths = []
    inp_root = "%s/1_16k_wavs" % (exp_dir)
--- a/infer/modules/train/extract_feature_print.py
+++ b/infer/modules/train/extract_feature_print.py
@@ -17,14 +17,7 @@ device = sys.argv[1]
 n_part = int(sys.argv[2])
 i_part = int(sys.argv[3])
 i_gpu = sys.argv[4]
-# CUDA_VISIBLE_DEVICES expects bare GPU indices (e.g. "0" or "0,1"),
-# but callers may pass "cuda:0", "cuda:0-cuda:1", etc.  Strip the prefix
-# and normalise separators so any combination works.
-import re
-
-i_gpu = re.sub(r"cuda:", "", str(i_gpu))
-i_gpu = i_gpu.replace("-", ",")
-os.environ["CUDA_VISIBLE_DEVICES"] = i_gpu
+os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
 exp_dir = sys.argv[5]
 version = sys.argv[6]
 is_half = sys.argv[7].lower() == "true"
--- a/infer/modules/train/train.py
+++ b/infer/modules/train/train.py
@@ -106,30 +106,23 @@ def main():
        # patch to unblock people without gpus. there is probably a better way.
        print("NO GPU DETECTED: falling back to CPU - this may take a while")
        n_gpus = 1
+    os.environ["MASTER_ADDR"] = "localhost"
+    os.environ["MASTER_PORT"] = str(randint(20000, 55555))
+    children = []
    logger = utils.get_logger(hps.model_dir)
-    if n_gpus == 1:
-        # Single GPU: run directly without distributed to avoid gloo issues on Windows
-        run(0, 1, hps, logger)
-    else:
-        master_port = str(randint(20000, 55555))
-        os.environ["MASTER_ADDR"] = "127.0.0.1"
-        os.environ["MASTER_PORT"] = master_port
-        children = []
-        for i in range(n_gpus):
-            subproc = mp.Process(
-                target=run,
-                args=(i, n_gpus, hps, logger, master_port),
-            )
-            children.append(subproc)
-            subproc.start()
+    for i in range(n_gpus):
+        subproc = mp.Process(
+            target=run,
+            args=(i, n_gpus, hps, logger),
+        )
+        children.append(subproc)
+        subproc.start()

-        for i in range(n_gpus):
-            children[i].join()
+    for i in range(n_gpus):
+        children[i].join()


-def run(
-    rank, n_gpus, hps: utils.HParams, logger: logging.Logger, master_port: str = "29500"
-):
+def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger):
    global global_step
    if rank == 0:
        # logger = utils.get_logger(hps.model_dir)
@@ -138,81 +131,24 @@ def run(
        writer = SummaryWriter(log_dir=hps.model_dir)
        writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))

-    use_distributed = n_gpus > 1
-    if use_distributed:
-        if os.name == "nt" or not torch.cuda.is_available():
-            # On Windows, gloo's create_device(hostname=...) is gated to Linux only
-            # in the C++ layer (makeDeviceForHostname). We must use the interface-
-            # based path instead: create_device(interface=...) calls
-            # makeDeviceForInterface which is not platform-gated.
-            import socket as _socket
-
-            try:
-                store = dist.TCPStore(
-                    host_name="127.0.0.1",
-                    port=int(master_port),
-                    world_size=n_gpus,
-                    is_master=(rank == 0),
-                )
-            except Exception:
-                store = dist.TCPStore(
-                    host_name="127.0.0.1",
-                    port=int(master_port),
-                    world_size=n_gpus,
-                    is_master=(rank == 0),
-                    use_libuv=False,
-                )
-
-            # Discover a working network interface for gloo device creation
-            gloo_device = None
-            try:
-                for idx, ifname in _socket.if_nameindex():
-                    try:
-                        gloo_device = dist.ProcessGroupGloo.create_device(
-                            interface=ifname
-                        )
-                        print("Try device", idx, "name", ifname)
-                        break
-                    except RuntimeError as e:
-                        print("Try device", idx, "name", ifname, "err:", e)
-                        continue
-            except (OSError, AttributeError) as e:
-                print(e.with_traceback(None))
-
-            if gloo_device is None:
-                raise RuntimeError(
-                    "Cannot create gloo device on Windows. "
-                    "No usable network interface found. "
-                    "Try adding your hostname to "
-                    "C:\\Windows\\System32\\drivers\\etc\\hosts "
-                    "with: 127.0.0.1  " + _socket.gethostname()
-                )
-
-            pg_options = dist.ProcessGroupGloo._Options()
-            pg_options._devices = [gloo_device]
-            dist.init_process_group(
-                backend="gloo",
-                store=store,
-                world_size=n_gpus,
-                rank=rank,
-                pg_options=pg_options,
-            )
-        else:
-            init_url = f"tcp://127.0.0.1:{master_port}"
-            try:
-                dist.init_process_group(
-                    backend="nccl",
-                    init_method=init_url,
-                    world_size=n_gpus,
-                    rank=rank,
-                )
-            except:
-                dist.init_process_group(
-                    backend="nccl",
-                    init_method=init_url + "?use_libuv=False",
-                    world_size=n_gpus,
-                    rank=rank,
-                )
+    try:
+        dist.init_process_group(
+            backend=(
+                "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"
+            ),
+            init_method="env://",
+            world_size=n_gpus,
+            rank=rank,
+        )
+    except:
+        dist.init_process_group(
+            backend=(
+                "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"
+            ),
+            init_method="env://?use_libuv=False",
+            world_size=n_gpus,
+            rank=rank,
+        )
    torch.manual_seed(hps.train.seed)
    if torch.cuda.is_available():
        torch.cuda.set_device(rank)
@@ -285,15 +221,14 @@ def run(
    )
    # net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
    # net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
-    if use_distributed:
-        if hasattr(torch, "xpu") and torch.xpu.is_available():
-            pass
-        elif torch.cuda.is_available():
-            net_g = DDP(net_g, device_ids=[rank])
-            net_d = DDP(net_d, device_ids=[rank])
-        else:
-            net_g = DDP(net_g)
-            net_d = DDP(net_d)
+    if hasattr(torch, "xpu") and torch.xpu.is_available():
+        pass
+    elif torch.cuda.is_available():
+        net_g = DDP(net_g, device_ids=[rank])
+        net_d = DDP(net_d, device_ids=[rank])
+    else:
+        net_g = DDP(net_g)
+        net_d = DDP(net_d)

    try:  # 如果能加载自动resume
        _, _, _, epoch_str = utils.load_checkpoint(
--- a/infer/modules/uvr5/modules.py
+++ b/infer/modules/uvr5/modules.py
@@ -62,6 +62,10 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
                    os.path.basename(inp_path),
                )
                resample_audio(inp_path, tmp_path, "pcm_s16le", "s16", 44100, "stereo")
+                try:  # Remove the original file
+                    os.remove(inp_path)
+                except Exception as e:
+                    print(f"Failed to remove the original file: {e}")
                inp_path = tmp_path
            try:
                if done == 0:
--- a/infer/modules/uvr5/vr.py
+++ b/infer/modules/uvr5/vr.py
@@ -37,7 +37,7 @@ class AudioPre:
        else:
            mp = ModelParameters("infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json")
            model = Nets.CascadedASPPNet(mp.param["bins"] * 2)
-        cpk = torch.load(model_path, map_location="cpu", weights_only=True)
+        cpk = torch.load(model_path, map_location="cpu")
        model.load_state_dict(cpk)
        model.eval()
        if is_half:
--- a/infer/modules/vc/hash.py
+++ b/infer/modules/vc/hash.py
@@ -152,7 +152,7 @@ def model_hash_ckpt(cpt):


 def model_hash_from(path):
-    cpt = torch.load(path, map_location="cpu", weights_only=True)
+    cpt = torch.load(path, map_location="cpu")
    h = model_hash_ckpt(cpt)
    del cpt
    return h
--- a/infer/modules/vc/info.py
+++ b/infer/modules/vc/info.py
@@ -75,7 +75,7 @@ def show_info(path):
    try:
        if hasattr(path, "name"):
            path = path.name
-        a = torch.load(path, map_location="cpu", weights_only=True)
+        a = torch.load(path, map_location="cpu")
        txt = show_model_info(a, show_long_id=True)
        del a
    except:
--- a/requirements/amd.txt
+++ b/requirements/amd.txt
@@ -1,10 +1,11 @@
+tensorflow-rocm
 joblib>=1.1.0
 numba
 numpy
 scipy
 librosa>=0.10.2
 llvmlite
-fairseq @ git+https://github.com/fumiama/fairseq.git
+fairseq @ git+https://github.com/One-sixth/fairseq.git
 faiss-cpu
 gradio
 Cython
--- a/requirements/cpu.txt
+++ b/requirements/cpu.txt
@@ -1,49 +0,0 @@
-joblib>=1.1.0
-numba
-numpy
-scipy
-librosa>=0.10.2
-llvmlite
-fairseq @ git+https://github.com/fumiama/fairseq.git
-faiss-cpu
-gradio
-Cython
-pydub>=0.25.1
-tensorboardX
-Jinja2>=3.1.2
-json5
-Markdown
-matplotlib>=3.7.0
-matplotlib-inline>=0.1.3
-praat-parselmouth>=0.4.2
-Pillow>=9.1.1
-resampy>=0.4.2
-scikit-learn
-tensorboard
-tqdm>=4.63.1
-tornado>=6.1
-Werkzeug>=2.2.3
-uc-micro-py>=1.0.1
-sympy>=1.11.1
-tabulate>=0.8.10
-PyYAML>=6.0
-pyasn1>=0.4.8
-pyasn1-modules>=0.2.8
-fsspec>=2022.11.0
-absl-py>=1.2.0
-audioread
-uvicorn>=0.21.1
-colorama>=0.4.5
-pyworld==0.3.2
-httpx
-onnxruntime; sys_platform == 'darwin'
-torchcrepe>=0.0.23
-fastapi
-torchfcpe
-python-dotenv>=1.0.0
-av
-pybase16384
--extra-index-url https://download.pytorch.org/whl/cpu
-torch
-torchvision
-torchaudio
--- a/requirements/dml.txt
+++ b/requirements/dml.txt
@@ -4,7 +4,7 @@ numpy
 scipy
 librosa>=0.10.2
 llvmlite
-fairseq @ git+https://github.com/fumiama/fairseq.git
+fairseq @ git+https://github.com/One-sixth/fairseq.git
 faiss-cpu
 gradio
 Cython
@@ -43,6 +43,3 @@ python-dotenv>=1.0.0
 av
 torchfcpe
 pybase16384
-torch-directml
-torchvision
-torchaudio
--- a/requirements/ipex.txt
+++ b/requirements/ipex.txt
@@ -9,7 +9,7 @@ numpy
 scipy
 librosa>=0.10.2
 llvmlite==0.39.0
-fairseq @ git+https://github.com/fumiama/fairseq.git
+fairseq @ git+https://github.com/One-sixth/fairseq.git
 faiss-cpu
 gradio
 Cython
--- a/requirements/main.txt
+++ b/requirements/main.txt
@@ -4,7 +4,7 @@ numpy
 scipy
 librosa>=0.10.2
 llvmlite
-fairseq @ git+https://github.com/fumiama/fairseq.git
+fairseq @ git+https://github.com/One-sixth/fairseq.git
 faiss-cpu
 gradio
 Cython
--- a/rvc/f0/f0.py
+++ b/rvc/f0/f0.py
@@ -11,14 +11,14 @@ class F0Predictor(object):
        f0_min=50,
        f0_max=1100,
        sampling_rate=44100,
-        device: Optional[Union[str, torch.device]] = None,
+        device: Optional[str] = None,
    ):
        self.hop_length = hop_length
        self.f0_min = f0_min
        self.f0_max = f0_max
        self.sampling_rate = sampling_rate
-        if not device:
-            device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        if device is None:
+            device = "cuda:0" if torch.cuda.is_available() else "cpu"
        self.device = device

    def compute_f0(
--- a/rvc/f0/mel.py
+++ b/rvc/f0/mel.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union
+from typing import Optional

 import torch
 import numpy as np
@@ -17,9 +17,9 @@ class MelSpectrogram(torch.nn.Module):
        hop_length: int,
        n_fft: Optional[int] = None,
        mel_fmin: int = 0,
-        mel_fmax: Optional[int] = None,
+        mel_fmax: int = None,
        clamp: float = 1e-5,
-        device: Union[str, torch.device] = torch.device("cpu"),
+        device=torch.device("cpu"),
    ):
        super().__init__()
        if n_fft is None:
--- a/rvc/f0/rmvpe.py
+++ b/rvc/f0/rmvpe.py
@@ -1,6 +1,6 @@
 from io import BytesIO
 import os
-from typing import Optional, Union
+from typing import Any, Optional, Union

 import numpy as np
 import torch
--- a/rvc/layers/attentions.py
+++ b/rvc/layers/attentions.py
@@ -12,8 +12,8 @@ class MultiHeadAttention(nn.Module):
        channels: int,
        out_channels: int,
        n_heads: int,
-        window_size: int,
        p_dropout: float = 0.0,
+        window_size: Optional[int] = None,
        heads_share: bool = True,
        block_length: Optional[int] = None,
        proximal_bias: bool = False,
--- a/rvc/layers/encoders.py
+++ b/rvc/layers/encoders.py
@@ -42,8 +42,8 @@ class Encoder(nn.Module):
                    hidden_channels,
                    hidden_channels,
                    n_heads,
-                    window_size,
                    p_dropout=p_dropout,
+                    window_size=window_size,
                )
            )
            self.norm_layers_1.append(LayerNorm(hidden_channels))
@@ -121,7 +121,7 @@ class TextEncoder(nn.Module):
    def __call__(
        self,
        phone: torch.Tensor,
-        pitch: Optional[torch.Tensor],
+        pitch: torch.Tensor,
        lengths: torch.Tensor,
        skip_head: Optional[int] = None,
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
@@ -135,7 +135,7 @@ class TextEncoder(nn.Module):
    def forward(
        self,
        phone: torch.Tensor,
-        pitch: Optional[torch.Tensor],
+        pitch: torch.Tensor,
        lengths: torch.Tensor,
        skip_head: Optional[int] = None,
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
--- a/rvc/layers/generators.py
+++ b/rvc/layers/generators.py
@@ -46,7 +46,6 @@ class Generator(torch.nn.Module):

        self.resblocks = nn.ModuleList()
        resblock_module = ResBlock1 if resblock == "1" else ResBlock2
-        ch = 0
        for i in range(len(self.ups)):
            ch = upsample_initial_channel // (2 ** (i + 1))
            for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes):
--- a/rvc/layers/norms.py
+++ b/rvc/layers/norms.py
@@ -30,7 +30,7 @@ class WN(torch.nn.Module):
        dilation_rate: int,
        n_layers: int,
        gin_channels: int = 0,
-        p_dropout: float = 0,
+        p_dropout: int = 0,
    ):
        super(WN, self).__init__()
        assert kernel_size % 2 == 1
--- a/rvc/layers/nsf.py
+++ b/rvc/layers/nsf.py
@@ -1,4 +1,4 @@
-from typing import Optional, List, Union
+from typing import Optional, List
 import math

 import torch
@@ -83,7 +83,7 @@ class NSFGenerator(torch.nn.Module):
        self.conv_pre = Conv1d(
            initial_channel, upsample_initial_channel, 7, 1, padding=3
        )
-        resblockcls = ResBlock1 if resblock == "1" else ResBlock2
+        resblock = ResBlock1 if resblock == "1" else ResBlock2

        self.ups = nn.ModuleList()
        for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
@@ -114,13 +114,12 @@ class NSFGenerator(torch.nn.Module):
                self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))

        self.resblocks = nn.ModuleList()
-        ch = 0
        for i in range(len(self.ups)):
-            ch = upsample_initial_channel // (2 ** (i + 1))
+            ch: int = upsample_initial_channel // (2 ** (i + 1))
            for j, (k, d) in enumerate(
                zip(resblock_kernel_sizes, resblock_dilation_sizes)
            ):
-                self.resblocks.append(resblockcls(ch, k, d))
+                self.resblocks.append(resblock(ch, k, d))

        self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False)
        self.ups.apply(call_weight_data_normal_if_Conv)
--- a/rvc/layers/residuals.py
+++ b/rvc/layers/residuals.py
@@ -20,7 +20,7 @@ class ResBlock1(torch.nn.Module):
        self,
        channels: int,
        kernel_size: int = 3,
-        dilation: List[int] = [1, 3, 5],
+        dilation: List[int] = (1, 3, 5),
    ):
        super(ResBlock1, self).__init__()

@@ -117,7 +117,7 @@ class ResBlock2(torch.nn.Module):
        self,
        channels: int,
        kernel_size=3,
-        dilation: List[int] = [1, 3],
+        dilation: List[int] = (1, 3),
    ):
        super(ResBlock2, self).__init__()
        self.convs = nn.ModuleList()
@@ -182,7 +182,7 @@ class ResidualCouplingLayer(nn.Module):
        kernel_size: int,
        dilation_rate: int,
        n_layers: int,
-        p_dropout: float = 0,
+        p_dropout: int = 0,
        gin_channels: int = 0,
        mean_only: bool = False,
    ):
--- a/rvc/layers/synthesizers.py
+++ b/rvc/layers/synthesizers.py
@@ -34,7 +34,7 @@ class SynthesizerTrnMsNSFsid(nn.Module):
        upsample_kernel_sizes: List[int],
        spk_embed_dim: int,
        gin_channels: int,
-        sr: Union[str, int],
+        sr: Optional[Union[str, int]],
        encoder_dim: int,
        use_f0: bool,
    ):
@@ -143,7 +143,7 @@ class SynthesizerTrnMsNSFsid(nn.Module):
                    torch.nn.utils.remove_weight_norm(self.enc_q)
        return self

-    @torch.jit.ignore()
+    @torch.jit.ignore
    def forward(
        self,
        phone: torch.Tensor,
@@ -155,20 +155,18 @@ class SynthesizerTrnMsNSFsid(nn.Module):
        pitchf: Optional[torch.Tensor] = None,
    ):  # 这里ds是id，[bs,1]
        # print(1,pitch.shape)#[bs,t]
-        embg = self.emb_g(ds).unsqueeze(-1)  # [b, 256, 1]##1是t，广播的
+        g = self.emb_g(ds).unsqueeze(-1)  # [b, 256, 1]##1是t，广播的
        m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
-        z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=embg)
-        z_p = self.flow(z, y_mask, g=embg)
+        z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)
+        z_p = self.flow(z, y_mask, g=g)
        z_slice, ids_slice = rand_slice_segments_on_last_dim(
            z, y_lengths, self.segment_size
        )
-        if pitchf is not None and isinstance(self.dec, NSFGenerator):
+        if pitchf is not None:
            pitchf = slice_on_last_dim(pitchf, ids_slice, self.segment_size)
-            o = self.dec(z_slice, pitchf, g=embg)  # type: ignore
-        elif isinstance(self.dec, Generator):
-            o = self.dec(z_slice, g=embg)
+            o = self.dec(z_slice, pitchf, g=g)
        else:
-            raise KeyError(f"unknown dec type: {type(self.dec).__name__}")
+            o = self.dec(z_slice, g=g)
        return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)

    @torch.jit.export
@@ -203,17 +201,15 @@ class SynthesizerTrnMsNSFsid(nn.Module):
            z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
            z = self.flow(z_p, x_mask, g=g, reverse=True)
        del z_p, m_p, logs_p
-        if pitchf is not None and isinstance(self.dec, NSFGenerator):
+        if pitchf is not None:
            o = self.dec(
                z * x_mask,
                pitchf,
                g=g,
                n_res=return_length2,
            )
-        elif isinstance(self.dec, Generator):
-            o = self.dec(z * x_mask, g=g, n_res=return_length2)
        else:
-            raise KeyError(f"unknown dec type: {type(self.dec).__name__}")
+            o = self.dec(z * x_mask, g=g, n_res=return_length2)
        del x_mask, z
        return o  # , x_mask, (z, z_p, m_p, logs_p)

@@ -330,7 +326,7 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
        upsample_kernel_sizes: List[int],
        spk_embed_dim: int,
        gin_channels: int,
-        sr: Union[str, int],
+        sr=None,
    ):
        super().__init__(
            spec_channels,
@@ -350,7 +346,6 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
            upsample_kernel_sizes,
            spk_embed_dim,
            gin_channels,
-            sr,
            256,
            False,
        )
@@ -376,7 +371,7 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
        upsample_kernel_sizes: List[int],
        spk_embed_dim: int,
        gin_channels: int,
-        sr: Union[str, int],
+        sr=None,
    ):
        super().__init__(
            spec_channels,
@@ -396,7 +391,6 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
            upsample_kernel_sizes,
            spk_embed_dim,
            gin_channels,
-            sr,
            768,
            False,
        )
--- a/rvc/layers/utils.py
+++ b/rvc/layers/utils.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Tuple, Iterator, Union
+from typing import List, Optional, Tuple, Iterator

 import torch

@@ -17,7 +17,7 @@ def get_padding(kernel_size: int, dilation=1) -> int:

 def slice_on_last_dim(
    x: torch.Tensor,
-    start_indices: Union[List[int], torch.Tensor],
+    start_indices: List[int],
    segment_size=4,
 ) -> torch.Tensor:
    new_shape = [*x.shape]
@@ -32,9 +32,9 @@ def slice_on_last_dim(

 def rand_slice_segments_on_last_dim(
    x: torch.Tensor,
-    x_lengths: Optional[Union[int, torch.Tensor]] = None,
+    x_lengths: int = None,
    segment_size=4,
-) -> Tuple[torch.Tensor, Union[List[int], torch.Tensor]]:
+) -> Tuple[torch.Tensor, List[int]]:
    b, _, t = x.size()
    if x_lengths is None:
        x_lengths = t
@@ -58,7 +58,7 @@ def activate_add_tanh_sigmoid_multiply(
 def sequence_mask(
    length: torch.Tensor,
    max_length: Optional[int] = None,
-):
+) -> torch.BoolTensor:
    if max_length is None:
        max_length = int(length.max())
    x = torch.arange(max_length, dtype=length.dtype, device=length.device)
--- a/rvc/onnx/synthesizer.py
+++ b/rvc/onnx/synthesizer.py
@@ -1,4 +1,4 @@
-from typing import List, Union
+from typing import List, Optional, Union

 import torch

@@ -25,7 +25,7 @@ class SynthesizerTrnMsNSFsid(SynthesizerBase):
        upsample_kernel_sizes: List[int],
        spk_embed_dim: int,
        gin_channels: int,
-        sr: Union[str, int],
+        sr: Optional[Union[str, int]],
        encoder_dim: int,
    ):
        super().__init__(
--- a/tools/web/infer-only.py
+++ b/tools/web/infer-only.py
@@ -40,9 +40,11 @@ app = gr.Blocks()
 with app:
    with gr.Tabs():
        with gr.TabItem("在线demo"):
-            gr.Markdown(value="""
+            gr.Markdown(
+                value="""
                RVC 在线demo
-                """)
+                """
+            )
            sid = gr.Dropdown(label=i18n("Inferencing voice"), choices=sorted(names))
            with gr.Column():
                spk_item = gr.Slider(
--- a/web.py
+++ b/web.py
@@ -36,6 +36,7 @@ import threading
 import shutil
 import logging

+
 logging.getLogger("numba").setLevel(logging.WARNING)
 logging.getLogger("httpx").setLevel(logging.WARNING)