1
0
mirror of https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git synced 2026-06-05 17:20:25 +08:00

1 Commits

Author SHA1 Message Date
github-actions[bot]
782c4f6473 chore(i18n): sync locale on dev 2025-07-08 07:46:51 +00:00
39 changed files with 319 additions and 419 deletions

View File

@@ -15,7 +15,7 @@ jobs:
- name: Run RVC-Models-Downloader - name: Run RVC-Models-Downloader
run: | run: |
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb
sudo apt -y install ./rvcmd_linux_amd64.deb sudo apt -y install ./rvcmd_linux_amd64.deb
rm -f ./rvcmd_linux_amd64.deb rm -f ./rvcmd_linux_amd64.deb
rvcmd -notrs -w 1 -notui assets/rvc rvcmd -notrs -w 1 -notui assets/rvc

View File

@@ -12,7 +12,7 @@ jobs:
steps: steps:
- uses: actions/stale@v5 - uses: actions/stale@v5
with: with:
exempt-issue-labels: "help wanted,good first issue,documentation,following up,todo list,enhancement" exempt-issue-labels: "help wanted,good first issue,documentation,following up,todo list"
days-before-issue-stale: 30 days-before-issue-stale: 30
days-before-issue-close: 15 days-before-issue-close: 15
stale-issue-label: "stale" stale-issue-label: "stale"

View File

@@ -11,72 +11,20 @@ jobs:
steps: steps:
- uses: actions/checkout@master - uses: actions/checkout@master
- name: Space cleanup
env:
DEBIAN_FRONTEND: noninteractive
run: |
df -h
# Source - https://stackoverflow.com/a
# Posted by Cosmin Bodnariuc
# Retrieved 2025-11-21, License - CC BY-SA 4.0
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/local/share/chromium
sudo rm -rf /usr/local/share/powershell
sudo rm -rf /usr/local/share/vcpkg
sudo rm -rf /usr/local/share/miniconda
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /opt/hostedtoolcache/go
sudo rm -rf /opt/hostedtoolcache/Python
sudo rm -rf /opt/hostedtoolcache/node
sudo rm -rf /opt/hostedtoolcache/R
sudo rm -rf /opt/hostedtoolcache/Java
sudo rm -rf /opt/hostedtoolcache/LLVM
sudo rm -rf /opt/hostedtoolcache/Swift
sudo rm -rf /opt/hostedtoolcache/Php
sudo rm -rf /opt/hostedtoolcache/Perl
sudo rm -rf /opt/hostedtoolcache/Scala
sudo rm -rf /opt/hostedtoolcache/Julia
sudo rm -rf /opt/hostedtoolcache/Mono
sudo rm -rf /opt/hostedtoolcache/PowerShell
sudo rm -rf /opt/hostedtoolcache/Crystal
sudo rm -rf /opt/hostedtoolcache/Elixir
sudo rm -rf /opt/hostedtoolcache/Erlang
sudo rm -rf /opt/hostedtoolcache/FSharp
sudo rm -rf /opt/hostedtoolcache/Haskell
sudo rm -rf /opt/hostedtoolcache/OCaml
sudo rm -rf /opt/hostedtoolcache/Rust
sudo rm -rf /opt/hostedtoolcache/Sbt
sudo rm -rf /opt/hostedtoolcache/Solidity
sudo rm -rf /opt/hostedtoolcache/VisualStudio
sudo rm -rf /opt/hostedtoolcache/WinAppDriver
sudo rm -rf /opt/hostedtoolcache/Xamarin
sudo rm -rf /opt/hostedtoolcache/Yarn
sudo rm -rf /opt/hostedtoolcache/Zephyr
sudo rm -rf /opt/hostedtoolcache/zig
sudo rm -rf /opt/hostedtoolcache/zulu
sudo rm -rf /opt/hostedtoolcache/azcopy
sudo -E apt-get update
sudo -E apt-get -y autoremove --purge
sudo -E apt-get clean
df -h
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4 uses: actions/setup-python@v4
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Install dependencies - name: Install dependencies
run: | run: |
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb sudo apt update
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb
sudo apt -y install ./rvcmd_linux_amd64.deb sudo apt -y install ./rvcmd_linux_amd64.deb
pip install --force pip==24.0 # fix fairseq installing issue https://github.com/facebookresearch/fairseq/issues/5552 pip install --force pip==24.0 # fix fairseq installing issue https://github.com/facebookresearch/fairseq/issues/5552
python -m pip install --upgrade setuptools python -m pip install --upgrade setuptools
python -m pip install --upgrade wheel python -m pip install --upgrade wheel
pip install torch torchvision torchaudio pip install torch torchvision torchaudio
pip install -r requirements/cpu.txt pip install -r requirements/main.txt
rvcmd -notrs -w 1 -notui assets/rvc rvcmd -notrs -w 1 -notui assets/rvc
- name: Test step 1 & 2 - name: Test step 1 & 2
run: | run: |

View File

@@ -4,7 +4,6 @@ import sys
import json import json
import shutil import shutil
from multiprocessing import cpu_count from multiprocessing import cpu_count
import importlib.util
import torch import torch
@@ -47,10 +46,10 @@ class Config(metaclass=Singleton):
self.global_link, self.global_link,
self.noparallel, self.noparallel,
self.noautoopen, self.noautoopen,
self.dml,
self.nocheck, self.nocheck,
self.update, self.update,
) = self.arg_parse() ) = self.arg_parse()
self.dml = False
self.instead = "" self.instead = ""
self.preprocess_per = 3.7 self.preprocess_per = 3.7
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
@@ -84,6 +83,11 @@ class Config(metaclass=Singleton):
action="store_true", action="store_true",
help="Do not open in browser automatically", help="Do not open in browser automatically",
) )
parser.add_argument(
"--dml",
action="store_true",
help="torch_dml",
)
parser.add_argument( parser.add_argument(
"--nocheck", action="store_true", help="Run without checking assets" "--nocheck", action="store_true", help="Run without checking assets"
) )
@@ -100,6 +104,7 @@ class Config(metaclass=Singleton):
cmd_opts.global_link, cmd_opts.global_link,
cmd_opts.noparallel, cmd_opts.noparallel,
cmd_opts.noautoopen, cmd_opts.noautoopen,
cmd_opts.dml,
cmd_opts.nocheck, cmd_opts.nocheck,
cmd_opts.update, cmd_opts.update,
) )
@@ -178,7 +183,7 @@ class Config(metaclass=Singleton):
if self.has_xpu(): if self.has_xpu():
self.device = self.instead = "xpu:0" self.device = self.instead = "xpu:0"
self.is_half = True self.is_half = True
i_device = int(str(self.device).split(":")[-1]) i_device = int(self.device.split(":")[-1])
self.gpu_name = torch.cuda.get_device_name(i_device) self.gpu_name = torch.cuda.get_device_name(i_device)
if ( if (
("16" in self.gpu_name and "V100" not in self.gpu_name.upper()) ("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
@@ -209,7 +214,7 @@ class Config(metaclass=Singleton):
self.use_fp32_config() self.use_fp32_config()
else: else:
logger.info("No supported Nvidia GPU found") logger.info("No supported Nvidia GPU found")
self.device = self.instead = torch.get_default_device() self.device = self.instead = "cpu"
self.is_half = False self.is_half = False
self.use_fp32_config() self.use_fp32_config()
@@ -234,13 +239,12 @@ class Config(metaclass=Singleton):
x_query = 5 x_query = 5
x_center = 30 x_center = 30
x_max = 32 x_max = 32
if importlib.util.find_spec("torch_directml") is not None: if self.dml:
logger.info("Use DirectML instead") logger.info("Use DirectML instead")
import torch_directml import torch_directml
self.device = torch_directml.device(torch_directml.default_device()) self.device = torch_directml.device(torch_directml.default_device())
self.is_half = False self.is_half = False
self.dml = True
else: else:
if self.instead: if self.instead:
logger.info(f"Use {self.instead} instead") logger.info(f"Use {self.instead} instead")

View File

@@ -133,7 +133,7 @@
"Takeover WASAPI device": "WASAPIデバイスを独占", "Takeover WASAPI device": "WASAPIデバイスを独占",
"Target sample rate": "目標サンプリング率", "Target sample rate": "目標サンプリング率",
"The audio file to be processed": "処理待ち音声", "The audio file to be processed": "処理待ち音声",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "本ソフトウェアは、GNU Affero General Public License バージョン3.0以降の下でライセンスされています。<br>作者は、このソフトウェアの使用に関して一切の管理権限や責任を負いません。<br>このソフトウェアを使用し、生成された音声やファイルを含むあらゆるコンテンツを配布するユーザーは、AGPL 3.0ライセンスの条項への準拠について全責任を負います。<br>これらの条項に同意しない場合、このソフトウェアパッケージに含まれるコードやファイルの使用、参照、配布は禁止されています。<br>詳細については、ルートディレクトリにあるLICENSEファイルを参照してください。", "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.",
"Total training epochs (total_epoch)": "総エポック数", "Total training epochs (total_epoch)": "総エポック数",
"Train": "学習", "Train": "学習",
"Train feature index": "特徴索引の学習", "Train feature index": "特徴索引の学習",

View File

@@ -133,7 +133,7 @@
"Takeover WASAPI device": "独占 WASAPI 设备", "Takeover WASAPI device": "独占 WASAPI 设备",
"Target sample rate": "目标采样率", "Target sample rate": "目标采样率",
"The audio file to be processed": "待处理音频文件", "The audio file to be processed": "待处理音频文件",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "本软件基于GNU Affero通用公共许可证3.0版本或更高版本进行许可。<br>作者对本软件的使用不承担任何控制权或责任。<br>使用本软件并分发任何内容包括由其生成的声音或文件的用户需对遵守AGPL 3.0许可证条款承担全部责任。<br>如果您不接受这些条款,则禁止使用、引用或分发本软件包中包含的任何代码或文件。<br>请参阅位于根目录中的LICENSE文件以获取完整详情。", "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.",
"Total training epochs (total_epoch)": "总训练轮数total_epoch", "Total training epochs (total_epoch)": "总训练轮数total_epoch",
"Train": "训练", "Train": "训练",
"Train feature index": "训练特征索引", "Train feature index": "训练特征索引",

View File

@@ -195,10 +195,7 @@ def get_audio_properties(input_path: str) -> Tuple[int, int]:
container = av.open(input_path) container = av.open(input_path)
audio_stream = next(s for s in container.streams if s.type == "audio") audio_stream = next(s for s in container.streams if s.type == "audio")
channels = 1 if audio_stream.layout == "mono" else 2 channels = 1 if audio_stream.layout == "mono" else 2
try:
rate = audio_stream.base_rate rate = audio_stream.base_rate
except:
rate = audio_stream.sample_rate
container.close() container.close()
return channels, rate return channels, rate

View File

@@ -162,7 +162,15 @@ def download_and_extract_zip(url: str, folder: str):
logger.info(f"extracted into {folder}") logger.info(f"extracted into {folder}")
def download_all_assets(tmpdir: str, version="0.2.11"): def download_dns_yaml(url: str, folder: str):
logger.info(f"downloading {url}")
response = requests.get(url, stream=True, timeout=(5, 10))
with open(os.path.join(folder, "dns.yaml"), "wb") as out_file:
out_file.write(response.content)
logger.info(f"downloaded into {folder}")
def download_all_assets(tmpdir: str, version="0.2.5"):
import subprocess import subprocess
import platform import platform
@@ -190,6 +198,7 @@ def download_all_assets(tmpdir: str, version="0.2.11"):
suffix = "zip" if is_win else "tar.gz" suffix = "zip" if is_win else "tar.gz"
RVCMD_URL = BASE_URL + f"v{version}/rvcmd_{system_type}_{architecture}.{suffix}" RVCMD_URL = BASE_URL + f"v{version}/rvcmd_{system_type}_{architecture}.{suffix}"
cmdfile = os.path.join(tmpdir, "rvcmd") cmdfile = os.path.join(tmpdir, "rvcmd")
try:
if is_win: if is_win:
download_and_extract_zip(RVCMD_URL, tmpdir) download_and_extract_zip(RVCMD_URL, tmpdir)
cmdfile += ".exe" cmdfile += ".exe"
@@ -197,3 +206,36 @@ def download_all_assets(tmpdir: str, version="0.2.11"):
download_and_extract_tar_gz(RVCMD_URL, tmpdir) download_and_extract_tar_gz(RVCMD_URL, tmpdir)
os.chmod(cmdfile, 0o755) os.chmod(cmdfile, 0o755)
subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"]) subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"])
except Exception:
BASE_URL = "https://raw.gitcode.com/u011570312/RVC-Models-Downloader/assets/"
suffix = {
"darwin_amd64": "555",
"darwin_arm64": "556",
"linux_386": "557",
"linux_amd64": "558",
"linux_arm64": "559",
"windows_386": "562",
"windows_amd64": "563",
}[f"{system_type}_{architecture}"]
RVCMD_URL = BASE_URL + suffix
download_dns_yaml(
"https://raw.gitcode.com/u011570312/RVC-Models-Downloader/raw/main/dns.yaml",
tmpdir,
)
if is_win:
download_and_extract_zip(RVCMD_URL, tmpdir)
cmdfile += ".exe"
else:
download_and_extract_tar_gz(RVCMD_URL, tmpdir)
os.chmod(cmdfile, 0o755)
subprocess.run(
[
cmdfile,
"-notui",
"-w",
"0",
"-dns",
os.path.join(tmpdir, "dns.yaml"),
"assets/rvc",
]
)

View File

@@ -303,7 +303,7 @@ class TextAudioLoader(torch.utils.data.Dataset):
spec_filename = filename.replace(".wav", ".spec.pt") spec_filename = filename.replace(".wav", ".spec.pt")
if os.path.exists(spec_filename): if os.path.exists(spec_filename):
try: try:
spec = torch.load(spec_filename, weights_only=True) spec = torch.load(spec_filename)
except: except:
logger.warning("%s %s", spec_filename, traceback.format_exc()) logger.warning("%s %s", spec_filename, traceback.format_exc())
spec = spectrogram_torch( spec = spectrogram_torch(

View File

@@ -59,7 +59,7 @@ def save_small_model(ckpt, sr, if_f0, name, epoch, version, hps):
def extract_small_model(path, name, author, sr, if_f0, info, version): def extract_small_model(path, name, author, sr, if_f0, info, version):
try: try:
ckpt = torch.load(path, map_location="cpu", weights_only=True) ckpt = torch.load(path, map_location="cpu")
if "model" in ckpt: if "model" in ckpt:
ckpt = ckpt["model"] ckpt = ckpt["model"]
opt = OrderedDict() opt = OrderedDict()
@@ -196,7 +196,7 @@ def extract_small_model(path, name, author, sr, if_f0, info, version):
def change_info(path, info, name): def change_info(path, info, name):
try: try:
ckpt = torch.load(path, map_location="cpu", weights_only=True) ckpt = torch.load(path, map_location="cpu")
ckpt["info"] = info ckpt["info"] = info
if name == "": if name == "":
name = os.path.basename(path) name = os.path.basename(path)
@@ -229,8 +229,8 @@ def merge(path1, path2, alpha1, sr, f0, info, name, version):
a2 = "Unknown" a2 = "Unknown"
return f"{a1} & {a2}" return f"{a1} & {a2}"
ckpt1 = torch.load(path1, map_location="cpu", weights_only=True) ckpt1 = torch.load(path1, map_location="cpu")
ckpt2 = torch.load(path2, map_location="cpu", weights_only=True) ckpt2 = torch.load(path2, map_location="cpu")
cfg = ckpt1["config"] cfg = ckpt1["config"]
if "model" in ckpt1: if "model" in ckpt1:
ckpt1 = extract(ckpt1) ckpt1 = extract(ckpt1)

View File

@@ -29,24 +29,6 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, load_opt=1):
checkpoint_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True) checkpoint_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
saved_state_dict = checkpoint_dict["model"] saved_state_dict = checkpoint_dict["model"]
# Convert old-style weight_norm keys (weight_g/weight_v) to new
# parametrizations format (parametrizations.weight.original0/original1)
# so that checkpoints saved with the deprecated API can still be loaded.
_converted = {}
for k, v in list(saved_state_dict.items()):
if k.endswith(".weight_g"):
new_key = k[: -len(".weight_g")] + ".parametrizations.weight.original0"
_converted[new_key] = v
elif k.endswith(".weight_v"):
new_key = k[: -len(".weight_v")] + ".parametrizations.weight.original1"
_converted[new_key] = v
if _converted:
logger.info(
"Converting %d old-style weight_norm keys from checkpoint to new parametrizations format",
len(_converted),
)
saved_state_dict.update(_converted)
if hasattr(model, "module"): if hasattr(model, "module"):
state_dict = model.module.state_dict() state_dict = model.module.state_dict()
else: else:

View File

@@ -2,7 +2,6 @@ import os
import sys import sys
import traceback import traceback
from pathlib import Path from pathlib import Path
import importlib.util
from dotenv import load_dotenv from dotenv import load_dotenv
@@ -39,9 +38,6 @@ f0method = sys.argv[3]
device = sys.argv[4] device = sys.argv[4]
is_half = sys.argv[5] == "True" is_half = sys.argv[5] == "True"
if importlib.util.find_spec("torch_directml") is not None:
import torch_directml # use side effect
class FeatureInput(object): class FeatureInput(object):
def __init__(self, is_half: bool, device="cpu", samplerate=16000, hop_size=160): def __init__(self, is_half: bool, device="cpu", samplerate=16000, hop_size=160):
@@ -106,12 +102,6 @@ if __name__ == "__main__":
Config.use_insecure_load() Config.use_insecure_load()
printt(" ".join(sys.argv)) printt(" ".join(sys.argv))
# GPU methods (rmvpe, fcpe, crepe, etc.) gain nothing from multiprocessing since
# all processes share one GPU. Spawning n_p processes each lazily loading
# the model onto the same CUDA device exhausts VRAM and causes deadlocks.
if "cuda" in device:
printt("WARN: use 1 thread since GPU is used.")
n_p = 1
featureInput = FeatureInput(is_half, device) featureInput = FeatureInput(is_half, device)
paths = [] paths = []
inp_root = "%s/1_16k_wavs" % (exp_dir) inp_root = "%s/1_16k_wavs" % (exp_dir)

View File

@@ -17,14 +17,7 @@ device = sys.argv[1]
n_part = int(sys.argv[2]) n_part = int(sys.argv[2])
i_part = int(sys.argv[3]) i_part = int(sys.argv[3])
i_gpu = sys.argv[4] i_gpu = sys.argv[4]
# CUDA_VISIBLE_DEVICES expects bare GPU indices (e.g. "0" or "0,1"), os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
# but callers may pass "cuda:0", "cuda:0-cuda:1", etc. Strip the prefix
# and normalise separators so any combination works.
import re
i_gpu = re.sub(r"cuda:", "", str(i_gpu))
i_gpu = i_gpu.replace("-", ",")
os.environ["CUDA_VISIBLE_DEVICES"] = i_gpu
exp_dir = sys.argv[5] exp_dir = sys.argv[5]
version = sys.argv[6] version = sys.argv[6]
is_half = sys.argv[7].lower() == "true" is_half = sys.argv[7].lower() == "true"

View File

@@ -29,12 +29,10 @@ try:
GradScaler = gradscaler_init() GradScaler = gradscaler_init()
ipex_init() ipex_init()
else:
from torch.cuda.amp import GradScaler, autocast
except Exception: except Exception:
pass from torch.cuda.amp import GradScaler, autocast
finally:
if not ("GradScaler" in globals() and "autocast" in globals()):
from torch.amp.grad_scaler import GradScaler
from torch.amp.autocast_mode import autocast
torch.backends.cudnn.deterministic = False torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = False torch.backends.cudnn.benchmark = False
@@ -108,19 +106,14 @@ def main():
# patch to unblock people without gpus. there is probably a better way. # patch to unblock people without gpus. there is probably a better way.
print("NO GPU DETECTED: falling back to CPU - this may take a while") print("NO GPU DETECTED: falling back to CPU - this may take a while")
n_gpus = 1 n_gpus = 1
logger = utils.get_logger(hps.model_dir) os.environ["MASTER_ADDR"] = "localhost"
if n_gpus == 1: os.environ["MASTER_PORT"] = str(randint(20000, 55555))
# Single GPU: run directly without distributed to avoid gloo issues on Windows
run(0, 1, hps, logger)
else:
master_port = str(randint(20000, 55555))
os.environ["MASTER_ADDR"] = "127.0.0.1"
os.environ["MASTER_PORT"] = master_port
children = [] children = []
logger = utils.get_logger(hps.model_dir)
for i in range(n_gpus): for i in range(n_gpus):
subproc = mp.Process( subproc = mp.Process(
target=run, target=run,
args=(i, n_gpus, hps, logger, master_port), args=(i, n_gpus, hps, logger),
) )
children.append(subproc) children.append(subproc)
subproc.start() subproc.start()
@@ -129,9 +122,7 @@ def main():
children[i].join() children[i].join()
def run( def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger):
rank, n_gpus, hps: utils.HParams, logger: logging.Logger, master_port: str = "29500"
):
global global_step global global_step
if rank == 0: if rank == 0:
# logger = utils.get_logger(hps.model_dir) # logger = utils.get_logger(hps.model_dir)
@@ -140,78 +131,21 @@ def run(
writer = SummaryWriter(log_dir=hps.model_dir) writer = SummaryWriter(log_dir=hps.model_dir)
writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval")) writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
use_distributed = n_gpus > 1
if use_distributed:
if os.name == "nt" or not torch.cuda.is_available():
# On Windows, gloo's create_device(hostname=...) is gated to Linux only
# in the C++ layer (makeDeviceForHostname). We must use the interface-
# based path instead: create_device(interface=...) calls
# makeDeviceForInterface which is not platform-gated.
import socket as _socket
try:
store = dist.TCPStore(
host_name="127.0.0.1",
port=int(master_port),
world_size=n_gpus,
is_master=(rank == 0),
)
except Exception:
store = dist.TCPStore(
host_name="127.0.0.1",
port=int(master_port),
world_size=n_gpus,
is_master=(rank == 0),
use_libuv=False,
)
# Discover a working network interface for gloo device creation
gloo_device = None
try:
for idx, ifname in _socket.if_nameindex():
try:
gloo_device = dist.ProcessGroupGloo.create_device(
interface=ifname
)
print("Try device", idx, "name", ifname)
break
except RuntimeError as e:
print("Try device", idx, "name", ifname, "err:", e)
continue
except (OSError, AttributeError) as e:
print(e.with_traceback(None))
if gloo_device is None:
raise RuntimeError(
"Cannot create gloo device on Windows. "
"No usable network interface found. "
"Try adding your hostname to "
"C:\\Windows\\System32\\drivers\\etc\\hosts "
"with: 127.0.0.1 " + _socket.gethostname()
)
pg_options = dist.ProcessGroupGloo._Options()
pg_options._devices = [gloo_device]
dist.init_process_group(
backend="gloo",
store=store,
world_size=n_gpus,
rank=rank,
pg_options=pg_options,
)
else:
init_url = f"tcp://127.0.0.1:{master_port}"
try: try:
dist.init_process_group( dist.init_process_group(
backend="nccl", backend=(
init_method=init_url, "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"
),
init_method="env://",
world_size=n_gpus, world_size=n_gpus,
rank=rank, rank=rank,
) )
except: except:
dist.init_process_group( dist.init_process_group(
backend="nccl", backend=(
init_method=init_url + "?use_libuv=False", "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"
),
init_method="env://?use_libuv=False",
world_size=n_gpus, world_size=n_gpus,
rank=rank, rank=rank,
) )
@@ -287,7 +221,6 @@ def run(
) )
# net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True) # net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
# net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True) # net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
if use_distributed:
if hasattr(torch, "xpu") and torch.xpu.is_available(): if hasattr(torch, "xpu") and torch.xpu.is_available():
pass pass
elif torch.cuda.is_available(): elif torch.cuda.is_available():
@@ -537,7 +470,7 @@ def train_and_evaluate(
# wave_lengths = wave_lengths.cuda(rank, non_blocking=True) # wave_lengths = wave_lengths.cuda(rank, non_blocking=True)
# Calculate # Calculate
with autocast(device_type="cuda", enabled=hps.train.fp16_run): with autocast(enabled=hps.train.fp16_run):
( (
y_hat, y_hat,
ids_slice, ids_slice,
@@ -556,7 +489,7 @@ def train_and_evaluate(
y_mel = slice_on_last_dim( y_mel = slice_on_last_dim(
mel, ids_slice, hps.train.segment_size // hps.data.hop_length mel, ids_slice, hps.train.segment_size // hps.data.hop_length
) )
with autocast(device_type="cuda", enabled=False): with autocast(enabled=False):
y_hat_mel = mel_spectrogram_torch( y_hat_mel = mel_spectrogram_torch(
y_hat.float().squeeze(1), y_hat.float().squeeze(1),
hps.data.filter_length, hps.data.filter_length,
@@ -575,7 +508,7 @@ def train_and_evaluate(
# Discriminator # Discriminator
y_d_hat_r, y_d_hat_g, _, _ = net_d(wave, y_hat.detach()) y_d_hat_r, y_d_hat_g, _, _ = net_d(wave, y_hat.detach())
with autocast(device_type="cuda", enabled=False): with autocast(enabled=False):
loss_disc, losses_disc_r, losses_disc_g = discriminator_loss( loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(
y_d_hat_r, y_d_hat_g y_d_hat_r, y_d_hat_g
) )
@@ -585,10 +518,10 @@ def train_and_evaluate(
grad_norm_d = total_grad_norm(net_d.parameters()) grad_norm_d = total_grad_norm(net_d.parameters())
scaler.step(optim_d) scaler.step(optim_d)
with autocast(device_type="cuda", enabled=hps.train.fp16_run): with autocast(enabled=hps.train.fp16_run):
# Generator # Generator
y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(wave, y_hat) y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(wave, y_hat)
with autocast(device_type="cuda", enabled=False): with autocast(enabled=False):
loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel
loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl
loss_fm = feature_loss(fmap_r, fmap_g) loss_fm = feature_loss(fmap_r, fmap_g)

View File

@@ -62,6 +62,10 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
os.path.basename(inp_path), os.path.basename(inp_path),
) )
resample_audio(inp_path, tmp_path, "pcm_s16le", "s16", 44100, "stereo") resample_audio(inp_path, tmp_path, "pcm_s16le", "s16", 44100, "stereo")
try: # Remove the original file
os.remove(inp_path)
except Exception as e:
print(f"Failed to remove the original file: {e}")
inp_path = tmp_path inp_path = tmp_path
try: try:
if done == 0: if done == 0:

View File

@@ -37,7 +37,7 @@ class AudioPre:
else: else:
mp = ModelParameters("infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json") mp = ModelParameters("infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json")
model = Nets.CascadedASPPNet(mp.param["bins"] * 2) model = Nets.CascadedASPPNet(mp.param["bins"] * 2)
cpk = torch.load(model_path, map_location="cpu", weights_only=True) cpk = torch.load(model_path, map_location="cpu")
model.load_state_dict(cpk) model.load_state_dict(cpk)
model.eval() model.eval()
if is_half: if is_half:

View File

@@ -10,6 +10,9 @@ from pybase16384 import encode_to_string, decode_from_string
from configs import CPUConfig from configs import CPUConfig
from rvc.synthesizer import get_synthesizer from rvc.synthesizer import get_synthesizer
from .pipeline import Pipeline
from .utils import load_hubert
class TorchSeedContext: class TorchSeedContext:
def __init__(self, seed): def __init__(self, seed):
@@ -92,9 +95,6 @@ def wave_hash(time_field):
def model_hash(config, tgt_sr, net_g, if_f0, version): def model_hash(config, tgt_sr, net_g, if_f0, version):
from .pipeline import Pipeline
from .utils import load_hubert
pipeline = Pipeline(tgt_sr, config) pipeline = Pipeline(tgt_sr, config)
audio = original_audio() audio = original_audio()
hbt = load_hubert(config.device, config.is_half) hbt = load_hubert(config.device, config.is_half)
@@ -152,7 +152,7 @@ def model_hash_ckpt(cpt):
def model_hash_from(path): def model_hash_from(path):
cpt = torch.load(path, map_location="cpu", weights_only=True) cpt = torch.load(path, map_location="cpu")
h = model_hash_ckpt(cpt) h = model_hash_ckpt(cpt)
del cpt del cpt
return h return h

View File

@@ -75,7 +75,7 @@ def show_info(path):
try: try:
if hasattr(path, "name"): if hasattr(path, "name"):
path = path.name path = path.name
a = torch.load(path, map_location="cpu", weights_only=True) a = torch.load(path, map_location="cpu")
txt = show_model_info(a, show_long_id=True) txt = show_model_info(a, show_long_id=True)
del a del a
except: except:

View File

@@ -1,7 +1,6 @@
import os, pathlib import os, pathlib
import torch from fairseq import checkpoint_utils
from fairseq import checkpoint_utils, data
def get_index_path_from_model(sid): def get_index_path_from_model(sid):
@@ -22,7 +21,6 @@ def get_index_path_from_model(sid):
def load_hubert(device, is_half): def load_hubert(device, is_half):
with torch.serialization.safe_globals([data.dictionary.Dictionary]):
models, _, _ = checkpoint_utils.load_model_ensemble_and_task( models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
["assets/hubert/hubert_base.pt"], ["assets/hubert/hubert_base.pt"],
suffix="", suffix="",

View File

@@ -1,10 +1,11 @@
tensorflow-rocm
joblib>=1.1.0 joblib>=1.1.0
numba numba
numpy numpy
scipy scipy
librosa>=0.10.2 librosa>=0.10.2
llvmlite llvmlite
fairseq @ git+https://github.com/fumiama/fairseq.git fairseq @ git+https://github.com/One-sixth/fairseq.git
faiss-cpu faiss-cpu
gradio gradio
Cython Cython

View File

@@ -1,49 +0,0 @@
joblib>=1.1.0
numba
numpy
scipy
librosa>=0.10.2
llvmlite
fairseq @ git+https://github.com/fumiama/fairseq.git
faiss-cpu
gradio
Cython
pydub>=0.25.1
tensorboardX
Jinja2>=3.1.2
json5
Markdown
matplotlib>=3.7.0
matplotlib-inline>=0.1.3
praat-parselmouth>=0.4.2
Pillow>=9.1.1
resampy>=0.4.2
scikit-learn
tensorboard
tqdm>=4.63.1
tornado>=6.1
Werkzeug>=2.2.3
uc-micro-py>=1.0.1
sympy>=1.11.1
tabulate>=0.8.10
PyYAML>=6.0
pyasn1>=0.4.8
pyasn1-modules>=0.2.8
fsspec>=2022.11.0
absl-py>=1.2.0
audioread
uvicorn>=0.21.1
colorama>=0.4.5
pyworld==0.3.2
httpx
onnxruntime; sys_platform == 'darwin'
torchcrepe>=0.0.23
fastapi
torchfcpe
python-dotenv>=1.0.0
av
pybase16384
--extra-index-url https://download.pytorch.org/whl/cpu
torch
torchvision
torchaudio

View File

@@ -4,7 +4,7 @@ numpy
scipy scipy
librosa>=0.10.2 librosa>=0.10.2
llvmlite llvmlite
fairseq @ git+https://github.com/fumiama/fairseq.git fairseq @ git+https://github.com/One-sixth/fairseq.git
faiss-cpu faiss-cpu
gradio gradio
Cython Cython
@@ -43,6 +43,3 @@ python-dotenv>=1.0.0
av av
torchfcpe torchfcpe
pybase16384 pybase16384
torch-directml
torchvision
torchaudio

View File

@@ -9,7 +9,7 @@ numpy
scipy scipy
librosa>=0.10.2 librosa>=0.10.2
llvmlite==0.39.0 llvmlite==0.39.0
fairseq @ git+https://github.com/fumiama/fairseq.git fairseq @ git+https://github.com/One-sixth/fairseq.git
faiss-cpu faiss-cpu
gradio gradio
Cython Cython

View File

@@ -4,7 +4,7 @@ numpy
scipy scipy
librosa>=0.10.2 librosa>=0.10.2
llvmlite llvmlite
fairseq @ git+https://github.com/fumiama/fairseq.git fairseq @ git+https://github.com/One-sixth/fairseq.git
faiss-cpu faiss-cpu
gradio gradio
Cython Cython

View File

@@ -11,14 +11,14 @@ class F0Predictor(object):
f0_min=50, f0_min=50,
f0_max=1100, f0_max=1100,
sampling_rate=44100, sampling_rate=44100,
device: Optional[Union[str, torch.device]] = None, device: Optional[str] = None,
): ):
self.hop_length = hop_length self.hop_length = hop_length
self.f0_min = f0_min self.f0_min = f0_min
self.f0_max = f0_max self.f0_max = f0_max
self.sampling_rate = sampling_rate self.sampling_rate = sampling_rate
if not device: if device is None:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") device = "cuda:0" if torch.cuda.is_available() else "cpu"
self.device = device self.device = device
def compute_f0( def compute_f0(

View File

@@ -1,4 +1,4 @@
from typing import Optional, Union from typing import Optional
import torch import torch
import numpy as np import numpy as np
@@ -17,9 +17,9 @@ class MelSpectrogram(torch.nn.Module):
hop_length: int, hop_length: int,
n_fft: Optional[int] = None, n_fft: Optional[int] = None,
mel_fmin: int = 0, mel_fmin: int = 0,
mel_fmax: Optional[int] = None, mel_fmax: int = None,
clamp: float = 1e-5, clamp: float = 1e-5,
device: Union[str, torch.device] = torch.device("cpu"), device=torch.device("cpu"),
): ):
super().__init__() super().__init__()
if n_fft is None: if n_fft is None:

View File

@@ -1,6 +1,6 @@
from io import BytesIO from io import BytesIO
import os import os
from typing import Optional, Union from typing import Any, Optional, Union
import numpy as np import numpy as np
import torch import torch

View File

@@ -12,8 +12,8 @@ class MultiHeadAttention(nn.Module):
channels: int, channels: int,
out_channels: int, out_channels: int,
n_heads: int, n_heads: int,
window_size: int,
p_dropout: float = 0.0, p_dropout: float = 0.0,
window_size: Optional[int] = None,
heads_share: bool = True, heads_share: bool = True,
block_length: Optional[int] = None, block_length: Optional[int] = None,
proximal_bias: bool = False, proximal_bias: bool = False,

View File

@@ -4,8 +4,7 @@ import torch
from torch import nn from torch import nn
from torch.nn import Conv1d, Conv2d from torch.nn import Conv1d, Conv2d
from torch.nn import functional as F from torch.nn import functional as F
from torch.nn.utils import spectral_norm from torch.nn.utils import spectral_norm, weight_norm
from torch.nn.utils.parametrizations import weight_norm
from .residuals import LRELU_SLOPE from .residuals import LRELU_SLOPE
from .utils import get_padding from .utils import get_padding

View File

@@ -42,8 +42,8 @@ class Encoder(nn.Module):
hidden_channels, hidden_channels,
hidden_channels, hidden_channels,
n_heads, n_heads,
window_size,
p_dropout=p_dropout, p_dropout=p_dropout,
window_size=window_size,
) )
) )
self.norm_layers_1.append(LayerNorm(hidden_channels)) self.norm_layers_1.append(LayerNorm(hidden_channels))
@@ -121,7 +121,7 @@ class TextEncoder(nn.Module):
def __call__( def __call__(
self, self,
phone: torch.Tensor, phone: torch.Tensor,
pitch: Optional[torch.Tensor], pitch: torch.Tensor,
lengths: torch.Tensor, lengths: torch.Tensor,
skip_head: Optional[int] = None, skip_head: Optional[int] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
@@ -135,7 +135,7 @@ class TextEncoder(nn.Module):
def forward( def forward(
self, self,
phone: torch.Tensor, phone: torch.Tensor,
pitch: Optional[torch.Tensor], pitch: torch.Tensor,
lengths: torch.Tensor, lengths: torch.Tensor,
skip_head: Optional[int] = None, skip_head: Optional[int] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
@@ -212,8 +212,10 @@ class PosteriorEncoder(nn.Module):
self.enc.remove_weight_norm() self.enc.remove_weight_norm()
def __prepare_scriptable__(self): def __prepare_scriptable__(self):
from torch.nn.utils import parametrize for hook in self.enc._forward_pre_hooks.values():
if (
if parametrize.is_parametrized(self.enc, "weight"): hook.__module__ == "torch.nn.utils.weight_norm"
parametrize.remove_parametrizations(self.enc, "weight") and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.enc)
return self return self

View File

@@ -4,8 +4,7 @@ import torch
from torch import nn from torch import nn
from torch.nn import Conv1d, ConvTranspose1d from torch.nn import Conv1d, ConvTranspose1d
from torch.nn import functional as F from torch.nn import functional as F
from torch.nn.utils.parametrizations import weight_norm from torch.nn.utils import remove_weight_norm, weight_norm
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
from .residuals import ResBlock1, ResBlock2, LRELU_SLOPE from .residuals import ResBlock1, ResBlock2, LRELU_SLOPE
from .utils import call_weight_data_normal_if_Conv from .utils import call_weight_data_normal_if_Conv
@@ -47,7 +46,6 @@ class Generator(torch.nn.Module):
self.resblocks = nn.ModuleList() self.resblocks = nn.ModuleList()
resblock_module = ResBlock1 if resblock == "1" else ResBlock2 resblock_module = ResBlock1 if resblock == "1" else ResBlock2
ch = 0
for i in range(len(self.ups)): for i in range(len(self.ups)):
ch = upsample_initial_channel // (2 ** (i + 1)) ch = upsample_initial_channel // (2 ** (i + 1))
for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes): for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes):
@@ -99,16 +97,29 @@ class Generator(torch.nn.Module):
def __prepare_scriptable__(self): def __prepare_scriptable__(self):
for l in self.ups: for l in self.ups:
if is_parametrized(l, "weight"): for hook in l._forward_pre_hooks.values():
remove_parametrizations(l, "weight") # The hook we want to remove is an instance of WeightNorm class, so
# normally we would do `if isinstance(...)` but this class is not accessible
# because of shadowing, so we check the module name directly.
# https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
for l in self.resblocks: for l in self.resblocks:
if is_parametrized(l, "weight"): for hook in l._forward_pre_hooks.values():
remove_parametrizations(l, "weight") if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
return self return self
def remove_weight_norm(self): def remove_weight_norm(self):
for l in self.ups: for l in self.ups:
remove_parametrizations(l, "weight") remove_weight_norm(l)
for l in self.resblocks: for l in self.resblocks:
l.remove_weight_norm() l.remove_weight_norm()

View File

@@ -6,8 +6,6 @@ from torch.nn import functional as F
from .utils import activate_add_tanh_sigmoid_multiply from .utils import activate_add_tanh_sigmoid_multiply
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
class LayerNorm(nn.Module): class LayerNorm(nn.Module):
def __init__(self, channels: int, eps: float = 1e-5): def __init__(self, channels: int, eps: float = 1e-5):
@@ -32,7 +30,7 @@ class WN(torch.nn.Module):
dilation_rate: int, dilation_rate: int,
n_layers: int, n_layers: int,
gin_channels: int = 0, gin_channels: int = 0,
p_dropout: float = 0, p_dropout: int = 0,
): ):
super(WN, self).__init__() super(WN, self).__init__()
assert kernel_size % 2 == 1 assert kernel_size % 2 == 1
@@ -51,9 +49,7 @@ class WN(torch.nn.Module):
cond_layer = torch.nn.Conv1d( cond_layer = torch.nn.Conv1d(
gin_channels, 2 * hidden_channels * n_layers, 1 gin_channels, 2 * hidden_channels * n_layers, 1
) )
self.cond_layer = torch.nn.utils.parametrizations.weight_norm( self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name="weight")
cond_layer, name="weight"
)
for i in range(n_layers): for i in range(n_layers):
dilation = dilation_rate**i dilation = dilation_rate**i
@@ -65,9 +61,7 @@ class WN(torch.nn.Module):
dilation=dilation, dilation=dilation,
padding=padding, padding=padding,
) )
in_layer = torch.nn.utils.parametrizations.weight_norm( in_layer = torch.nn.utils.weight_norm(in_layer, name="weight")
in_layer, name="weight"
)
self.in_layers.append(in_layer) self.in_layers.append(in_layer)
# last one is not necessary # last one is not necessary
@@ -77,9 +71,7 @@ class WN(torch.nn.Module):
res_skip_channels = hidden_channels res_skip_channels = hidden_channels
res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1) res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
res_skip_layer = torch.nn.utils.parametrizations.weight_norm( res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name="weight")
res_skip_layer, name="weight"
)
self.res_skip_layers.append(res_skip_layer) self.res_skip_layers.append(res_skip_layer)
def __call__( def __call__(
@@ -125,20 +117,32 @@ class WN(torch.nn.Module):
def remove_weight_norm(self): def remove_weight_norm(self):
if self.gin_channels != 0: if self.gin_channels != 0:
remove_parametrizations(self.cond_layer, "weight") torch.nn.utils.remove_weight_norm(self.cond_layer)
for l in self.in_layers: for l in self.in_layers:
remove_parametrizations(l, "weight") torch.nn.utils.remove_weight_norm(l)
for l in self.res_skip_layers: for l in self.res_skip_layers:
remove_parametrizations(l, "weight") torch.nn.utils.remove_weight_norm(l)
def __prepare_scriptable__(self): def __prepare_scriptable__(self):
if self.gin_channels != 0: if self.gin_channels != 0:
if is_parametrized(self.cond_layer, "weight"): for hook in self.cond_layer._forward_pre_hooks.values():
remove_parametrizations(self.cond_layer, "weight") if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.cond_layer)
for l in self.in_layers: for l in self.in_layers:
if is_parametrized(l, "weight"): for hook in l._forward_pre_hooks.values():
remove_parametrizations(l, "weight") if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
for l in self.res_skip_layers: for l in self.res_skip_layers:
if is_parametrized(l, "weight"): for hook in l._forward_pre_hooks.values():
remove_parametrizations(l, "weight") if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
return self return self

View File

@@ -1,12 +1,11 @@
from typing import Optional, List, Union from typing import Optional, List
import math import math
import torch import torch
from torch import nn from torch import nn
from torch.nn import Conv1d, ConvTranspose1d from torch.nn import Conv1d, ConvTranspose1d
from torch.nn import functional as F from torch.nn import functional as F
from torch.nn.utils.parametrizations import weight_norm from torch.nn.utils import remove_weight_norm, weight_norm
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
from .generators import SineGenerator from .generators import SineGenerator
from .residuals import ResBlock1, ResBlock2, LRELU_SLOPE from .residuals import ResBlock1, ResBlock2, LRELU_SLOPE
@@ -84,7 +83,7 @@ class NSFGenerator(torch.nn.Module):
self.conv_pre = Conv1d( self.conv_pre = Conv1d(
initial_channel, upsample_initial_channel, 7, 1, padding=3 initial_channel, upsample_initial_channel, 7, 1, padding=3
) )
resblockcls = ResBlock1 if resblock == "1" else ResBlock2 resblock = ResBlock1 if resblock == "1" else ResBlock2
self.ups = nn.ModuleList() self.ups = nn.ModuleList()
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
@@ -115,13 +114,12 @@ class NSFGenerator(torch.nn.Module):
self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1)) self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
self.resblocks = nn.ModuleList() self.resblocks = nn.ModuleList()
ch = 0
for i in range(len(self.ups)): for i in range(len(self.ups)):
ch = upsample_initial_channel // (2 ** (i + 1)) ch: int = upsample_initial_channel // (2 ** (i + 1))
for j, (k, d) in enumerate( for j, (k, d) in enumerate(
zip(resblock_kernel_sizes, resblock_dilation_sizes) zip(resblock_kernel_sizes, resblock_dilation_sizes)
): ):
self.resblocks.append(resblockcls(ch, k, d)) self.resblocks.append(resblock(ch, k, d))
self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False) self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False)
self.ups.apply(call_weight_data_normal_if_Conv) self.ups.apply(call_weight_data_normal_if_Conv)
@@ -192,15 +190,27 @@ class NSFGenerator(torch.nn.Module):
def remove_weight_norm(self): def remove_weight_norm(self):
for l in self.ups: for l in self.ups:
remove_parametrizations(l, "weight") remove_weight_norm(l)
for l in self.resblocks: for l in self.resblocks:
l.remove_weight_norm() l.remove_weight_norm()
def __prepare_scriptable__(self): def __prepare_scriptable__(self):
for l in self.ups: for l in self.ups:
if is_parametrized(l, "weight"): for hook in l._forward_pre_hooks.values():
remove_parametrizations(l, "weight") # The hook we want to remove is an instance of WeightNorm class, so
# normally we would do `if isinstance(...)` but this class is not accessible
# because of shadowing, so we check the module name directly.
# https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
for l in self.resblocks: for l in self.resblocks:
if is_parametrized(l, "weight"): for hook in self.resblocks._forward_pre_hooks.values():
remove_parametrizations(l, "weight") if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
return self return self

View File

@@ -4,8 +4,7 @@ import torch
from torch import nn from torch import nn
from torch.nn import Conv1d from torch.nn import Conv1d
from torch.nn import functional as F from torch.nn import functional as F
from torch.nn.utils.parametrizations import weight_norm from torch.nn.utils import remove_weight_norm, weight_norm
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
from .norms import WN from .norms import WN
from .utils import ( from .utils import (
@@ -21,7 +20,7 @@ class ResBlock1(torch.nn.Module):
self, self,
channels: int, channels: int,
kernel_size: int = 3, kernel_size: int = 3,
dilation: List[int] = [1, 3, 5], dilation: List[int] = (1, 3, 5),
): ):
super(ResBlock1, self).__init__() super(ResBlock1, self).__init__()
@@ -86,17 +85,25 @@ class ResBlock1(torch.nn.Module):
def remove_weight_norm(self): def remove_weight_norm(self):
for l in self.convs1: for l in self.convs1:
remove_parametrizations(l, "weight") remove_weight_norm(l)
for l in self.convs2: for l in self.convs2:
remove_parametrizations(l, "weight") remove_weight_norm(l)
def __prepare_scriptable__(self): def __prepare_scriptable__(self):
for l in self.convs1: for l in self.convs1:
if is_parametrized(l, "weight"): for hook in l._forward_pre_hooks.values():
remove_parametrizations(l, "weight") if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
for l in self.convs2: for l in self.convs2:
if is_parametrized(l, "weight"): for hook in l._forward_pre_hooks.values():
remove_parametrizations(l, "weight") if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
return self return self
@@ -110,7 +117,7 @@ class ResBlock2(torch.nn.Module):
self, self,
channels: int, channels: int,
kernel_size=3, kernel_size=3,
dilation: List[int] = [1, 3], dilation: List[int] = (1, 3),
): ):
super(ResBlock2, self).__init__() super(ResBlock2, self).__init__()
self.convs = nn.ModuleList() self.convs = nn.ModuleList()
@@ -154,12 +161,16 @@ class ResBlock2(torch.nn.Module):
def remove_weight_norm(self): def remove_weight_norm(self):
for l in self.convs: for l in self.convs:
remove_parametrizations(l, "weight") remove_weight_norm(l)
def __prepare_scriptable__(self): def __prepare_scriptable__(self):
for l in self.convs: for l in self.convs:
if is_parametrized(l, "weight"): for hook in l._forward_pre_hooks.values():
remove_parametrizations(l, "weight") if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
return self return self
@@ -171,7 +182,7 @@ class ResidualCouplingLayer(nn.Module):
kernel_size: int, kernel_size: int,
dilation_rate: int, dilation_rate: int,
n_layers: int, n_layers: int,
p_dropout: float = 0, p_dropout: int = 0,
gin_channels: int = 0, gin_channels: int = 0,
mean_only: bool = False, mean_only: bool = False,
): ):
@@ -238,8 +249,12 @@ class ResidualCouplingLayer(nn.Module):
self.enc.remove_weight_norm() self.enc.remove_weight_norm()
def __prepare_scriptable__(self): def __prepare_scriptable__(self):
if is_parametrized(self.enc, "weight"): for hook in self.enc._forward_pre_hooks.values():
remove_parametrizations(self.enc, "weight") if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.enc)
return self return self
@@ -329,6 +344,10 @@ class ResidualCouplingBlock(nn.Module):
def __prepare_scriptable__(self): def __prepare_scriptable__(self):
for i in range(self.n_flows): for i in range(self.n_flows):
if is_parametrized(self.flows[i * 2], "weight"): for hook in self.flows[i * 2]._forward_pre_hooks.values():
remove_parametrizations(self.flows[i * 2], "weight") if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.flows[i * 2])
return self return self

View File

@@ -2,7 +2,6 @@ from typing import Optional, List, Union
import torch import torch
from torch import nn from torch import nn
from torch.nn.utils import parametrize
from .encoders import TextEncoder, PosteriorEncoder from .encoders import TextEncoder, PosteriorEncoder
@@ -35,7 +34,7 @@ class SynthesizerTrnMsNSFsid(nn.Module):
upsample_kernel_sizes: List[int], upsample_kernel_sizes: List[int],
spk_embed_dim: int, spk_embed_dim: int,
gin_channels: int, gin_channels: int,
sr: Union[str, int], sr: Optional[Union[str, int]],
encoder_dim: int, encoder_dim: int,
use_f0: bool, use_f0: bool,
): ):
@@ -119,16 +118,32 @@ class SynthesizerTrnMsNSFsid(nn.Module):
self.enc_q.remove_weight_norm() self.enc_q.remove_weight_norm()
def __prepare_scriptable__(self): def __prepare_scriptable__(self):
if parametrize.is_parametrized(self.dec, "weight"): for hook in self.dec._forward_pre_hooks.values():
parametrize.remove_parametrizations(self.dec, "weight") # The hook we want to remove is an instance of WeightNorm class, so
if parametrize.is_parametrized(self.flow, "weight"): # normally we would do `if isinstance(...)` but this class is not accessible
parametrize.remove_parametrizations(self.flow, "weight") # because of shadowing, so we check the module name directly.
# https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.dec)
for hook in self.flow._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.flow)
if hasattr(self, "enc_q"): if hasattr(self, "enc_q"):
if parametrize.is_parametrized(self.enc_q, "weight"): for hook in self.enc_q._forward_pre_hooks.values():
parametrize.remove_parametrizations(self.enc_q, "weight") if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.enc_q)
return self return self
@torch.jit.ignore() @torch.jit.ignore
def forward( def forward(
self, self,
phone: torch.Tensor, phone: torch.Tensor,
@@ -140,20 +155,18 @@ class SynthesizerTrnMsNSFsid(nn.Module):
pitchf: Optional[torch.Tensor] = None, pitchf: Optional[torch.Tensor] = None,
): # 这里ds是id[bs,1] ): # 这里ds是id[bs,1]
# print(1,pitch.shape)#[bs,t] # print(1,pitch.shape)#[bs,t]
embg = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t广播的 g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t广播的
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=embg) z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)
z_p = self.flow(z, y_mask, g=embg) z_p = self.flow(z, y_mask, g=g)
z_slice, ids_slice = rand_slice_segments_on_last_dim( z_slice, ids_slice = rand_slice_segments_on_last_dim(
z, y_lengths, self.segment_size z, y_lengths, self.segment_size
) )
if pitchf is not None and isinstance(self.dec, NSFGenerator): if pitchf is not None:
pitchf = slice_on_last_dim(pitchf, ids_slice, self.segment_size) pitchf = slice_on_last_dim(pitchf, ids_slice, self.segment_size)
o = self.dec(z_slice, pitchf, g=embg) # type: ignore o = self.dec(z_slice, pitchf, g=g)
elif isinstance(self.dec, Generator):
o = self.dec(z_slice, g=embg)
else: else:
raise KeyError(f"unknown dec type: {type(self.dec).__name__}") o = self.dec(z_slice, g=g)
return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
@torch.jit.export @torch.jit.export
@@ -188,17 +201,15 @@ class SynthesizerTrnMsNSFsid(nn.Module):
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
z = self.flow(z_p, x_mask, g=g, reverse=True) z = self.flow(z_p, x_mask, g=g, reverse=True)
del z_p, m_p, logs_p del z_p, m_p, logs_p
if pitchf is not None and isinstance(self.dec, NSFGenerator): if pitchf is not None:
o = self.dec( o = self.dec(
z * x_mask, z * x_mask,
pitchf, pitchf,
g=g, g=g,
n_res=return_length2, n_res=return_length2,
) )
elif isinstance(self.dec, Generator):
o = self.dec(z * x_mask, g=g, n_res=return_length2)
else: else:
raise KeyError(f"unknown dec type: {type(self.dec).__name__}") o = self.dec(z * x_mask, g=g, n_res=return_length2)
del x_mask, z del x_mask, z
return o # , x_mask, (z, z_p, m_p, logs_p) return o # , x_mask, (z, z_p, m_p, logs_p)
@@ -315,7 +326,7 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes: List[int], upsample_kernel_sizes: List[int],
spk_embed_dim: int, spk_embed_dim: int,
gin_channels: int, gin_channels: int,
sr: Union[str, int], sr=None,
): ):
super().__init__( super().__init__(
spec_channels, spec_channels,
@@ -335,7 +346,6 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes, upsample_kernel_sizes,
spk_embed_dim, spk_embed_dim,
gin_channels, gin_channels,
sr,
256, 256,
False, False,
) )
@@ -361,7 +371,7 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes: List[int], upsample_kernel_sizes: List[int],
spk_embed_dim: int, spk_embed_dim: int,
gin_channels: int, gin_channels: int,
sr: Union[str, int], sr=None,
): ):
super().__init__( super().__init__(
spec_channels, spec_channels,
@@ -381,7 +391,6 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes, upsample_kernel_sizes,
spk_embed_dim, spk_embed_dim,
gin_channels, gin_channels,
sr,
768, 768,
False, False,
) )

View File

@@ -1,4 +1,4 @@
from typing import List, Optional, Tuple, Iterator, Union from typing import List, Optional, Tuple, Iterator
import torch import torch
@@ -17,7 +17,7 @@ def get_padding(kernel_size: int, dilation=1) -> int:
def slice_on_last_dim( def slice_on_last_dim(
x: torch.Tensor, x: torch.Tensor,
start_indices: Union[List[int], torch.Tensor], start_indices: List[int],
segment_size=4, segment_size=4,
) -> torch.Tensor: ) -> torch.Tensor:
new_shape = [*x.shape] new_shape = [*x.shape]
@@ -32,9 +32,9 @@ def slice_on_last_dim(
def rand_slice_segments_on_last_dim( def rand_slice_segments_on_last_dim(
x: torch.Tensor, x: torch.Tensor,
x_lengths: Optional[Union[int, torch.Tensor]] = None, x_lengths: int = None,
segment_size=4, segment_size=4,
) -> Tuple[torch.Tensor, Union[List[int], torch.Tensor]]: ) -> Tuple[torch.Tensor, List[int]]:
b, _, t = x.size() b, _, t = x.size()
if x_lengths is None: if x_lengths is None:
x_lengths = t x_lengths = t
@@ -58,7 +58,7 @@ def activate_add_tanh_sigmoid_multiply(
def sequence_mask( def sequence_mask(
length: torch.Tensor, length: torch.Tensor,
max_length: Optional[int] = None, max_length: Optional[int] = None,
): ) -> torch.BoolTensor:
if max_length is None: if max_length is None:
max_length = int(length.max()) max_length = int(length.max())
x = torch.arange(max_length, dtype=length.dtype, device=length.device) x = torch.arange(max_length, dtype=length.dtype, device=length.device)

View File

@@ -1,4 +1,4 @@
from typing import List, Union from typing import List, Optional, Union
import torch import torch
@@ -25,7 +25,7 @@ class SynthesizerTrnMsNSFsid(SynthesizerBase):
upsample_kernel_sizes: List[int], upsample_kernel_sizes: List[int],
spk_embed_dim: int, spk_embed_dim: int,
gin_channels: int, gin_channels: int,
sr: Union[str, int], sr: Optional[Union[str, int]],
encoder_dim: int, encoder_dim: int,
): ):
super().__init__( super().__init__(

View File

@@ -40,9 +40,11 @@ app = gr.Blocks()
with app: with app:
with gr.Tabs(): with gr.Tabs():
with gr.TabItem("在线demo"): with gr.TabItem("在线demo"):
gr.Markdown(value=""" gr.Markdown(
value="""
RVC 在线demo RVC 在线demo
""") """
)
sid = gr.Dropdown(label=i18n("Inferencing voice"), choices=sorted(names)) sid = gr.Dropdown(label=i18n("Inferencing voice"), choices=sorted(names))
with gr.Column(): with gr.Column():
spk_item = gr.Slider( spk_item = gr.Slider(

20
web.py
View File

@@ -36,6 +36,7 @@ import threading
import shutil import shutil
import logging import logging
logging.getLogger("numba").setLevel(logging.WARNING) logging.getLogger("numba").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING) logging.getLogger("httpx").setLevel(logging.WARNING)
@@ -88,24 +89,23 @@ index_paths = [""]
def lookup_names(weight_root): def lookup_names(weight_root):
names = [] global names
for name in os.listdir(weight_root): for name in os.listdir(weight_root):
if name.endswith(".pth"): if name.endswith(".pth"):
names.append(name) names.append(name)
return names
def lookup_indices(index_root): def lookup_indices(index_root):
index_paths = [] global index_paths
for root, _, files in os.walk(index_root, topdown=False): for root, _, files in os.walk(index_root, topdown=False):
for name in files: for name in files:
if name.endswith(".index") and "trained" not in name: if name.endswith(".index") and "trained" not in name:
index_paths.append(str(pathlib.Path(root, name))) index_paths.append(str(pathlib.Path(root, name)))
return index_paths
names = [""] + lookup_names(weight_root) lookup_names(weight_root)
index_paths = [""] + lookup_indices(index_root) + lookup_indices(outside_index_root) lookup_indices(index_root)
lookup_indices(outside_index_root)
uvr5_names = [] uvr5_names = []
for name in os.listdir(weight_uvr5_root): for name in os.listdir(weight_uvr5_root):
if name.endswith(".pth") or "onnx" in name: if name.endswith(".pth") or "onnx" in name:
@@ -113,8 +113,12 @@ for name in os.listdir(weight_uvr5_root):
def change_choices(): def change_choices():
names = [""] + lookup_names(weight_root) global index_paths, names
index_paths = [""] + lookup_indices(index_root) + lookup_indices(outside_index_root) names = [""]
lookup_names(weight_root)
index_paths = [""]
lookup_indices(index_root)
lookup_indices(outside_index_root)
return {"choices": sorted(names), "__type__": "update"}, { return {"choices": sorted(names), "__type__": "update"}, {
"choices": sorted(index_paths), "choices": sorted(index_paths),
"__type__": "update", "__type__": "update",