1
0
mirror of https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git synced 2026-06-05 17:20:25 +08:00

1 Commits

Author SHA1 Message Date
github-actions[bot]
782c4f6473 chore(i18n): sync locale on dev 2025-07-08 07:46:51 +00:00
39 changed files with 319 additions and 419 deletions

View File

@@ -15,7 +15,7 @@ jobs:
- name: Run RVC-Models-Downloader
run: |
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb
sudo apt -y install ./rvcmd_linux_amd64.deb
rm -f ./rvcmd_linux_amd64.deb
rvcmd -notrs -w 1 -notui assets/rvc

View File

@@ -12,7 +12,7 @@ jobs:
steps:
- uses: actions/stale@v5
with:
exempt-issue-labels: "help wanted,good first issue,documentation,following up,todo list,enhancement"
exempt-issue-labels: "help wanted,good first issue,documentation,following up,todo list"
days-before-issue-stale: 30
days-before-issue-close: 15
stale-issue-label: "stale"

View File

@@ -11,72 +11,20 @@ jobs:
steps:
- uses: actions/checkout@master
- name: Space cleanup
env:
DEBIAN_FRONTEND: noninteractive
run: |
df -h
# Source - https://stackoverflow.com/a
# Posted by Cosmin Bodnariuc
# Retrieved 2025-11-21, License - CC BY-SA 4.0
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/local/share/chromium
sudo rm -rf /usr/local/share/powershell
sudo rm -rf /usr/local/share/vcpkg
sudo rm -rf /usr/local/share/miniconda
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /opt/hostedtoolcache/go
sudo rm -rf /opt/hostedtoolcache/Python
sudo rm -rf /opt/hostedtoolcache/node
sudo rm -rf /opt/hostedtoolcache/R
sudo rm -rf /opt/hostedtoolcache/Java
sudo rm -rf /opt/hostedtoolcache/LLVM
sudo rm -rf /opt/hostedtoolcache/Swift
sudo rm -rf /opt/hostedtoolcache/Php
sudo rm -rf /opt/hostedtoolcache/Perl
sudo rm -rf /opt/hostedtoolcache/Scala
sudo rm -rf /opt/hostedtoolcache/Julia
sudo rm -rf /opt/hostedtoolcache/Mono
sudo rm -rf /opt/hostedtoolcache/PowerShell
sudo rm -rf /opt/hostedtoolcache/Crystal
sudo rm -rf /opt/hostedtoolcache/Elixir
sudo rm -rf /opt/hostedtoolcache/Erlang
sudo rm -rf /opt/hostedtoolcache/FSharp
sudo rm -rf /opt/hostedtoolcache/Haskell
sudo rm -rf /opt/hostedtoolcache/OCaml
sudo rm -rf /opt/hostedtoolcache/Rust
sudo rm -rf /opt/hostedtoolcache/Sbt
sudo rm -rf /opt/hostedtoolcache/Solidity
sudo rm -rf /opt/hostedtoolcache/VisualStudio
sudo rm -rf /opt/hostedtoolcache/WinAppDriver
sudo rm -rf /opt/hostedtoolcache/Xamarin
sudo rm -rf /opt/hostedtoolcache/Yarn
sudo rm -rf /opt/hostedtoolcache/Zephyr
sudo rm -rf /opt/hostedtoolcache/zig
sudo rm -rf /opt/hostedtoolcache/zulu
sudo rm -rf /opt/hostedtoolcache/azcopy
sudo -E apt-get update
sudo -E apt-get -y autoremove --purge
sudo -E apt-get clean
df -h
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb
sudo apt update
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb
sudo apt -y install ./rvcmd_linux_amd64.deb
pip install --force pip==24.0 # fix fairseq installing issue https://github.com/facebookresearch/fairseq/issues/5552
python -m pip install --upgrade setuptools
python -m pip install --upgrade wheel
pip install torch torchvision torchaudio
pip install -r requirements/cpu.txt
pip install -r requirements/main.txt
rvcmd -notrs -w 1 -notui assets/rvc
- name: Test step 1 & 2
run: |

View File

@@ -4,7 +4,6 @@ import sys
import json
import shutil
from multiprocessing import cpu_count
import importlib.util
import torch
@@ -47,10 +46,10 @@ class Config(metaclass=Singleton):
self.global_link,
self.noparallel,
self.noautoopen,
self.dml,
self.nocheck,
self.update,
) = self.arg_parse()
self.dml = False
self.instead = ""
self.preprocess_per = 3.7
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
@@ -84,6 +83,11 @@ class Config(metaclass=Singleton):
action="store_true",
help="Do not open in browser automatically",
)
parser.add_argument(
"--dml",
action="store_true",
help="torch_dml",
)
parser.add_argument(
"--nocheck", action="store_true", help="Run without checking assets"
)
@@ -100,6 +104,7 @@ class Config(metaclass=Singleton):
cmd_opts.global_link,
cmd_opts.noparallel,
cmd_opts.noautoopen,
cmd_opts.dml,
cmd_opts.nocheck,
cmd_opts.update,
)
@@ -178,7 +183,7 @@ class Config(metaclass=Singleton):
if self.has_xpu():
self.device = self.instead = "xpu:0"
self.is_half = True
i_device = int(str(self.device).split(":")[-1])
i_device = int(self.device.split(":")[-1])
self.gpu_name = torch.cuda.get_device_name(i_device)
if (
("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
@@ -209,7 +214,7 @@ class Config(metaclass=Singleton):
self.use_fp32_config()
else:
logger.info("No supported Nvidia GPU found")
self.device = self.instead = torch.get_default_device()
self.device = self.instead = "cpu"
self.is_half = False
self.use_fp32_config()
@@ -234,13 +239,12 @@ class Config(metaclass=Singleton):
x_query = 5
x_center = 30
x_max = 32
if importlib.util.find_spec("torch_directml") is not None:
if self.dml:
logger.info("Use DirectML instead")
import torch_directml
self.device = torch_directml.device(torch_directml.default_device())
self.is_half = False
self.dml = True
else:
if self.instead:
logger.info(f"Use {self.instead} instead")

View File

@@ -133,7 +133,7 @@
"Takeover WASAPI device": "WASAPIデバイスを独占",
"Target sample rate": "目標サンプリング率",
"The audio file to be processed": "処理待ち音声",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "本ソフトウェアは、GNU Affero General Public License バージョン3.0以降の下でライセンスされています。<br>作者は、このソフトウェアの使用に関して一切の管理権限や責任を負いません。<br>このソフトウェアを使用し、生成された音声やファイルを含むあらゆるコンテンツを配布するユーザーは、AGPL 3.0ライセンスの条項への準拠について全責任を負います。<br>これらの条項に同意しない場合、このソフトウェアパッケージに含まれるコードやファイルの使用、参照、配布は禁止されています。<br>詳細については、ルートディレクトリにあるLICENSEファイルを参照してください。",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.",
"Total training epochs (total_epoch)": "総エポック数",
"Train": "学習",
"Train feature index": "特徴索引の学習",

View File

@@ -133,7 +133,7 @@
"Takeover WASAPI device": "独占 WASAPI 设备",
"Target sample rate": "目标采样率",
"The audio file to be processed": "待处理音频文件",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "本软件基于GNU Affero通用公共许可证3.0版本或更高版本进行许可。<br>作者对本软件的使用不承担任何控制权或责任。<br>使用本软件并分发任何内容包括由其生成的声音或文件的用户需对遵守AGPL 3.0许可证条款承担全部责任。<br>如果您不接受这些条款,则禁止使用、引用或分发本软件包中包含的任何代码或文件。<br>请参阅位于根目录中的LICENSE文件以获取完整详情。",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.",
"Total training epochs (total_epoch)": "总训练轮数total_epoch",
"Train": "训练",
"Train feature index": "训练特征索引",

View File

@@ -195,10 +195,7 @@ def get_audio_properties(input_path: str) -> Tuple[int, int]:
container = av.open(input_path)
audio_stream = next(s for s in container.streams if s.type == "audio")
channels = 1 if audio_stream.layout == "mono" else 2
try:
rate = audio_stream.base_rate
except:
rate = audio_stream.sample_rate
rate = audio_stream.base_rate
container.close()
return channels, rate

View File

@@ -162,7 +162,15 @@ def download_and_extract_zip(url: str, folder: str):
logger.info(f"extracted into {folder}")
def download_all_assets(tmpdir: str, version="0.2.11"):
def download_dns_yaml(url: str, folder: str):
logger.info(f"downloading {url}")
response = requests.get(url, stream=True, timeout=(5, 10))
with open(os.path.join(folder, "dns.yaml"), "wb") as out_file:
out_file.write(response.content)
logger.info(f"downloaded into {folder}")
def download_all_assets(tmpdir: str, version="0.2.5"):
import subprocess
import platform
@@ -190,10 +198,44 @@ def download_all_assets(tmpdir: str, version="0.2.11"):
suffix = "zip" if is_win else "tar.gz"
RVCMD_URL = BASE_URL + f"v{version}/rvcmd_{system_type}_{architecture}.{suffix}"
cmdfile = os.path.join(tmpdir, "rvcmd")
if is_win:
download_and_extract_zip(RVCMD_URL, tmpdir)
cmdfile += ".exe"
else:
download_and_extract_tar_gz(RVCMD_URL, tmpdir)
os.chmod(cmdfile, 0o755)
subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"])
try:
if is_win:
download_and_extract_zip(RVCMD_URL, tmpdir)
cmdfile += ".exe"
else:
download_and_extract_tar_gz(RVCMD_URL, tmpdir)
os.chmod(cmdfile, 0o755)
subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"])
except Exception:
BASE_URL = "https://raw.gitcode.com/u011570312/RVC-Models-Downloader/assets/"
suffix = {
"darwin_amd64": "555",
"darwin_arm64": "556",
"linux_386": "557",
"linux_amd64": "558",
"linux_arm64": "559",
"windows_386": "562",
"windows_amd64": "563",
}[f"{system_type}_{architecture}"]
RVCMD_URL = BASE_URL + suffix
download_dns_yaml(
"https://raw.gitcode.com/u011570312/RVC-Models-Downloader/raw/main/dns.yaml",
tmpdir,
)
if is_win:
download_and_extract_zip(RVCMD_URL, tmpdir)
cmdfile += ".exe"
else:
download_and_extract_tar_gz(RVCMD_URL, tmpdir)
os.chmod(cmdfile, 0o755)
subprocess.run(
[
cmdfile,
"-notui",
"-w",
"0",
"-dns",
os.path.join(tmpdir, "dns.yaml"),
"assets/rvc",
]
)

View File

@@ -303,7 +303,7 @@ class TextAudioLoader(torch.utils.data.Dataset):
spec_filename = filename.replace(".wav", ".spec.pt")
if os.path.exists(spec_filename):
try:
spec = torch.load(spec_filename, weights_only=True)
spec = torch.load(spec_filename)
except:
logger.warning("%s %s", spec_filename, traceback.format_exc())
spec = spectrogram_torch(

View File

@@ -59,7 +59,7 @@ def save_small_model(ckpt, sr, if_f0, name, epoch, version, hps):
def extract_small_model(path, name, author, sr, if_f0, info, version):
try:
ckpt = torch.load(path, map_location="cpu", weights_only=True)
ckpt = torch.load(path, map_location="cpu")
if "model" in ckpt:
ckpt = ckpt["model"]
opt = OrderedDict()
@@ -196,7 +196,7 @@ def extract_small_model(path, name, author, sr, if_f0, info, version):
def change_info(path, info, name):
try:
ckpt = torch.load(path, map_location="cpu", weights_only=True)
ckpt = torch.load(path, map_location="cpu")
ckpt["info"] = info
if name == "":
name = os.path.basename(path)
@@ -229,8 +229,8 @@ def merge(path1, path2, alpha1, sr, f0, info, name, version):
a2 = "Unknown"
return f"{a1} & {a2}"
ckpt1 = torch.load(path1, map_location="cpu", weights_only=True)
ckpt2 = torch.load(path2, map_location="cpu", weights_only=True)
ckpt1 = torch.load(path1, map_location="cpu")
ckpt2 = torch.load(path2, map_location="cpu")
cfg = ckpt1["config"]
if "model" in ckpt1:
ckpt1 = extract(ckpt1)

View File

@@ -29,24 +29,6 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, load_opt=1):
checkpoint_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
saved_state_dict = checkpoint_dict["model"]
# Convert old-style weight_norm keys (weight_g/weight_v) to new
# parametrizations format (parametrizations.weight.original0/original1)
# so that checkpoints saved with the deprecated API can still be loaded.
_converted = {}
for k, v in list(saved_state_dict.items()):
if k.endswith(".weight_g"):
new_key = k[: -len(".weight_g")] + ".parametrizations.weight.original0"
_converted[new_key] = v
elif k.endswith(".weight_v"):
new_key = k[: -len(".weight_v")] + ".parametrizations.weight.original1"
_converted[new_key] = v
if _converted:
logger.info(
"Converting %d old-style weight_norm keys from checkpoint to new parametrizations format",
len(_converted),
)
saved_state_dict.update(_converted)
if hasattr(model, "module"):
state_dict = model.module.state_dict()
else:

View File

@@ -2,7 +2,6 @@ import os
import sys
import traceback
from pathlib import Path
import importlib.util
from dotenv import load_dotenv
@@ -39,9 +38,6 @@ f0method = sys.argv[3]
device = sys.argv[4]
is_half = sys.argv[5] == "True"
if importlib.util.find_spec("torch_directml") is not None:
import torch_directml # use side effect
class FeatureInput(object):
def __init__(self, is_half: bool, device="cpu", samplerate=16000, hop_size=160):
@@ -106,12 +102,6 @@ if __name__ == "__main__":
Config.use_insecure_load()
printt(" ".join(sys.argv))
# GPU methods (rmvpe, fcpe, crepe, etc.) gain nothing from multiprocessing since
# all processes share one GPU. Spawning n_p processes each lazily loading
# the model onto the same CUDA device exhausts VRAM and causes deadlocks.
if "cuda" in device:
printt("WARN: use 1 thread since GPU is used.")
n_p = 1
featureInput = FeatureInput(is_half, device)
paths = []
inp_root = "%s/1_16k_wavs" % (exp_dir)

View File

@@ -17,14 +17,7 @@ device = sys.argv[1]
n_part = int(sys.argv[2])
i_part = int(sys.argv[3])
i_gpu = sys.argv[4]
# CUDA_VISIBLE_DEVICES expects bare GPU indices (e.g. "0" or "0,1"),
# but callers may pass "cuda:0", "cuda:0-cuda:1", etc. Strip the prefix
# and normalise separators so any combination works.
import re
i_gpu = re.sub(r"cuda:", "", str(i_gpu))
i_gpu = i_gpu.replace("-", ",")
os.environ["CUDA_VISIBLE_DEVICES"] = i_gpu
os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
exp_dir = sys.argv[5]
version = sys.argv[6]
is_half = sys.argv[7].lower() == "true"

View File

@@ -29,12 +29,10 @@ try:
GradScaler = gradscaler_init()
ipex_init()
else:
from torch.cuda.amp import GradScaler, autocast
except Exception:
pass
finally:
if not ("GradScaler" in globals() and "autocast" in globals()):
from torch.amp.grad_scaler import GradScaler
from torch.amp.autocast_mode import autocast
from torch.cuda.amp import GradScaler, autocast
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = False
@@ -108,30 +106,23 @@ def main():
# patch to unblock people without gpus. there is probably a better way.
print("NO GPU DETECTED: falling back to CPU - this may take a while")
n_gpus = 1
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = str(randint(20000, 55555))
children = []
logger = utils.get_logger(hps.model_dir)
if n_gpus == 1:
# Single GPU: run directly without distributed to avoid gloo issues on Windows
run(0, 1, hps, logger)
else:
master_port = str(randint(20000, 55555))
os.environ["MASTER_ADDR"] = "127.0.0.1"
os.environ["MASTER_PORT"] = master_port
children = []
for i in range(n_gpus):
subproc = mp.Process(
target=run,
args=(i, n_gpus, hps, logger, master_port),
)
children.append(subproc)
subproc.start()
for i in range(n_gpus):
subproc = mp.Process(
target=run,
args=(i, n_gpus, hps, logger),
)
children.append(subproc)
subproc.start()
for i in range(n_gpus):
children[i].join()
for i in range(n_gpus):
children[i].join()
def run(
rank, n_gpus, hps: utils.HParams, logger: logging.Logger, master_port: str = "29500"
):
def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger):
global global_step
if rank == 0:
# logger = utils.get_logger(hps.model_dir)
@@ -140,81 +131,24 @@ def run(
writer = SummaryWriter(log_dir=hps.model_dir)
writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
use_distributed = n_gpus > 1
if use_distributed:
if os.name == "nt" or not torch.cuda.is_available():
# On Windows, gloo's create_device(hostname=...) is gated to Linux only
# in the C++ layer (makeDeviceForHostname). We must use the interface-
# based path instead: create_device(interface=...) calls
# makeDeviceForInterface which is not platform-gated.
import socket as _socket
try:
store = dist.TCPStore(
host_name="127.0.0.1",
port=int(master_port),
world_size=n_gpus,
is_master=(rank == 0),
)
except Exception:
store = dist.TCPStore(
host_name="127.0.0.1",
port=int(master_port),
world_size=n_gpus,
is_master=(rank == 0),
use_libuv=False,
)
# Discover a working network interface for gloo device creation
gloo_device = None
try:
for idx, ifname in _socket.if_nameindex():
try:
gloo_device = dist.ProcessGroupGloo.create_device(
interface=ifname
)
print("Try device", idx, "name", ifname)
break
except RuntimeError as e:
print("Try device", idx, "name", ifname, "err:", e)
continue
except (OSError, AttributeError) as e:
print(e.with_traceback(None))
if gloo_device is None:
raise RuntimeError(
"Cannot create gloo device on Windows. "
"No usable network interface found. "
"Try adding your hostname to "
"C:\\Windows\\System32\\drivers\\etc\\hosts "
"with: 127.0.0.1 " + _socket.gethostname()
)
pg_options = dist.ProcessGroupGloo._Options()
pg_options._devices = [gloo_device]
dist.init_process_group(
backend="gloo",
store=store,
world_size=n_gpus,
rank=rank,
pg_options=pg_options,
)
else:
init_url = f"tcp://127.0.0.1:{master_port}"
try:
dist.init_process_group(
backend="nccl",
init_method=init_url,
world_size=n_gpus,
rank=rank,
)
except:
dist.init_process_group(
backend="nccl",
init_method=init_url + "?use_libuv=False",
world_size=n_gpus,
rank=rank,
)
try:
dist.init_process_group(
backend=(
"gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"
),
init_method="env://",
world_size=n_gpus,
rank=rank,
)
except:
dist.init_process_group(
backend=(
"gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"
),
init_method="env://?use_libuv=False",
world_size=n_gpus,
rank=rank,
)
torch.manual_seed(hps.train.seed)
if torch.cuda.is_available():
torch.cuda.set_device(rank)
@@ -287,15 +221,14 @@ def run(
)
# net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
# net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
if use_distributed:
if hasattr(torch, "xpu") and torch.xpu.is_available():
pass
elif torch.cuda.is_available():
net_g = DDP(net_g, device_ids=[rank])
net_d = DDP(net_d, device_ids=[rank])
else:
net_g = DDP(net_g)
net_d = DDP(net_d)
if hasattr(torch, "xpu") and torch.xpu.is_available():
pass
elif torch.cuda.is_available():
net_g = DDP(net_g, device_ids=[rank])
net_d = DDP(net_d, device_ids=[rank])
else:
net_g = DDP(net_g)
net_d = DDP(net_d)
try: # 如果能加载自动resume
_, _, _, epoch_str = utils.load_checkpoint(
@@ -537,7 +470,7 @@ def train_and_evaluate(
# wave_lengths = wave_lengths.cuda(rank, non_blocking=True)
# Calculate
with autocast(device_type="cuda", enabled=hps.train.fp16_run):
with autocast(enabled=hps.train.fp16_run):
(
y_hat,
ids_slice,
@@ -556,7 +489,7 @@ def train_and_evaluate(
y_mel = slice_on_last_dim(
mel, ids_slice, hps.train.segment_size // hps.data.hop_length
)
with autocast(device_type="cuda", enabled=False):
with autocast(enabled=False):
y_hat_mel = mel_spectrogram_torch(
y_hat.float().squeeze(1),
hps.data.filter_length,
@@ -575,7 +508,7 @@ def train_and_evaluate(
# Discriminator
y_d_hat_r, y_d_hat_g, _, _ = net_d(wave, y_hat.detach())
with autocast(device_type="cuda", enabled=False):
with autocast(enabled=False):
loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(
y_d_hat_r, y_d_hat_g
)
@@ -585,10 +518,10 @@ def train_and_evaluate(
grad_norm_d = total_grad_norm(net_d.parameters())
scaler.step(optim_d)
with autocast(device_type="cuda", enabled=hps.train.fp16_run):
with autocast(enabled=hps.train.fp16_run):
# Generator
y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(wave, y_hat)
with autocast(device_type="cuda", enabled=False):
with autocast(enabled=False):
loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel
loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl
loss_fm = feature_loss(fmap_r, fmap_g)

View File

@@ -62,6 +62,10 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
os.path.basename(inp_path),
)
resample_audio(inp_path, tmp_path, "pcm_s16le", "s16", 44100, "stereo")
try: # Remove the original file
os.remove(inp_path)
except Exception as e:
print(f"Failed to remove the original file: {e}")
inp_path = tmp_path
try:
if done == 0:

View File

@@ -37,7 +37,7 @@ class AudioPre:
else:
mp = ModelParameters("infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json")
model = Nets.CascadedASPPNet(mp.param["bins"] * 2)
cpk = torch.load(model_path, map_location="cpu", weights_only=True)
cpk = torch.load(model_path, map_location="cpu")
model.load_state_dict(cpk)
model.eval()
if is_half:

View File

@@ -10,6 +10,9 @@ from pybase16384 import encode_to_string, decode_from_string
from configs import CPUConfig
from rvc.synthesizer import get_synthesizer
from .pipeline import Pipeline
from .utils import load_hubert
class TorchSeedContext:
def __init__(self, seed):
@@ -92,9 +95,6 @@ def wave_hash(time_field):
def model_hash(config, tgt_sr, net_g, if_f0, version):
from .pipeline import Pipeline
from .utils import load_hubert
pipeline = Pipeline(tgt_sr, config)
audio = original_audio()
hbt = load_hubert(config.device, config.is_half)
@@ -152,7 +152,7 @@ def model_hash_ckpt(cpt):
def model_hash_from(path):
cpt = torch.load(path, map_location="cpu", weights_only=True)
cpt = torch.load(path, map_location="cpu")
h = model_hash_ckpt(cpt)
del cpt
return h

View File

@@ -75,7 +75,7 @@ def show_info(path):
try:
if hasattr(path, "name"):
path = path.name
a = torch.load(path, map_location="cpu", weights_only=True)
a = torch.load(path, map_location="cpu")
txt = show_model_info(a, show_long_id=True)
del a
except:

View File

@@ -1,7 +1,6 @@
import os, pathlib
import torch
from fairseq import checkpoint_utils, data
from fairseq import checkpoint_utils
def get_index_path_from_model(sid):
@@ -22,11 +21,10 @@ def get_index_path_from_model(sid):
def load_hubert(device, is_half):
with torch.serialization.safe_globals([data.dictionary.Dictionary]):
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
["assets/hubert/hubert_base.pt"],
suffix="",
)
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
["assets/hubert/hubert_base.pt"],
suffix="",
)
hubert_model = models[0]
hubert_model = hubert_model.to(device)
if is_half:

View File

@@ -1,10 +1,11 @@
tensorflow-rocm
joblib>=1.1.0
numba
numpy
scipy
librosa>=0.10.2
llvmlite
fairseq @ git+https://github.com/fumiama/fairseq.git
fairseq @ git+https://github.com/One-sixth/fairseq.git
faiss-cpu
gradio
Cython

View File

@@ -1,49 +0,0 @@
joblib>=1.1.0
numba
numpy
scipy
librosa>=0.10.2
llvmlite
fairseq @ git+https://github.com/fumiama/fairseq.git
faiss-cpu
gradio
Cython
pydub>=0.25.1
tensorboardX
Jinja2>=3.1.2
json5
Markdown
matplotlib>=3.7.0
matplotlib-inline>=0.1.3
praat-parselmouth>=0.4.2
Pillow>=9.1.1
resampy>=0.4.2
scikit-learn
tensorboard
tqdm>=4.63.1
tornado>=6.1
Werkzeug>=2.2.3
uc-micro-py>=1.0.1
sympy>=1.11.1
tabulate>=0.8.10
PyYAML>=6.0
pyasn1>=0.4.8
pyasn1-modules>=0.2.8
fsspec>=2022.11.0
absl-py>=1.2.0
audioread
uvicorn>=0.21.1
colorama>=0.4.5
pyworld==0.3.2
httpx
onnxruntime; sys_platform == 'darwin'
torchcrepe>=0.0.23
fastapi
torchfcpe
python-dotenv>=1.0.0
av
pybase16384
--extra-index-url https://download.pytorch.org/whl/cpu
torch
torchvision
torchaudio

View File

@@ -4,7 +4,7 @@ numpy
scipy
librosa>=0.10.2
llvmlite
fairseq @ git+https://github.com/fumiama/fairseq.git
fairseq @ git+https://github.com/One-sixth/fairseq.git
faiss-cpu
gradio
Cython
@@ -43,6 +43,3 @@ python-dotenv>=1.0.0
av
torchfcpe
pybase16384
torch-directml
torchvision
torchaudio

View File

@@ -9,7 +9,7 @@ numpy
scipy
librosa>=0.10.2
llvmlite==0.39.0
fairseq @ git+https://github.com/fumiama/fairseq.git
fairseq @ git+https://github.com/One-sixth/fairseq.git
faiss-cpu
gradio
Cython

View File

@@ -4,7 +4,7 @@ numpy
scipy
librosa>=0.10.2
llvmlite
fairseq @ git+https://github.com/fumiama/fairseq.git
fairseq @ git+https://github.com/One-sixth/fairseq.git
faiss-cpu
gradio
Cython

View File

@@ -11,14 +11,14 @@ class F0Predictor(object):
f0_min=50,
f0_max=1100,
sampling_rate=44100,
device: Optional[Union[str, torch.device]] = None,
device: Optional[str] = None,
):
self.hop_length = hop_length
self.f0_min = f0_min
self.f0_max = f0_max
self.sampling_rate = sampling_rate
if not device:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if device is None:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
self.device = device
def compute_f0(

View File

@@ -1,4 +1,4 @@
from typing import Optional, Union
from typing import Optional
import torch
import numpy as np
@@ -17,9 +17,9 @@ class MelSpectrogram(torch.nn.Module):
hop_length: int,
n_fft: Optional[int] = None,
mel_fmin: int = 0,
mel_fmax: Optional[int] = None,
mel_fmax: int = None,
clamp: float = 1e-5,
device: Union[str, torch.device] = torch.device("cpu"),
device=torch.device("cpu"),
):
super().__init__()
if n_fft is None:

View File

@@ -1,6 +1,6 @@
from io import BytesIO
import os
from typing import Optional, Union
from typing import Any, Optional, Union
import numpy as np
import torch

View File

@@ -12,8 +12,8 @@ class MultiHeadAttention(nn.Module):
channels: int,
out_channels: int,
n_heads: int,
window_size: int,
p_dropout: float = 0.0,
window_size: Optional[int] = None,
heads_share: bool = True,
block_length: Optional[int] = None,
proximal_bias: bool = False,

View File

@@ -4,8 +4,7 @@ import torch
from torch import nn
from torch.nn import Conv1d, Conv2d
from torch.nn import functional as F
from torch.nn.utils import spectral_norm
from torch.nn.utils.parametrizations import weight_norm
from torch.nn.utils import spectral_norm, weight_norm
from .residuals import LRELU_SLOPE
from .utils import get_padding

View File

@@ -42,8 +42,8 @@ class Encoder(nn.Module):
hidden_channels,
hidden_channels,
n_heads,
window_size,
p_dropout=p_dropout,
window_size=window_size,
)
)
self.norm_layers_1.append(LayerNorm(hidden_channels))
@@ -121,7 +121,7 @@ class TextEncoder(nn.Module):
def __call__(
self,
phone: torch.Tensor,
pitch: Optional[torch.Tensor],
pitch: torch.Tensor,
lengths: torch.Tensor,
skip_head: Optional[int] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
@@ -135,7 +135,7 @@ class TextEncoder(nn.Module):
def forward(
self,
phone: torch.Tensor,
pitch: Optional[torch.Tensor],
pitch: torch.Tensor,
lengths: torch.Tensor,
skip_head: Optional[int] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
@@ -212,8 +212,10 @@ class PosteriorEncoder(nn.Module):
self.enc.remove_weight_norm()
def __prepare_scriptable__(self):
from torch.nn.utils import parametrize
if parametrize.is_parametrized(self.enc, "weight"):
parametrize.remove_parametrizations(self.enc, "weight")
for hook in self.enc._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.enc)
return self

View File

@@ -4,8 +4,7 @@ import torch
from torch import nn
from torch.nn import Conv1d, ConvTranspose1d
from torch.nn import functional as F
from torch.nn.utils.parametrizations import weight_norm
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
from torch.nn.utils import remove_weight_norm, weight_norm
from .residuals import ResBlock1, ResBlock2, LRELU_SLOPE
from .utils import call_weight_data_normal_if_Conv
@@ -47,7 +46,6 @@ class Generator(torch.nn.Module):
self.resblocks = nn.ModuleList()
resblock_module = ResBlock1 if resblock == "1" else ResBlock2
ch = 0
for i in range(len(self.ups)):
ch = upsample_initial_channel // (2 ** (i + 1))
for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes):
@@ -99,16 +97,29 @@ class Generator(torch.nn.Module):
def __prepare_scriptable__(self):
for l in self.ups:
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
for hook in l._forward_pre_hooks.values():
# The hook we want to remove is an instance of WeightNorm class, so
# normally we would do `if isinstance(...)` but this class is not accessible
# because of shadowing, so we check the module name directly.
# https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
for l in self.resblocks:
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
for hook in l._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
return self
def remove_weight_norm(self):
for l in self.ups:
remove_parametrizations(l, "weight")
remove_weight_norm(l)
for l in self.resblocks:
l.remove_weight_norm()

View File

@@ -6,8 +6,6 @@ from torch.nn import functional as F
from .utils import activate_add_tanh_sigmoid_multiply
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
class LayerNorm(nn.Module):
def __init__(self, channels: int, eps: float = 1e-5):
@@ -32,7 +30,7 @@ class WN(torch.nn.Module):
dilation_rate: int,
n_layers: int,
gin_channels: int = 0,
p_dropout: float = 0,
p_dropout: int = 0,
):
super(WN, self).__init__()
assert kernel_size % 2 == 1
@@ -51,9 +49,7 @@ class WN(torch.nn.Module):
cond_layer = torch.nn.Conv1d(
gin_channels, 2 * hidden_channels * n_layers, 1
)
self.cond_layer = torch.nn.utils.parametrizations.weight_norm(
cond_layer, name="weight"
)
self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name="weight")
for i in range(n_layers):
dilation = dilation_rate**i
@@ -65,9 +61,7 @@ class WN(torch.nn.Module):
dilation=dilation,
padding=padding,
)
in_layer = torch.nn.utils.parametrizations.weight_norm(
in_layer, name="weight"
)
in_layer = torch.nn.utils.weight_norm(in_layer, name="weight")
self.in_layers.append(in_layer)
# last one is not necessary
@@ -77,9 +71,7 @@ class WN(torch.nn.Module):
res_skip_channels = hidden_channels
res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
res_skip_layer = torch.nn.utils.parametrizations.weight_norm(
res_skip_layer, name="weight"
)
res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name="weight")
self.res_skip_layers.append(res_skip_layer)
def __call__(
@@ -125,20 +117,32 @@ class WN(torch.nn.Module):
def remove_weight_norm(self):
if self.gin_channels != 0:
remove_parametrizations(self.cond_layer, "weight")
torch.nn.utils.remove_weight_norm(self.cond_layer)
for l in self.in_layers:
remove_parametrizations(l, "weight")
torch.nn.utils.remove_weight_norm(l)
for l in self.res_skip_layers:
remove_parametrizations(l, "weight")
torch.nn.utils.remove_weight_norm(l)
def __prepare_scriptable__(self):
if self.gin_channels != 0:
if is_parametrized(self.cond_layer, "weight"):
remove_parametrizations(self.cond_layer, "weight")
for hook in self.cond_layer._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.cond_layer)
for l in self.in_layers:
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
for hook in l._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
for l in self.res_skip_layers:
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
for hook in l._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
return self

View File

@@ -1,12 +1,11 @@
from typing import Optional, List, Union
from typing import Optional, List
import math
import torch
from torch import nn
from torch.nn import Conv1d, ConvTranspose1d
from torch.nn import functional as F
from torch.nn.utils.parametrizations import weight_norm
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
from torch.nn.utils import remove_weight_norm, weight_norm
from .generators import SineGenerator
from .residuals import ResBlock1, ResBlock2, LRELU_SLOPE
@@ -84,7 +83,7 @@ class NSFGenerator(torch.nn.Module):
self.conv_pre = Conv1d(
initial_channel, upsample_initial_channel, 7, 1, padding=3
)
resblockcls = ResBlock1 if resblock == "1" else ResBlock2
resblock = ResBlock1 if resblock == "1" else ResBlock2
self.ups = nn.ModuleList()
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
@@ -115,13 +114,12 @@ class NSFGenerator(torch.nn.Module):
self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
self.resblocks = nn.ModuleList()
ch = 0
for i in range(len(self.ups)):
ch = upsample_initial_channel // (2 ** (i + 1))
ch: int = upsample_initial_channel // (2 ** (i + 1))
for j, (k, d) in enumerate(
zip(resblock_kernel_sizes, resblock_dilation_sizes)
):
self.resblocks.append(resblockcls(ch, k, d))
self.resblocks.append(resblock(ch, k, d))
self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False)
self.ups.apply(call_weight_data_normal_if_Conv)
@@ -192,15 +190,27 @@ class NSFGenerator(torch.nn.Module):
def remove_weight_norm(self):
for l in self.ups:
remove_parametrizations(l, "weight")
remove_weight_norm(l)
for l in self.resblocks:
l.remove_weight_norm()
def __prepare_scriptable__(self):
for l in self.ups:
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
for hook in l._forward_pre_hooks.values():
# The hook we want to remove is an instance of WeightNorm class, so
# normally we would do `if isinstance(...)` but this class is not accessible
# because of shadowing, so we check the module name directly.
# https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
for l in self.resblocks:
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
for hook in self.resblocks._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
return self

View File

@@ -4,8 +4,7 @@ import torch
from torch import nn
from torch.nn import Conv1d
from torch.nn import functional as F
from torch.nn.utils.parametrizations import weight_norm
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
from torch.nn.utils import remove_weight_norm, weight_norm
from .norms import WN
from .utils import (
@@ -21,7 +20,7 @@ class ResBlock1(torch.nn.Module):
self,
channels: int,
kernel_size: int = 3,
dilation: List[int] = [1, 3, 5],
dilation: List[int] = (1, 3, 5),
):
super(ResBlock1, self).__init__()
@@ -86,17 +85,25 @@ class ResBlock1(torch.nn.Module):
def remove_weight_norm(self):
for l in self.convs1:
remove_parametrizations(l, "weight")
remove_weight_norm(l)
for l in self.convs2:
remove_parametrizations(l, "weight")
remove_weight_norm(l)
def __prepare_scriptable__(self):
for l in self.convs1:
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
for hook in l._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
for l in self.convs2:
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
for hook in l._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
return self
@@ -110,7 +117,7 @@ class ResBlock2(torch.nn.Module):
self,
channels: int,
kernel_size=3,
dilation: List[int] = [1, 3],
dilation: List[int] = (1, 3),
):
super(ResBlock2, self).__init__()
self.convs = nn.ModuleList()
@@ -154,12 +161,16 @@ class ResBlock2(torch.nn.Module):
def remove_weight_norm(self):
for l in self.convs:
remove_parametrizations(l, "weight")
remove_weight_norm(l)
def __prepare_scriptable__(self):
for l in self.convs:
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
for hook in l._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
return self
@@ -171,7 +182,7 @@ class ResidualCouplingLayer(nn.Module):
kernel_size: int,
dilation_rate: int,
n_layers: int,
p_dropout: float = 0,
p_dropout: int = 0,
gin_channels: int = 0,
mean_only: bool = False,
):
@@ -238,8 +249,12 @@ class ResidualCouplingLayer(nn.Module):
self.enc.remove_weight_norm()
def __prepare_scriptable__(self):
if is_parametrized(self.enc, "weight"):
remove_parametrizations(self.enc, "weight")
for hook in self.enc._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.enc)
return self
@@ -329,6 +344,10 @@ class ResidualCouplingBlock(nn.Module):
def __prepare_scriptable__(self):
for i in range(self.n_flows):
if is_parametrized(self.flows[i * 2], "weight"):
remove_parametrizations(self.flows[i * 2], "weight")
for hook in self.flows[i * 2]._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.flows[i * 2])
return self

View File

@@ -2,7 +2,6 @@ from typing import Optional, List, Union
import torch
from torch import nn
from torch.nn.utils import parametrize
from .encoders import TextEncoder, PosteriorEncoder
@@ -35,7 +34,7 @@ class SynthesizerTrnMsNSFsid(nn.Module):
upsample_kernel_sizes: List[int],
spk_embed_dim: int,
gin_channels: int,
sr: Union[str, int],
sr: Optional[Union[str, int]],
encoder_dim: int,
use_f0: bool,
):
@@ -119,16 +118,32 @@ class SynthesizerTrnMsNSFsid(nn.Module):
self.enc_q.remove_weight_norm()
def __prepare_scriptable__(self):
if parametrize.is_parametrized(self.dec, "weight"):
parametrize.remove_parametrizations(self.dec, "weight")
if parametrize.is_parametrized(self.flow, "weight"):
parametrize.remove_parametrizations(self.flow, "weight")
for hook in self.dec._forward_pre_hooks.values():
# The hook we want to remove is an instance of WeightNorm class, so
# normally we would do `if isinstance(...)` but this class is not accessible
# because of shadowing, so we check the module name directly.
# https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.dec)
for hook in self.flow._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.flow)
if hasattr(self, "enc_q"):
if parametrize.is_parametrized(self.enc_q, "weight"):
parametrize.remove_parametrizations(self.enc_q, "weight")
for hook in self.enc_q._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.enc_q)
return self
@torch.jit.ignore()
@torch.jit.ignore
def forward(
self,
phone: torch.Tensor,
@@ -140,20 +155,18 @@ class SynthesizerTrnMsNSFsid(nn.Module):
pitchf: Optional[torch.Tensor] = None,
): # 这里ds是id[bs,1]
# print(1,pitch.shape)#[bs,t]
embg = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t广播的
g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t广播的
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=embg)
z_p = self.flow(z, y_mask, g=embg)
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)
z_p = self.flow(z, y_mask, g=g)
z_slice, ids_slice = rand_slice_segments_on_last_dim(
z, y_lengths, self.segment_size
)
if pitchf is not None and isinstance(self.dec, NSFGenerator):
if pitchf is not None:
pitchf = slice_on_last_dim(pitchf, ids_slice, self.segment_size)
o = self.dec(z_slice, pitchf, g=embg) # type: ignore
elif isinstance(self.dec, Generator):
o = self.dec(z_slice, g=embg)
o = self.dec(z_slice, pitchf, g=g)
else:
raise KeyError(f"unknown dec type: {type(self.dec).__name__}")
o = self.dec(z_slice, g=g)
return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
@torch.jit.export
@@ -188,17 +201,15 @@ class SynthesizerTrnMsNSFsid(nn.Module):
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
z = self.flow(z_p, x_mask, g=g, reverse=True)
del z_p, m_p, logs_p
if pitchf is not None and isinstance(self.dec, NSFGenerator):
if pitchf is not None:
o = self.dec(
z * x_mask,
pitchf,
g=g,
n_res=return_length2,
)
elif isinstance(self.dec, Generator):
o = self.dec(z * x_mask, g=g, n_res=return_length2)
else:
raise KeyError(f"unknown dec type: {type(self.dec).__name__}")
o = self.dec(z * x_mask, g=g, n_res=return_length2)
del x_mask, z
return o # , x_mask, (z, z_p, m_p, logs_p)
@@ -315,7 +326,7 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes: List[int],
spk_embed_dim: int,
gin_channels: int,
sr: Union[str, int],
sr=None,
):
super().__init__(
spec_channels,
@@ -335,7 +346,6 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes,
spk_embed_dim,
gin_channels,
sr,
256,
False,
)
@@ -361,7 +371,7 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes: List[int],
spk_embed_dim: int,
gin_channels: int,
sr: Union[str, int],
sr=None,
):
super().__init__(
spec_channels,
@@ -381,7 +391,6 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes,
spk_embed_dim,
gin_channels,
sr,
768,
False,
)

View File

@@ -1,4 +1,4 @@
from typing import List, Optional, Tuple, Iterator, Union
from typing import List, Optional, Tuple, Iterator
import torch
@@ -17,7 +17,7 @@ def get_padding(kernel_size: int, dilation=1) -> int:
def slice_on_last_dim(
x: torch.Tensor,
start_indices: Union[List[int], torch.Tensor],
start_indices: List[int],
segment_size=4,
) -> torch.Tensor:
new_shape = [*x.shape]
@@ -32,9 +32,9 @@ def slice_on_last_dim(
def rand_slice_segments_on_last_dim(
x: torch.Tensor,
x_lengths: Optional[Union[int, torch.Tensor]] = None,
x_lengths: int = None,
segment_size=4,
) -> Tuple[torch.Tensor, Union[List[int], torch.Tensor]]:
) -> Tuple[torch.Tensor, List[int]]:
b, _, t = x.size()
if x_lengths is None:
x_lengths = t
@@ -58,7 +58,7 @@ def activate_add_tanh_sigmoid_multiply(
def sequence_mask(
length: torch.Tensor,
max_length: Optional[int] = None,
):
) -> torch.BoolTensor:
if max_length is None:
max_length = int(length.max())
x = torch.arange(max_length, dtype=length.dtype, device=length.device)

View File

@@ -1,4 +1,4 @@
from typing import List, Union
from typing import List, Optional, Union
import torch
@@ -25,7 +25,7 @@ class SynthesizerTrnMsNSFsid(SynthesizerBase):
upsample_kernel_sizes: List[int],
spk_embed_dim: int,
gin_channels: int,
sr: Union[str, int],
sr: Optional[Union[str, int]],
encoder_dim: int,
):
super().__init__(

View File

@@ -40,9 +40,11 @@ app = gr.Blocks()
with app:
with gr.Tabs():
with gr.TabItem("在线demo"):
gr.Markdown(value="""
gr.Markdown(
value="""
RVC 在线demo
""")
"""
)
sid = gr.Dropdown(label=i18n("Inferencing voice"), choices=sorted(names))
with gr.Column():
spk_item = gr.Slider(

20
web.py
View File

@@ -36,6 +36,7 @@ import threading
import shutil
import logging
logging.getLogger("numba").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
@@ -88,24 +89,23 @@ index_paths = [""]
def lookup_names(weight_root):
names = []
global names
for name in os.listdir(weight_root):
if name.endswith(".pth"):
names.append(name)
return names
def lookup_indices(index_root):
index_paths = []
global index_paths
for root, _, files in os.walk(index_root, topdown=False):
for name in files:
if name.endswith(".index") and "trained" not in name:
index_paths.append(str(pathlib.Path(root, name)))
return index_paths
names = [""] + lookup_names(weight_root)
index_paths = [""] + lookup_indices(index_root) + lookup_indices(outside_index_root)
lookup_names(weight_root)
lookup_indices(index_root)
lookup_indices(outside_index_root)
uvr5_names = []
for name in os.listdir(weight_uvr5_root):
if name.endswith(".pth") or "onnx" in name:
@@ -113,8 +113,12 @@ for name in os.listdir(weight_uvr5_root):
def change_choices():
names = [""] + lookup_names(weight_root)
index_paths = [""] + lookup_indices(index_root) + lookup_indices(outside_index_root)
global index_paths, names
names = [""]
lookup_names(weight_root)
index_paths = [""]
lookup_indices(index_root)
lookup_indices(outside_index_root)
return {"choices": sorted(names), "__type__": "update"}, {
"choices": sorted(index_paths),
"__type__": "update",