1
0
mirror of https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git synced 2026-06-05 09:10:25 +08:00

1 Commits

Author SHA1 Message Date
github-actions[bot]
782c4f6473 chore(i18n): sync locale on dev 2025-07-08 07:46:51 +00:00
35 changed files with 167 additions and 310 deletions

View File

@@ -15,7 +15,7 @@ jobs:
- name: Run RVC-Models-Downloader
run: |
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb
sudo apt -y install ./rvcmd_linux_amd64.deb
rm -f ./rvcmd_linux_amd64.deb
rvcmd -notrs -w 1 -notui assets/rvc

View File

@@ -11,72 +11,20 @@ jobs:
steps:
- uses: actions/checkout@master
- name: Space cleanup
env:
DEBIAN_FRONTEND: noninteractive
run: |
df -h
# Source - https://stackoverflow.com/a
# Posted by Cosmin Bodnariuc
# Retrieved 2025-11-21, License - CC BY-SA 4.0
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/local/share/chromium
sudo rm -rf /usr/local/share/powershell
sudo rm -rf /usr/local/share/vcpkg
sudo rm -rf /usr/local/share/miniconda
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /opt/hostedtoolcache/go
sudo rm -rf /opt/hostedtoolcache/Python
sudo rm -rf /opt/hostedtoolcache/node
sudo rm -rf /opt/hostedtoolcache/R
sudo rm -rf /opt/hostedtoolcache/Java
sudo rm -rf /opt/hostedtoolcache/LLVM
sudo rm -rf /opt/hostedtoolcache/Swift
sudo rm -rf /opt/hostedtoolcache/Php
sudo rm -rf /opt/hostedtoolcache/Perl
sudo rm -rf /opt/hostedtoolcache/Scala
sudo rm -rf /opt/hostedtoolcache/Julia
sudo rm -rf /opt/hostedtoolcache/Mono
sudo rm -rf /opt/hostedtoolcache/PowerShell
sudo rm -rf /opt/hostedtoolcache/Crystal
sudo rm -rf /opt/hostedtoolcache/Elixir
sudo rm -rf /opt/hostedtoolcache/Erlang
sudo rm -rf /opt/hostedtoolcache/FSharp
sudo rm -rf /opt/hostedtoolcache/Haskell
sudo rm -rf /opt/hostedtoolcache/OCaml
sudo rm -rf /opt/hostedtoolcache/Rust
sudo rm -rf /opt/hostedtoolcache/Sbt
sudo rm -rf /opt/hostedtoolcache/Solidity
sudo rm -rf /opt/hostedtoolcache/VisualStudio
sudo rm -rf /opt/hostedtoolcache/WinAppDriver
sudo rm -rf /opt/hostedtoolcache/Xamarin
sudo rm -rf /opt/hostedtoolcache/Yarn
sudo rm -rf /opt/hostedtoolcache/Zephyr
sudo rm -rf /opt/hostedtoolcache/zig
sudo rm -rf /opt/hostedtoolcache/zulu
sudo rm -rf /opt/hostedtoolcache/azcopy
sudo -E apt-get update
sudo -E apt-get -y autoremove --purge
sudo -E apt-get clean
df -h
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb
sudo apt update
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb
sudo apt -y install ./rvcmd_linux_amd64.deb
pip install --force pip==24.0 # fix fairseq installing issue https://github.com/facebookresearch/fairseq/issues/5552
python -m pip install --upgrade setuptools
python -m pip install --upgrade wheel
pip install torch torchvision torchaudio
pip install -r requirements/cpu.txt
pip install -r requirements/main.txt
rvcmd -notrs -w 1 -notui assets/rvc
- name: Test step 1 & 2
run: |

View File

@@ -4,7 +4,6 @@ import sys
import json
import shutil
from multiprocessing import cpu_count
import importlib.util
import torch
@@ -47,10 +46,10 @@ class Config(metaclass=Singleton):
self.global_link,
self.noparallel,
self.noautoopen,
self.dml,
self.nocheck,
self.update,
) = self.arg_parse()
self.dml = False
self.instead = ""
self.preprocess_per = 3.7
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
@@ -84,6 +83,11 @@ class Config(metaclass=Singleton):
action="store_true",
help="Do not open in browser automatically",
)
parser.add_argument(
"--dml",
action="store_true",
help="torch_dml",
)
parser.add_argument(
"--nocheck", action="store_true", help="Run without checking assets"
)
@@ -100,6 +104,7 @@ class Config(metaclass=Singleton):
cmd_opts.global_link,
cmd_opts.noparallel,
cmd_opts.noautoopen,
cmd_opts.dml,
cmd_opts.nocheck,
cmd_opts.update,
)
@@ -178,7 +183,7 @@ class Config(metaclass=Singleton):
if self.has_xpu():
self.device = self.instead = "xpu:0"
self.is_half = True
i_device = int(str(self.device).split(":")[-1])
i_device = int(self.device.split(":")[-1])
self.gpu_name = torch.cuda.get_device_name(i_device)
if (
("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
@@ -209,7 +214,7 @@ class Config(metaclass=Singleton):
self.use_fp32_config()
else:
logger.info("No supported Nvidia GPU found")
self.device = self.instead = torch.get_default_device()
self.device = self.instead = "cpu"
self.is_half = False
self.use_fp32_config()
@@ -234,13 +239,12 @@ class Config(metaclass=Singleton):
x_query = 5
x_center = 30
x_max = 32
if importlib.util.find_spec("torch_directml") is not None:
if self.dml:
logger.info("Use DirectML instead")
import torch_directml
self.device = torch_directml.device(torch_directml.default_device())
self.is_half = False
self.dml = True
else:
if self.instead:
logger.info(f"Use {self.instead} instead")

View File

@@ -133,7 +133,7 @@
"Takeover WASAPI device": "WASAPIデバイスを独占",
"Target sample rate": "目標サンプリング率",
"The audio file to be processed": "処理待ち音声",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "本ソフトウェアは、GNU Affero General Public License バージョン3.0以降の下でライセンスされています。<br>作者は、このソフトウェアの使用に関して一切の管理権限や責任を負いません。<br>このソフトウェアを使用し、生成された音声やファイルを含むあらゆるコンテンツを配布するユーザーは、AGPL 3.0ライセンスの条項への準拠について全責任を負います。<br>これらの条項に同意しない場合、このソフトウェアパッケージに含まれるコードやファイルの使用、参照、配布は禁止されています。<br>詳細については、ルートディレクトリにあるLICENSEファイルを参照してください。",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.",
"Total training epochs (total_epoch)": "総エポック数",
"Train": "学習",
"Train feature index": "特徴索引の学習",

View File

@@ -133,7 +133,7 @@
"Takeover WASAPI device": "独占 WASAPI 设备",
"Target sample rate": "目标采样率",
"The audio file to be processed": "待处理音频文件",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "本软件基于GNU Affero通用公共许可证3.0版本或更高版本进行许可。<br>作者对本软件的使用不承担任何控制权或责任。<br>使用本软件并分发任何内容包括由其生成的声音或文件的用户需对遵守AGPL 3.0许可证条款承担全部责任。<br>如果您不接受这些条款,则禁止使用、引用或分发本软件包中包含的任何代码或文件。<br>请参阅位于根目录中的LICENSE文件以获取完整详情。",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.",
"Total training epochs (total_epoch)": "总训练轮数total_epoch",
"Train": "训练",
"Train feature index": "训练特征索引",

View File

@@ -195,10 +195,7 @@ def get_audio_properties(input_path: str) -> Tuple[int, int]:
container = av.open(input_path)
audio_stream = next(s for s in container.streams if s.type == "audio")
channels = 1 if audio_stream.layout == "mono" else 2
try:
rate = audio_stream.base_rate
except:
rate = audio_stream.sample_rate
rate = audio_stream.base_rate
container.close()
return channels, rate

View File

@@ -162,7 +162,15 @@ def download_and_extract_zip(url: str, folder: str):
logger.info(f"extracted into {folder}")
def download_all_assets(tmpdir: str, version="0.2.11"):
def download_dns_yaml(url: str, folder: str):
logger.info(f"downloading {url}")
response = requests.get(url, stream=True, timeout=(5, 10))
with open(os.path.join(folder, "dns.yaml"), "wb") as out_file:
out_file.write(response.content)
logger.info(f"downloaded into {folder}")
def download_all_assets(tmpdir: str, version="0.2.5"):
import subprocess
import platform
@@ -190,10 +198,44 @@ def download_all_assets(tmpdir: str, version="0.2.11"):
suffix = "zip" if is_win else "tar.gz"
RVCMD_URL = BASE_URL + f"v{version}/rvcmd_{system_type}_{architecture}.{suffix}"
cmdfile = os.path.join(tmpdir, "rvcmd")
if is_win:
download_and_extract_zip(RVCMD_URL, tmpdir)
cmdfile += ".exe"
else:
download_and_extract_tar_gz(RVCMD_URL, tmpdir)
os.chmod(cmdfile, 0o755)
subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"])
try:
if is_win:
download_and_extract_zip(RVCMD_URL, tmpdir)
cmdfile += ".exe"
else:
download_and_extract_tar_gz(RVCMD_URL, tmpdir)
os.chmod(cmdfile, 0o755)
subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"])
except Exception:
BASE_URL = "https://raw.gitcode.com/u011570312/RVC-Models-Downloader/assets/"
suffix = {
"darwin_amd64": "555",
"darwin_arm64": "556",
"linux_386": "557",
"linux_amd64": "558",
"linux_arm64": "559",
"windows_386": "562",
"windows_amd64": "563",
}[f"{system_type}_{architecture}"]
RVCMD_URL = BASE_URL + suffix
download_dns_yaml(
"https://raw.gitcode.com/u011570312/RVC-Models-Downloader/raw/main/dns.yaml",
tmpdir,
)
if is_win:
download_and_extract_zip(RVCMD_URL, tmpdir)
cmdfile += ".exe"
else:
download_and_extract_tar_gz(RVCMD_URL, tmpdir)
os.chmod(cmdfile, 0o755)
subprocess.run(
[
cmdfile,
"-notui",
"-w",
"0",
"-dns",
os.path.join(tmpdir, "dns.yaml"),
"assets/rvc",
]
)

View File

@@ -303,7 +303,7 @@ class TextAudioLoader(torch.utils.data.Dataset):
spec_filename = filename.replace(".wav", ".spec.pt")
if os.path.exists(spec_filename):
try:
spec = torch.load(spec_filename, weights_only=True)
spec = torch.load(spec_filename)
except:
logger.warning("%s %s", spec_filename, traceback.format_exc())
spec = spectrogram_torch(

View File

@@ -59,7 +59,7 @@ def save_small_model(ckpt, sr, if_f0, name, epoch, version, hps):
def extract_small_model(path, name, author, sr, if_f0, info, version):
try:
ckpt = torch.load(path, map_location="cpu", weights_only=True)
ckpt = torch.load(path, map_location="cpu")
if "model" in ckpt:
ckpt = ckpt["model"]
opt = OrderedDict()
@@ -196,7 +196,7 @@ def extract_small_model(path, name, author, sr, if_f0, info, version):
def change_info(path, info, name):
try:
ckpt = torch.load(path, map_location="cpu", weights_only=True)
ckpt = torch.load(path, map_location="cpu")
ckpt["info"] = info
if name == "":
name = os.path.basename(path)
@@ -229,8 +229,8 @@ def merge(path1, path2, alpha1, sr, f0, info, name, version):
a2 = "Unknown"
return f"{a1} & {a2}"
ckpt1 = torch.load(path1, map_location="cpu", weights_only=True)
ckpt2 = torch.load(path2, map_location="cpu", weights_only=True)
ckpt1 = torch.load(path1, map_location="cpu")
ckpt2 = torch.load(path2, map_location="cpu")
cfg = ckpt1["config"]
if "model" in ckpt1:
ckpt1 = extract(ckpt1)

View File

@@ -2,7 +2,6 @@ import os
import sys
import traceback
from pathlib import Path
import importlib.util
from dotenv import load_dotenv
@@ -39,9 +38,6 @@ f0method = sys.argv[3]
device = sys.argv[4]
is_half = sys.argv[5] == "True"
if importlib.util.find_spec("torch_directml") is not None:
import torch_directml # use side effect
class FeatureInput(object):
def __init__(self, is_half: bool, device="cpu", samplerate=16000, hop_size=160):
@@ -106,12 +102,6 @@ if __name__ == "__main__":
Config.use_insecure_load()
printt(" ".join(sys.argv))
# GPU methods (rmvpe, fcpe, crepe, etc.) gain nothing from multiprocessing since
# all processes share one GPU. Spawning n_p processes each lazily loading
# the model onto the same CUDA device exhausts VRAM and causes deadlocks.
if "cuda" in device:
printt("WARN: use 1 thread since GPU is used.")
n_p = 1
featureInput = FeatureInput(is_half, device)
paths = []
inp_root = "%s/1_16k_wavs" % (exp_dir)

View File

@@ -17,14 +17,7 @@ device = sys.argv[1]
n_part = int(sys.argv[2])
i_part = int(sys.argv[3])
i_gpu = sys.argv[4]
# CUDA_VISIBLE_DEVICES expects bare GPU indices (e.g. "0" or "0,1"),
# but callers may pass "cuda:0", "cuda:0-cuda:1", etc. Strip the prefix
# and normalise separators so any combination works.
import re
i_gpu = re.sub(r"cuda:", "", str(i_gpu))
i_gpu = i_gpu.replace("-", ",")
os.environ["CUDA_VISIBLE_DEVICES"] = i_gpu
os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
exp_dir = sys.argv[5]
version = sys.argv[6]
is_half = sys.argv[7].lower() == "true"

View File

@@ -106,30 +106,23 @@ def main():
# patch to unblock people without gpus. there is probably a better way.
print("NO GPU DETECTED: falling back to CPU - this may take a while")
n_gpus = 1
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = str(randint(20000, 55555))
children = []
logger = utils.get_logger(hps.model_dir)
if n_gpus == 1:
# Single GPU: run directly without distributed to avoid gloo issues on Windows
run(0, 1, hps, logger)
else:
master_port = str(randint(20000, 55555))
os.environ["MASTER_ADDR"] = "127.0.0.1"
os.environ["MASTER_PORT"] = master_port
children = []
for i in range(n_gpus):
subproc = mp.Process(
target=run,
args=(i, n_gpus, hps, logger, master_port),
)
children.append(subproc)
subproc.start()
for i in range(n_gpus):
subproc = mp.Process(
target=run,
args=(i, n_gpus, hps, logger),
)
children.append(subproc)
subproc.start()
for i in range(n_gpus):
children[i].join()
for i in range(n_gpus):
children[i].join()
def run(
rank, n_gpus, hps: utils.HParams, logger: logging.Logger, master_port: str = "29500"
):
def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger):
global global_step
if rank == 0:
# logger = utils.get_logger(hps.model_dir)
@@ -138,81 +131,24 @@ def run(
writer = SummaryWriter(log_dir=hps.model_dir)
writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
use_distributed = n_gpus > 1
if use_distributed:
if os.name == "nt" or not torch.cuda.is_available():
# On Windows, gloo's create_device(hostname=...) is gated to Linux only
# in the C++ layer (makeDeviceForHostname). We must use the interface-
# based path instead: create_device(interface=...) calls
# makeDeviceForInterface which is not platform-gated.
import socket as _socket
try:
store = dist.TCPStore(
host_name="127.0.0.1",
port=int(master_port),
world_size=n_gpus,
is_master=(rank == 0),
)
except Exception:
store = dist.TCPStore(
host_name="127.0.0.1",
port=int(master_port),
world_size=n_gpus,
is_master=(rank == 0),
use_libuv=False,
)
# Discover a working network interface for gloo device creation
gloo_device = None
try:
for idx, ifname in _socket.if_nameindex():
try:
gloo_device = dist.ProcessGroupGloo.create_device(
interface=ifname
)
print("Try device", idx, "name", ifname)
break
except RuntimeError as e:
print("Try device", idx, "name", ifname, "err:", e)
continue
except (OSError, AttributeError) as e:
print(e.with_traceback(None))
if gloo_device is None:
raise RuntimeError(
"Cannot create gloo device on Windows. "
"No usable network interface found. "
"Try adding your hostname to "
"C:\\Windows\\System32\\drivers\\etc\\hosts "
"with: 127.0.0.1 " + _socket.gethostname()
)
pg_options = dist.ProcessGroupGloo._Options()
pg_options._devices = [gloo_device]
dist.init_process_group(
backend="gloo",
store=store,
world_size=n_gpus,
rank=rank,
pg_options=pg_options,
)
else:
init_url = f"tcp://127.0.0.1:{master_port}"
try:
dist.init_process_group(
backend="nccl",
init_method=init_url,
world_size=n_gpus,
rank=rank,
)
except:
dist.init_process_group(
backend="nccl",
init_method=init_url + "?use_libuv=False",
world_size=n_gpus,
rank=rank,
)
try:
dist.init_process_group(
backend=(
"gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"
),
init_method="env://",
world_size=n_gpus,
rank=rank,
)
except:
dist.init_process_group(
backend=(
"gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"
),
init_method="env://?use_libuv=False",
world_size=n_gpus,
rank=rank,
)
torch.manual_seed(hps.train.seed)
if torch.cuda.is_available():
torch.cuda.set_device(rank)
@@ -285,15 +221,14 @@ def run(
)
# net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
# net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
if use_distributed:
if hasattr(torch, "xpu") and torch.xpu.is_available():
pass
elif torch.cuda.is_available():
net_g = DDP(net_g, device_ids=[rank])
net_d = DDP(net_d, device_ids=[rank])
else:
net_g = DDP(net_g)
net_d = DDP(net_d)
if hasattr(torch, "xpu") and torch.xpu.is_available():
pass
elif torch.cuda.is_available():
net_g = DDP(net_g, device_ids=[rank])
net_d = DDP(net_d, device_ids=[rank])
else:
net_g = DDP(net_g)
net_d = DDP(net_d)
try: # 如果能加载自动resume
_, _, _, epoch_str = utils.load_checkpoint(

View File

@@ -62,6 +62,10 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
os.path.basename(inp_path),
)
resample_audio(inp_path, tmp_path, "pcm_s16le", "s16", 44100, "stereo")
try: # Remove the original file
os.remove(inp_path)
except Exception as e:
print(f"Failed to remove the original file: {e}")
inp_path = tmp_path
try:
if done == 0:

View File

@@ -37,7 +37,7 @@ class AudioPre:
else:
mp = ModelParameters("infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json")
model = Nets.CascadedASPPNet(mp.param["bins"] * 2)
cpk = torch.load(model_path, map_location="cpu", weights_only=True)
cpk = torch.load(model_path, map_location="cpu")
model.load_state_dict(cpk)
model.eval()
if is_half:

View File

@@ -152,7 +152,7 @@ def model_hash_ckpt(cpt):
def model_hash_from(path):
cpt = torch.load(path, map_location="cpu", weights_only=True)
cpt = torch.load(path, map_location="cpu")
h = model_hash_ckpt(cpt)
del cpt
return h

View File

@@ -75,7 +75,7 @@ def show_info(path):
try:
if hasattr(path, "name"):
path = path.name
a = torch.load(path, map_location="cpu", weights_only=True)
a = torch.load(path, map_location="cpu")
txt = show_model_info(a, show_long_id=True)
del a
except:

View File

@@ -1,10 +1,11 @@
tensorflow-rocm
joblib>=1.1.0
numba
numpy
scipy
librosa>=0.10.2
llvmlite
fairseq @ git+https://github.com/fumiama/fairseq.git
fairseq @ git+https://github.com/One-sixth/fairseq.git
faiss-cpu
gradio
Cython

View File

@@ -1,49 +0,0 @@
joblib>=1.1.0
numba
numpy
scipy
librosa>=0.10.2
llvmlite
fairseq @ git+https://github.com/fumiama/fairseq.git
faiss-cpu
gradio
Cython
pydub>=0.25.1
tensorboardX
Jinja2>=3.1.2
json5
Markdown
matplotlib>=3.7.0
matplotlib-inline>=0.1.3
praat-parselmouth>=0.4.2
Pillow>=9.1.1
resampy>=0.4.2
scikit-learn
tensorboard
tqdm>=4.63.1
tornado>=6.1
Werkzeug>=2.2.3
uc-micro-py>=1.0.1
sympy>=1.11.1
tabulate>=0.8.10
PyYAML>=6.0
pyasn1>=0.4.8
pyasn1-modules>=0.2.8
fsspec>=2022.11.0
absl-py>=1.2.0
audioread
uvicorn>=0.21.1
colorama>=0.4.5
pyworld==0.3.2
httpx
onnxruntime; sys_platform == 'darwin'
torchcrepe>=0.0.23
fastapi
torchfcpe
python-dotenv>=1.0.0
av
pybase16384
--extra-index-url https://download.pytorch.org/whl/cpu
torch
torchvision
torchaudio

View File

@@ -4,7 +4,7 @@ numpy
scipy
librosa>=0.10.2
llvmlite
fairseq @ git+https://github.com/fumiama/fairseq.git
fairseq @ git+https://github.com/One-sixth/fairseq.git
faiss-cpu
gradio
Cython
@@ -43,6 +43,3 @@ python-dotenv>=1.0.0
av
torchfcpe
pybase16384
torch-directml
torchvision
torchaudio

View File

@@ -9,7 +9,7 @@ numpy
scipy
librosa>=0.10.2
llvmlite==0.39.0
fairseq @ git+https://github.com/fumiama/fairseq.git
fairseq @ git+https://github.com/One-sixth/fairseq.git
faiss-cpu
gradio
Cython

View File

@@ -4,7 +4,7 @@ numpy
scipy
librosa>=0.10.2
llvmlite
fairseq @ git+https://github.com/fumiama/fairseq.git
fairseq @ git+https://github.com/One-sixth/fairseq.git
faiss-cpu
gradio
Cython

View File

@@ -11,14 +11,14 @@ class F0Predictor(object):
f0_min=50,
f0_max=1100,
sampling_rate=44100,
device: Optional[Union[str, torch.device]] = None,
device: Optional[str] = None,
):
self.hop_length = hop_length
self.f0_min = f0_min
self.f0_max = f0_max
self.sampling_rate = sampling_rate
if not device:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if device is None:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
self.device = device
def compute_f0(

View File

@@ -1,4 +1,4 @@
from typing import Optional, Union
from typing import Optional
import torch
import numpy as np
@@ -17,9 +17,9 @@ class MelSpectrogram(torch.nn.Module):
hop_length: int,
n_fft: Optional[int] = None,
mel_fmin: int = 0,
mel_fmax: Optional[int] = None,
mel_fmax: int = None,
clamp: float = 1e-5,
device: Union[str, torch.device] = torch.device("cpu"),
device=torch.device("cpu"),
):
super().__init__()
if n_fft is None:

View File

@@ -1,6 +1,6 @@
from io import BytesIO
import os
from typing import Optional, Union
from typing import Any, Optional, Union
import numpy as np
import torch

View File

@@ -12,8 +12,8 @@ class MultiHeadAttention(nn.Module):
channels: int,
out_channels: int,
n_heads: int,
window_size: int,
p_dropout: float = 0.0,
window_size: Optional[int] = None,
heads_share: bool = True,
block_length: Optional[int] = None,
proximal_bias: bool = False,

View File

@@ -42,8 +42,8 @@ class Encoder(nn.Module):
hidden_channels,
hidden_channels,
n_heads,
window_size,
p_dropout=p_dropout,
window_size=window_size,
)
)
self.norm_layers_1.append(LayerNorm(hidden_channels))
@@ -121,7 +121,7 @@ class TextEncoder(nn.Module):
def __call__(
self,
phone: torch.Tensor,
pitch: Optional[torch.Tensor],
pitch: torch.Tensor,
lengths: torch.Tensor,
skip_head: Optional[int] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
@@ -135,7 +135,7 @@ class TextEncoder(nn.Module):
def forward(
self,
phone: torch.Tensor,
pitch: Optional[torch.Tensor],
pitch: torch.Tensor,
lengths: torch.Tensor,
skip_head: Optional[int] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:

View File

@@ -46,7 +46,6 @@ class Generator(torch.nn.Module):
self.resblocks = nn.ModuleList()
resblock_module = ResBlock1 if resblock == "1" else ResBlock2
ch = 0
for i in range(len(self.ups)):
ch = upsample_initial_channel // (2 ** (i + 1))
for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes):

View File

@@ -30,7 +30,7 @@ class WN(torch.nn.Module):
dilation_rate: int,
n_layers: int,
gin_channels: int = 0,
p_dropout: float = 0,
p_dropout: int = 0,
):
super(WN, self).__init__()
assert kernel_size % 2 == 1

View File

@@ -1,4 +1,4 @@
from typing import Optional, List, Union
from typing import Optional, List
import math
import torch
@@ -83,7 +83,7 @@ class NSFGenerator(torch.nn.Module):
self.conv_pre = Conv1d(
initial_channel, upsample_initial_channel, 7, 1, padding=3
)
resblockcls = ResBlock1 if resblock == "1" else ResBlock2
resblock = ResBlock1 if resblock == "1" else ResBlock2
self.ups = nn.ModuleList()
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
@@ -114,13 +114,12 @@ class NSFGenerator(torch.nn.Module):
self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
self.resblocks = nn.ModuleList()
ch = 0
for i in range(len(self.ups)):
ch = upsample_initial_channel // (2 ** (i + 1))
ch: int = upsample_initial_channel // (2 ** (i + 1))
for j, (k, d) in enumerate(
zip(resblock_kernel_sizes, resblock_dilation_sizes)
):
self.resblocks.append(resblockcls(ch, k, d))
self.resblocks.append(resblock(ch, k, d))
self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False)
self.ups.apply(call_weight_data_normal_if_Conv)

View File

@@ -20,7 +20,7 @@ class ResBlock1(torch.nn.Module):
self,
channels: int,
kernel_size: int = 3,
dilation: List[int] = [1, 3, 5],
dilation: List[int] = (1, 3, 5),
):
super(ResBlock1, self).__init__()
@@ -117,7 +117,7 @@ class ResBlock2(torch.nn.Module):
self,
channels: int,
kernel_size=3,
dilation: List[int] = [1, 3],
dilation: List[int] = (1, 3),
):
super(ResBlock2, self).__init__()
self.convs = nn.ModuleList()
@@ -182,7 +182,7 @@ class ResidualCouplingLayer(nn.Module):
kernel_size: int,
dilation_rate: int,
n_layers: int,
p_dropout: float = 0,
p_dropout: int = 0,
gin_channels: int = 0,
mean_only: bool = False,
):

View File

@@ -34,7 +34,7 @@ class SynthesizerTrnMsNSFsid(nn.Module):
upsample_kernel_sizes: List[int],
spk_embed_dim: int,
gin_channels: int,
sr: Union[str, int],
sr: Optional[Union[str, int]],
encoder_dim: int,
use_f0: bool,
):
@@ -143,7 +143,7 @@ class SynthesizerTrnMsNSFsid(nn.Module):
torch.nn.utils.remove_weight_norm(self.enc_q)
return self
@torch.jit.ignore()
@torch.jit.ignore
def forward(
self,
phone: torch.Tensor,
@@ -155,20 +155,18 @@ class SynthesizerTrnMsNSFsid(nn.Module):
pitchf: Optional[torch.Tensor] = None,
): # 这里ds是id[bs,1]
# print(1,pitch.shape)#[bs,t]
embg = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t广播的
g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t广播的
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=embg)
z_p = self.flow(z, y_mask, g=embg)
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)
z_p = self.flow(z, y_mask, g=g)
z_slice, ids_slice = rand_slice_segments_on_last_dim(
z, y_lengths, self.segment_size
)
if pitchf is not None and isinstance(self.dec, NSFGenerator):
if pitchf is not None:
pitchf = slice_on_last_dim(pitchf, ids_slice, self.segment_size)
o = self.dec(z_slice, pitchf, g=embg) # type: ignore
elif isinstance(self.dec, Generator):
o = self.dec(z_slice, g=embg)
o = self.dec(z_slice, pitchf, g=g)
else:
raise KeyError(f"unknown dec type: {type(self.dec).__name__}")
o = self.dec(z_slice, g=g)
return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
@torch.jit.export
@@ -203,17 +201,15 @@ class SynthesizerTrnMsNSFsid(nn.Module):
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
z = self.flow(z_p, x_mask, g=g, reverse=True)
del z_p, m_p, logs_p
if pitchf is not None and isinstance(self.dec, NSFGenerator):
if pitchf is not None:
o = self.dec(
z * x_mask,
pitchf,
g=g,
n_res=return_length2,
)
elif isinstance(self.dec, Generator):
o = self.dec(z * x_mask, g=g, n_res=return_length2)
else:
raise KeyError(f"unknown dec type: {type(self.dec).__name__}")
o = self.dec(z * x_mask, g=g, n_res=return_length2)
del x_mask, z
return o # , x_mask, (z, z_p, m_p, logs_p)
@@ -330,7 +326,7 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes: List[int],
spk_embed_dim: int,
gin_channels: int,
sr: Union[str, int],
sr=None,
):
super().__init__(
spec_channels,
@@ -350,7 +346,6 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes,
spk_embed_dim,
gin_channels,
sr,
256,
False,
)
@@ -376,7 +371,7 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes: List[int],
spk_embed_dim: int,
gin_channels: int,
sr: Union[str, int],
sr=None,
):
super().__init__(
spec_channels,
@@ -396,7 +391,6 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes,
spk_embed_dim,
gin_channels,
sr,
768,
False,
)

View File

@@ -1,4 +1,4 @@
from typing import List, Optional, Tuple, Iterator, Union
from typing import List, Optional, Tuple, Iterator
import torch
@@ -17,7 +17,7 @@ def get_padding(kernel_size: int, dilation=1) -> int:
def slice_on_last_dim(
x: torch.Tensor,
start_indices: Union[List[int], torch.Tensor],
start_indices: List[int],
segment_size=4,
) -> torch.Tensor:
new_shape = [*x.shape]
@@ -32,9 +32,9 @@ def slice_on_last_dim(
def rand_slice_segments_on_last_dim(
x: torch.Tensor,
x_lengths: Optional[Union[int, torch.Tensor]] = None,
x_lengths: int = None,
segment_size=4,
) -> Tuple[torch.Tensor, Union[List[int], torch.Tensor]]:
) -> Tuple[torch.Tensor, List[int]]:
b, _, t = x.size()
if x_lengths is None:
x_lengths = t
@@ -58,7 +58,7 @@ def activate_add_tanh_sigmoid_multiply(
def sequence_mask(
length: torch.Tensor,
max_length: Optional[int] = None,
):
) -> torch.BoolTensor:
if max_length is None:
max_length = int(length.max())
x = torch.arange(max_length, dtype=length.dtype, device=length.device)

View File

@@ -1,4 +1,4 @@
from typing import List, Union
from typing import List, Optional, Union
import torch
@@ -25,7 +25,7 @@ class SynthesizerTrnMsNSFsid(SynthesizerBase):
upsample_kernel_sizes: List[int],
spk_embed_dim: int,
gin_channels: int,
sr: Union[str, int],
sr: Optional[Union[str, int]],
encoder_dim: int,
):
super().__init__(

View File

@@ -40,9 +40,11 @@ app = gr.Blocks()
with app:
with gr.Tabs():
with gr.TabItem("在线demo"):
gr.Markdown(value="""
gr.Markdown(
value="""
RVC 在线demo
""")
"""
)
sid = gr.Dropdown(label=i18n("Inferencing voice"), choices=sorted(names))
with gr.Column():
spk_item = gr.Slider(

1
web.py
View File

@@ -36,6 +36,7 @@ import threading
import shutil
import logging
logging.getLogger("numba").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)