mirror of
https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git
synced 2026-06-05 17:20:25 +08:00
Compare commits
1 Commits
dev
...
genlocale-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
782c4f6473 |
2
.github/workflows/checksum.yml
vendored
2
.github/workflows/checksum.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Run RVC-Models-Downloader
|
- name: Run RVC-Models-Downloader
|
||||||
run: |
|
run: |
|
||||||
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb
|
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb
|
||||||
sudo apt -y install ./rvcmd_linux_amd64.deb
|
sudo apt -y install ./rvcmd_linux_amd64.deb
|
||||||
rm -f ./rvcmd_linux_amd64.deb
|
rm -f ./rvcmd_linux_amd64.deb
|
||||||
rvcmd -notrs -w 1 -notui assets/rvc
|
rvcmd -notrs -w 1 -notui assets/rvc
|
||||||
|
|||||||
2
.github/workflows/close-issue.yml
vendored
2
.github/workflows/close-issue.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/stale@v5
|
- uses: actions/stale@v5
|
||||||
with:
|
with:
|
||||||
exempt-issue-labels: "help wanted,good first issue,documentation,following up,todo list,enhancement"
|
exempt-issue-labels: "help wanted,good first issue,documentation,following up,todo list"
|
||||||
days-before-issue-stale: 30
|
days-before-issue-stale: 30
|
||||||
days-before-issue-close: 15
|
days-before-issue-close: 15
|
||||||
stale-issue-label: "stale"
|
stale-issue-label: "stale"
|
||||||
|
|||||||
58
.github/workflows/unitest.yml
vendored
58
.github/workflows/unitest.yml
vendored
@@ -11,72 +11,20 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@master
|
- uses: actions/checkout@master
|
||||||
- name: Space cleanup
|
|
||||||
env:
|
|
||||||
DEBIAN_FRONTEND: noninteractive
|
|
||||||
run: |
|
|
||||||
df -h
|
|
||||||
|
|
||||||
# Source - https://stackoverflow.com/a
|
|
||||||
# Posted by Cosmin Bodnariuc
|
|
||||||
# Retrieved 2025-11-21, License - CC BY-SA 4.0
|
|
||||||
sudo rm -rf /usr/share/dotnet
|
|
||||||
sudo rm -rf /usr/local/share/boost
|
|
||||||
sudo rm -rf /usr/local/share/chromium
|
|
||||||
sudo rm -rf /usr/local/share/powershell
|
|
||||||
sudo rm -rf /usr/local/share/vcpkg
|
|
||||||
sudo rm -rf /usr/local/share/miniconda
|
|
||||||
sudo rm -rf /opt/ghc
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/CodeQL
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/go
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Python
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/node
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/R
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Java
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/LLVM
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Swift
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Php
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Perl
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Scala
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Julia
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Mono
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/PowerShell
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Crystal
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Elixir
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Erlang
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/FSharp
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Haskell
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/OCaml
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Rust
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Sbt
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Solidity
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/VisualStudio
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/WinAppDriver
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Xamarin
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Yarn
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/Zephyr
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/zig
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/zulu
|
|
||||||
sudo rm -rf /opt/hostedtoolcache/azcopy
|
|
||||||
|
|
||||||
sudo -E apt-get update
|
|
||||||
sudo -E apt-get -y autoremove --purge
|
|
||||||
sudo -E apt-get clean
|
|
||||||
|
|
||||||
df -h
|
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v4
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb
|
sudo apt update
|
||||||
|
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb
|
||||||
sudo apt -y install ./rvcmd_linux_amd64.deb
|
sudo apt -y install ./rvcmd_linux_amd64.deb
|
||||||
pip install --force pip==24.0 # fix fairseq installing issue https://github.com/facebookresearch/fairseq/issues/5552
|
pip install --force pip==24.0 # fix fairseq installing issue https://github.com/facebookresearch/fairseq/issues/5552
|
||||||
python -m pip install --upgrade setuptools
|
python -m pip install --upgrade setuptools
|
||||||
python -m pip install --upgrade wheel
|
python -m pip install --upgrade wheel
|
||||||
pip install torch torchvision torchaudio
|
pip install torch torchvision torchaudio
|
||||||
pip install -r requirements/cpu.txt
|
pip install -r requirements/main.txt
|
||||||
rvcmd -notrs -w 1 -notui assets/rvc
|
rvcmd -notrs -w 1 -notui assets/rvc
|
||||||
- name: Test step 1 & 2
|
- name: Test step 1 & 2
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import sys
|
|||||||
import json
|
import json
|
||||||
import shutil
|
import shutil
|
||||||
from multiprocessing import cpu_count
|
from multiprocessing import cpu_count
|
||||||
import importlib.util
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
@@ -47,10 +46,10 @@ class Config(metaclass=Singleton):
|
|||||||
self.global_link,
|
self.global_link,
|
||||||
self.noparallel,
|
self.noparallel,
|
||||||
self.noautoopen,
|
self.noautoopen,
|
||||||
|
self.dml,
|
||||||
self.nocheck,
|
self.nocheck,
|
||||||
self.update,
|
self.update,
|
||||||
) = self.arg_parse()
|
) = self.arg_parse()
|
||||||
self.dml = False
|
|
||||||
self.instead = ""
|
self.instead = ""
|
||||||
self.preprocess_per = 3.7
|
self.preprocess_per = 3.7
|
||||||
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
|
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
|
||||||
@@ -84,6 +83,11 @@ class Config(metaclass=Singleton):
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="Do not open in browser automatically",
|
help="Do not open in browser automatically",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dml",
|
||||||
|
action="store_true",
|
||||||
|
help="torch_dml",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--nocheck", action="store_true", help="Run without checking assets"
|
"--nocheck", action="store_true", help="Run without checking assets"
|
||||||
)
|
)
|
||||||
@@ -100,6 +104,7 @@ class Config(metaclass=Singleton):
|
|||||||
cmd_opts.global_link,
|
cmd_opts.global_link,
|
||||||
cmd_opts.noparallel,
|
cmd_opts.noparallel,
|
||||||
cmd_opts.noautoopen,
|
cmd_opts.noautoopen,
|
||||||
|
cmd_opts.dml,
|
||||||
cmd_opts.nocheck,
|
cmd_opts.nocheck,
|
||||||
cmd_opts.update,
|
cmd_opts.update,
|
||||||
)
|
)
|
||||||
@@ -178,7 +183,7 @@ class Config(metaclass=Singleton):
|
|||||||
if self.has_xpu():
|
if self.has_xpu():
|
||||||
self.device = self.instead = "xpu:0"
|
self.device = self.instead = "xpu:0"
|
||||||
self.is_half = True
|
self.is_half = True
|
||||||
i_device = int(str(self.device).split(":")[-1])
|
i_device = int(self.device.split(":")[-1])
|
||||||
self.gpu_name = torch.cuda.get_device_name(i_device)
|
self.gpu_name = torch.cuda.get_device_name(i_device)
|
||||||
if (
|
if (
|
||||||
("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
|
("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
|
||||||
@@ -209,7 +214,7 @@ class Config(metaclass=Singleton):
|
|||||||
self.use_fp32_config()
|
self.use_fp32_config()
|
||||||
else:
|
else:
|
||||||
logger.info("No supported Nvidia GPU found")
|
logger.info("No supported Nvidia GPU found")
|
||||||
self.device = self.instead = torch.get_default_device()
|
self.device = self.instead = "cpu"
|
||||||
self.is_half = False
|
self.is_half = False
|
||||||
self.use_fp32_config()
|
self.use_fp32_config()
|
||||||
|
|
||||||
@@ -234,13 +239,12 @@ class Config(metaclass=Singleton):
|
|||||||
x_query = 5
|
x_query = 5
|
||||||
x_center = 30
|
x_center = 30
|
||||||
x_max = 32
|
x_max = 32
|
||||||
if importlib.util.find_spec("torch_directml") is not None:
|
if self.dml:
|
||||||
logger.info("Use DirectML instead")
|
logger.info("Use DirectML instead")
|
||||||
import torch_directml
|
import torch_directml
|
||||||
|
|
||||||
self.device = torch_directml.device(torch_directml.default_device())
|
self.device = torch_directml.device(torch_directml.default_device())
|
||||||
self.is_half = False
|
self.is_half = False
|
||||||
self.dml = True
|
|
||||||
else:
|
else:
|
||||||
if self.instead:
|
if self.instead:
|
||||||
logger.info(f"Use {self.instead} instead")
|
logger.info(f"Use {self.instead} instead")
|
||||||
|
|||||||
@@ -133,7 +133,7 @@
|
|||||||
"Takeover WASAPI device": "WASAPIデバイスを独占",
|
"Takeover WASAPI device": "WASAPIデバイスを独占",
|
||||||
"Target sample rate": "目標サンプリング率",
|
"Target sample rate": "目標サンプリング率",
|
||||||
"The audio file to be processed": "処理待ち音声",
|
"The audio file to be processed": "処理待ち音声",
|
||||||
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "本ソフトウェアは、GNU Affero General Public License バージョン3.0以降の下でライセンスされています。<br>作者は、このソフトウェアの使用に関して一切の管理権限や責任を負いません。<br>このソフトウェアを使用し、生成された音声やファイルを含むあらゆるコンテンツを配布するユーザーは、AGPL 3.0ライセンスの条項への準拠について全責任を負います。<br>これらの条項に同意しない場合、このソフトウェアパッケージに含まれるコードやファイルの使用、参照、配布は禁止されています。<br>詳細については、ルートディレクトリにあるLICENSEファイルを参照してください。",
|
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.",
|
||||||
"Total training epochs (total_epoch)": "総エポック数",
|
"Total training epochs (total_epoch)": "総エポック数",
|
||||||
"Train": "学習",
|
"Train": "学習",
|
||||||
"Train feature index": "特徴索引の学習",
|
"Train feature index": "特徴索引の学習",
|
||||||
|
|||||||
@@ -133,7 +133,7 @@
|
|||||||
"Takeover WASAPI device": "独占 WASAPI 设备",
|
"Takeover WASAPI device": "独占 WASAPI 设备",
|
||||||
"Target sample rate": "目标采样率",
|
"Target sample rate": "目标采样率",
|
||||||
"The audio file to be processed": "待处理音频文件",
|
"The audio file to be processed": "待处理音频文件",
|
||||||
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "本软件基于GNU Affero通用公共许可证3.0版本或更高版本进行许可。<br>作者对本软件的使用不承担任何控制权或责任。<br>使用本软件并分发任何内容(包括由其生成的声音或文件)的用户,需对遵守AGPL 3.0许可证条款承担全部责任。<br>如果您不接受这些条款,则禁止使用、引用或分发本软件包中包含的任何代码或文件。<br>请参阅位于根目录中的LICENSE文件以获取完整详情。",
|
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.",
|
||||||
"Total training epochs (total_epoch)": "总训练轮数total_epoch",
|
"Total training epochs (total_epoch)": "总训练轮数total_epoch",
|
||||||
"Train": "训练",
|
"Train": "训练",
|
||||||
"Train feature index": "训练特征索引",
|
"Train feature index": "训练特征索引",
|
||||||
|
|||||||
@@ -195,10 +195,7 @@ def get_audio_properties(input_path: str) -> Tuple[int, int]:
|
|||||||
container = av.open(input_path)
|
container = av.open(input_path)
|
||||||
audio_stream = next(s for s in container.streams if s.type == "audio")
|
audio_stream = next(s for s in container.streams if s.type == "audio")
|
||||||
channels = 1 if audio_stream.layout == "mono" else 2
|
channels = 1 if audio_stream.layout == "mono" else 2
|
||||||
try:
|
|
||||||
rate = audio_stream.base_rate
|
rate = audio_stream.base_rate
|
||||||
except:
|
|
||||||
rate = audio_stream.sample_rate
|
|
||||||
container.close()
|
container.close()
|
||||||
return channels, rate
|
return channels, rate
|
||||||
|
|
||||||
|
|||||||
@@ -162,7 +162,15 @@ def download_and_extract_zip(url: str, folder: str):
|
|||||||
logger.info(f"extracted into {folder}")
|
logger.info(f"extracted into {folder}")
|
||||||
|
|
||||||
|
|
||||||
def download_all_assets(tmpdir: str, version="0.2.11"):
|
def download_dns_yaml(url: str, folder: str):
|
||||||
|
logger.info(f"downloading {url}")
|
||||||
|
response = requests.get(url, stream=True, timeout=(5, 10))
|
||||||
|
with open(os.path.join(folder, "dns.yaml"), "wb") as out_file:
|
||||||
|
out_file.write(response.content)
|
||||||
|
logger.info(f"downloaded into {folder}")
|
||||||
|
|
||||||
|
|
||||||
|
def download_all_assets(tmpdir: str, version="0.2.5"):
|
||||||
import subprocess
|
import subprocess
|
||||||
import platform
|
import platform
|
||||||
|
|
||||||
@@ -190,6 +198,7 @@ def download_all_assets(tmpdir: str, version="0.2.11"):
|
|||||||
suffix = "zip" if is_win else "tar.gz"
|
suffix = "zip" if is_win else "tar.gz"
|
||||||
RVCMD_URL = BASE_URL + f"v{version}/rvcmd_{system_type}_{architecture}.{suffix}"
|
RVCMD_URL = BASE_URL + f"v{version}/rvcmd_{system_type}_{architecture}.{suffix}"
|
||||||
cmdfile = os.path.join(tmpdir, "rvcmd")
|
cmdfile = os.path.join(tmpdir, "rvcmd")
|
||||||
|
try:
|
||||||
if is_win:
|
if is_win:
|
||||||
download_and_extract_zip(RVCMD_URL, tmpdir)
|
download_and_extract_zip(RVCMD_URL, tmpdir)
|
||||||
cmdfile += ".exe"
|
cmdfile += ".exe"
|
||||||
@@ -197,3 +206,36 @@ def download_all_assets(tmpdir: str, version="0.2.11"):
|
|||||||
download_and_extract_tar_gz(RVCMD_URL, tmpdir)
|
download_and_extract_tar_gz(RVCMD_URL, tmpdir)
|
||||||
os.chmod(cmdfile, 0o755)
|
os.chmod(cmdfile, 0o755)
|
||||||
subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"])
|
subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"])
|
||||||
|
except Exception:
|
||||||
|
BASE_URL = "https://raw.gitcode.com/u011570312/RVC-Models-Downloader/assets/"
|
||||||
|
suffix = {
|
||||||
|
"darwin_amd64": "555",
|
||||||
|
"darwin_arm64": "556",
|
||||||
|
"linux_386": "557",
|
||||||
|
"linux_amd64": "558",
|
||||||
|
"linux_arm64": "559",
|
||||||
|
"windows_386": "562",
|
||||||
|
"windows_amd64": "563",
|
||||||
|
}[f"{system_type}_{architecture}"]
|
||||||
|
RVCMD_URL = BASE_URL + suffix
|
||||||
|
download_dns_yaml(
|
||||||
|
"https://raw.gitcode.com/u011570312/RVC-Models-Downloader/raw/main/dns.yaml",
|
||||||
|
tmpdir,
|
||||||
|
)
|
||||||
|
if is_win:
|
||||||
|
download_and_extract_zip(RVCMD_URL, tmpdir)
|
||||||
|
cmdfile += ".exe"
|
||||||
|
else:
|
||||||
|
download_and_extract_tar_gz(RVCMD_URL, tmpdir)
|
||||||
|
os.chmod(cmdfile, 0o755)
|
||||||
|
subprocess.run(
|
||||||
|
[
|
||||||
|
cmdfile,
|
||||||
|
"-notui",
|
||||||
|
"-w",
|
||||||
|
"0",
|
||||||
|
"-dns",
|
||||||
|
os.path.join(tmpdir, "dns.yaml"),
|
||||||
|
"assets/rvc",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|||||||
@@ -303,7 +303,7 @@ class TextAudioLoader(torch.utils.data.Dataset):
|
|||||||
spec_filename = filename.replace(".wav", ".spec.pt")
|
spec_filename = filename.replace(".wav", ".spec.pt")
|
||||||
if os.path.exists(spec_filename):
|
if os.path.exists(spec_filename):
|
||||||
try:
|
try:
|
||||||
spec = torch.load(spec_filename, weights_only=True)
|
spec = torch.load(spec_filename)
|
||||||
except:
|
except:
|
||||||
logger.warning("%s %s", spec_filename, traceback.format_exc())
|
logger.warning("%s %s", spec_filename, traceback.format_exc())
|
||||||
spec = spectrogram_torch(
|
spec = spectrogram_torch(
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ def save_small_model(ckpt, sr, if_f0, name, epoch, version, hps):
|
|||||||
|
|
||||||
def extract_small_model(path, name, author, sr, if_f0, info, version):
|
def extract_small_model(path, name, author, sr, if_f0, info, version):
|
||||||
try:
|
try:
|
||||||
ckpt = torch.load(path, map_location="cpu", weights_only=True)
|
ckpt = torch.load(path, map_location="cpu")
|
||||||
if "model" in ckpt:
|
if "model" in ckpt:
|
||||||
ckpt = ckpt["model"]
|
ckpt = ckpt["model"]
|
||||||
opt = OrderedDict()
|
opt = OrderedDict()
|
||||||
@@ -196,7 +196,7 @@ def extract_small_model(path, name, author, sr, if_f0, info, version):
|
|||||||
|
|
||||||
def change_info(path, info, name):
|
def change_info(path, info, name):
|
||||||
try:
|
try:
|
||||||
ckpt = torch.load(path, map_location="cpu", weights_only=True)
|
ckpt = torch.load(path, map_location="cpu")
|
||||||
ckpt["info"] = info
|
ckpt["info"] = info
|
||||||
if name == "":
|
if name == "":
|
||||||
name = os.path.basename(path)
|
name = os.path.basename(path)
|
||||||
@@ -229,8 +229,8 @@ def merge(path1, path2, alpha1, sr, f0, info, name, version):
|
|||||||
a2 = "Unknown"
|
a2 = "Unknown"
|
||||||
return f"{a1} & {a2}"
|
return f"{a1} & {a2}"
|
||||||
|
|
||||||
ckpt1 = torch.load(path1, map_location="cpu", weights_only=True)
|
ckpt1 = torch.load(path1, map_location="cpu")
|
||||||
ckpt2 = torch.load(path2, map_location="cpu", weights_only=True)
|
ckpt2 = torch.load(path2, map_location="cpu")
|
||||||
cfg = ckpt1["config"]
|
cfg = ckpt1["config"]
|
||||||
if "model" in ckpt1:
|
if "model" in ckpt1:
|
||||||
ckpt1 = extract(ckpt1)
|
ckpt1 = extract(ckpt1)
|
||||||
|
|||||||
@@ -29,24 +29,6 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, load_opt=1):
|
|||||||
checkpoint_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
checkpoint_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
|
||||||
|
|
||||||
saved_state_dict = checkpoint_dict["model"]
|
saved_state_dict = checkpoint_dict["model"]
|
||||||
# Convert old-style weight_norm keys (weight_g/weight_v) to new
|
|
||||||
# parametrizations format (parametrizations.weight.original0/original1)
|
|
||||||
# so that checkpoints saved with the deprecated API can still be loaded.
|
|
||||||
_converted = {}
|
|
||||||
for k, v in list(saved_state_dict.items()):
|
|
||||||
if k.endswith(".weight_g"):
|
|
||||||
new_key = k[: -len(".weight_g")] + ".parametrizations.weight.original0"
|
|
||||||
_converted[new_key] = v
|
|
||||||
elif k.endswith(".weight_v"):
|
|
||||||
new_key = k[: -len(".weight_v")] + ".parametrizations.weight.original1"
|
|
||||||
_converted[new_key] = v
|
|
||||||
if _converted:
|
|
||||||
logger.info(
|
|
||||||
"Converting %d old-style weight_norm keys from checkpoint to new parametrizations format",
|
|
||||||
len(_converted),
|
|
||||||
)
|
|
||||||
saved_state_dict.update(_converted)
|
|
||||||
|
|
||||||
if hasattr(model, "module"):
|
if hasattr(model, "module"):
|
||||||
state_dict = model.module.state_dict()
|
state_dict = model.module.state_dict()
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import importlib.util
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
@@ -39,9 +38,6 @@ f0method = sys.argv[3]
|
|||||||
device = sys.argv[4]
|
device = sys.argv[4]
|
||||||
is_half = sys.argv[5] == "True"
|
is_half = sys.argv[5] == "True"
|
||||||
|
|
||||||
if importlib.util.find_spec("torch_directml") is not None:
|
|
||||||
import torch_directml # use side effect
|
|
||||||
|
|
||||||
|
|
||||||
class FeatureInput(object):
|
class FeatureInput(object):
|
||||||
def __init__(self, is_half: bool, device="cpu", samplerate=16000, hop_size=160):
|
def __init__(self, is_half: bool, device="cpu", samplerate=16000, hop_size=160):
|
||||||
@@ -106,12 +102,6 @@ if __name__ == "__main__":
|
|||||||
Config.use_insecure_load()
|
Config.use_insecure_load()
|
||||||
|
|
||||||
printt(" ".join(sys.argv))
|
printt(" ".join(sys.argv))
|
||||||
# GPU methods (rmvpe, fcpe, crepe, etc.) gain nothing from multiprocessing since
|
|
||||||
# all processes share one GPU. Spawning n_p processes each lazily loading
|
|
||||||
# the model onto the same CUDA device exhausts VRAM and causes deadlocks.
|
|
||||||
if "cuda" in device:
|
|
||||||
printt("WARN: use 1 thread since GPU is used.")
|
|
||||||
n_p = 1
|
|
||||||
featureInput = FeatureInput(is_half, device)
|
featureInput = FeatureInput(is_half, device)
|
||||||
paths = []
|
paths = []
|
||||||
inp_root = "%s/1_16k_wavs" % (exp_dir)
|
inp_root = "%s/1_16k_wavs" % (exp_dir)
|
||||||
|
|||||||
@@ -17,14 +17,7 @@ device = sys.argv[1]
|
|||||||
n_part = int(sys.argv[2])
|
n_part = int(sys.argv[2])
|
||||||
i_part = int(sys.argv[3])
|
i_part = int(sys.argv[3])
|
||||||
i_gpu = sys.argv[4]
|
i_gpu = sys.argv[4]
|
||||||
# CUDA_VISIBLE_DEVICES expects bare GPU indices (e.g. "0" or "0,1"),
|
os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
|
||||||
# but callers may pass "cuda:0", "cuda:0-cuda:1", etc. Strip the prefix
|
|
||||||
# and normalise separators so any combination works.
|
|
||||||
import re
|
|
||||||
|
|
||||||
i_gpu = re.sub(r"cuda:", "", str(i_gpu))
|
|
||||||
i_gpu = i_gpu.replace("-", ",")
|
|
||||||
os.environ["CUDA_VISIBLE_DEVICES"] = i_gpu
|
|
||||||
exp_dir = sys.argv[5]
|
exp_dir = sys.argv[5]
|
||||||
version = sys.argv[6]
|
version = sys.argv[6]
|
||||||
is_half = sys.argv[7].lower() == "true"
|
is_half = sys.argv[7].lower() == "true"
|
||||||
|
|||||||
@@ -29,12 +29,10 @@ try:
|
|||||||
|
|
||||||
GradScaler = gradscaler_init()
|
GradScaler = gradscaler_init()
|
||||||
ipex_init()
|
ipex_init()
|
||||||
|
else:
|
||||||
|
from torch.cuda.amp import GradScaler, autocast
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
from torch.cuda.amp import GradScaler, autocast
|
||||||
finally:
|
|
||||||
if not ("GradScaler" in globals() and "autocast" in globals()):
|
|
||||||
from torch.amp.grad_scaler import GradScaler
|
|
||||||
from torch.amp.autocast_mode import autocast
|
|
||||||
|
|
||||||
torch.backends.cudnn.deterministic = False
|
torch.backends.cudnn.deterministic = False
|
||||||
torch.backends.cudnn.benchmark = False
|
torch.backends.cudnn.benchmark = False
|
||||||
@@ -108,19 +106,14 @@ def main():
|
|||||||
# patch to unblock people without gpus. there is probably a better way.
|
# patch to unblock people without gpus. there is probably a better way.
|
||||||
print("NO GPU DETECTED: falling back to CPU - this may take a while")
|
print("NO GPU DETECTED: falling back to CPU - this may take a while")
|
||||||
n_gpus = 1
|
n_gpus = 1
|
||||||
logger = utils.get_logger(hps.model_dir)
|
os.environ["MASTER_ADDR"] = "localhost"
|
||||||
if n_gpus == 1:
|
os.environ["MASTER_PORT"] = str(randint(20000, 55555))
|
||||||
# Single GPU: run directly without distributed to avoid gloo issues on Windows
|
|
||||||
run(0, 1, hps, logger)
|
|
||||||
else:
|
|
||||||
master_port = str(randint(20000, 55555))
|
|
||||||
os.environ["MASTER_ADDR"] = "127.0.0.1"
|
|
||||||
os.environ["MASTER_PORT"] = master_port
|
|
||||||
children = []
|
children = []
|
||||||
|
logger = utils.get_logger(hps.model_dir)
|
||||||
for i in range(n_gpus):
|
for i in range(n_gpus):
|
||||||
subproc = mp.Process(
|
subproc = mp.Process(
|
||||||
target=run,
|
target=run,
|
||||||
args=(i, n_gpus, hps, logger, master_port),
|
args=(i, n_gpus, hps, logger),
|
||||||
)
|
)
|
||||||
children.append(subproc)
|
children.append(subproc)
|
||||||
subproc.start()
|
subproc.start()
|
||||||
@@ -129,9 +122,7 @@ def main():
|
|||||||
children[i].join()
|
children[i].join()
|
||||||
|
|
||||||
|
|
||||||
def run(
|
def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger):
|
||||||
rank, n_gpus, hps: utils.HParams, logger: logging.Logger, master_port: str = "29500"
|
|
||||||
):
|
|
||||||
global global_step
|
global global_step
|
||||||
if rank == 0:
|
if rank == 0:
|
||||||
# logger = utils.get_logger(hps.model_dir)
|
# logger = utils.get_logger(hps.model_dir)
|
||||||
@@ -140,78 +131,21 @@ def run(
|
|||||||
writer = SummaryWriter(log_dir=hps.model_dir)
|
writer = SummaryWriter(log_dir=hps.model_dir)
|
||||||
writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
|
writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
|
||||||
|
|
||||||
use_distributed = n_gpus > 1
|
|
||||||
if use_distributed:
|
|
||||||
if os.name == "nt" or not torch.cuda.is_available():
|
|
||||||
# On Windows, gloo's create_device(hostname=...) is gated to Linux only
|
|
||||||
# in the C++ layer (makeDeviceForHostname). We must use the interface-
|
|
||||||
# based path instead: create_device(interface=...) calls
|
|
||||||
# makeDeviceForInterface which is not platform-gated.
|
|
||||||
import socket as _socket
|
|
||||||
|
|
||||||
try:
|
|
||||||
store = dist.TCPStore(
|
|
||||||
host_name="127.0.0.1",
|
|
||||||
port=int(master_port),
|
|
||||||
world_size=n_gpus,
|
|
||||||
is_master=(rank == 0),
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
store = dist.TCPStore(
|
|
||||||
host_name="127.0.0.1",
|
|
||||||
port=int(master_port),
|
|
||||||
world_size=n_gpus,
|
|
||||||
is_master=(rank == 0),
|
|
||||||
use_libuv=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Discover a working network interface for gloo device creation
|
|
||||||
gloo_device = None
|
|
||||||
try:
|
|
||||||
for idx, ifname in _socket.if_nameindex():
|
|
||||||
try:
|
|
||||||
gloo_device = dist.ProcessGroupGloo.create_device(
|
|
||||||
interface=ifname
|
|
||||||
)
|
|
||||||
print("Try device", idx, "name", ifname)
|
|
||||||
break
|
|
||||||
except RuntimeError as e:
|
|
||||||
print("Try device", idx, "name", ifname, "err:", e)
|
|
||||||
continue
|
|
||||||
except (OSError, AttributeError) as e:
|
|
||||||
print(e.with_traceback(None))
|
|
||||||
|
|
||||||
if gloo_device is None:
|
|
||||||
raise RuntimeError(
|
|
||||||
"Cannot create gloo device on Windows. "
|
|
||||||
"No usable network interface found. "
|
|
||||||
"Try adding your hostname to "
|
|
||||||
"C:\\Windows\\System32\\drivers\\etc\\hosts "
|
|
||||||
"with: 127.0.0.1 " + _socket.gethostname()
|
|
||||||
)
|
|
||||||
|
|
||||||
pg_options = dist.ProcessGroupGloo._Options()
|
|
||||||
pg_options._devices = [gloo_device]
|
|
||||||
dist.init_process_group(
|
|
||||||
backend="gloo",
|
|
||||||
store=store,
|
|
||||||
world_size=n_gpus,
|
|
||||||
rank=rank,
|
|
||||||
pg_options=pg_options,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
init_url = f"tcp://127.0.0.1:{master_port}"
|
|
||||||
try:
|
try:
|
||||||
dist.init_process_group(
|
dist.init_process_group(
|
||||||
backend="nccl",
|
backend=(
|
||||||
init_method=init_url,
|
"gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"
|
||||||
|
),
|
||||||
|
init_method="env://",
|
||||||
world_size=n_gpus,
|
world_size=n_gpus,
|
||||||
rank=rank,
|
rank=rank,
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
dist.init_process_group(
|
dist.init_process_group(
|
||||||
backend="nccl",
|
backend=(
|
||||||
init_method=init_url + "?use_libuv=False",
|
"gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"
|
||||||
|
),
|
||||||
|
init_method="env://?use_libuv=False",
|
||||||
world_size=n_gpus,
|
world_size=n_gpus,
|
||||||
rank=rank,
|
rank=rank,
|
||||||
)
|
)
|
||||||
@@ -287,7 +221,6 @@ def run(
|
|||||||
)
|
)
|
||||||
# net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
|
# net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
|
||||||
# net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
|
# net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
|
||||||
if use_distributed:
|
|
||||||
if hasattr(torch, "xpu") and torch.xpu.is_available():
|
if hasattr(torch, "xpu") and torch.xpu.is_available():
|
||||||
pass
|
pass
|
||||||
elif torch.cuda.is_available():
|
elif torch.cuda.is_available():
|
||||||
@@ -537,7 +470,7 @@ def train_and_evaluate(
|
|||||||
# wave_lengths = wave_lengths.cuda(rank, non_blocking=True)
|
# wave_lengths = wave_lengths.cuda(rank, non_blocking=True)
|
||||||
|
|
||||||
# Calculate
|
# Calculate
|
||||||
with autocast(device_type="cuda", enabled=hps.train.fp16_run):
|
with autocast(enabled=hps.train.fp16_run):
|
||||||
(
|
(
|
||||||
y_hat,
|
y_hat,
|
||||||
ids_slice,
|
ids_slice,
|
||||||
@@ -556,7 +489,7 @@ def train_and_evaluate(
|
|||||||
y_mel = slice_on_last_dim(
|
y_mel = slice_on_last_dim(
|
||||||
mel, ids_slice, hps.train.segment_size // hps.data.hop_length
|
mel, ids_slice, hps.train.segment_size // hps.data.hop_length
|
||||||
)
|
)
|
||||||
with autocast(device_type="cuda", enabled=False):
|
with autocast(enabled=False):
|
||||||
y_hat_mel = mel_spectrogram_torch(
|
y_hat_mel = mel_spectrogram_torch(
|
||||||
y_hat.float().squeeze(1),
|
y_hat.float().squeeze(1),
|
||||||
hps.data.filter_length,
|
hps.data.filter_length,
|
||||||
@@ -575,7 +508,7 @@ def train_and_evaluate(
|
|||||||
|
|
||||||
# Discriminator
|
# Discriminator
|
||||||
y_d_hat_r, y_d_hat_g, _, _ = net_d(wave, y_hat.detach())
|
y_d_hat_r, y_d_hat_g, _, _ = net_d(wave, y_hat.detach())
|
||||||
with autocast(device_type="cuda", enabled=False):
|
with autocast(enabled=False):
|
||||||
loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(
|
loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(
|
||||||
y_d_hat_r, y_d_hat_g
|
y_d_hat_r, y_d_hat_g
|
||||||
)
|
)
|
||||||
@@ -585,10 +518,10 @@ def train_and_evaluate(
|
|||||||
grad_norm_d = total_grad_norm(net_d.parameters())
|
grad_norm_d = total_grad_norm(net_d.parameters())
|
||||||
scaler.step(optim_d)
|
scaler.step(optim_d)
|
||||||
|
|
||||||
with autocast(device_type="cuda", enabled=hps.train.fp16_run):
|
with autocast(enabled=hps.train.fp16_run):
|
||||||
# Generator
|
# Generator
|
||||||
y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(wave, y_hat)
|
y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(wave, y_hat)
|
||||||
with autocast(device_type="cuda", enabled=False):
|
with autocast(enabled=False):
|
||||||
loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel
|
loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel
|
||||||
loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl
|
loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl
|
||||||
loss_fm = feature_loss(fmap_r, fmap_g)
|
loss_fm = feature_loss(fmap_r, fmap_g)
|
||||||
|
|||||||
@@ -62,6 +62,10 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
|
|||||||
os.path.basename(inp_path),
|
os.path.basename(inp_path),
|
||||||
)
|
)
|
||||||
resample_audio(inp_path, tmp_path, "pcm_s16le", "s16", 44100, "stereo")
|
resample_audio(inp_path, tmp_path, "pcm_s16le", "s16", 44100, "stereo")
|
||||||
|
try: # Remove the original file
|
||||||
|
os.remove(inp_path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to remove the original file: {e}")
|
||||||
inp_path = tmp_path
|
inp_path = tmp_path
|
||||||
try:
|
try:
|
||||||
if done == 0:
|
if done == 0:
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ class AudioPre:
|
|||||||
else:
|
else:
|
||||||
mp = ModelParameters("infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json")
|
mp = ModelParameters("infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json")
|
||||||
model = Nets.CascadedASPPNet(mp.param["bins"] * 2)
|
model = Nets.CascadedASPPNet(mp.param["bins"] * 2)
|
||||||
cpk = torch.load(model_path, map_location="cpu", weights_only=True)
|
cpk = torch.load(model_path, map_location="cpu")
|
||||||
model.load_state_dict(cpk)
|
model.load_state_dict(cpk)
|
||||||
model.eval()
|
model.eval()
|
||||||
if is_half:
|
if is_half:
|
||||||
|
|||||||
@@ -10,6 +10,9 @@ from pybase16384 import encode_to_string, decode_from_string
|
|||||||
from configs import CPUConfig
|
from configs import CPUConfig
|
||||||
from rvc.synthesizer import get_synthesizer
|
from rvc.synthesizer import get_synthesizer
|
||||||
|
|
||||||
|
from .pipeline import Pipeline
|
||||||
|
from .utils import load_hubert
|
||||||
|
|
||||||
|
|
||||||
class TorchSeedContext:
|
class TorchSeedContext:
|
||||||
def __init__(self, seed):
|
def __init__(self, seed):
|
||||||
@@ -92,9 +95,6 @@ def wave_hash(time_field):
|
|||||||
|
|
||||||
|
|
||||||
def model_hash(config, tgt_sr, net_g, if_f0, version):
|
def model_hash(config, tgt_sr, net_g, if_f0, version):
|
||||||
from .pipeline import Pipeline
|
|
||||||
from .utils import load_hubert
|
|
||||||
|
|
||||||
pipeline = Pipeline(tgt_sr, config)
|
pipeline = Pipeline(tgt_sr, config)
|
||||||
audio = original_audio()
|
audio = original_audio()
|
||||||
hbt = load_hubert(config.device, config.is_half)
|
hbt = load_hubert(config.device, config.is_half)
|
||||||
@@ -152,7 +152,7 @@ def model_hash_ckpt(cpt):
|
|||||||
|
|
||||||
|
|
||||||
def model_hash_from(path):
|
def model_hash_from(path):
|
||||||
cpt = torch.load(path, map_location="cpu", weights_only=True)
|
cpt = torch.load(path, map_location="cpu")
|
||||||
h = model_hash_ckpt(cpt)
|
h = model_hash_ckpt(cpt)
|
||||||
del cpt
|
del cpt
|
||||||
return h
|
return h
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ def show_info(path):
|
|||||||
try:
|
try:
|
||||||
if hasattr(path, "name"):
|
if hasattr(path, "name"):
|
||||||
path = path.name
|
path = path.name
|
||||||
a = torch.load(path, map_location="cpu", weights_only=True)
|
a = torch.load(path, map_location="cpu")
|
||||||
txt = show_model_info(a, show_long_id=True)
|
txt = show_model_info(a, show_long_id=True)
|
||||||
del a
|
del a
|
||||||
except:
|
except:
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import os, pathlib
|
import os, pathlib
|
||||||
|
|
||||||
import torch
|
from fairseq import checkpoint_utils
|
||||||
from fairseq import checkpoint_utils, data
|
|
||||||
|
|
||||||
|
|
||||||
def get_index_path_from_model(sid):
|
def get_index_path_from_model(sid):
|
||||||
@@ -22,7 +21,6 @@ def get_index_path_from_model(sid):
|
|||||||
|
|
||||||
|
|
||||||
def load_hubert(device, is_half):
|
def load_hubert(device, is_half):
|
||||||
with torch.serialization.safe_globals([data.dictionary.Dictionary]):
|
|
||||||
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
|
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
|
||||||
["assets/hubert/hubert_base.pt"],
|
["assets/hubert/hubert_base.pt"],
|
||||||
suffix="",
|
suffix="",
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
|
tensorflow-rocm
|
||||||
joblib>=1.1.0
|
joblib>=1.1.0
|
||||||
numba
|
numba
|
||||||
numpy
|
numpy
|
||||||
scipy
|
scipy
|
||||||
librosa>=0.10.2
|
librosa>=0.10.2
|
||||||
llvmlite
|
llvmlite
|
||||||
fairseq @ git+https://github.com/fumiama/fairseq.git
|
fairseq @ git+https://github.com/One-sixth/fairseq.git
|
||||||
faiss-cpu
|
faiss-cpu
|
||||||
gradio
|
gradio
|
||||||
Cython
|
Cython
|
||||||
|
|||||||
@@ -1,49 +0,0 @@
|
|||||||
joblib>=1.1.0
|
|
||||||
numba
|
|
||||||
numpy
|
|
||||||
scipy
|
|
||||||
librosa>=0.10.2
|
|
||||||
llvmlite
|
|
||||||
fairseq @ git+https://github.com/fumiama/fairseq.git
|
|
||||||
faiss-cpu
|
|
||||||
gradio
|
|
||||||
Cython
|
|
||||||
pydub>=0.25.1
|
|
||||||
tensorboardX
|
|
||||||
Jinja2>=3.1.2
|
|
||||||
json5
|
|
||||||
Markdown
|
|
||||||
matplotlib>=3.7.0
|
|
||||||
matplotlib-inline>=0.1.3
|
|
||||||
praat-parselmouth>=0.4.2
|
|
||||||
Pillow>=9.1.1
|
|
||||||
resampy>=0.4.2
|
|
||||||
scikit-learn
|
|
||||||
tensorboard
|
|
||||||
tqdm>=4.63.1
|
|
||||||
tornado>=6.1
|
|
||||||
Werkzeug>=2.2.3
|
|
||||||
uc-micro-py>=1.0.1
|
|
||||||
sympy>=1.11.1
|
|
||||||
tabulate>=0.8.10
|
|
||||||
PyYAML>=6.0
|
|
||||||
pyasn1>=0.4.8
|
|
||||||
pyasn1-modules>=0.2.8
|
|
||||||
fsspec>=2022.11.0
|
|
||||||
absl-py>=1.2.0
|
|
||||||
audioread
|
|
||||||
uvicorn>=0.21.1
|
|
||||||
colorama>=0.4.5
|
|
||||||
pyworld==0.3.2
|
|
||||||
httpx
|
|
||||||
onnxruntime; sys_platform == 'darwin'
|
|
||||||
torchcrepe>=0.0.23
|
|
||||||
fastapi
|
|
||||||
torchfcpe
|
|
||||||
python-dotenv>=1.0.0
|
|
||||||
av
|
|
||||||
pybase16384
|
|
||||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
|
||||||
torch
|
|
||||||
torchvision
|
|
||||||
torchaudio
|
|
||||||
@@ -4,7 +4,7 @@ numpy
|
|||||||
scipy
|
scipy
|
||||||
librosa>=0.10.2
|
librosa>=0.10.2
|
||||||
llvmlite
|
llvmlite
|
||||||
fairseq @ git+https://github.com/fumiama/fairseq.git
|
fairseq @ git+https://github.com/One-sixth/fairseq.git
|
||||||
faiss-cpu
|
faiss-cpu
|
||||||
gradio
|
gradio
|
||||||
Cython
|
Cython
|
||||||
@@ -43,6 +43,3 @@ python-dotenv>=1.0.0
|
|||||||
av
|
av
|
||||||
torchfcpe
|
torchfcpe
|
||||||
pybase16384
|
pybase16384
|
||||||
torch-directml
|
|
||||||
torchvision
|
|
||||||
torchaudio
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ numpy
|
|||||||
scipy
|
scipy
|
||||||
librosa>=0.10.2
|
librosa>=0.10.2
|
||||||
llvmlite==0.39.0
|
llvmlite==0.39.0
|
||||||
fairseq @ git+https://github.com/fumiama/fairseq.git
|
fairseq @ git+https://github.com/One-sixth/fairseq.git
|
||||||
faiss-cpu
|
faiss-cpu
|
||||||
gradio
|
gradio
|
||||||
Cython
|
Cython
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ numpy
|
|||||||
scipy
|
scipy
|
||||||
librosa>=0.10.2
|
librosa>=0.10.2
|
||||||
llvmlite
|
llvmlite
|
||||||
fairseq @ git+https://github.com/fumiama/fairseq.git
|
fairseq @ git+https://github.com/One-sixth/fairseq.git
|
||||||
faiss-cpu
|
faiss-cpu
|
||||||
gradio
|
gradio
|
||||||
Cython
|
Cython
|
||||||
|
|||||||
@@ -11,14 +11,14 @@ class F0Predictor(object):
|
|||||||
f0_min=50,
|
f0_min=50,
|
||||||
f0_max=1100,
|
f0_max=1100,
|
||||||
sampling_rate=44100,
|
sampling_rate=44100,
|
||||||
device: Optional[Union[str, torch.device]] = None,
|
device: Optional[str] = None,
|
||||||
):
|
):
|
||||||
self.hop_length = hop_length
|
self.hop_length = hop_length
|
||||||
self.f0_min = f0_min
|
self.f0_min = f0_min
|
||||||
self.f0_max = f0_max
|
self.f0_max = f0_max
|
||||||
self.sampling_rate = sampling_rate
|
self.sampling_rate = sampling_rate
|
||||||
if not device:
|
if device is None:
|
||||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||||
self.device = device
|
self.device = device
|
||||||
|
|
||||||
def compute_f0(
|
def compute_f0(
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from typing import Optional, Union
|
from typing import Optional
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -17,9 +17,9 @@ class MelSpectrogram(torch.nn.Module):
|
|||||||
hop_length: int,
|
hop_length: int,
|
||||||
n_fft: Optional[int] = None,
|
n_fft: Optional[int] = None,
|
||||||
mel_fmin: int = 0,
|
mel_fmin: int = 0,
|
||||||
mel_fmax: Optional[int] = None,
|
mel_fmax: int = None,
|
||||||
clamp: float = 1e-5,
|
clamp: float = 1e-5,
|
||||||
device: Union[str, torch.device] = torch.device("cpu"),
|
device=torch.device("cpu"),
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
if n_fft is None:
|
if n_fft is None:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import os
|
import os
|
||||||
from typing import Optional, Union
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ class MultiHeadAttention(nn.Module):
|
|||||||
channels: int,
|
channels: int,
|
||||||
out_channels: int,
|
out_channels: int,
|
||||||
n_heads: int,
|
n_heads: int,
|
||||||
window_size: int,
|
|
||||||
p_dropout: float = 0.0,
|
p_dropout: float = 0.0,
|
||||||
|
window_size: Optional[int] = None,
|
||||||
heads_share: bool = True,
|
heads_share: bool = True,
|
||||||
block_length: Optional[int] = None,
|
block_length: Optional[int] = None,
|
||||||
proximal_bias: bool = False,
|
proximal_bias: bool = False,
|
||||||
|
|||||||
@@ -4,8 +4,7 @@ import torch
|
|||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import Conv1d, Conv2d
|
from torch.nn import Conv1d, Conv2d
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
from torch.nn.utils import spectral_norm
|
from torch.nn.utils import spectral_norm, weight_norm
|
||||||
from torch.nn.utils.parametrizations import weight_norm
|
|
||||||
|
|
||||||
from .residuals import LRELU_SLOPE
|
from .residuals import LRELU_SLOPE
|
||||||
from .utils import get_padding
|
from .utils import get_padding
|
||||||
|
|||||||
@@ -42,8 +42,8 @@ class Encoder(nn.Module):
|
|||||||
hidden_channels,
|
hidden_channels,
|
||||||
hidden_channels,
|
hidden_channels,
|
||||||
n_heads,
|
n_heads,
|
||||||
window_size,
|
|
||||||
p_dropout=p_dropout,
|
p_dropout=p_dropout,
|
||||||
|
window_size=window_size,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.norm_layers_1.append(LayerNorm(hidden_channels))
|
self.norm_layers_1.append(LayerNorm(hidden_channels))
|
||||||
@@ -121,7 +121,7 @@ class TextEncoder(nn.Module):
|
|||||||
def __call__(
|
def __call__(
|
||||||
self,
|
self,
|
||||||
phone: torch.Tensor,
|
phone: torch.Tensor,
|
||||||
pitch: Optional[torch.Tensor],
|
pitch: torch.Tensor,
|
||||||
lengths: torch.Tensor,
|
lengths: torch.Tensor,
|
||||||
skip_head: Optional[int] = None,
|
skip_head: Optional[int] = None,
|
||||||
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
||||||
@@ -135,7 +135,7 @@ class TextEncoder(nn.Module):
|
|||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
phone: torch.Tensor,
|
phone: torch.Tensor,
|
||||||
pitch: Optional[torch.Tensor],
|
pitch: torch.Tensor,
|
||||||
lengths: torch.Tensor,
|
lengths: torch.Tensor,
|
||||||
skip_head: Optional[int] = None,
|
skip_head: Optional[int] = None,
|
||||||
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
||||||
@@ -212,8 +212,10 @@ class PosteriorEncoder(nn.Module):
|
|||||||
self.enc.remove_weight_norm()
|
self.enc.remove_weight_norm()
|
||||||
|
|
||||||
def __prepare_scriptable__(self):
|
def __prepare_scriptable__(self):
|
||||||
from torch.nn.utils import parametrize
|
for hook in self.enc._forward_pre_hooks.values():
|
||||||
|
if (
|
||||||
if parametrize.is_parametrized(self.enc, "weight"):
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
parametrize.remove_parametrizations(self.enc, "weight")
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(self.enc)
|
||||||
return self
|
return self
|
||||||
|
|||||||
@@ -4,8 +4,7 @@ import torch
|
|||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import Conv1d, ConvTranspose1d
|
from torch.nn import Conv1d, ConvTranspose1d
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
from torch.nn.utils.parametrizations import weight_norm
|
from torch.nn.utils import remove_weight_norm, weight_norm
|
||||||
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
|
|
||||||
|
|
||||||
from .residuals import ResBlock1, ResBlock2, LRELU_SLOPE
|
from .residuals import ResBlock1, ResBlock2, LRELU_SLOPE
|
||||||
from .utils import call_weight_data_normal_if_Conv
|
from .utils import call_weight_data_normal_if_Conv
|
||||||
@@ -47,7 +46,6 @@ class Generator(torch.nn.Module):
|
|||||||
|
|
||||||
self.resblocks = nn.ModuleList()
|
self.resblocks = nn.ModuleList()
|
||||||
resblock_module = ResBlock1 if resblock == "1" else ResBlock2
|
resblock_module = ResBlock1 if resblock == "1" else ResBlock2
|
||||||
ch = 0
|
|
||||||
for i in range(len(self.ups)):
|
for i in range(len(self.ups)):
|
||||||
ch = upsample_initial_channel // (2 ** (i + 1))
|
ch = upsample_initial_channel // (2 ** (i + 1))
|
||||||
for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes):
|
for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes):
|
||||||
@@ -99,16 +97,29 @@ class Generator(torch.nn.Module):
|
|||||||
|
|
||||||
def __prepare_scriptable__(self):
|
def __prepare_scriptable__(self):
|
||||||
for l in self.ups:
|
for l in self.ups:
|
||||||
if is_parametrized(l, "weight"):
|
for hook in l._forward_pre_hooks.values():
|
||||||
remove_parametrizations(l, "weight")
|
# The hook we want to remove is an instance of WeightNorm class, so
|
||||||
|
# normally we would do `if isinstance(...)` but this class is not accessible
|
||||||
|
# because of shadowing, so we check the module name directly.
|
||||||
|
# https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3
|
||||||
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(l)
|
||||||
|
|
||||||
for l in self.resblocks:
|
for l in self.resblocks:
|
||||||
if is_parametrized(l, "weight"):
|
for hook in l._forward_pre_hooks.values():
|
||||||
remove_parametrizations(l, "weight")
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(l)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def remove_weight_norm(self):
|
def remove_weight_norm(self):
|
||||||
for l in self.ups:
|
for l in self.ups:
|
||||||
remove_parametrizations(l, "weight")
|
remove_weight_norm(l)
|
||||||
for l in self.resblocks:
|
for l in self.resblocks:
|
||||||
l.remove_weight_norm()
|
l.remove_weight_norm()
|
||||||
|
|
||||||
|
|||||||
@@ -6,8 +6,6 @@ from torch.nn import functional as F
|
|||||||
|
|
||||||
from .utils import activate_add_tanh_sigmoid_multiply
|
from .utils import activate_add_tanh_sigmoid_multiply
|
||||||
|
|
||||||
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
|
|
||||||
|
|
||||||
|
|
||||||
class LayerNorm(nn.Module):
|
class LayerNorm(nn.Module):
|
||||||
def __init__(self, channels: int, eps: float = 1e-5):
|
def __init__(self, channels: int, eps: float = 1e-5):
|
||||||
@@ -32,7 +30,7 @@ class WN(torch.nn.Module):
|
|||||||
dilation_rate: int,
|
dilation_rate: int,
|
||||||
n_layers: int,
|
n_layers: int,
|
||||||
gin_channels: int = 0,
|
gin_channels: int = 0,
|
||||||
p_dropout: float = 0,
|
p_dropout: int = 0,
|
||||||
):
|
):
|
||||||
super(WN, self).__init__()
|
super(WN, self).__init__()
|
||||||
assert kernel_size % 2 == 1
|
assert kernel_size % 2 == 1
|
||||||
@@ -51,9 +49,7 @@ class WN(torch.nn.Module):
|
|||||||
cond_layer = torch.nn.Conv1d(
|
cond_layer = torch.nn.Conv1d(
|
||||||
gin_channels, 2 * hidden_channels * n_layers, 1
|
gin_channels, 2 * hidden_channels * n_layers, 1
|
||||||
)
|
)
|
||||||
self.cond_layer = torch.nn.utils.parametrizations.weight_norm(
|
self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name="weight")
|
||||||
cond_layer, name="weight"
|
|
||||||
)
|
|
||||||
|
|
||||||
for i in range(n_layers):
|
for i in range(n_layers):
|
||||||
dilation = dilation_rate**i
|
dilation = dilation_rate**i
|
||||||
@@ -65,9 +61,7 @@ class WN(torch.nn.Module):
|
|||||||
dilation=dilation,
|
dilation=dilation,
|
||||||
padding=padding,
|
padding=padding,
|
||||||
)
|
)
|
||||||
in_layer = torch.nn.utils.parametrizations.weight_norm(
|
in_layer = torch.nn.utils.weight_norm(in_layer, name="weight")
|
||||||
in_layer, name="weight"
|
|
||||||
)
|
|
||||||
self.in_layers.append(in_layer)
|
self.in_layers.append(in_layer)
|
||||||
|
|
||||||
# last one is not necessary
|
# last one is not necessary
|
||||||
@@ -77,9 +71,7 @@ class WN(torch.nn.Module):
|
|||||||
res_skip_channels = hidden_channels
|
res_skip_channels = hidden_channels
|
||||||
|
|
||||||
res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
|
res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
|
||||||
res_skip_layer = torch.nn.utils.parametrizations.weight_norm(
|
res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name="weight")
|
||||||
res_skip_layer, name="weight"
|
|
||||||
)
|
|
||||||
self.res_skip_layers.append(res_skip_layer)
|
self.res_skip_layers.append(res_skip_layer)
|
||||||
|
|
||||||
def __call__(
|
def __call__(
|
||||||
@@ -125,20 +117,32 @@ class WN(torch.nn.Module):
|
|||||||
|
|
||||||
def remove_weight_norm(self):
|
def remove_weight_norm(self):
|
||||||
if self.gin_channels != 0:
|
if self.gin_channels != 0:
|
||||||
remove_parametrizations(self.cond_layer, "weight")
|
torch.nn.utils.remove_weight_norm(self.cond_layer)
|
||||||
for l in self.in_layers:
|
for l in self.in_layers:
|
||||||
remove_parametrizations(l, "weight")
|
torch.nn.utils.remove_weight_norm(l)
|
||||||
for l in self.res_skip_layers:
|
for l in self.res_skip_layers:
|
||||||
remove_parametrizations(l, "weight")
|
torch.nn.utils.remove_weight_norm(l)
|
||||||
|
|
||||||
def __prepare_scriptable__(self):
|
def __prepare_scriptable__(self):
|
||||||
if self.gin_channels != 0:
|
if self.gin_channels != 0:
|
||||||
if is_parametrized(self.cond_layer, "weight"):
|
for hook in self.cond_layer._forward_pre_hooks.values():
|
||||||
remove_parametrizations(self.cond_layer, "weight")
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(self.cond_layer)
|
||||||
for l in self.in_layers:
|
for l in self.in_layers:
|
||||||
if is_parametrized(l, "weight"):
|
for hook in l._forward_pre_hooks.values():
|
||||||
remove_parametrizations(l, "weight")
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(l)
|
||||||
for l in self.res_skip_layers:
|
for l in self.res_skip_layers:
|
||||||
if is_parametrized(l, "weight"):
|
for hook in l._forward_pre_hooks.values():
|
||||||
remove_parametrizations(l, "weight")
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(l)
|
||||||
return self
|
return self
|
||||||
|
|||||||
@@ -1,12 +1,11 @@
|
|||||||
from typing import Optional, List, Union
|
from typing import Optional, List
|
||||||
import math
|
import math
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import Conv1d, ConvTranspose1d
|
from torch.nn import Conv1d, ConvTranspose1d
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
from torch.nn.utils.parametrizations import weight_norm
|
from torch.nn.utils import remove_weight_norm, weight_norm
|
||||||
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
|
|
||||||
|
|
||||||
from .generators import SineGenerator
|
from .generators import SineGenerator
|
||||||
from .residuals import ResBlock1, ResBlock2, LRELU_SLOPE
|
from .residuals import ResBlock1, ResBlock2, LRELU_SLOPE
|
||||||
@@ -84,7 +83,7 @@ class NSFGenerator(torch.nn.Module):
|
|||||||
self.conv_pre = Conv1d(
|
self.conv_pre = Conv1d(
|
||||||
initial_channel, upsample_initial_channel, 7, 1, padding=3
|
initial_channel, upsample_initial_channel, 7, 1, padding=3
|
||||||
)
|
)
|
||||||
resblockcls = ResBlock1 if resblock == "1" else ResBlock2
|
resblock = ResBlock1 if resblock == "1" else ResBlock2
|
||||||
|
|
||||||
self.ups = nn.ModuleList()
|
self.ups = nn.ModuleList()
|
||||||
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
|
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
|
||||||
@@ -115,13 +114,12 @@ class NSFGenerator(torch.nn.Module):
|
|||||||
self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
|
self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
|
||||||
|
|
||||||
self.resblocks = nn.ModuleList()
|
self.resblocks = nn.ModuleList()
|
||||||
ch = 0
|
|
||||||
for i in range(len(self.ups)):
|
for i in range(len(self.ups)):
|
||||||
ch = upsample_initial_channel // (2 ** (i + 1))
|
ch: int = upsample_initial_channel // (2 ** (i + 1))
|
||||||
for j, (k, d) in enumerate(
|
for j, (k, d) in enumerate(
|
||||||
zip(resblock_kernel_sizes, resblock_dilation_sizes)
|
zip(resblock_kernel_sizes, resblock_dilation_sizes)
|
||||||
):
|
):
|
||||||
self.resblocks.append(resblockcls(ch, k, d))
|
self.resblocks.append(resblock(ch, k, d))
|
||||||
|
|
||||||
self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False)
|
self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False)
|
||||||
self.ups.apply(call_weight_data_normal_if_Conv)
|
self.ups.apply(call_weight_data_normal_if_Conv)
|
||||||
@@ -192,15 +190,27 @@ class NSFGenerator(torch.nn.Module):
|
|||||||
|
|
||||||
def remove_weight_norm(self):
|
def remove_weight_norm(self):
|
||||||
for l in self.ups:
|
for l in self.ups:
|
||||||
remove_parametrizations(l, "weight")
|
remove_weight_norm(l)
|
||||||
for l in self.resblocks:
|
for l in self.resblocks:
|
||||||
l.remove_weight_norm()
|
l.remove_weight_norm()
|
||||||
|
|
||||||
def __prepare_scriptable__(self):
|
def __prepare_scriptable__(self):
|
||||||
for l in self.ups:
|
for l in self.ups:
|
||||||
if is_parametrized(l, "weight"):
|
for hook in l._forward_pre_hooks.values():
|
||||||
remove_parametrizations(l, "weight")
|
# The hook we want to remove is an instance of WeightNorm class, so
|
||||||
|
# normally we would do `if isinstance(...)` but this class is not accessible
|
||||||
|
# because of shadowing, so we check the module name directly.
|
||||||
|
# https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3
|
||||||
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(l)
|
||||||
for l in self.resblocks:
|
for l in self.resblocks:
|
||||||
if is_parametrized(l, "weight"):
|
for hook in self.resblocks._forward_pre_hooks.values():
|
||||||
remove_parametrizations(l, "weight")
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(l)
|
||||||
return self
|
return self
|
||||||
|
|||||||
@@ -4,8 +4,7 @@ import torch
|
|||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import Conv1d
|
from torch.nn import Conv1d
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
from torch.nn.utils.parametrizations import weight_norm
|
from torch.nn.utils import remove_weight_norm, weight_norm
|
||||||
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
|
|
||||||
|
|
||||||
from .norms import WN
|
from .norms import WN
|
||||||
from .utils import (
|
from .utils import (
|
||||||
@@ -21,7 +20,7 @@ class ResBlock1(torch.nn.Module):
|
|||||||
self,
|
self,
|
||||||
channels: int,
|
channels: int,
|
||||||
kernel_size: int = 3,
|
kernel_size: int = 3,
|
||||||
dilation: List[int] = [1, 3, 5],
|
dilation: List[int] = (1, 3, 5),
|
||||||
):
|
):
|
||||||
super(ResBlock1, self).__init__()
|
super(ResBlock1, self).__init__()
|
||||||
|
|
||||||
@@ -86,17 +85,25 @@ class ResBlock1(torch.nn.Module):
|
|||||||
|
|
||||||
def remove_weight_norm(self):
|
def remove_weight_norm(self):
|
||||||
for l in self.convs1:
|
for l in self.convs1:
|
||||||
remove_parametrizations(l, "weight")
|
remove_weight_norm(l)
|
||||||
for l in self.convs2:
|
for l in self.convs2:
|
||||||
remove_parametrizations(l, "weight")
|
remove_weight_norm(l)
|
||||||
|
|
||||||
def __prepare_scriptable__(self):
|
def __prepare_scriptable__(self):
|
||||||
for l in self.convs1:
|
for l in self.convs1:
|
||||||
if is_parametrized(l, "weight"):
|
for hook in l._forward_pre_hooks.values():
|
||||||
remove_parametrizations(l, "weight")
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(l)
|
||||||
for l in self.convs2:
|
for l in self.convs2:
|
||||||
if is_parametrized(l, "weight"):
|
for hook in l._forward_pre_hooks.values():
|
||||||
remove_parametrizations(l, "weight")
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(l)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
@@ -110,7 +117,7 @@ class ResBlock2(torch.nn.Module):
|
|||||||
self,
|
self,
|
||||||
channels: int,
|
channels: int,
|
||||||
kernel_size=3,
|
kernel_size=3,
|
||||||
dilation: List[int] = [1, 3],
|
dilation: List[int] = (1, 3),
|
||||||
):
|
):
|
||||||
super(ResBlock2, self).__init__()
|
super(ResBlock2, self).__init__()
|
||||||
self.convs = nn.ModuleList()
|
self.convs = nn.ModuleList()
|
||||||
@@ -154,12 +161,16 @@ class ResBlock2(torch.nn.Module):
|
|||||||
|
|
||||||
def remove_weight_norm(self):
|
def remove_weight_norm(self):
|
||||||
for l in self.convs:
|
for l in self.convs:
|
||||||
remove_parametrizations(l, "weight")
|
remove_weight_norm(l)
|
||||||
|
|
||||||
def __prepare_scriptable__(self):
|
def __prepare_scriptable__(self):
|
||||||
for l in self.convs:
|
for l in self.convs:
|
||||||
if is_parametrized(l, "weight"):
|
for hook in l._forward_pre_hooks.values():
|
||||||
remove_parametrizations(l, "weight")
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(l)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
@@ -171,7 +182,7 @@ class ResidualCouplingLayer(nn.Module):
|
|||||||
kernel_size: int,
|
kernel_size: int,
|
||||||
dilation_rate: int,
|
dilation_rate: int,
|
||||||
n_layers: int,
|
n_layers: int,
|
||||||
p_dropout: float = 0,
|
p_dropout: int = 0,
|
||||||
gin_channels: int = 0,
|
gin_channels: int = 0,
|
||||||
mean_only: bool = False,
|
mean_only: bool = False,
|
||||||
):
|
):
|
||||||
@@ -238,8 +249,12 @@ class ResidualCouplingLayer(nn.Module):
|
|||||||
self.enc.remove_weight_norm()
|
self.enc.remove_weight_norm()
|
||||||
|
|
||||||
def __prepare_scriptable__(self):
|
def __prepare_scriptable__(self):
|
||||||
if is_parametrized(self.enc, "weight"):
|
for hook in self.enc._forward_pre_hooks.values():
|
||||||
remove_parametrizations(self.enc, "weight")
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(self.enc)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
@@ -329,6 +344,10 @@ class ResidualCouplingBlock(nn.Module):
|
|||||||
|
|
||||||
def __prepare_scriptable__(self):
|
def __prepare_scriptable__(self):
|
||||||
for i in range(self.n_flows):
|
for i in range(self.n_flows):
|
||||||
if is_parametrized(self.flows[i * 2], "weight"):
|
for hook in self.flows[i * 2]._forward_pre_hooks.values():
|
||||||
remove_parametrizations(self.flows[i * 2], "weight")
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(self.flows[i * 2])
|
||||||
return self
|
return self
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ from typing import Optional, List, Union
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn.utils import parametrize
|
|
||||||
|
|
||||||
|
|
||||||
from .encoders import TextEncoder, PosteriorEncoder
|
from .encoders import TextEncoder, PosteriorEncoder
|
||||||
@@ -35,7 +34,7 @@ class SynthesizerTrnMsNSFsid(nn.Module):
|
|||||||
upsample_kernel_sizes: List[int],
|
upsample_kernel_sizes: List[int],
|
||||||
spk_embed_dim: int,
|
spk_embed_dim: int,
|
||||||
gin_channels: int,
|
gin_channels: int,
|
||||||
sr: Union[str, int],
|
sr: Optional[Union[str, int]],
|
||||||
encoder_dim: int,
|
encoder_dim: int,
|
||||||
use_f0: bool,
|
use_f0: bool,
|
||||||
):
|
):
|
||||||
@@ -119,16 +118,32 @@ class SynthesizerTrnMsNSFsid(nn.Module):
|
|||||||
self.enc_q.remove_weight_norm()
|
self.enc_q.remove_weight_norm()
|
||||||
|
|
||||||
def __prepare_scriptable__(self):
|
def __prepare_scriptable__(self):
|
||||||
if parametrize.is_parametrized(self.dec, "weight"):
|
for hook in self.dec._forward_pre_hooks.values():
|
||||||
parametrize.remove_parametrizations(self.dec, "weight")
|
# The hook we want to remove is an instance of WeightNorm class, so
|
||||||
if parametrize.is_parametrized(self.flow, "weight"):
|
# normally we would do `if isinstance(...)` but this class is not accessible
|
||||||
parametrize.remove_parametrizations(self.flow, "weight")
|
# because of shadowing, so we check the module name directly.
|
||||||
|
# https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3
|
||||||
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(self.dec)
|
||||||
|
for hook in self.flow._forward_pre_hooks.values():
|
||||||
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(self.flow)
|
||||||
if hasattr(self, "enc_q"):
|
if hasattr(self, "enc_q"):
|
||||||
if parametrize.is_parametrized(self.enc_q, "weight"):
|
for hook in self.enc_q._forward_pre_hooks.values():
|
||||||
parametrize.remove_parametrizations(self.enc_q, "weight")
|
if (
|
||||||
|
hook.__module__ == "torch.nn.utils.weight_norm"
|
||||||
|
and hook.__class__.__name__ == "WeightNorm"
|
||||||
|
):
|
||||||
|
torch.nn.utils.remove_weight_norm(self.enc_q)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@torch.jit.ignore()
|
@torch.jit.ignore
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
phone: torch.Tensor,
|
phone: torch.Tensor,
|
||||||
@@ -140,20 +155,18 @@ class SynthesizerTrnMsNSFsid(nn.Module):
|
|||||||
pitchf: Optional[torch.Tensor] = None,
|
pitchf: Optional[torch.Tensor] = None,
|
||||||
): # 这里ds是id,[bs,1]
|
): # 这里ds是id,[bs,1]
|
||||||
# print(1,pitch.shape)#[bs,t]
|
# print(1,pitch.shape)#[bs,t]
|
||||||
embg = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的
|
g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的
|
||||||
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
|
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
|
||||||
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=embg)
|
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)
|
||||||
z_p = self.flow(z, y_mask, g=embg)
|
z_p = self.flow(z, y_mask, g=g)
|
||||||
z_slice, ids_slice = rand_slice_segments_on_last_dim(
|
z_slice, ids_slice = rand_slice_segments_on_last_dim(
|
||||||
z, y_lengths, self.segment_size
|
z, y_lengths, self.segment_size
|
||||||
)
|
)
|
||||||
if pitchf is not None and isinstance(self.dec, NSFGenerator):
|
if pitchf is not None:
|
||||||
pitchf = slice_on_last_dim(pitchf, ids_slice, self.segment_size)
|
pitchf = slice_on_last_dim(pitchf, ids_slice, self.segment_size)
|
||||||
o = self.dec(z_slice, pitchf, g=embg) # type: ignore
|
o = self.dec(z_slice, pitchf, g=g)
|
||||||
elif isinstance(self.dec, Generator):
|
|
||||||
o = self.dec(z_slice, g=embg)
|
|
||||||
else:
|
else:
|
||||||
raise KeyError(f"unknown dec type: {type(self.dec).__name__}")
|
o = self.dec(z_slice, g=g)
|
||||||
return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
|
return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
|
||||||
|
|
||||||
@torch.jit.export
|
@torch.jit.export
|
||||||
@@ -188,17 +201,15 @@ class SynthesizerTrnMsNSFsid(nn.Module):
|
|||||||
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
||||||
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
||||||
del z_p, m_p, logs_p
|
del z_p, m_p, logs_p
|
||||||
if pitchf is not None and isinstance(self.dec, NSFGenerator):
|
if pitchf is not None:
|
||||||
o = self.dec(
|
o = self.dec(
|
||||||
z * x_mask,
|
z * x_mask,
|
||||||
pitchf,
|
pitchf,
|
||||||
g=g,
|
g=g,
|
||||||
n_res=return_length2,
|
n_res=return_length2,
|
||||||
)
|
)
|
||||||
elif isinstance(self.dec, Generator):
|
|
||||||
o = self.dec(z * x_mask, g=g, n_res=return_length2)
|
|
||||||
else:
|
else:
|
||||||
raise KeyError(f"unknown dec type: {type(self.dec).__name__}")
|
o = self.dec(z * x_mask, g=g, n_res=return_length2)
|
||||||
del x_mask, z
|
del x_mask, z
|
||||||
return o # , x_mask, (z, z_p, m_p, logs_p)
|
return o # , x_mask, (z, z_p, m_p, logs_p)
|
||||||
|
|
||||||
@@ -315,7 +326,7 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
|
|||||||
upsample_kernel_sizes: List[int],
|
upsample_kernel_sizes: List[int],
|
||||||
spk_embed_dim: int,
|
spk_embed_dim: int,
|
||||||
gin_channels: int,
|
gin_channels: int,
|
||||||
sr: Union[str, int],
|
sr=None,
|
||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
spec_channels,
|
spec_channels,
|
||||||
@@ -335,7 +346,6 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
|
|||||||
upsample_kernel_sizes,
|
upsample_kernel_sizes,
|
||||||
spk_embed_dim,
|
spk_embed_dim,
|
||||||
gin_channels,
|
gin_channels,
|
||||||
sr,
|
|
||||||
256,
|
256,
|
||||||
False,
|
False,
|
||||||
)
|
)
|
||||||
@@ -361,7 +371,7 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
|
|||||||
upsample_kernel_sizes: List[int],
|
upsample_kernel_sizes: List[int],
|
||||||
spk_embed_dim: int,
|
spk_embed_dim: int,
|
||||||
gin_channels: int,
|
gin_channels: int,
|
||||||
sr: Union[str, int],
|
sr=None,
|
||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
spec_channels,
|
spec_channels,
|
||||||
@@ -381,7 +391,6 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
|
|||||||
upsample_kernel_sizes,
|
upsample_kernel_sizes,
|
||||||
spk_embed_dim,
|
spk_embed_dim,
|
||||||
gin_channels,
|
gin_channels,
|
||||||
sr,
|
|
||||||
768,
|
768,
|
||||||
False,
|
False,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from typing import List, Optional, Tuple, Iterator, Union
|
from typing import List, Optional, Tuple, Iterator
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
@@ -17,7 +17,7 @@ def get_padding(kernel_size: int, dilation=1) -> int:
|
|||||||
|
|
||||||
def slice_on_last_dim(
|
def slice_on_last_dim(
|
||||||
x: torch.Tensor,
|
x: torch.Tensor,
|
||||||
start_indices: Union[List[int], torch.Tensor],
|
start_indices: List[int],
|
||||||
segment_size=4,
|
segment_size=4,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
new_shape = [*x.shape]
|
new_shape = [*x.shape]
|
||||||
@@ -32,9 +32,9 @@ def slice_on_last_dim(
|
|||||||
|
|
||||||
def rand_slice_segments_on_last_dim(
|
def rand_slice_segments_on_last_dim(
|
||||||
x: torch.Tensor,
|
x: torch.Tensor,
|
||||||
x_lengths: Optional[Union[int, torch.Tensor]] = None,
|
x_lengths: int = None,
|
||||||
segment_size=4,
|
segment_size=4,
|
||||||
) -> Tuple[torch.Tensor, Union[List[int], torch.Tensor]]:
|
) -> Tuple[torch.Tensor, List[int]]:
|
||||||
b, _, t = x.size()
|
b, _, t = x.size()
|
||||||
if x_lengths is None:
|
if x_lengths is None:
|
||||||
x_lengths = t
|
x_lengths = t
|
||||||
@@ -58,7 +58,7 @@ def activate_add_tanh_sigmoid_multiply(
|
|||||||
def sequence_mask(
|
def sequence_mask(
|
||||||
length: torch.Tensor,
|
length: torch.Tensor,
|
||||||
max_length: Optional[int] = None,
|
max_length: Optional[int] = None,
|
||||||
):
|
) -> torch.BoolTensor:
|
||||||
if max_length is None:
|
if max_length is None:
|
||||||
max_length = int(length.max())
|
max_length = int(length.max())
|
||||||
x = torch.arange(max_length, dtype=length.dtype, device=length.device)
|
x = torch.arange(max_length, dtype=length.dtype, device=length.device)
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from typing import List, Union
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
@@ -25,7 +25,7 @@ class SynthesizerTrnMsNSFsid(SynthesizerBase):
|
|||||||
upsample_kernel_sizes: List[int],
|
upsample_kernel_sizes: List[int],
|
||||||
spk_embed_dim: int,
|
spk_embed_dim: int,
|
||||||
gin_channels: int,
|
gin_channels: int,
|
||||||
sr: Union[str, int],
|
sr: Optional[Union[str, int]],
|
||||||
encoder_dim: int,
|
encoder_dim: int,
|
||||||
):
|
):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
|
|||||||
@@ -40,9 +40,11 @@ app = gr.Blocks()
|
|||||||
with app:
|
with app:
|
||||||
with gr.Tabs():
|
with gr.Tabs():
|
||||||
with gr.TabItem("在线demo"):
|
with gr.TabItem("在线demo"):
|
||||||
gr.Markdown(value="""
|
gr.Markdown(
|
||||||
|
value="""
|
||||||
RVC 在线demo
|
RVC 在线demo
|
||||||
""")
|
"""
|
||||||
|
)
|
||||||
sid = gr.Dropdown(label=i18n("Inferencing voice"), choices=sorted(names))
|
sid = gr.Dropdown(label=i18n("Inferencing voice"), choices=sorted(names))
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
spk_item = gr.Slider(
|
spk_item = gr.Slider(
|
||||||
|
|||||||
20
web.py
20
web.py
@@ -36,6 +36,7 @@ import threading
|
|||||||
import shutil
|
import shutil
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
||||||
logging.getLogger("numba").setLevel(logging.WARNING)
|
logging.getLogger("numba").setLevel(logging.WARNING)
|
||||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||||
|
|
||||||
@@ -88,24 +89,23 @@ index_paths = [""]
|
|||||||
|
|
||||||
|
|
||||||
def lookup_names(weight_root):
|
def lookup_names(weight_root):
|
||||||
names = []
|
global names
|
||||||
for name in os.listdir(weight_root):
|
for name in os.listdir(weight_root):
|
||||||
if name.endswith(".pth"):
|
if name.endswith(".pth"):
|
||||||
names.append(name)
|
names.append(name)
|
||||||
return names
|
|
||||||
|
|
||||||
|
|
||||||
def lookup_indices(index_root):
|
def lookup_indices(index_root):
|
||||||
index_paths = []
|
global index_paths
|
||||||
for root, _, files in os.walk(index_root, topdown=False):
|
for root, _, files in os.walk(index_root, topdown=False):
|
||||||
for name in files:
|
for name in files:
|
||||||
if name.endswith(".index") and "trained" not in name:
|
if name.endswith(".index") and "trained" not in name:
|
||||||
index_paths.append(str(pathlib.Path(root, name)))
|
index_paths.append(str(pathlib.Path(root, name)))
|
||||||
return index_paths
|
|
||||||
|
|
||||||
|
|
||||||
names = [""] + lookup_names(weight_root)
|
lookup_names(weight_root)
|
||||||
index_paths = [""] + lookup_indices(index_root) + lookup_indices(outside_index_root)
|
lookup_indices(index_root)
|
||||||
|
lookup_indices(outside_index_root)
|
||||||
uvr5_names = []
|
uvr5_names = []
|
||||||
for name in os.listdir(weight_uvr5_root):
|
for name in os.listdir(weight_uvr5_root):
|
||||||
if name.endswith(".pth") or "onnx" in name:
|
if name.endswith(".pth") or "onnx" in name:
|
||||||
@@ -113,8 +113,12 @@ for name in os.listdir(weight_uvr5_root):
|
|||||||
|
|
||||||
|
|
||||||
def change_choices():
|
def change_choices():
|
||||||
names = [""] + lookup_names(weight_root)
|
global index_paths, names
|
||||||
index_paths = [""] + lookup_indices(index_root) + lookup_indices(outside_index_root)
|
names = [""]
|
||||||
|
lookup_names(weight_root)
|
||||||
|
index_paths = [""]
|
||||||
|
lookup_indices(index_root)
|
||||||
|
lookup_indices(outside_index_root)
|
||||||
return {"choices": sorted(names), "__type__": "update"}, {
|
return {"choices": sorted(names), "__type__": "update"}, {
|
||||||
"choices": sorted(index_paths),
|
"choices": sorted(index_paths),
|
||||||
"__type__": "update",
|
"__type__": "update",
|
||||||
|
|||||||
Reference in New Issue
Block a user