1
0
mirror of https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git synced 2026-06-06 01:30:24 +08:00

19 Commits

Author SHA1 Message Date
github-actions[bot]
d56c03eb74 chore(format): run black on dev 2026-04-18 11:04:30 +00:00
源文雨
f9ae0b5d32 fix(fairseq): hubert load model error 2026-04-18 19:04:13 +08:00
源文雨
8ded36e9e1 Merge branch 'dev' of https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI 2026-04-18 17:39:34 +08:00
github-actions[bot]
645ce27dcc chore(format): run black on dev (#143)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2026-04-18 17:36:44 +08:00
源文雨
3affc9415d fix(train): unsupported gloo device on win 2026-04-18 17:30:48 +08:00
源文雨
cc50ede4fb fix(train): extract f0 & feature hang 2026-04-18 17:03:52 +08:00
github-actions[bot]
96f7bccb12 chore(format): run black on dev (#136)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2025-11-26 13:06:55 +08:00
源文雨
43d19eb00e fix(dml): train extract_f0_print error
ModuleNotFoundError: No module named 'torch.privateuseone' due to new prosess
2025-11-21 16:52:17 +08:00
源文雨
7fa122045f fix(config): dml load & av codec ctx has no base_rate attr 2025-11-21 15:48:03 +08:00
源文雨
71cc31a96c fix: make ci happy 2025-11-21 15:36:50 +08:00
源文雨
e71b6c4408 fix: make ci happy 2025-11-21 15:24:43 +08:00
源文雨
5f198f31c3 fix: make ci happy 2025-11-21 15:14:22 +08:00
源文雨
7096797eaf fix: make ci happy 2025-11-21 15:12:26 +08:00
源文雨
57bee42fc9 fix: make ci happy 2025-11-21 15:10:40 +08:00
源文雨
53d54681cf fix: i18n & dl & tests 2025-11-21 15:06:04 +08:00
github-actions[bot]
8ab9fe3dee chore(i18n): sync locale on dev (#124)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2025-11-21 14:32:55 +08:00
源文雨
215a3edcef fix: remove outdated codes & add weights_only=True 2025-11-18 13:57:03 +08:00
fumiama
6a9b026a53 fix(ci): mis-closing of some useful issues 2025-11-18 13:21:30 +08:00
源文雨
34f28d97e3 deps: fix fairseq install in .venv 2025-11-09 18:30:11 +08:00
50 changed files with 419 additions and 332 deletions

View File

@@ -15,7 +15,7 @@ jobs:
- name: Run RVC-Models-Downloader
run: |
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb
sudo apt -y install ./rvcmd_linux_amd64.deb
rm -f ./rvcmd_linux_amd64.deb
rvcmd -notrs -w 1 -notui assets/rvc

View File

@@ -12,7 +12,7 @@ jobs:
steps:
- uses: actions/stale@v5
with:
exempt-issue-labels: "help wanted,good first issue,documentation,following up,todo list"
exempt-issue-labels: "help wanted,good first issue,documentation,following up,todo list,enhancement"
days-before-issue-stale: 30
days-before-issue-close: 15
stale-issue-label: "stale"

View File

@@ -11,20 +11,72 @@ jobs:
steps:
- uses: actions/checkout@master
- name: Space cleanup
env:
DEBIAN_FRONTEND: noninteractive
run: |
df -h
# Source - https://stackoverflow.com/a
# Posted by Cosmin Bodnariuc
# Retrieved 2025-11-21, License - CC BY-SA 4.0
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/local/share/chromium
sudo rm -rf /usr/local/share/powershell
sudo rm -rf /usr/local/share/vcpkg
sudo rm -rf /usr/local/share/miniconda
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo rm -rf /opt/hostedtoolcache/go
sudo rm -rf /opt/hostedtoolcache/Python
sudo rm -rf /opt/hostedtoolcache/node
sudo rm -rf /opt/hostedtoolcache/R
sudo rm -rf /opt/hostedtoolcache/Java
sudo rm -rf /opt/hostedtoolcache/LLVM
sudo rm -rf /opt/hostedtoolcache/Swift
sudo rm -rf /opt/hostedtoolcache/Php
sudo rm -rf /opt/hostedtoolcache/Perl
sudo rm -rf /opt/hostedtoolcache/Scala
sudo rm -rf /opt/hostedtoolcache/Julia
sudo rm -rf /opt/hostedtoolcache/Mono
sudo rm -rf /opt/hostedtoolcache/PowerShell
sudo rm -rf /opt/hostedtoolcache/Crystal
sudo rm -rf /opt/hostedtoolcache/Elixir
sudo rm -rf /opt/hostedtoolcache/Erlang
sudo rm -rf /opt/hostedtoolcache/FSharp
sudo rm -rf /opt/hostedtoolcache/Haskell
sudo rm -rf /opt/hostedtoolcache/OCaml
sudo rm -rf /opt/hostedtoolcache/Rust
sudo rm -rf /opt/hostedtoolcache/Sbt
sudo rm -rf /opt/hostedtoolcache/Solidity
sudo rm -rf /opt/hostedtoolcache/VisualStudio
sudo rm -rf /opt/hostedtoolcache/WinAppDriver
sudo rm -rf /opt/hostedtoolcache/Xamarin
sudo rm -rf /opt/hostedtoolcache/Yarn
sudo rm -rf /opt/hostedtoolcache/Zephyr
sudo rm -rf /opt/hostedtoolcache/zig
sudo rm -rf /opt/hostedtoolcache/zulu
sudo rm -rf /opt/hostedtoolcache/azcopy
sudo -E apt-get update
sudo -E apt-get -y autoremove --purge
sudo -E apt-get clean
df -h
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
sudo apt update
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb
wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb
sudo apt -y install ./rvcmd_linux_amd64.deb
pip install --force pip==24.0 # fix fairseq installing issue https://github.com/facebookresearch/fairseq/issues/5552
python -m pip install --upgrade setuptools
python -m pip install --upgrade wheel
pip install torch torchvision torchaudio
pip install -r requirements/main.txt
pip install -r requirements/cpu.txt
rvcmd -notrs -w 1 -notui assets/rvc
- name: Test step 1 & 2
run: |

View File

@@ -4,6 +4,7 @@ import sys
import json
import shutil
from multiprocessing import cpu_count
import importlib.util
import torch
@@ -46,10 +47,10 @@ class Config(metaclass=Singleton):
self.global_link,
self.noparallel,
self.noautoopen,
self.dml,
self.nocheck,
self.update,
) = self.arg_parse()
self.dml = False
self.instead = ""
self.preprocess_per = 3.7
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
@@ -83,11 +84,6 @@ class Config(metaclass=Singleton):
action="store_true",
help="Do not open in browser automatically",
)
parser.add_argument(
"--dml",
action="store_true",
help="torch_dml",
)
parser.add_argument(
"--nocheck", action="store_true", help="Run without checking assets"
)
@@ -104,7 +100,6 @@ class Config(metaclass=Singleton):
cmd_opts.global_link,
cmd_opts.noparallel,
cmd_opts.noautoopen,
cmd_opts.dml,
cmd_opts.nocheck,
cmd_opts.update,
)
@@ -183,7 +178,7 @@ class Config(metaclass=Singleton):
if self.has_xpu():
self.device = self.instead = "xpu:0"
self.is_half = True
i_device = int(self.device.split(":")[-1])
i_device = int(str(self.device).split(":")[-1])
self.gpu_name = torch.cuda.get_device_name(i_device)
if (
("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
@@ -214,7 +209,7 @@ class Config(metaclass=Singleton):
self.use_fp32_config()
else:
logger.info("No supported Nvidia GPU found")
self.device = self.instead = "cpu"
self.device = self.instead = torch.get_default_device()
self.is_half = False
self.use_fp32_config()
@@ -239,12 +234,13 @@ class Config(metaclass=Singleton):
x_query = 5
x_center = 30
x_max = 32
if self.dml:
if importlib.util.find_spec("torch_directml") is not None:
logger.info("Use DirectML instead")
import torch_directml
self.device = torch_directml.device(torch_directml.default_device())
self.is_half = False
self.dml = True
else:
if self.instead:
logger.info(f"Use {self.instead} instead")

View File

@@ -140,7 +140,6 @@
"Train model": "Train model",
"Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.",
"Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)",
"Unfortunately, there is no compatible GPU available to support your training.": "Unfortunately, there is no compatible GPU available to support your training.",
"Unknown": "Unknown",
"Unload model to save GPU memory": "Unload model to save GPU memory",
"Version": "Version",

View File

@@ -140,7 +140,6 @@
"Train model": "Entrenar Modelo",
"Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Entrenamiento finalizado, puede ver el registro de entrenamiento en la consola o en el archivo train.log en la carpeta del experimento",
"Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Cambio de tono (entero, número de semitonos, subir una octava +12 o bajar una octava -12)",
"Unfortunately, there is no compatible GPU available to support your training.": "Lamentablemente, no tiene una tarjeta gráfica adecuada para soportar su entrenamiento",
"Unknown": "Desconocido",
"Unload model to save GPU memory": "Descargue la voz para ahorrar memoria GPU",
"Version": "Versión",

View File

@@ -140,7 +140,6 @@
"Train model": "Entraîner le modèle",
"Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Entraînement terminé. Vous pouvez consulter les rapports d'entraînement dans la console ou dans le fichier 'train.log' situé dans le dossier de l'expérience.",
"Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Transposer (entier, nombre de demi-tons, monter d'une octave : 12, descendre d'une octave : -12) :",
"Unfortunately, there is no compatible GPU available to support your training.": "Malheureusement, il n'y a pas de GPU compatible disponible pour prendre en charge votre entrainement.",
"Unknown": "Inconnu",
"Unload model to save GPU memory": "Décharger la voix pour économiser la mémoire GPU.",
"Version": "Version",

View File

@@ -140,7 +140,6 @@
"Train model": "Addestra modello",
"Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Addestramento completato. ",
"Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Trasposizione (numero intero, numero di semitoni, alza di un'ottava: 12, abbassa di un'ottava: -12):",
"Unfortunately, there is no compatible GPU available to support your training.": "Sfortunatamente, non è disponibile alcuna GPU compatibile per supportare l'addestramento.",
"Unknown": "Unknown",
"Unload model to save GPU memory": "Scarica la voce per risparmiare memoria della GPU:",
"Version": "Versione",

View File

@@ -133,14 +133,13 @@
"Takeover WASAPI device": "WASAPIデバイスを独占",
"Target sample rate": "目標サンプリング率",
"The audio file to be processed": "処理待ち音声",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "本ソフトウェアは、GNU Affero General Public License バージョン3.0以降の下でライセンスされています。<br>作者は、このソフトウェアの使用に関して一切の管理権限や責任を負いません。<br>このソフトウェアを使用し、生成された音声やファイルを含むあらゆるコンテンツを配布するユーザーは、AGPL 3.0ライセンスの条項への準拠について全責任を負います。<br>これらの条項に同意しない場合、このソフトウェアパッケージに含まれるコードやファイルの使用、参照、配布は禁止されています。<br>詳細については、ルートディレクトリにあるLICENSEファイルを参照してください。",
"Total training epochs (total_epoch)": "総エポック数",
"Train": "学習",
"Train feature index": "特徴索引の学習",
"Train model": "モデルの学習",
"Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "学習終了時に、学習ログやフォルダ内のtrain.logを確認することができます",
"Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "ピッチ変更(整数、半音数、上下オクターブ12-12)",
"Unfortunately, there is no compatible GPU available to support your training.": "学習に対応したGPUが動作しないのは残念です。",
"Unknown": "未知",
"Unload model to save GPU memory": "音源を削除してメモリを節約",
"Version": "バージョン",

View File

@@ -140,7 +140,6 @@
"Train model": "모델 훈련",
"Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "훈련 완료, 콘솔 훈련 로그 또는 실험 폴더 내의 train.log 확인 가능",
"Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "키 변경(정수, 반음 수, 옥타브 상승 12, 옥타브 하강 -12)",
"Unfortunately, there is no compatible GPU available to support your training.": "사용 가능한 그래픽 카드가 없어 훈련을 지원할 수 없습니다",
"Unknown": "Unknown",
"Unload model to save GPU memory": "음색 언로드로 디스플레이 메모리 절약",
"Version": "버전",

View File

@@ -140,7 +140,6 @@
"Train model": "Treinar Modelo",
"Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Após o término do treinamento, você pode verificar o log de treinamento do console ou train.log na pasta de experimentos",
"Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Mude o tom aqui. Se a voz for do mesmo sexo, não é necessario alterar (12 caso seja Masculino para feminino, -12 caso seja ao contrário).",
"Unfortunately, there is no compatible GPU available to support your training.": "Infelizmente, não há GPU compatível disponível para apoiar o seu treinamento.",
"Unknown": "Unknown",
"Unload model to save GPU memory": "Descarregue a voz para liberar a memória da GPU:",
"Version": "Versão",

View File

@@ -140,7 +140,6 @@
"Train model": "Обучить модель",
"Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Обучение модели завершено. Журнал обучения можно просмотреть в консоли или в файле 'train.log' в папке с моделью.",
"Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Изменить высоту голоса (укажите количество полутонов; чтобы поднять голос на октаву, выберите 12, понизить на октаву — -12):",
"Unfortunately, there is no compatible GPU available to support your training.": "К сожалению, у вас нету графического процессора, который поддерживает обучение моделей.",
"Unknown": "Unknown",
"Unload model to save GPU memory": "Выгрузить модель из памяти GPU для освобождения ресурсов",
"Version": "Версия архитектуры модели:",

View File

@@ -140,7 +140,6 @@
"Train model": "Modeli Eğit",
"Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Eğitim tamamlandı. Eğitim günlüklerini konsolda veya deney klasörü altındaki train.log dosyasında kontrol edebilirsiniz.",
"Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Transpoze et (tamsayı, yarıton sayısıyla; bir oktav yükseltmek için: 12, bir oktav düşürmek için: -12):",
"Unfortunately, there is no compatible GPU available to support your training.": "Maalesef, eğitiminizi desteklemek için uyumlu bir GPU bulunmamaktadır.",
"Unknown": "Unknown",
"Unload model to save GPU memory": "GPU bellek kullanımını azaltmak için sesi kaldır",
"Version": "Sürüm",

View File

@@ -133,14 +133,13 @@
"Takeover WASAPI device": "独占 WASAPI 设备",
"Target sample rate": "目标采样率",
"The audio file to be processed": "待处理音频文件",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.",
"This software is licensed under the GNU Affero General Public License, version 3.0 or later.<br>The author has no control or responsibility regarding the use of this software.<br>Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.<br>If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.<br>Refer to the LICENSE file located in the root directory for full details.": "本软件基于GNU Affero通用公共许可证3.0版本或更高版本进行许可。<br>作者对本软件的使用不承担任何控制权或责任。<br>使用本软件并分发任何内容包括由其生成的声音或文件的用户需对遵守AGPL 3.0许可证条款承担全部责任。<br>如果您不接受这些条款,则禁止使用、引用或分发本软件包中包含的任何代码或文件。<br>请参阅位于根目录中的LICENSE文件以获取完整详情。",
"Total training epochs (total_epoch)": "总训练轮数total_epoch",
"Train": "训练",
"Train feature index": "训练特征索引",
"Train model": "训练模型",
"Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log",
"Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "变调(整数, 半音数量, 升八度12降八度-12)",
"Unfortunately, there is no compatible GPU available to support your training.": "很遗憾您这没有能用的显卡来支持您训练",
"Unknown": "未知",
"Unload model to save GPU memory": "卸载音色省显存",
"Version": "版本",

View File

@@ -140,7 +140,6 @@
"Train model": "訓練模型",
"Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log",
"Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "變調(整數、半音數量、升八度12降八度-12)",
"Unfortunately, there is no compatible GPU available to support your training.": "很遗憾您这没有能用的显卡来支持您训练",
"Unknown": "Unknown",
"Unload model to save GPU memory": "卸載音色節省 VRAM",
"Version": "版本",

View File

@@ -140,7 +140,6 @@
"Train model": "訓練模型",
"Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log",
"Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "變調(整數、半音數量、升八度12降八度-12)",
"Unfortunately, there is no compatible GPU available to support your training.": "很遗憾您这没有能用的显卡来支持您训练",
"Unknown": "Unknown",
"Unload model to save GPU memory": "卸載音色節省 VRAM",
"Version": "版本",

View File

@@ -140,7 +140,6 @@
"Train model": "訓練模型",
"Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log",
"Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "變調(整數、半音數量、升八度12降八度-12)",
"Unfortunately, there is no compatible GPU available to support your training.": "很遗憾您这没有能用的显卡来支持您训练",
"Unknown": "Unknown",
"Unload model to save GPU memory": "卸載音色節省 VRAM",
"Version": "版本",

View File

@@ -195,7 +195,10 @@ def get_audio_properties(input_path: str) -> Tuple[int, int]:
container = av.open(input_path)
audio_stream = next(s for s in container.streams if s.type == "audio")
channels = 1 if audio_stream.layout == "mono" else 2
rate = audio_stream.base_rate
try:
rate = audio_stream.base_rate
except:
rate = audio_stream.sample_rate
container.close()
return channels, rate

View File

@@ -162,15 +162,7 @@ def download_and_extract_zip(url: str, folder: str):
logger.info(f"extracted into {folder}")
def download_dns_yaml(url: str, folder: str):
logger.info(f"downloading {url}")
response = requests.get(url, stream=True, timeout=(5, 10))
with open(os.path.join(folder, "dns.yaml"), "wb") as out_file:
out_file.write(response.content)
logger.info(f"downloaded into {folder}")
def download_all_assets(tmpdir: str, version="0.2.5"):
def download_all_assets(tmpdir: str, version="0.2.11"):
import subprocess
import platform
@@ -198,44 +190,10 @@ def download_all_assets(tmpdir: str, version="0.2.5"):
suffix = "zip" if is_win else "tar.gz"
RVCMD_URL = BASE_URL + f"v{version}/rvcmd_{system_type}_{architecture}.{suffix}"
cmdfile = os.path.join(tmpdir, "rvcmd")
try:
if is_win:
download_and_extract_zip(RVCMD_URL, tmpdir)
cmdfile += ".exe"
else:
download_and_extract_tar_gz(RVCMD_URL, tmpdir)
os.chmod(cmdfile, 0o755)
subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"])
except Exception:
BASE_URL = "https://raw.gitcode.com/u011570312/RVC-Models-Downloader/assets/"
suffix = {
"darwin_amd64": "555",
"darwin_arm64": "556",
"linux_386": "557",
"linux_amd64": "558",
"linux_arm64": "559",
"windows_386": "562",
"windows_amd64": "563",
}[f"{system_type}_{architecture}"]
RVCMD_URL = BASE_URL + suffix
download_dns_yaml(
"https://raw.gitcode.com/u011570312/RVC-Models-Downloader/raw/main/dns.yaml",
tmpdir,
)
if is_win:
download_and_extract_zip(RVCMD_URL, tmpdir)
cmdfile += ".exe"
else:
download_and_extract_tar_gz(RVCMD_URL, tmpdir)
os.chmod(cmdfile, 0o755)
subprocess.run(
[
cmdfile,
"-notui",
"-w",
"0",
"-dns",
os.path.join(tmpdir, "dns.yaml"),
"assets/rvc",
]
)
if is_win:
download_and_extract_zip(RVCMD_URL, tmpdir)
cmdfile += ".exe"
else:
download_and_extract_tar_gz(RVCMD_URL, tmpdir)
os.chmod(cmdfile, 0o755)
subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"])

View File

@@ -303,7 +303,7 @@ class TextAudioLoader(torch.utils.data.Dataset):
spec_filename = filename.replace(".wav", ".spec.pt")
if os.path.exists(spec_filename):
try:
spec = torch.load(spec_filename)
spec = torch.load(spec_filename, weights_only=True)
except:
logger.warning("%s %s", spec_filename, traceback.format_exc())
spec = spectrogram_torch(

View File

@@ -59,7 +59,7 @@ def save_small_model(ckpt, sr, if_f0, name, epoch, version, hps):
def extract_small_model(path, name, author, sr, if_f0, info, version):
try:
ckpt = torch.load(path, map_location="cpu")
ckpt = torch.load(path, map_location="cpu", weights_only=True)
if "model" in ckpt:
ckpt = ckpt["model"]
opt = OrderedDict()
@@ -196,7 +196,7 @@ def extract_small_model(path, name, author, sr, if_f0, info, version):
def change_info(path, info, name):
try:
ckpt = torch.load(path, map_location="cpu")
ckpt = torch.load(path, map_location="cpu", weights_only=True)
ckpt["info"] = info
if name == "":
name = os.path.basename(path)
@@ -229,8 +229,8 @@ def merge(path1, path2, alpha1, sr, f0, info, name, version):
a2 = "Unknown"
return f"{a1} & {a2}"
ckpt1 = torch.load(path1, map_location="cpu")
ckpt2 = torch.load(path2, map_location="cpu")
ckpt1 = torch.load(path1, map_location="cpu", weights_only=True)
ckpt2 = torch.load(path2, map_location="cpu", weights_only=True)
cfg = ckpt1["config"]
if "model" in ckpt1:
ckpt1 = extract(ckpt1)

View File

@@ -29,6 +29,24 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, load_opt=1):
checkpoint_dict = torch.load(checkpoint_path, map_location="cpu", weights_only=True)
saved_state_dict = checkpoint_dict["model"]
# Convert old-style weight_norm keys (weight_g/weight_v) to new
# parametrizations format (parametrizations.weight.original0/original1)
# so that checkpoints saved with the deprecated API can still be loaded.
_converted = {}
for k, v in list(saved_state_dict.items()):
if k.endswith(".weight_g"):
new_key = k[: -len(".weight_g")] + ".parametrizations.weight.original0"
_converted[new_key] = v
elif k.endswith(".weight_v"):
new_key = k[: -len(".weight_v")] + ".parametrizations.weight.original1"
_converted[new_key] = v
if _converted:
logger.info(
"Converting %d old-style weight_norm keys from checkpoint to new parametrizations format",
len(_converted),
)
saved_state_dict.update(_converted)
if hasattr(model, "module"):
state_dict = model.module.state_dict()
else:

View File

@@ -2,6 +2,7 @@ import os
import sys
import traceback
from pathlib import Path
import importlib.util
from dotenv import load_dotenv
@@ -38,6 +39,9 @@ f0method = sys.argv[3]
device = sys.argv[4]
is_half = sys.argv[5] == "True"
if importlib.util.find_spec("torch_directml") is not None:
import torch_directml # use side effect
class FeatureInput(object):
def __init__(self, is_half: bool, device="cpu", samplerate=16000, hop_size=160):
@@ -102,6 +106,12 @@ if __name__ == "__main__":
Config.use_insecure_load()
printt(" ".join(sys.argv))
# GPU methods (rmvpe, fcpe, crepe, etc.) gain nothing from multiprocessing since
# all processes share one GPU. Spawning n_p processes each lazily loading
# the model onto the same CUDA device exhausts VRAM and causes deadlocks.
if "cuda" in device:
printt("WARN: use 1 thread since GPU is used.")
n_p = 1
featureInput = FeatureInput(is_half, device)
paths = []
inp_root = "%s/1_16k_wavs" % (exp_dir)

View File

@@ -17,7 +17,14 @@ device = sys.argv[1]
n_part = int(sys.argv[2])
i_part = int(sys.argv[3])
i_gpu = sys.argv[4]
os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
# CUDA_VISIBLE_DEVICES expects bare GPU indices (e.g. "0" or "0,1"),
# but callers may pass "cuda:0", "cuda:0-cuda:1", etc. Strip the prefix
# and normalise separators so any combination works.
import re
i_gpu = re.sub(r"cuda:", "", str(i_gpu))
i_gpu = i_gpu.replace("-", ",")
os.environ["CUDA_VISIBLE_DEVICES"] = i_gpu
exp_dir = sys.argv[5]
version = sys.argv[6]
is_half = sys.argv[7].lower() == "true"

View File

@@ -29,10 +29,12 @@ try:
GradScaler = gradscaler_init()
ipex_init()
else:
from torch.cuda.amp import GradScaler, autocast
except Exception:
from torch.cuda.amp import GradScaler, autocast
pass
finally:
if not ("GradScaler" in globals() and "autocast" in globals()):
from torch.amp.grad_scaler import GradScaler
from torch.amp.autocast_mode import autocast
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = False
@@ -106,23 +108,30 @@ def main():
# patch to unblock people without gpus. there is probably a better way.
print("NO GPU DETECTED: falling back to CPU - this may take a while")
n_gpus = 1
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = str(randint(20000, 55555))
children = []
logger = utils.get_logger(hps.model_dir)
for i in range(n_gpus):
subproc = mp.Process(
target=run,
args=(i, n_gpus, hps, logger),
)
children.append(subproc)
subproc.start()
if n_gpus == 1:
# Single GPU: run directly without distributed to avoid gloo issues on Windows
run(0, 1, hps, logger)
else:
master_port = str(randint(20000, 55555))
os.environ["MASTER_ADDR"] = "127.0.0.1"
os.environ["MASTER_PORT"] = master_port
children = []
for i in range(n_gpus):
subproc = mp.Process(
target=run,
args=(i, n_gpus, hps, logger, master_port),
)
children.append(subproc)
subproc.start()
for i in range(n_gpus):
children[i].join()
for i in range(n_gpus):
children[i].join()
def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger):
def run(
rank, n_gpus, hps: utils.HParams, logger: logging.Logger, master_port: str = "29500"
):
global global_step
if rank == 0:
# logger = utils.get_logger(hps.model_dir)
@@ -131,24 +140,81 @@ def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger):
writer = SummaryWriter(log_dir=hps.model_dir)
writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
try:
dist.init_process_group(
backend=(
"gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"
),
init_method="env://",
world_size=n_gpus,
rank=rank,
)
except:
dist.init_process_group(
backend=(
"gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl"
),
init_method="env://?use_libuv=False",
world_size=n_gpus,
rank=rank,
)
use_distributed = n_gpus > 1
if use_distributed:
if os.name == "nt" or not torch.cuda.is_available():
# On Windows, gloo's create_device(hostname=...) is gated to Linux only
# in the C++ layer (makeDeviceForHostname). We must use the interface-
# based path instead: create_device(interface=...) calls
# makeDeviceForInterface which is not platform-gated.
import socket as _socket
try:
store = dist.TCPStore(
host_name="127.0.0.1",
port=int(master_port),
world_size=n_gpus,
is_master=(rank == 0),
)
except Exception:
store = dist.TCPStore(
host_name="127.0.0.1",
port=int(master_port),
world_size=n_gpus,
is_master=(rank == 0),
use_libuv=False,
)
# Discover a working network interface for gloo device creation
gloo_device = None
try:
for idx, ifname in _socket.if_nameindex():
try:
gloo_device = dist.ProcessGroupGloo.create_device(
interface=ifname
)
print("Try device", idx, "name", ifname)
break
except RuntimeError as e:
print("Try device", idx, "name", ifname, "err:", e)
continue
except (OSError, AttributeError) as e:
print(e.with_traceback(None))
if gloo_device is None:
raise RuntimeError(
"Cannot create gloo device on Windows. "
"No usable network interface found. "
"Try adding your hostname to "
"C:\\Windows\\System32\\drivers\\etc\\hosts "
"with: 127.0.0.1 " + _socket.gethostname()
)
pg_options = dist.ProcessGroupGloo._Options()
pg_options._devices = [gloo_device]
dist.init_process_group(
backend="gloo",
store=store,
world_size=n_gpus,
rank=rank,
pg_options=pg_options,
)
else:
init_url = f"tcp://127.0.0.1:{master_port}"
try:
dist.init_process_group(
backend="nccl",
init_method=init_url,
world_size=n_gpus,
rank=rank,
)
except:
dist.init_process_group(
backend="nccl",
init_method=init_url + "?use_libuv=False",
world_size=n_gpus,
rank=rank,
)
torch.manual_seed(hps.train.seed)
if torch.cuda.is_available():
torch.cuda.set_device(rank)
@@ -221,14 +287,15 @@ def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger):
)
# net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
# net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
if hasattr(torch, "xpu") and torch.xpu.is_available():
pass
elif torch.cuda.is_available():
net_g = DDP(net_g, device_ids=[rank])
net_d = DDP(net_d, device_ids=[rank])
else:
net_g = DDP(net_g)
net_d = DDP(net_d)
if use_distributed:
if hasattr(torch, "xpu") and torch.xpu.is_available():
pass
elif torch.cuda.is_available():
net_g = DDP(net_g, device_ids=[rank])
net_d = DDP(net_d, device_ids=[rank])
else:
net_g = DDP(net_g)
net_d = DDP(net_d)
try: # 如果能加载自动resume
_, _, _, epoch_str = utils.load_checkpoint(
@@ -470,7 +537,7 @@ def train_and_evaluate(
# wave_lengths = wave_lengths.cuda(rank, non_blocking=True)
# Calculate
with autocast(enabled=hps.train.fp16_run):
with autocast(device_type="cuda", enabled=hps.train.fp16_run):
(
y_hat,
ids_slice,
@@ -489,7 +556,7 @@ def train_and_evaluate(
y_mel = slice_on_last_dim(
mel, ids_slice, hps.train.segment_size // hps.data.hop_length
)
with autocast(enabled=False):
with autocast(device_type="cuda", enabled=False):
y_hat_mel = mel_spectrogram_torch(
y_hat.float().squeeze(1),
hps.data.filter_length,
@@ -508,7 +575,7 @@ def train_and_evaluate(
# Discriminator
y_d_hat_r, y_d_hat_g, _, _ = net_d(wave, y_hat.detach())
with autocast(enabled=False):
with autocast(device_type="cuda", enabled=False):
loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(
y_d_hat_r, y_d_hat_g
)
@@ -518,10 +585,10 @@ def train_and_evaluate(
grad_norm_d = total_grad_norm(net_d.parameters())
scaler.step(optim_d)
with autocast(enabled=hps.train.fp16_run):
with autocast(device_type="cuda", enabled=hps.train.fp16_run):
# Generator
y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(wave, y_hat)
with autocast(enabled=False):
with autocast(device_type="cuda", enabled=False):
loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel
loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl
loss_fm = feature_loss(fmap_r, fmap_g)

View File

@@ -62,10 +62,6 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format
os.path.basename(inp_path),
)
resample_audio(inp_path, tmp_path, "pcm_s16le", "s16", 44100, "stereo")
try: # Remove the original file
os.remove(inp_path)
except Exception as e:
print(f"Failed to remove the original file: {e}")
inp_path = tmp_path
try:
if done == 0:

View File

@@ -37,7 +37,7 @@ class AudioPre:
else:
mp = ModelParameters("infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json")
model = Nets.CascadedASPPNet(mp.param["bins"] * 2)
cpk = torch.load(model_path, map_location="cpu")
cpk = torch.load(model_path, map_location="cpu", weights_only=True)
model.load_state_dict(cpk)
model.eval()
if is_half:

View File

@@ -10,9 +10,6 @@ from pybase16384 import encode_to_string, decode_from_string
from configs import CPUConfig
from rvc.synthesizer import get_synthesizer
from .pipeline import Pipeline
from .utils import load_hubert
class TorchSeedContext:
def __init__(self, seed):
@@ -95,6 +92,9 @@ def wave_hash(time_field):
def model_hash(config, tgt_sr, net_g, if_f0, version):
from .pipeline import Pipeline
from .utils import load_hubert
pipeline = Pipeline(tgt_sr, config)
audio = original_audio()
hbt = load_hubert(config.device, config.is_half)
@@ -152,7 +152,7 @@ def model_hash_ckpt(cpt):
def model_hash_from(path):
cpt = torch.load(path, map_location="cpu")
cpt = torch.load(path, map_location="cpu", weights_only=True)
h = model_hash_ckpt(cpt)
del cpt
return h

View File

@@ -75,7 +75,7 @@ def show_info(path):
try:
if hasattr(path, "name"):
path = path.name
a = torch.load(path, map_location="cpu")
a = torch.load(path, map_location="cpu", weights_only=True)
txt = show_model_info(a, show_long_id=True)
del a
except:

View File

@@ -1,6 +1,7 @@
import os, pathlib
from fairseq import checkpoint_utils
import torch
from fairseq import checkpoint_utils, data
def get_index_path_from_model(sid):
@@ -21,10 +22,11 @@ def get_index_path_from_model(sid):
def load_hubert(device, is_half):
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
["assets/hubert/hubert_base.pt"],
suffix="",
)
with torch.serialization.safe_globals([data.dictionary.Dictionary]):
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
["assets/hubert/hubert_base.pt"],
suffix="",
)
hubert_model = models[0]
hubert_model = hubert_model.to(device)
if is_half:

View File

@@ -1,11 +1,10 @@
tensorflow-rocm
joblib>=1.1.0
numba
numpy
scipy
librosa>=0.10.2
llvmlite
fairseq @ git+https://github.com/One-sixth/fairseq.git
fairseq @ git+https://github.com/fumiama/fairseq.git
faiss-cpu
gradio
Cython

49
requirements/cpu.txt Normal file
View File

@@ -0,0 +1,49 @@
joblib>=1.1.0
numba
numpy
scipy
librosa>=0.10.2
llvmlite
fairseq @ git+https://github.com/fumiama/fairseq.git
faiss-cpu
gradio
Cython
pydub>=0.25.1
tensorboardX
Jinja2>=3.1.2
json5
Markdown
matplotlib>=3.7.0
matplotlib-inline>=0.1.3
praat-parselmouth>=0.4.2
Pillow>=9.1.1
resampy>=0.4.2
scikit-learn
tensorboard
tqdm>=4.63.1
tornado>=6.1
Werkzeug>=2.2.3
uc-micro-py>=1.0.1
sympy>=1.11.1
tabulate>=0.8.10
PyYAML>=6.0
pyasn1>=0.4.8
pyasn1-modules>=0.2.8
fsspec>=2022.11.0
absl-py>=1.2.0
audioread
uvicorn>=0.21.1
colorama>=0.4.5
pyworld==0.3.2
httpx
onnxruntime; sys_platform == 'darwin'
torchcrepe>=0.0.23
fastapi
torchfcpe
python-dotenv>=1.0.0
av
pybase16384
--extra-index-url https://download.pytorch.org/whl/cpu
torch
torchvision
torchaudio

View File

@@ -4,7 +4,7 @@ numpy
scipy
librosa>=0.10.2
llvmlite
fairseq @ git+https://github.com/One-sixth/fairseq.git
fairseq @ git+https://github.com/fumiama/fairseq.git
faiss-cpu
gradio
Cython
@@ -43,3 +43,6 @@ python-dotenv>=1.0.0
av
torchfcpe
pybase16384
torch-directml
torchvision
torchaudio

View File

@@ -9,7 +9,7 @@ numpy
scipy
librosa>=0.10.2
llvmlite==0.39.0
fairseq @ git+https://github.com/One-sixth/fairseq.git
fairseq @ git+https://github.com/fumiama/fairseq.git
faiss-cpu
gradio
Cython

View File

@@ -4,7 +4,7 @@ numpy
scipy
librosa>=0.10.2
llvmlite
fairseq @ git+https://github.com/One-sixth/fairseq.git
fairseq @ git+https://github.com/fumiama/fairseq.git
faiss-cpu
gradio
Cython

View File

@@ -11,14 +11,14 @@ class F0Predictor(object):
f0_min=50,
f0_max=1100,
sampling_rate=44100,
device: Optional[str] = None,
device: Optional[Union[str, torch.device]] = None,
):
self.hop_length = hop_length
self.f0_min = f0_min
self.f0_max = f0_max
self.sampling_rate = sampling_rate
if device is None:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
if not device:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.device = device
def compute_f0(

View File

@@ -1,4 +1,4 @@
from typing import Optional
from typing import Optional, Union
import torch
import numpy as np
@@ -17,9 +17,9 @@ class MelSpectrogram(torch.nn.Module):
hop_length: int,
n_fft: Optional[int] = None,
mel_fmin: int = 0,
mel_fmax: int = None,
mel_fmax: Optional[int] = None,
clamp: float = 1e-5,
device=torch.device("cpu"),
device: Union[str, torch.device] = torch.device("cpu"),
):
super().__init__()
if n_fft is None:

View File

@@ -1,6 +1,6 @@
from io import BytesIO
import os
from typing import Any, Optional, Union
from typing import Optional, Union
import numpy as np
import torch

View File

@@ -12,8 +12,8 @@ class MultiHeadAttention(nn.Module):
channels: int,
out_channels: int,
n_heads: int,
window_size: int,
p_dropout: float = 0.0,
window_size: Optional[int] = None,
heads_share: bool = True,
block_length: Optional[int] = None,
proximal_bias: bool = False,

View File

@@ -4,7 +4,8 @@ import torch
from torch import nn
from torch.nn import Conv1d, Conv2d
from torch.nn import functional as F
from torch.nn.utils import spectral_norm, weight_norm
from torch.nn.utils import spectral_norm
from torch.nn.utils.parametrizations import weight_norm
from .residuals import LRELU_SLOPE
from .utils import get_padding

View File

@@ -42,8 +42,8 @@ class Encoder(nn.Module):
hidden_channels,
hidden_channels,
n_heads,
window_size,
p_dropout=p_dropout,
window_size=window_size,
)
)
self.norm_layers_1.append(LayerNorm(hidden_channels))
@@ -121,7 +121,7 @@ class TextEncoder(nn.Module):
def __call__(
self,
phone: torch.Tensor,
pitch: torch.Tensor,
pitch: Optional[torch.Tensor],
lengths: torch.Tensor,
skip_head: Optional[int] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
@@ -135,7 +135,7 @@ class TextEncoder(nn.Module):
def forward(
self,
phone: torch.Tensor,
pitch: torch.Tensor,
pitch: Optional[torch.Tensor],
lengths: torch.Tensor,
skip_head: Optional[int] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
@@ -212,10 +212,8 @@ class PosteriorEncoder(nn.Module):
self.enc.remove_weight_norm()
def __prepare_scriptable__(self):
for hook in self.enc._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.enc)
from torch.nn.utils import parametrize
if parametrize.is_parametrized(self.enc, "weight"):
parametrize.remove_parametrizations(self.enc, "weight")
return self

View File

@@ -4,7 +4,8 @@ import torch
from torch import nn
from torch.nn import Conv1d, ConvTranspose1d
from torch.nn import functional as F
from torch.nn.utils import remove_weight_norm, weight_norm
from torch.nn.utils.parametrizations import weight_norm
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
from .residuals import ResBlock1, ResBlock2, LRELU_SLOPE
from .utils import call_weight_data_normal_if_Conv
@@ -46,6 +47,7 @@ class Generator(torch.nn.Module):
self.resblocks = nn.ModuleList()
resblock_module = ResBlock1 if resblock == "1" else ResBlock2
ch = 0
for i in range(len(self.ups)):
ch = upsample_initial_channel // (2 ** (i + 1))
for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes):
@@ -97,29 +99,16 @@ class Generator(torch.nn.Module):
def __prepare_scriptable__(self):
for l in self.ups:
for hook in l._forward_pre_hooks.values():
# The hook we want to remove is an instance of WeightNorm class, so
# normally we would do `if isinstance(...)` but this class is not accessible
# because of shadowing, so we check the module name directly.
# https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
for l in self.resblocks:
for hook in l._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
return self
def remove_weight_norm(self):
for l in self.ups:
remove_weight_norm(l)
remove_parametrizations(l, "weight")
for l in self.resblocks:
l.remove_weight_norm()

View File

@@ -6,6 +6,8 @@ from torch.nn import functional as F
from .utils import activate_add_tanh_sigmoid_multiply
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
class LayerNorm(nn.Module):
def __init__(self, channels: int, eps: float = 1e-5):
@@ -30,7 +32,7 @@ class WN(torch.nn.Module):
dilation_rate: int,
n_layers: int,
gin_channels: int = 0,
p_dropout: int = 0,
p_dropout: float = 0,
):
super(WN, self).__init__()
assert kernel_size % 2 == 1
@@ -49,7 +51,9 @@ class WN(torch.nn.Module):
cond_layer = torch.nn.Conv1d(
gin_channels, 2 * hidden_channels * n_layers, 1
)
self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name="weight")
self.cond_layer = torch.nn.utils.parametrizations.weight_norm(
cond_layer, name="weight"
)
for i in range(n_layers):
dilation = dilation_rate**i
@@ -61,7 +65,9 @@ class WN(torch.nn.Module):
dilation=dilation,
padding=padding,
)
in_layer = torch.nn.utils.weight_norm(in_layer, name="weight")
in_layer = torch.nn.utils.parametrizations.weight_norm(
in_layer, name="weight"
)
self.in_layers.append(in_layer)
# last one is not necessary
@@ -71,7 +77,9 @@ class WN(torch.nn.Module):
res_skip_channels = hidden_channels
res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name="weight")
res_skip_layer = torch.nn.utils.parametrizations.weight_norm(
res_skip_layer, name="weight"
)
self.res_skip_layers.append(res_skip_layer)
def __call__(
@@ -117,32 +125,20 @@ class WN(torch.nn.Module):
def remove_weight_norm(self):
if self.gin_channels != 0:
torch.nn.utils.remove_weight_norm(self.cond_layer)
remove_parametrizations(self.cond_layer, "weight")
for l in self.in_layers:
torch.nn.utils.remove_weight_norm(l)
remove_parametrizations(l, "weight")
for l in self.res_skip_layers:
torch.nn.utils.remove_weight_norm(l)
remove_parametrizations(l, "weight")
def __prepare_scriptable__(self):
if self.gin_channels != 0:
for hook in self.cond_layer._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.cond_layer)
if is_parametrized(self.cond_layer, "weight"):
remove_parametrizations(self.cond_layer, "weight")
for l in self.in_layers:
for hook in l._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
for l in self.res_skip_layers:
for hook in l._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
return self

View File

@@ -1,11 +1,12 @@
from typing import Optional, List
from typing import Optional, List, Union
import math
import torch
from torch import nn
from torch.nn import Conv1d, ConvTranspose1d
from torch.nn import functional as F
from torch.nn.utils import remove_weight_norm, weight_norm
from torch.nn.utils.parametrizations import weight_norm
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
from .generators import SineGenerator
from .residuals import ResBlock1, ResBlock2, LRELU_SLOPE
@@ -83,7 +84,7 @@ class NSFGenerator(torch.nn.Module):
self.conv_pre = Conv1d(
initial_channel, upsample_initial_channel, 7, 1, padding=3
)
resblock = ResBlock1 if resblock == "1" else ResBlock2
resblockcls = ResBlock1 if resblock == "1" else ResBlock2
self.ups = nn.ModuleList()
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
@@ -114,12 +115,13 @@ class NSFGenerator(torch.nn.Module):
self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
self.resblocks = nn.ModuleList()
ch = 0
for i in range(len(self.ups)):
ch: int = upsample_initial_channel // (2 ** (i + 1))
ch = upsample_initial_channel // (2 ** (i + 1))
for j, (k, d) in enumerate(
zip(resblock_kernel_sizes, resblock_dilation_sizes)
):
self.resblocks.append(resblock(ch, k, d))
self.resblocks.append(resblockcls(ch, k, d))
self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False)
self.ups.apply(call_weight_data_normal_if_Conv)
@@ -190,27 +192,15 @@ class NSFGenerator(torch.nn.Module):
def remove_weight_norm(self):
for l in self.ups:
remove_weight_norm(l)
remove_parametrizations(l, "weight")
for l in self.resblocks:
l.remove_weight_norm()
def __prepare_scriptable__(self):
for l in self.ups:
for hook in l._forward_pre_hooks.values():
# The hook we want to remove is an instance of WeightNorm class, so
# normally we would do `if isinstance(...)` but this class is not accessible
# because of shadowing, so we check the module name directly.
# https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
for l in self.resblocks:
for hook in self.resblocks._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
return self

View File

@@ -4,7 +4,8 @@ import torch
from torch import nn
from torch.nn import Conv1d
from torch.nn import functional as F
from torch.nn.utils import remove_weight_norm, weight_norm
from torch.nn.utils.parametrizations import weight_norm
from torch.nn.utils.parametrize import is_parametrized, remove_parametrizations
from .norms import WN
from .utils import (
@@ -20,7 +21,7 @@ class ResBlock1(torch.nn.Module):
self,
channels: int,
kernel_size: int = 3,
dilation: List[int] = (1, 3, 5),
dilation: List[int] = [1, 3, 5],
):
super(ResBlock1, self).__init__()
@@ -85,25 +86,17 @@ class ResBlock1(torch.nn.Module):
def remove_weight_norm(self):
for l in self.convs1:
remove_weight_norm(l)
remove_parametrizations(l, "weight")
for l in self.convs2:
remove_weight_norm(l)
remove_parametrizations(l, "weight")
def __prepare_scriptable__(self):
for l in self.convs1:
for hook in l._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
for l in self.convs2:
for hook in l._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
return self
@@ -117,7 +110,7 @@ class ResBlock2(torch.nn.Module):
self,
channels: int,
kernel_size=3,
dilation: List[int] = (1, 3),
dilation: List[int] = [1, 3],
):
super(ResBlock2, self).__init__()
self.convs = nn.ModuleList()
@@ -161,16 +154,12 @@ class ResBlock2(torch.nn.Module):
def remove_weight_norm(self):
for l in self.convs:
remove_weight_norm(l)
remove_parametrizations(l, "weight")
def __prepare_scriptable__(self):
for l in self.convs:
for hook in l._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(l)
if is_parametrized(l, "weight"):
remove_parametrizations(l, "weight")
return self
@@ -182,7 +171,7 @@ class ResidualCouplingLayer(nn.Module):
kernel_size: int,
dilation_rate: int,
n_layers: int,
p_dropout: int = 0,
p_dropout: float = 0,
gin_channels: int = 0,
mean_only: bool = False,
):
@@ -249,12 +238,8 @@ class ResidualCouplingLayer(nn.Module):
self.enc.remove_weight_norm()
def __prepare_scriptable__(self):
for hook in self.enc._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.enc)
if is_parametrized(self.enc, "weight"):
remove_parametrizations(self.enc, "weight")
return self
@@ -344,10 +329,6 @@ class ResidualCouplingBlock(nn.Module):
def __prepare_scriptable__(self):
for i in range(self.n_flows):
for hook in self.flows[i * 2]._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.flows[i * 2])
if is_parametrized(self.flows[i * 2], "weight"):
remove_parametrizations(self.flows[i * 2], "weight")
return self

View File

@@ -2,6 +2,7 @@ from typing import Optional, List, Union
import torch
from torch import nn
from torch.nn.utils import parametrize
from .encoders import TextEncoder, PosteriorEncoder
@@ -34,7 +35,7 @@ class SynthesizerTrnMsNSFsid(nn.Module):
upsample_kernel_sizes: List[int],
spk_embed_dim: int,
gin_channels: int,
sr: Optional[Union[str, int]],
sr: Union[str, int],
encoder_dim: int,
use_f0: bool,
):
@@ -118,32 +119,16 @@ class SynthesizerTrnMsNSFsid(nn.Module):
self.enc_q.remove_weight_norm()
def __prepare_scriptable__(self):
for hook in self.dec._forward_pre_hooks.values():
# The hook we want to remove is an instance of WeightNorm class, so
# normally we would do `if isinstance(...)` but this class is not accessible
# because of shadowing, so we check the module name directly.
# https://github.com/pytorch/pytorch/blob/be0ca00c5ce260eb5bcec3237357f7a30cc08983/torch/nn/utils/__init__.py#L3
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.dec)
for hook in self.flow._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.flow)
if parametrize.is_parametrized(self.dec, "weight"):
parametrize.remove_parametrizations(self.dec, "weight")
if parametrize.is_parametrized(self.flow, "weight"):
parametrize.remove_parametrizations(self.flow, "weight")
if hasattr(self, "enc_q"):
for hook in self.enc_q._forward_pre_hooks.values():
if (
hook.__module__ == "torch.nn.utils.weight_norm"
and hook.__class__.__name__ == "WeightNorm"
):
torch.nn.utils.remove_weight_norm(self.enc_q)
if parametrize.is_parametrized(self.enc_q, "weight"):
parametrize.remove_parametrizations(self.enc_q, "weight")
return self
@torch.jit.ignore
@torch.jit.ignore()
def forward(
self,
phone: torch.Tensor,
@@ -155,18 +140,20 @@ class SynthesizerTrnMsNSFsid(nn.Module):
pitchf: Optional[torch.Tensor] = None,
): # 这里ds是id[bs,1]
# print(1,pitch.shape)#[bs,t]
g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t广播的
embg = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t广播的
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)
z_p = self.flow(z, y_mask, g=g)
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=embg)
z_p = self.flow(z, y_mask, g=embg)
z_slice, ids_slice = rand_slice_segments_on_last_dim(
z, y_lengths, self.segment_size
)
if pitchf is not None:
if pitchf is not None and isinstance(self.dec, NSFGenerator):
pitchf = slice_on_last_dim(pitchf, ids_slice, self.segment_size)
o = self.dec(z_slice, pitchf, g=g)
o = self.dec(z_slice, pitchf, g=embg) # type: ignore
elif isinstance(self.dec, Generator):
o = self.dec(z_slice, g=embg)
else:
o = self.dec(z_slice, g=g)
raise KeyError(f"unknown dec type: {type(self.dec).__name__}")
return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
@torch.jit.export
@@ -201,15 +188,17 @@ class SynthesizerTrnMsNSFsid(nn.Module):
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
z = self.flow(z_p, x_mask, g=g, reverse=True)
del z_p, m_p, logs_p
if pitchf is not None:
if pitchf is not None and isinstance(self.dec, NSFGenerator):
o = self.dec(
z * x_mask,
pitchf,
g=g,
n_res=return_length2,
)
else:
elif isinstance(self.dec, Generator):
o = self.dec(z * x_mask, g=g, n_res=return_length2)
else:
raise KeyError(f"unknown dec type: {type(self.dec).__name__}")
del x_mask, z
return o # , x_mask, (z, z_p, m_p, logs_p)
@@ -326,7 +315,7 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes: List[int],
spk_embed_dim: int,
gin_channels: int,
sr=None,
sr: Union[str, int],
):
super().__init__(
spec_channels,
@@ -346,6 +335,7 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes,
spk_embed_dim,
gin_channels,
sr,
256,
False,
)
@@ -371,7 +361,7 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes: List[int],
spk_embed_dim: int,
gin_channels: int,
sr=None,
sr: Union[str, int],
):
super().__init__(
spec_channels,
@@ -391,6 +381,7 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes,
spk_embed_dim,
gin_channels,
sr,
768,
False,
)

View File

@@ -1,4 +1,4 @@
from typing import List, Optional, Tuple, Iterator
from typing import List, Optional, Tuple, Iterator, Union
import torch
@@ -17,7 +17,7 @@ def get_padding(kernel_size: int, dilation=1) -> int:
def slice_on_last_dim(
x: torch.Tensor,
start_indices: List[int],
start_indices: Union[List[int], torch.Tensor],
segment_size=4,
) -> torch.Tensor:
new_shape = [*x.shape]
@@ -32,9 +32,9 @@ def slice_on_last_dim(
def rand_slice_segments_on_last_dim(
x: torch.Tensor,
x_lengths: int = None,
x_lengths: Optional[Union[int, torch.Tensor]] = None,
segment_size=4,
) -> Tuple[torch.Tensor, List[int]]:
) -> Tuple[torch.Tensor, Union[List[int], torch.Tensor]]:
b, _, t = x.size()
if x_lengths is None:
x_lengths = t
@@ -58,7 +58,7 @@ def activate_add_tanh_sigmoid_multiply(
def sequence_mask(
length: torch.Tensor,
max_length: Optional[int] = None,
) -> torch.BoolTensor:
):
if max_length is None:
max_length = int(length.max())
x = torch.arange(max_length, dtype=length.dtype, device=length.device)

View File

@@ -1,4 +1,4 @@
from typing import List, Optional, Union
from typing import List, Union
import torch
@@ -25,7 +25,7 @@ class SynthesizerTrnMsNSFsid(SynthesizerBase):
upsample_kernel_sizes: List[int],
spk_embed_dim: int,
gin_channels: int,
sr: Optional[Union[str, int]],
sr: Union[str, int],
encoder_dim: int,
):
super().__init__(

View File

@@ -40,11 +40,9 @@ app = gr.Blocks()
with app:
with gr.Tabs():
with gr.TabItem("在线demo"):
gr.Markdown(
value="""
gr.Markdown(value="""
RVC 在线demo
"""
)
""")
sid = gr.Dropdown(label=i18n("Inferencing voice"), choices=sorted(names))
with gr.Column():
spk_item = gr.Slider(

20
web.py
View File

@@ -36,7 +36,6 @@ import threading
import shutil
import logging
logging.getLogger("numba").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
@@ -89,23 +88,24 @@ index_paths = [""]
def lookup_names(weight_root):
global names
names = []
for name in os.listdir(weight_root):
if name.endswith(".pth"):
names.append(name)
return names
def lookup_indices(index_root):
global index_paths
index_paths = []
for root, _, files in os.walk(index_root, topdown=False):
for name in files:
if name.endswith(".index") and "trained" not in name:
index_paths.append(str(pathlib.Path(root, name)))
return index_paths
lookup_names(weight_root)
lookup_indices(index_root)
lookup_indices(outside_index_root)
names = [""] + lookup_names(weight_root)
index_paths = [""] + lookup_indices(index_root) + lookup_indices(outside_index_root)
uvr5_names = []
for name in os.listdir(weight_uvr5_root):
if name.endswith(".pth") or "onnx" in name:
@@ -113,12 +113,8 @@ for name in os.listdir(weight_uvr5_root):
def change_choices():
global index_paths, names
names = [""]
lookup_names(weight_root)
index_paths = [""]
lookup_indices(index_root)
lookup_indices(outside_index_root)
names = [""] + lookup_names(weight_root)
index_paths = [""] + lookup_indices(index_root) + lookup_indices(outside_index_root)
return {"choices": sorted(names), "__type__": "update"}, {
"choices": sorted(index_paths),
"__type__": "update",