diff --git a/.github/workflows/checksum.yml b/.github/workflows/checksum.yml index 1bf904b..1fa0fbc 100644 --- a/.github/workflows/checksum.yml +++ b/.github/workflows/checksum.yml @@ -15,7 +15,7 @@ jobs: - name: Run RVC-Models-Downloader run: | - wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb + wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb sudo apt -y install ./rvcmd_linux_amd64.deb rm -f ./rvcmd_linux_amd64.deb rvcmd -notrs -w 1 -notui assets/rvc diff --git a/.github/workflows/unitest.yml b/.github/workflows/unitest.yml index 7a4987b..db6273e 100644 --- a/.github/workflows/unitest.yml +++ b/.github/workflows/unitest.yml @@ -11,20 +11,72 @@ jobs: steps: - uses: actions/checkout@master + - name: Space cleanup + env: + DEBIAN_FRONTEND: noninteractive + run: | + df -h + + # Source - https://stackoverflow.com/a + # Posted by Cosmin Bodnariuc + # Retrieved 2025-11-21, License - CC BY-SA 4.0 + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/share/boost + sudo rm -rf /usr/local/share/chromium + sudo rm -rf /usr/local/share/powershell + sudo rm -rf /usr/local/share/vcpkg + sudo rm -rf /usr/local/share/miniconda + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo rm -rf /opt/hostedtoolcache/go + sudo rm -rf /opt/hostedtoolcache/Python + sudo rm -rf /opt/hostedtoolcache/node + sudo rm -rf /opt/hostedtoolcache/R + sudo rm -rf /opt/hostedtoolcache/Java + sudo rm -rf /opt/hostedtoolcache/LLVM + sudo rm -rf /opt/hostedtoolcache/Swift + sudo rm -rf /opt/hostedtoolcache/Php + sudo rm -rf /opt/hostedtoolcache/Perl + sudo rm -rf /opt/hostedtoolcache/Scala + sudo rm -rf /opt/hostedtoolcache/Julia + sudo rm -rf /opt/hostedtoolcache/Mono + sudo rm -rf /opt/hostedtoolcache/PowerShell + sudo rm -rf /opt/hostedtoolcache/Crystal + sudo rm -rf /opt/hostedtoolcache/Elixir + sudo rm -rf /opt/hostedtoolcache/Erlang + sudo rm -rf /opt/hostedtoolcache/FSharp + sudo rm -rf /opt/hostedtoolcache/Haskell + sudo rm -rf /opt/hostedtoolcache/OCaml + sudo rm -rf /opt/hostedtoolcache/Rust + sudo rm -rf /opt/hostedtoolcache/Sbt + sudo rm -rf /opt/hostedtoolcache/Solidity + sudo rm -rf /opt/hostedtoolcache/VisualStudio + sudo rm -rf /opt/hostedtoolcache/WinAppDriver + sudo rm -rf /opt/hostedtoolcache/Xamarin + sudo rm -rf /opt/hostedtoolcache/Yarn + sudo rm -rf /opt/hostedtoolcache/Zephyr + sudo rm -rf /opt/hostedtoolcache/zig + sudo rm -rf /opt/hostedtoolcache/zulu + sudo rm -rf /opt/hostedtoolcache/azcopy + + sudo -E apt-get update + sudo -E apt-get -y autoremove --purge + sudo -E apt-get clean + + df -h - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - sudo apt update - wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb + wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb sudo apt -y install ./rvcmd_linux_amd64.deb pip install --force pip==24.0 # fix fairseq installing issue https://github.com/facebookresearch/fairseq/issues/5552 python -m pip install --upgrade setuptools python -m pip install --upgrade wheel pip install torch torchvision torchaudio - pip install -r requirements/main.txt + pip install -r requirements/cpu.txt rvcmd -notrs -w 1 -notui assets/rvc - name: Test step 1 & 2 run: | diff --git a/configs/config.py b/configs/config.py index 3fb3410..2476efb 100644 --- a/configs/config.py +++ b/configs/config.py @@ -4,6 +4,7 @@ import sys import json import shutil from multiprocessing import cpu_count +import importlib.util import torch @@ -46,13 +47,14 @@ class Config(metaclass=Singleton): self.global_link, self.noparallel, self.noautoopen, - self.dml, self.nocheck, self.update, ) = self.arg_parse() + self.dml = False self.instead = "" self.preprocess_per = 3.7 self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() + self.default_batch_size = self.get_default_batch_size() @staticmethod def load_config_json() -> dict: @@ -82,11 +84,6 @@ class Config(metaclass=Singleton): action="store_true", help="Do not open in browser automatically", ) - parser.add_argument( - "--dml", - action="store_true", - help="torch_dml", - ) parser.add_argument( "--nocheck", action="store_true", help="Run without checking assets" ) @@ -103,7 +100,6 @@ class Config(metaclass=Singleton): cmd_opts.global_link, cmd_opts.noparallel, cmd_opts.noautoopen, - cmd_opts.dml, cmd_opts.nocheck, cmd_opts.update, ) @@ -137,6 +133,35 @@ class Config(metaclass=Singleton): except AttributeError: pass + @staticmethod + def get_default_batch_size() -> int: + if not torch.cuda.is_available(): + # TODO: add non-cuda multicards + return 1 + # 判断是否有能用来训练和加速推理的N卡 + ngpu = torch.cuda.device_count() + if not ngpu: + return 1 + mem = [] + if_gpu_ok = False + + for i in range(ngpu): + if_gpu_ok = True # 至少有一张能用的N卡 + mem.append( + int( + torch.cuda.get_device_properties(i).total_memory + / 1024 + / 1024 + / 1024 + + 0.4 + ) + ) + if if_gpu_ok: + default_batch_size = min(mem) // 2 + else: + default_batch_size = 1 + return default_batch_size + def use_fp32_config(self): for config_file in version_config_list: self.json_config[config_file]["train"]["fp16_run"] = False @@ -153,7 +178,7 @@ class Config(metaclass=Singleton): if self.has_xpu(): self.device = self.instead = "xpu:0" self.is_half = True - i_device = int(self.device.split(":")[-1]) + i_device = int(str(self.device).split(":")[-1]) self.gpu_name = torch.cuda.get_device_name(i_device) if ( ("16" in self.gpu_name and "V100" not in self.gpu_name.upper()) @@ -184,7 +209,7 @@ class Config(metaclass=Singleton): self.use_fp32_config() else: logger.info("No supported Nvidia GPU found") - self.device = self.instead = "cpu" + self.device = self.instead = torch.get_default_device() self.is_half = False self.use_fp32_config() @@ -209,12 +234,13 @@ class Config(metaclass=Singleton): x_query = 5 x_center = 30 x_max = 32 - if self.dml: + if importlib.util.find_spec("torch_directml") is not None: logger.info("Use DirectML instead") import torch_directml self.device = torch_directml.device(torch_directml.default_device()) self.is_half = False + self.dml = True else: if self.instead: logger.info(f"Use {self.instead} instead") diff --git a/i18n/locale/en_US.json b/i18n/locale/en_US.json index 7cc92d3..44c8da0 100644 --- a/i18n/locale/en_US.json +++ b/i18n/locale/en_US.json @@ -133,14 +133,13 @@ "Takeover WASAPI device": "Takeover WASAPI device", "Target sample rate": "Target sample rate", "The audio file to be processed": "The audio file to be processed", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Total training epochs (total_epoch)", "Train": "Train", "Train feature index": "Train feature index", "Train model": "Train model", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)", - "Unfortunately, there is no compatible GPU available to support your training.": "Unfortunately, there is no compatible GPU available to support your training.", "Unknown": "Unknown", "Unload model to save GPU memory": "Unload model to save GPU memory", "Version": "Version", diff --git a/i18n/locale/es_ES.json b/i18n/locale/es_ES.json index d4102fa..f21209e 100644 --- a/i18n/locale/es_ES.json +++ b/i18n/locale/es_ES.json @@ -133,14 +133,13 @@ "Takeover WASAPI device": "Adquisición del dispositivo WASAPI", "Target sample rate": "Tasa de muestreo objetivo", "The audio file to be processed": "El archivo de audio a procesar", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "Este software es de código abierto bajo la licencia MIT, el autor no tiene ningún control sobre el software, y aquellos que usan el software y difunden los sonidos exportados por el software son los únicos responsables.
Si no está de acuerdo con esta cláusula , no puede utilizar ni citar ningún código ni archivo del paquete de software Consulte el directorio raíz Agreement-LICENSE.txt para obtener más información.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Total de épocas de entrenamiento (total_epoch)", "Train": "Entrenamiento", "Train feature index": "Índice de características", "Train model": "Entrenar Modelo", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Entrenamiento finalizado, puede ver el registro de entrenamiento en la consola o en el archivo train.log en la carpeta del experimento", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Cambio de tono (entero, número de semitonos, subir una octava +12 o bajar una octava -12)", - "Unfortunately, there is no compatible GPU available to support your training.": "Lamentablemente, no tiene una tarjeta gráfica adecuada para soportar su entrenamiento", "Unknown": "Desconocido", "Unload model to save GPU memory": "Descargue la voz para ahorrar memoria GPU", "Version": "Versión", diff --git a/i18n/locale/fr_FR.json b/i18n/locale/fr_FR.json index fbbd471..d55454f 100644 --- a/i18n/locale/fr_FR.json +++ b/i18n/locale/fr_FR.json @@ -133,14 +133,13 @@ "Takeover WASAPI device": "Reprise du périphérique WASAPI", "Target sample rate": "Taux d'échantillonnage cible :", "The audio file to be processed": "Le fichier audio à traiter", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "Ce logiciel est open source sous la licence MIT. L'auteur n'a aucun contrôle sur le logiciel. Les utilisateurs qui utilisent le logiciel et distribuent les sons exportés par le logiciel en sont entièrement responsables.
Si vous n'acceptez pas cette clause, vous ne pouvez pas utiliser ou faire référence à aucun code ni fichier contenu dans le package logiciel. Consultez le fichier Agreement-LICENSE.txt dans le répertoire racine pour plus de détails.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Nombre total d'époques d'entraînement (total_epoch) :", "Train": "Entraîner", "Train feature index": "Entraîner l'index des caractéristiques", "Train model": "Entraîner le modèle", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Entraînement terminé. Vous pouvez consulter les rapports d'entraînement dans la console ou dans le fichier 'train.log' situé dans le dossier de l'expérience.", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Transposer (entier, nombre de demi-tons, monter d'une octave : 12, descendre d'une octave : -12) :", - "Unfortunately, there is no compatible GPU available to support your training.": "Malheureusement, il n'y a pas de GPU compatible disponible pour prendre en charge votre entrainement.", "Unknown": "Inconnu", "Unload model to save GPU memory": "Décharger la voix pour économiser la mémoire GPU.", "Version": "Version", diff --git a/i18n/locale/it_IT.json b/i18n/locale/it_IT.json index 821dbb4..c28b7f1 100644 --- a/i18n/locale/it_IT.json +++ b/i18n/locale/it_IT.json @@ -133,14 +133,13 @@ "Takeover WASAPI device": "Takeover WASAPI device", "Target sample rate": "Frequenza di campionamento target:", "The audio file to be processed": "The audio file to be processed", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "Questo software è open source con licenza MIT.
Se non si accetta questa clausola, non è possibile utilizzare o fare riferimento a codici e file all'interno del pacchetto software. Contratto-LICENZA.txt per dettagli.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Epoch totali di addestramento (total_epoch):", "Train": "Addestramento", "Train feature index": "Addestra indice delle caratteristiche", "Train model": "Addestra modello", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Addestramento completato. ", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Trasposizione (numero intero, numero di semitoni, alza di un'ottava: 12, abbassa di un'ottava: -12):", - "Unfortunately, there is no compatible GPU available to support your training.": "Sfortunatamente, non è disponibile alcuna GPU compatibile per supportare l'addestramento.", "Unknown": "Unknown", "Unload model to save GPU memory": "Scarica la voce per risparmiare memoria della GPU:", "Version": "Versione", diff --git a/i18n/locale/ja_JP.json b/i18n/locale/ja_JP.json index 1391409..c5c2a62 100644 --- a/i18n/locale/ja_JP.json +++ b/i18n/locale/ja_JP.json @@ -133,14 +133,13 @@ "Takeover WASAPI device": "WASAPIデバイスを独占", "Target sample rate": "目標サンプリング率", "The audio file to be processed": "処理待ち音声", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "本ソフトウェアはMITライセンスに基づくオープンソースであり、製作者は本ソフトウェアに対していかなる責任を持ちません。本ソフトウェアの利用者および本ソフトウェアから派生した音源(成果物)を配布する者は、本ソフトウェアに対して自身で責任を負うものとします。
この条項に同意しない場合、パッケージ内のコードやファイルを使用や参照を禁じます。詳しくはLICENSEをご覧ください。", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "本ソフトウェアは、GNU Affero General Public License バージョン3.0以降の下でライセンスされています。
作者は、このソフトウェアの使用に関して一切の管理権限や責任を負いません。
このソフトウェアを使用し、生成された音声やファイルを含むあらゆるコンテンツを配布するユーザーは、AGPL 3.0ライセンスの条項への準拠について全責任を負います。
これらの条項に同意しない場合、このソフトウェアパッケージに含まれるコードやファイルの使用、参照、配布は禁止されています。
詳細については、ルートディレクトリにあるLICENSEファイルを参照してください。", "Total training epochs (total_epoch)": "総エポック数", "Train": "学習", "Train feature index": "特徴索引の学習", "Train model": "モデルの学習", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "学習終了時に、学習ログやフォルダ内のtrain.logを確認することができます", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "ピッチ変更(整数、半音数、上下オクターブ12-12)", - "Unfortunately, there is no compatible GPU available to support your training.": "学習に対応したGPUが動作しないのは残念です。", "Unknown": "未知", "Unload model to save GPU memory": "音源を削除してメモリを節約", "Version": "バージョン", diff --git a/i18n/locale/ko_KR.json b/i18n/locale/ko_KR.json index 49596af..8207271 100644 --- a/i18n/locale/ko_KR.json +++ b/i18n/locale/ko_KR.json @@ -133,14 +133,13 @@ "Takeover WASAPI device": "Takeover WASAPI device", "Target sample rate": "목표 샘플링률", "The audio file to be processed": "The audio file to be processed", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "이 소프트웨어는 MIT 라이선스로 공개되며, 저자는 소프트웨어에 대해 어떠한 통제권도 가지지 않습니다. 모든 귀책사유는 소프트웨어 사용자 및 소프트웨어에서 생성된 결과물을 사용하는 당사자에게 있습니다.
해당 조항을 인정하지 않는 경우, 소프트웨어 패키지의 어떠한 코드나 파일도 사용하거나 인용할 수 없습니다. 자세한 내용은 루트 디렉토리의 LICENSE를 참조하세요.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "총 훈련 라운드 수 total_epoch", "Train": "훈련", "Train feature index": "특징 인덱스 훈련", "Train model": "모델 훈련", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "훈련 완료, 콘솔 훈련 로그 또는 실험 폴더 내의 train.log 확인 가능", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "키 변경(정수, 반음 수, 옥타브 상승 12, 옥타브 하강 -12)", - "Unfortunately, there is no compatible GPU available to support your training.": "사용 가능한 그래픽 카드가 없어 훈련을 지원할 수 없습니다", "Unknown": "Unknown", "Unload model to save GPU memory": "음색 언로드로 디스플레이 메모리 절약", "Version": "버전", diff --git a/i18n/locale/pt_BR.json b/i18n/locale/pt_BR.json index 50e4ef5..3a9c6f7 100644 --- a/i18n/locale/pt_BR.json +++ b/i18n/locale/pt_BR.json @@ -133,14 +133,13 @@ "Takeover WASAPI device": "Takeover WASAPI device", "Target sample rate": "Taxa de amostragem:", "The audio file to be processed": "The audio file to be processed", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "
The Mangio-RVC 💻 | Tradução por Krisp e Rafael Godoy Ebert | AI HUB BRASIL
Este software é de código aberto sob a licença MIT. O autor não tem qualquer controle sobre o software. Aqueles que usam o software e divulgam os sons exportados pelo software são totalmente responsáveis.
Se você não concorda com este termo, você não pode usar ou citar nenhum código e arquivo no pacote de software. Para obter detalhes, consulte o diretório raiz O acordo a ser seguido para uso LICENSE
", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Número total de ciclos(epoch) de treino (se escolher um valor alto demais, o seu modelo parecerá terrivelmente sobretreinado):", "Train": "Treinar", "Train feature index": "Treinar Index", "Train model": "Treinar Modelo", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Após o término do treinamento, você pode verificar o log de treinamento do console ou train.log na pasta de experimentos", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Mude o tom aqui. Se a voz for do mesmo sexo, não é necessario alterar (12 caso seja Masculino para feminino, -12 caso seja ao contrário).", - "Unfortunately, there is no compatible GPU available to support your training.": "Infelizmente, não há GPU compatível disponível para apoiar o seu treinamento.", "Unknown": "Unknown", "Unload model to save GPU memory": "Descarregue a voz para liberar a memória da GPU:", "Version": "Versão", diff --git a/i18n/locale/ru_RU.json b/i18n/locale/ru_RU.json index 2f3eaa6..28134b3 100644 --- a/i18n/locale/ru_RU.json +++ b/i18n/locale/ru_RU.json @@ -133,14 +133,13 @@ "Takeover WASAPI device": "Takeover WASAPI device", "Target sample rate": "Частота дискретизации аудио:", "The audio file to be processed": "The audio file to be processed", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "Это программное обеспечение с открытым исходным кодом распространяется по лицензии MIT. Автор никак не контролирует это программное обеспечение. Пользователи, которые используют эту программу и распространяют аудиозаписи, полученные с помощью этой программы, несут полную ответственность за это. Если вы не согласны с этим, вы не можете использовать какие-либо коды и файлы в рамках этой программы или ссылаться на них. Подробнее в файле Agreement-LICENSE.txt в корневом каталоге программы.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Полное количество эпох (total_epoch):", "Train": "Обучение модели", "Train feature index": "Обучить индекс черт", "Train model": "Обучить модель", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Обучение модели завершено. Журнал обучения можно просмотреть в консоли или в файле 'train.log' в папке с моделью.", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Изменить высоту голоса (укажите количество полутонов; чтобы поднять голос на октаву, выберите 12, понизить на октаву — -12):", - "Unfortunately, there is no compatible GPU available to support your training.": "К сожалению, у вас нету графического процессора, который поддерживает обучение моделей.", "Unknown": "Unknown", "Unload model to save GPU memory": "Выгрузить модель из памяти GPU для освобождения ресурсов", "Version": "Версия архитектуры модели:", diff --git a/i18n/locale/tr_TR.json b/i18n/locale/tr_TR.json index 69a4fcd..776bab9 100644 --- a/i18n/locale/tr_TR.json +++ b/i18n/locale/tr_TR.json @@ -133,14 +133,13 @@ "Takeover WASAPI device": "Takeover WASAPI device", "Target sample rate": "Hedef örnekleme oranı:", "The audio file to be processed": "The audio file to be processed", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "Bu yazılım, MIT lisansı altında açık kaynaklıdır. Yazarın yazılım üzerinde herhangi bir kontrolü yoktur. Yazılımı kullanan ve yazılım tarafından dışa aktarılan sesleri dağıtan kullanıcılar sorumludur.
Eğer bu maddeyle aynı fikirde değilseniz, yazılım paketi içindeki herhangi bir kod veya dosyayı kullanamaz veya referans göremezsiniz. Detaylar için kök dizindeki Agreement-LICENSE.txt dosyasına bakınız.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Toplam eğitim turu (total_epoch):", "Train": "Eğitim", "Train feature index": "Özellik Dizinini Eğit", "Train model": "Modeli Eğit", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Eğitim tamamlandı. Eğitim günlüklerini konsolda veya deney klasörü altındaki train.log dosyasında kontrol edebilirsiniz.", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Transpoze et (tamsayı, yarıton sayısıyla; bir oktav yükseltmek için: 12, bir oktav düşürmek için: -12):", - "Unfortunately, there is no compatible GPU available to support your training.": "Maalesef, eğitiminizi desteklemek için uyumlu bir GPU bulunmamaktadır.", "Unknown": "Unknown", "Unload model to save GPU memory": "GPU bellek kullanımını azaltmak için sesi kaldır", "Version": "Sürüm", diff --git a/i18n/locale/zh_CN.json b/i18n/locale/zh_CN.json index 9bcf290..24367be 100644 --- a/i18n/locale/zh_CN.json +++ b/i18n/locale/zh_CN.json @@ -133,14 +133,13 @@ "Takeover WASAPI device": "独占 WASAPI 设备", "Target sample rate": "目标采样率", "The audio file to be processed": "待处理音频文件", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "本软件基于GNU Affero通用公共许可证3.0版本或更高版本进行许可。
作者对本软件的使用不承担任何控制权或责任。
使用本软件并分发任何内容(包括由其生成的声音或文件)的用户,需对遵守AGPL 3.0许可证条款承担全部责任。
如果您不接受这些条款,则禁止使用、引用或分发本软件包中包含的任何代码或文件。
请参阅位于根目录中的LICENSE文件以获取完整详情。", "Total training epochs (total_epoch)": "总训练轮数total_epoch", "Train": "训练", "Train feature index": "训练特征索引", "Train model": "训练模型", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "变调(整数, 半音数量, 升八度12降八度-12)", - "Unfortunately, there is no compatible GPU available to support your training.": "很遗憾您这没有能用的显卡来支持您训练", "Unknown": "未知", "Unload model to save GPU memory": "卸载音色省显存", "Version": "版本", diff --git a/i18n/locale/zh_HK.json b/i18n/locale/zh_HK.json index 93071fc..e6b2a66 100644 --- a/i18n/locale/zh_HK.json +++ b/i18n/locale/zh_HK.json @@ -133,14 +133,13 @@ "Takeover WASAPI device": "独占 WASAPI 设备", "Target sample rate": "目標取樣率", "The audio file to be processed": "待处理音频文件", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "本軟體以MIT協議開源,作者不對軟體具備任何控制力,使用軟體者、傳播軟體導出的聲音者自負全責。
如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄使用需遵守的協議-LICENSE.txt。", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "總訓練輪數total_epoch", "Train": "訓練", "Train feature index": "訓練特徵索引", "Train model": "訓練模型", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "變調(整數、半音數量、升八度12降八度-12)", - "Unfortunately, there is no compatible GPU available to support your training.": "很遗憾您这没有能用的显卡来支持您训练", "Unknown": "Unknown", "Unload model to save GPU memory": "卸載音色節省 VRAM", "Version": "版本", diff --git a/i18n/locale/zh_SG.json b/i18n/locale/zh_SG.json index 93071fc..e6b2a66 100644 --- a/i18n/locale/zh_SG.json +++ b/i18n/locale/zh_SG.json @@ -133,14 +133,13 @@ "Takeover WASAPI device": "独占 WASAPI 设备", "Target sample rate": "目標取樣率", "The audio file to be processed": "待处理音频文件", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "本軟體以MIT協議開源,作者不對軟體具備任何控制力,使用軟體者、傳播軟體導出的聲音者自負全責。
如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄使用需遵守的協議-LICENSE.txt。", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "總訓練輪數total_epoch", "Train": "訓練", "Train feature index": "訓練特徵索引", "Train model": "訓練模型", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "變調(整數、半音數量、升八度12降八度-12)", - "Unfortunately, there is no compatible GPU available to support your training.": "很遗憾您这没有能用的显卡来支持您训练", "Unknown": "Unknown", "Unload model to save GPU memory": "卸載音色節省 VRAM", "Version": "版本", diff --git a/i18n/locale/zh_TW.json b/i18n/locale/zh_TW.json index 93071fc..e6b2a66 100644 --- a/i18n/locale/zh_TW.json +++ b/i18n/locale/zh_TW.json @@ -133,14 +133,13 @@ "Takeover WASAPI device": "独占 WASAPI 设备", "Target sample rate": "目標取樣率", "The audio file to be processed": "待处理音频文件", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "本軟體以MIT協議開源,作者不對軟體具備任何控制力,使用軟體者、傳播軟體導出的聲音者自負全責。
如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄使用需遵守的協議-LICENSE.txt。", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "總訓練輪數total_epoch", "Train": "訓練", "Train feature index": "訓練特徵索引", "Train model": "訓練模型", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "變調(整數、半音數量、升八度12降八度-12)", - "Unfortunately, there is no compatible GPU available to support your training.": "很遗憾您这没有能用的显卡来支持您训练", "Unknown": "Unknown", "Unload model to save GPU memory": "卸載音色節省 VRAM", "Version": "版本", diff --git a/infer/lib/audio.py b/infer/lib/audio.py index 8c4c092..ca03c78 100644 --- a/infer/lib/audio.py +++ b/infer/lib/audio.py @@ -195,7 +195,10 @@ def get_audio_properties(input_path: str) -> Tuple[int, int]: container = av.open(input_path) audio_stream = next(s for s in container.streams if s.type == "audio") channels = 1 if audio_stream.layout == "mono" else 2 - rate = audio_stream.base_rate + try: + rate = audio_stream.base_rate + except: + rate = audio_stream.sample_rate container.close() return channels, rate diff --git a/infer/lib/rvcmd.py b/infer/lib/rvcmd.py index 32b87d3..a2a3ce5 100644 --- a/infer/lib/rvcmd.py +++ b/infer/lib/rvcmd.py @@ -162,15 +162,7 @@ def download_and_extract_zip(url: str, folder: str): logger.info(f"extracted into {folder}") -def download_dns_yaml(url: str, folder: str): - logger.info(f"downloading {url}") - response = requests.get(url, stream=True, timeout=(5, 10)) - with open(os.path.join(folder, "dns.yaml"), "wb") as out_file: - out_file.write(response.content) - logger.info(f"downloaded into {folder}") - - -def download_all_assets(tmpdir: str, version="0.2.5"): +def download_all_assets(tmpdir: str, version="0.2.11"): import subprocess import platform @@ -194,48 +186,14 @@ def download_all_assets(tmpdir: str, version="0.2.5"): if not architecture: logger.error(f"architecture {architecture} is not supported") exit(1) - try: - BASE_URL = "https://github.com/fumiama/RVC-Models-Downloader/releases/download/" - suffix = "zip" if is_win else "tar.gz" - RVCMD_URL = BASE_URL + f"v{version}/rvcmd_{system_type}_{architecture}.{suffix}" - cmdfile = os.path.join(tmpdir, "rvcmd") - if is_win: - download_and_extract_zip(RVCMD_URL, tmpdir) - cmdfile += ".exe" - else: - download_and_extract_tar_gz(RVCMD_URL, tmpdir) - os.chmod(cmdfile, 0o755) - subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"]) - except Exception: - BASE_URL = "https://raw.gitcode.com/u011570312/RVC-Models-Downloader/assets/" - suffix = { - "darwin_amd64": "555", - "darwin_arm64": "556", - "linux_386": "557", - "linux_amd64": "558", - "linux_arm64": "559", - "windows_386": "562", - "windows_amd64": "563", - }[f"{system_type}_{architecture}"] - RVCMD_URL = BASE_URL + suffix - download_dns_yaml( - "https://raw.gitcode.com/u011570312/RVC-Models-Downloader/raw/main/dns.yaml", - tmpdir, - ) - if is_win: - download_and_extract_zip(RVCMD_URL, tmpdir) - cmdfile += ".exe" - else: - download_and_extract_tar_gz(RVCMD_URL, tmpdir) - os.chmod(cmdfile, 0o755) - subprocess.run( - [ - cmdfile, - "-notui", - "-w", - "0", - "-dns", - os.path.join(tmpdir, "dns.yaml"), - "assets/rvc", - ] - ) + BASE_URL = "https://github.com/fumiama/RVC-Models-Downloader/releases/download/" + suffix = "zip" if is_win else "tar.gz" + RVCMD_URL = BASE_URL + f"v{version}/rvcmd_{system_type}_{architecture}.{suffix}" + cmdfile = os.path.join(tmpdir, "rvcmd") + if is_win: + download_and_extract_zip(RVCMD_URL, tmpdir) + cmdfile += ".exe" + else: + download_and_extract_tar_gz(RVCMD_URL, tmpdir) + os.chmod(cmdfile, 0o755) + subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"]) diff --git a/infer/lib/train/data_utils.py b/infer/lib/train/data_utils.py index a5e4a93..da2a923 100644 --- a/infer/lib/train/data_utils.py +++ b/infer/lib/train/data_utils.py @@ -303,7 +303,7 @@ class TextAudioLoader(torch.utils.data.Dataset): spec_filename = filename.replace(".wav", ".spec.pt") if os.path.exists(spec_filename): try: - spec = torch.load(spec_filename) + spec = torch.load(spec_filename, weights_only=True) except: logger.warning("%s %s", spec_filename, traceback.format_exc()) spec = spectrogram_torch( diff --git a/infer/lib/train/process_ckpt.py b/infer/lib/train/process_ckpt.py index cf9697f..8688a17 100644 --- a/infer/lib/train/process_ckpt.py +++ b/infer/lib/train/process_ckpt.py @@ -59,7 +59,7 @@ def save_small_model(ckpt, sr, if_f0, name, epoch, version, hps): def extract_small_model(path, name, author, sr, if_f0, info, version): try: - ckpt = torch.load(path, map_location="cpu") + ckpt = torch.load(path, map_location="cpu", weights_only=True) if "model" in ckpt: ckpt = ckpt["model"] opt = OrderedDict() @@ -196,7 +196,7 @@ def extract_small_model(path, name, author, sr, if_f0, info, version): def change_info(path, info, name): try: - ckpt = torch.load(path, map_location="cpu") + ckpt = torch.load(path, map_location="cpu", weights_only=True) ckpt["info"] = info if name == "": name = os.path.basename(path) @@ -229,8 +229,8 @@ def merge(path1, path2, alpha1, sr, f0, info, name, version): a2 = "Unknown" return f"{a1} & {a2}" - ckpt1 = torch.load(path1, map_location="cpu") - ckpt2 = torch.load(path2, map_location="cpu") + ckpt1 = torch.load(path1, map_location="cpu", weights_only=True) + ckpt2 = torch.load(path2, map_location="cpu", weights_only=True) cfg = ckpt1["config"] if "model" in ckpt1: ckpt1 = extract(ckpt1) diff --git a/infer/modules/train/extract_f0_print.py b/infer/modules/train/extract_f0_print.py index 87b6b14..5058a0a 100644 --- a/infer/modules/train/extract_f0_print.py +++ b/infer/modules/train/extract_f0_print.py @@ -2,6 +2,7 @@ import os import sys import traceback from pathlib import Path +import importlib.util from dotenv import load_dotenv @@ -38,6 +39,9 @@ f0method = sys.argv[3] device = sys.argv[4] is_half = sys.argv[5] == "True" +if importlib.util.find_spec("torch_directml") is not None: + import torch_directml # use side effect + class FeatureInput(object): def __init__(self, is_half: bool, device="cpu", samplerate=16000, hop_size=160): @@ -102,6 +106,12 @@ if __name__ == "__main__": Config.use_insecure_load() printt(" ".join(sys.argv)) + # GPU methods (rmvpe, fcpe, crepe, etc.) gain nothing from multiprocessing since + # all processes share one GPU. Spawning n_p processes each lazily loading + # the model onto the same CUDA device exhausts VRAM and causes deadlocks. + if "cuda" in device: + printt("WARN: use 1 thread since GPU is used.") + n_p = 1 featureInput = FeatureInput(is_half, device) paths = [] inp_root = "%s/1_16k_wavs" % (exp_dir) diff --git a/infer/modules/train/extract_feature_print.py b/infer/modules/train/extract_feature_print.py index 1c3ec9a..22755b5 100644 --- a/infer/modules/train/extract_feature_print.py +++ b/infer/modules/train/extract_feature_print.py @@ -10,19 +10,24 @@ from infer.lib.audio import load_audio os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0" +if len(sys.argv) != 8: + sys.exit(0) + device = sys.argv[1] n_part = int(sys.argv[2]) i_part = int(sys.argv[3]) -if len(sys.argv) == 7: - exp_dir = sys.argv[4] - version = sys.argv[5] - is_half = sys.argv[6].lower() == "true" -else: - i_gpu = sys.argv[4] - exp_dir = sys.argv[5] - os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu) - version = sys.argv[6] - is_half = sys.argv[7].lower() == "true" +i_gpu = sys.argv[4] +# CUDA_VISIBLE_DEVICES expects bare GPU indices (e.g. "0" or "0,1"), +# but callers may pass "cuda:0", "cuda:0-cuda:1", etc. Strip the prefix +# and normalise separators so any combination works. +import re + +i_gpu = re.sub(r"cuda:", "", str(i_gpu)) +i_gpu = i_gpu.replace("-", ",") +os.environ["CUDA_VISIBLE_DEVICES"] = i_gpu +exp_dir = sys.argv[5] +version = sys.argv[6] +is_half = sys.argv[7].lower() == "true" import fairseq import numpy as np diff --git a/infer/modules/train/train.py b/infer/modules/train/train.py index e8f7156..c8b4875 100644 --- a/infer/modules/train/train.py +++ b/infer/modules/train/train.py @@ -106,23 +106,30 @@ def main(): # patch to unblock people without gpus. there is probably a better way. print("NO GPU DETECTED: falling back to CPU - this may take a while") n_gpus = 1 - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = str(randint(20000, 55555)) - children = [] logger = utils.get_logger(hps.model_dir) - for i in range(n_gpus): - subproc = mp.Process( - target=run, - args=(i, n_gpus, hps, logger), - ) - children.append(subproc) - subproc.start() + if n_gpus == 1: + # Single GPU: run directly without distributed to avoid gloo issues on Windows + run(0, 1, hps, logger) + else: + master_port = str(randint(20000, 55555)) + os.environ["MASTER_ADDR"] = "127.0.0.1" + os.environ["MASTER_PORT"] = master_port + children = [] + for i in range(n_gpus): + subproc = mp.Process( + target=run, + args=(i, n_gpus, hps, logger, master_port), + ) + children.append(subproc) + subproc.start() - for i in range(n_gpus): - children[i].join() + for i in range(n_gpus): + children[i].join() -def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger): +def run( + rank, n_gpus, hps: utils.HParams, logger: logging.Logger, master_port: str = "29500" +): global global_step if rank == 0: # logger = utils.get_logger(hps.model_dir) @@ -131,24 +138,81 @@ def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger): writer = SummaryWriter(log_dir=hps.model_dir) writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval")) - try: - dist.init_process_group( - backend=( - "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl" - ), - init_method="env://", - world_size=n_gpus, - rank=rank, - ) - except: - dist.init_process_group( - backend=( - "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl" - ), - init_method="env://?use_libuv=False", - world_size=n_gpus, - rank=rank, - ) + use_distributed = n_gpus > 1 + if use_distributed: + if os.name == "nt" or not torch.cuda.is_available(): + # On Windows, gloo's create_device(hostname=...) is gated to Linux only + # in the C++ layer (makeDeviceForHostname). We must use the interface- + # based path instead: create_device(interface=...) calls + # makeDeviceForInterface which is not platform-gated. + import socket as _socket + + try: + store = dist.TCPStore( + host_name="127.0.0.1", + port=int(master_port), + world_size=n_gpus, + is_master=(rank == 0), + ) + except Exception: + store = dist.TCPStore( + host_name="127.0.0.1", + port=int(master_port), + world_size=n_gpus, + is_master=(rank == 0), + use_libuv=False, + ) + + # Discover a working network interface for gloo device creation + gloo_device = None + try: + for idx, ifname in _socket.if_nameindex(): + try: + gloo_device = dist.ProcessGroupGloo.create_device( + interface=ifname + ) + print("Try device", idx, "name", ifname) + break + except RuntimeError as e: + print("Try device", idx, "name", ifname, "err:", e) + continue + except (OSError, AttributeError) as e: + print(e.with_traceback(None)) + + if gloo_device is None: + raise RuntimeError( + "Cannot create gloo device on Windows. " + "No usable network interface found. " + "Try adding your hostname to " + "C:\\Windows\\System32\\drivers\\etc\\hosts " + "with: 127.0.0.1 " + _socket.gethostname() + ) + + pg_options = dist.ProcessGroupGloo._Options() + pg_options._devices = [gloo_device] + dist.init_process_group( + backend="gloo", + store=store, + world_size=n_gpus, + rank=rank, + pg_options=pg_options, + ) + else: + init_url = f"tcp://127.0.0.1:{master_port}" + try: + dist.init_process_group( + backend="nccl", + init_method=init_url, + world_size=n_gpus, + rank=rank, + ) + except: + dist.init_process_group( + backend="nccl", + init_method=init_url + "?use_libuv=False", + world_size=n_gpus, + rank=rank, + ) torch.manual_seed(hps.train.seed) if torch.cuda.is_available(): torch.cuda.set_device(rank) @@ -221,14 +285,15 @@ def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger): ) # net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True) # net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True) - if hasattr(torch, "xpu") and torch.xpu.is_available(): - pass - elif torch.cuda.is_available(): - net_g = DDP(net_g, device_ids=[rank]) - net_d = DDP(net_d, device_ids=[rank]) - else: - net_g = DDP(net_g) - net_d = DDP(net_d) + if use_distributed: + if hasattr(torch, "xpu") and torch.xpu.is_available(): + pass + elif torch.cuda.is_available(): + net_g = DDP(net_g, device_ids=[rank]) + net_d = DDP(net_d, device_ids=[rank]) + else: + net_g = DDP(net_g) + net_d = DDP(net_d) try: # 如果能加载自动resume _, _, _, epoch_str = utils.load_checkpoint( diff --git a/infer/modules/uvr5/modules.py b/infer/modules/uvr5/modules.py index 7b3af7c..6053d11 100644 --- a/infer/modules/uvr5/modules.py +++ b/infer/modules/uvr5/modules.py @@ -62,10 +62,6 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format os.path.basename(inp_path), ) resample_audio(inp_path, tmp_path, "pcm_s16le", "s16", 44100, "stereo") - try: # Remove the original file - os.remove(inp_path) - except Exception as e: - print(f"Failed to remove the original file: {e}") inp_path = tmp_path try: if done == 0: diff --git a/infer/modules/uvr5/vr.py b/infer/modules/uvr5/vr.py index 999bc19..6c0bf42 100644 --- a/infer/modules/uvr5/vr.py +++ b/infer/modules/uvr5/vr.py @@ -37,7 +37,7 @@ class AudioPre: else: mp = ModelParameters("infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json") model = Nets.CascadedASPPNet(mp.param["bins"] * 2) - cpk = torch.load(model_path, map_location="cpu") + cpk = torch.load(model_path, map_location="cpu", weights_only=True) model.load_state_dict(cpk) model.eval() if is_half: diff --git a/infer/modules/vc/hash.py b/infer/modules/vc/hash.py index 77b6ae1..6a3561b 100644 --- a/infer/modules/vc/hash.py +++ b/infer/modules/vc/hash.py @@ -152,7 +152,7 @@ def model_hash_ckpt(cpt): def model_hash_from(path): - cpt = torch.load(path, map_location="cpu") + cpt = torch.load(path, map_location="cpu", weights_only=True) h = model_hash_ckpt(cpt) del cpt return h diff --git a/infer/modules/vc/info.py b/infer/modules/vc/info.py index 14a7346..c16a8f1 100644 --- a/infer/modules/vc/info.py +++ b/infer/modules/vc/info.py @@ -75,7 +75,7 @@ def show_info(path): try: if hasattr(path, "name"): path = path.name - a = torch.load(path, map_location="cpu") + a = torch.load(path, map_location="cpu", weights_only=True) txt = show_model_info(a, show_long_id=True) del a except: diff --git a/requirements/amd.txt b/requirements/amd.txt index a051034..0f52b52 100644 --- a/requirements/amd.txt +++ b/requirements/amd.txt @@ -1,11 +1,10 @@ -tensorflow-rocm joblib>=1.1.0 numba numpy scipy librosa>=0.10.2 llvmlite -fairseq @ git+https://github.com/One-sixth/fairseq.git +fairseq @ git+https://github.com/fumiama/fairseq.git faiss-cpu gradio Cython diff --git a/requirements/cpu.txt b/requirements/cpu.txt new file mode 100644 index 0000000..6bdaefb --- /dev/null +++ b/requirements/cpu.txt @@ -0,0 +1,49 @@ +joblib>=1.1.0 +numba +numpy +scipy +librosa>=0.10.2 +llvmlite +fairseq @ git+https://github.com/fumiama/fairseq.git +faiss-cpu +gradio +Cython +pydub>=0.25.1 +tensorboardX +Jinja2>=3.1.2 +json5 +Markdown +matplotlib>=3.7.0 +matplotlib-inline>=0.1.3 +praat-parselmouth>=0.4.2 +Pillow>=9.1.1 +resampy>=0.4.2 +scikit-learn +tensorboard +tqdm>=4.63.1 +tornado>=6.1 +Werkzeug>=2.2.3 +uc-micro-py>=1.0.1 +sympy>=1.11.1 +tabulate>=0.8.10 +PyYAML>=6.0 +pyasn1>=0.4.8 +pyasn1-modules>=0.2.8 +fsspec>=2022.11.0 +absl-py>=1.2.0 +audioread +uvicorn>=0.21.1 +colorama>=0.4.5 +pyworld==0.3.2 +httpx +onnxruntime; sys_platform == 'darwin' +torchcrepe>=0.0.23 +fastapi +torchfcpe +python-dotenv>=1.0.0 +av +pybase16384 +--extra-index-url https://download.pytorch.org/whl/cpu +torch +torchvision +torchaudio diff --git a/requirements/dml.txt b/requirements/dml.txt index 5493ece..f54a76f 100644 --- a/requirements/dml.txt +++ b/requirements/dml.txt @@ -4,7 +4,7 @@ numpy scipy librosa>=0.10.2 llvmlite -fairseq @ git+https://github.com/One-sixth/fairseq.git +fairseq @ git+https://github.com/fumiama/fairseq.git faiss-cpu gradio Cython @@ -43,3 +43,6 @@ python-dotenv>=1.0.0 av torchfcpe pybase16384 +torch-directml +torchvision +torchaudio diff --git a/requirements/ipex.txt b/requirements/ipex.txt index 056d9fa..d166308 100644 --- a/requirements/ipex.txt +++ b/requirements/ipex.txt @@ -9,7 +9,7 @@ numpy scipy librosa>=0.10.2 llvmlite==0.39.0 -fairseq @ git+https://github.com/One-sixth/fairseq.git +fairseq @ git+https://github.com/fumiama/fairseq.git faiss-cpu gradio Cython diff --git a/requirements/main.txt b/requirements/main.txt index fef9c3c..099949d 100644 --- a/requirements/main.txt +++ b/requirements/main.txt @@ -4,7 +4,7 @@ numpy scipy librosa>=0.10.2 llvmlite -fairseq @ git+https://github.com/One-sixth/fairseq.git +fairseq @ git+https://github.com/fumiama/fairseq.git faiss-cpu gradio Cython diff --git a/rvc/f0/f0.py b/rvc/f0/f0.py index 0f615e6..c79c4f8 100644 --- a/rvc/f0/f0.py +++ b/rvc/f0/f0.py @@ -11,14 +11,14 @@ class F0Predictor(object): f0_min=50, f0_max=1100, sampling_rate=44100, - device: Optional[str] = None, + device: Optional[Union[str, torch.device]] = None, ): self.hop_length = hop_length self.f0_min = f0_min self.f0_max = f0_max self.sampling_rate = sampling_rate - if device is None: - device = "cuda:0" if torch.cuda.is_available() else "cpu" + if not device: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.device = device def compute_f0( diff --git a/rvc/f0/mel.py b/rvc/f0/mel.py index 2c06800..ee98b15 100644 --- a/rvc/f0/mel.py +++ b/rvc/f0/mel.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, Union import torch import numpy as np @@ -17,9 +17,9 @@ class MelSpectrogram(torch.nn.Module): hop_length: int, n_fft: Optional[int] = None, mel_fmin: int = 0, - mel_fmax: int = None, + mel_fmax: Optional[int] = None, clamp: float = 1e-5, - device=torch.device("cpu"), + device: Union[str, torch.device] = torch.device("cpu"), ): super().__init__() if n_fft is None: diff --git a/rvc/f0/rmvpe.py b/rvc/f0/rmvpe.py index 33eac0a..02d4f0c 100644 --- a/rvc/f0/rmvpe.py +++ b/rvc/f0/rmvpe.py @@ -1,6 +1,6 @@ from io import BytesIO import os -from typing import Any, Optional, Union +from typing import Optional, Union import numpy as np import torch diff --git a/rvc/layers/attentions.py b/rvc/layers/attentions.py index 22b626d..3e1fd01 100644 --- a/rvc/layers/attentions.py +++ b/rvc/layers/attentions.py @@ -12,8 +12,8 @@ class MultiHeadAttention(nn.Module): channels: int, out_channels: int, n_heads: int, + window_size: int, p_dropout: float = 0.0, - window_size: Optional[int] = None, heads_share: bool = True, block_length: Optional[int] = None, proximal_bias: bool = False, diff --git a/rvc/layers/encoders.py b/rvc/layers/encoders.py index 1fc2478..eeaa3fd 100644 --- a/rvc/layers/encoders.py +++ b/rvc/layers/encoders.py @@ -42,8 +42,8 @@ class Encoder(nn.Module): hidden_channels, hidden_channels, n_heads, + window_size, p_dropout=p_dropout, - window_size=window_size, ) ) self.norm_layers_1.append(LayerNorm(hidden_channels)) @@ -121,7 +121,7 @@ class TextEncoder(nn.Module): def __call__( self, phone: torch.Tensor, - pitch: torch.Tensor, + pitch: Optional[torch.Tensor], lengths: torch.Tensor, skip_head: Optional[int] = None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: @@ -135,7 +135,7 @@ class TextEncoder(nn.Module): def forward( self, phone: torch.Tensor, - pitch: torch.Tensor, + pitch: Optional[torch.Tensor], lengths: torch.Tensor, skip_head: Optional[int] = None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: diff --git a/rvc/layers/generators.py b/rvc/layers/generators.py index 185f939..d78d6cd 100644 --- a/rvc/layers/generators.py +++ b/rvc/layers/generators.py @@ -46,6 +46,7 @@ class Generator(torch.nn.Module): self.resblocks = nn.ModuleList() resblock_module = ResBlock1 if resblock == "1" else ResBlock2 + ch = 0 for i in range(len(self.ups)): ch = upsample_initial_channel // (2 ** (i + 1)) for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes): diff --git a/rvc/layers/norms.py b/rvc/layers/norms.py index 9418035..4b07143 100644 --- a/rvc/layers/norms.py +++ b/rvc/layers/norms.py @@ -30,7 +30,7 @@ class WN(torch.nn.Module): dilation_rate: int, n_layers: int, gin_channels: int = 0, - p_dropout: int = 0, + p_dropout: float = 0, ): super(WN, self).__init__() assert kernel_size % 2 == 1 diff --git a/rvc/layers/nsf.py b/rvc/layers/nsf.py index 5e9e35a..22fd968 100644 --- a/rvc/layers/nsf.py +++ b/rvc/layers/nsf.py @@ -1,4 +1,4 @@ -from typing import Optional, List +from typing import Optional, List, Union import math import torch @@ -83,7 +83,7 @@ class NSFGenerator(torch.nn.Module): self.conv_pre = Conv1d( initial_channel, upsample_initial_channel, 7, 1, padding=3 ) - resblock = ResBlock1 if resblock == "1" else ResBlock2 + resblockcls = ResBlock1 if resblock == "1" else ResBlock2 self.ups = nn.ModuleList() for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): @@ -114,12 +114,13 @@ class NSFGenerator(torch.nn.Module): self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1)) self.resblocks = nn.ModuleList() + ch = 0 for i in range(len(self.ups)): - ch: int = upsample_initial_channel // (2 ** (i + 1)) + ch = upsample_initial_channel // (2 ** (i + 1)) for j, (k, d) in enumerate( zip(resblock_kernel_sizes, resblock_dilation_sizes) ): - self.resblocks.append(resblock(ch, k, d)) + self.resblocks.append(resblockcls(ch, k, d)) self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False) self.ups.apply(call_weight_data_normal_if_Conv) diff --git a/rvc/layers/residuals.py b/rvc/layers/residuals.py index 4a135ce..45b3d11 100644 --- a/rvc/layers/residuals.py +++ b/rvc/layers/residuals.py @@ -20,7 +20,7 @@ class ResBlock1(torch.nn.Module): self, channels: int, kernel_size: int = 3, - dilation: List[int] = (1, 3, 5), + dilation: List[int] = [1, 3, 5], ): super(ResBlock1, self).__init__() @@ -117,7 +117,7 @@ class ResBlock2(torch.nn.Module): self, channels: int, kernel_size=3, - dilation: List[int] = (1, 3), + dilation: List[int] = [1, 3], ): super(ResBlock2, self).__init__() self.convs = nn.ModuleList() @@ -182,7 +182,7 @@ class ResidualCouplingLayer(nn.Module): kernel_size: int, dilation_rate: int, n_layers: int, - p_dropout: int = 0, + p_dropout: float = 0, gin_channels: int = 0, mean_only: bool = False, ): diff --git a/rvc/layers/synthesizers.py b/rvc/layers/synthesizers.py index 474781e..c2c70ba 100644 --- a/rvc/layers/synthesizers.py +++ b/rvc/layers/synthesizers.py @@ -34,7 +34,7 @@ class SynthesizerTrnMsNSFsid(nn.Module): upsample_kernel_sizes: List[int], spk_embed_dim: int, gin_channels: int, - sr: Optional[Union[str, int]], + sr: Union[str, int], encoder_dim: int, use_f0: bool, ): @@ -143,7 +143,7 @@ class SynthesizerTrnMsNSFsid(nn.Module): torch.nn.utils.remove_weight_norm(self.enc_q) return self - @torch.jit.ignore + @torch.jit.ignore() def forward( self, phone: torch.Tensor, @@ -155,18 +155,20 @@ class SynthesizerTrnMsNSFsid(nn.Module): pitchf: Optional[torch.Tensor] = None, ): # 这里ds是id,[bs,1] # print(1,pitch.shape)#[bs,t] - g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的 + embg = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的 m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) - z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g) - z_p = self.flow(z, y_mask, g=g) + z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=embg) + z_p = self.flow(z, y_mask, g=embg) z_slice, ids_slice = rand_slice_segments_on_last_dim( z, y_lengths, self.segment_size ) - if pitchf is not None: + if pitchf is not None and isinstance(self.dec, NSFGenerator): pitchf = slice_on_last_dim(pitchf, ids_slice, self.segment_size) - o = self.dec(z_slice, pitchf, g=g) + o = self.dec(z_slice, pitchf, g=embg) # type: ignore + elif isinstance(self.dec, Generator): + o = self.dec(z_slice, g=embg) else: - o = self.dec(z_slice, g=g) + raise KeyError(f"unknown dec type: {type(self.dec).__name__}") return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) @torch.jit.export @@ -201,15 +203,17 @@ class SynthesizerTrnMsNSFsid(nn.Module): z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask z = self.flow(z_p, x_mask, g=g, reverse=True) del z_p, m_p, logs_p - if pitchf is not None: + if pitchf is not None and isinstance(self.dec, NSFGenerator): o = self.dec( z * x_mask, pitchf, g=g, n_res=return_length2, ) - else: + elif isinstance(self.dec, Generator): o = self.dec(z * x_mask, g=g, n_res=return_length2) + else: + raise KeyError(f"unknown dec type: {type(self.dec).__name__}") del x_mask, z return o # , x_mask, (z, z_p, m_p, logs_p) @@ -326,7 +330,7 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid): upsample_kernel_sizes: List[int], spk_embed_dim: int, gin_channels: int, - sr=None, + sr: Union[str, int], ): super().__init__( spec_channels, @@ -346,6 +350,7 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid): upsample_kernel_sizes, spk_embed_dim, gin_channels, + sr, 256, False, ) @@ -371,7 +376,7 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid): upsample_kernel_sizes: List[int], spk_embed_dim: int, gin_channels: int, - sr=None, + sr: Union[str, int], ): super().__init__( spec_channels, @@ -391,6 +396,7 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid): upsample_kernel_sizes, spk_embed_dim, gin_channels, + sr, 768, False, ) diff --git a/rvc/layers/utils.py b/rvc/layers/utils.py index 418578b..bf5a9ec 100644 --- a/rvc/layers/utils.py +++ b/rvc/layers/utils.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Tuple, Iterator +from typing import List, Optional, Tuple, Iterator, Union import torch @@ -17,7 +17,7 @@ def get_padding(kernel_size: int, dilation=1) -> int: def slice_on_last_dim( x: torch.Tensor, - start_indices: List[int], + start_indices: Union[List[int], torch.Tensor], segment_size=4, ) -> torch.Tensor: new_shape = [*x.shape] @@ -32,9 +32,9 @@ def slice_on_last_dim( def rand_slice_segments_on_last_dim( x: torch.Tensor, - x_lengths: int = None, + x_lengths: Optional[Union[int, torch.Tensor]] = None, segment_size=4, -) -> Tuple[torch.Tensor, List[int]]: +) -> Tuple[torch.Tensor, Union[List[int], torch.Tensor]]: b, _, t = x.size() if x_lengths is None: x_lengths = t @@ -58,7 +58,7 @@ def activate_add_tanh_sigmoid_multiply( def sequence_mask( length: torch.Tensor, max_length: Optional[int] = None, -) -> torch.BoolTensor: +): if max_length is None: max_length = int(length.max()) x = torch.arange(max_length, dtype=length.dtype, device=length.device) diff --git a/rvc/onnx/synthesizer.py b/rvc/onnx/synthesizer.py index e8bf516..9dafce8 100644 --- a/rvc/onnx/synthesizer.py +++ b/rvc/onnx/synthesizer.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Union +from typing import List, Union import torch @@ -25,7 +25,7 @@ class SynthesizerTrnMsNSFsid(SynthesizerBase): upsample_kernel_sizes: List[int], spk_embed_dim: int, gin_channels: int, - sr: Optional[Union[str, int]], + sr: Union[str, int], encoder_dim: int, ): super().__init__( diff --git a/tools/web/infer-only.py b/tools/web/infer-only.py index 218788d..037031b 100644 --- a/tools/web/infer-only.py +++ b/tools/web/infer-only.py @@ -40,11 +40,9 @@ app = gr.Blocks() with app: with gr.Tabs(): with gr.TabItem("在线demo"): - gr.Markdown( - value=""" + gr.Markdown(value=""" RVC 在线demo - """ - ) + """) sid = gr.Dropdown(label=i18n("Inferencing voice"), choices=sorted(names)) with gr.Column(): spk_item = gr.Slider( diff --git a/web.py b/web.py index f869d6b..df7f11c 100644 --- a/web.py +++ b/web.py @@ -36,7 +36,6 @@ import threading import shutil import logging - logging.getLogger("numba").setLevel(logging.WARNING) logging.getLogger("httpx").setLevel(logging.WARNING) @@ -78,63 +77,6 @@ if config.dml == True: i18n = I18nAuto() logger.info(i18n) -# 判断是否有能用来训练和加速推理的N卡 -ngpu = torch.cuda.device_count() -gpu_infos = [] -mem = [] -if_gpu_ok = False - -if torch.cuda.is_available() or ngpu != 0: - for i in range(ngpu): - gpu_name = torch.cuda.get_device_name(i) - if any( - value in gpu_name.upper() - for value in [ - "10", - "16", - "20", - "30", - "40", - "A2", - "A3", - "A4", - "P4", - "A50", - "500", - "A60", - "70", - "80", - "90", - "M4", - "T4", - "TITAN", - "4060", - "L", - "6000", - ] - ): - # A10#A100#V100#A40#P40#M40#K80#A4500 - if_gpu_ok = True # 至少有一张能用的N卡 - gpu_infos.append("%s\t%s" % (i, gpu_name)) - mem.append( - int( - torch.cuda.get_device_properties(i).total_memory - / 1024 - / 1024 - / 1024 - + 0.4 - ) - ) -if if_gpu_ok and len(gpu_infos) > 0: - gpu_info = "\n".join(gpu_infos) - default_batch_size = min(mem) // 2 -else: - gpu_info = i18n( - "Unfortunately, there is no compatible GPU available to support your training." - ) - default_batch_size = 1 -gpus = "-".join([i[0] for i in gpu_infos]) - weight_root = os.getenv("weight_root") weight_uvr5_root = os.getenv("weight_uvr5_root") @@ -314,6 +256,7 @@ def extract_f0_feature(n_p, f0method, if_f0, exp_dir, version19): exp_dir=sys.argv[4] os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu) """ + gpus = [config.device] leng = len(gpus) ps = [] for idx, n_g in enumerate(gpus): @@ -751,7 +694,7 @@ with gr.Blocks(title="RVC WebUI") as app: gr.Markdown("## RVC WebUI") gr.Markdown( value=i18n( - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details." + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details." ) ) with gr.Tabs(): @@ -1201,7 +1144,7 @@ with gr.Blocks(title="RVC WebUI") as app: with gr.Column(): gpu_info9 = gr.Textbox( label=i18n("GPU Information"), - value=gpu_info, + value=config.device, ) f0method8 = gr.Radio( label=i18n( @@ -1254,7 +1197,7 @@ with gr.Blocks(title="RVC WebUI") as app: maximum=40, step=1, label=i18n("Batch size per GPU"), - value=default_batch_size, + value=config.default_batch_size, interactive=True, ) if_save_latest13 = gr.Radio( @@ -1296,7 +1239,7 @@ with gr.Blocks(title="RVC WebUI") as app: label=i18n( "Enter the GPU index(es) separated by '-', e.g., 0-1-2 to use GPU 0, 1, and 2" ), - value=gpus, + value="0", interactive=True, ) sr2.change(