From 2e622d62c7c5765d8c9111795c1e6d7f8c9f3f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Thu, 19 Jun 2025 18:11:14 +0900 Subject: [PATCH 01/19] chore: update LICENSE info --- web.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web.py b/web.py index f869d6b..c929930 100644 --- a/web.py +++ b/web.py @@ -751,7 +751,7 @@ with gr.Blocks(title="RVC WebUI") as app: gr.Markdown("## RVC WebUI") gr.Markdown( value=i18n( - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details." + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details." ) ) with gr.Tabs(): From 55a981d470b26b635b4f8acc2009e27cfe2caf03 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 19 Jun 2025 18:12:54 +0900 Subject: [PATCH 02/19] chore(i18n): sync locale on dev (#122) Co-authored-by: github-actions[bot] --- i18n/locale/en_US.json | 2 +- i18n/locale/es_ES.json | 2 +- i18n/locale/fr_FR.json | 2 +- i18n/locale/it_IT.json | 2 +- i18n/locale/ja_JP.json | 2 +- i18n/locale/ko_KR.json | 2 +- i18n/locale/pt_BR.json | 2 +- i18n/locale/ru_RU.json | 2 +- i18n/locale/tr_TR.json | 2 +- i18n/locale/zh_CN.json | 2 +- i18n/locale/zh_HK.json | 2 +- i18n/locale/zh_SG.json | 2 +- i18n/locale/zh_TW.json | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) diff --git a/i18n/locale/en_US.json b/i18n/locale/en_US.json index 7cc92d3..b5ccfc0 100644 --- a/i18n/locale/en_US.json +++ b/i18n/locale/en_US.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "Takeover WASAPI device", "Target sample rate": "Target sample rate", "The audio file to be processed": "The audio file to be processed", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Total training epochs (total_epoch)", "Train": "Train", "Train feature index": "Train feature index", diff --git a/i18n/locale/es_ES.json b/i18n/locale/es_ES.json index d4102fa..96bcc84 100644 --- a/i18n/locale/es_ES.json +++ b/i18n/locale/es_ES.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "Adquisición del dispositivo WASAPI", "Target sample rate": "Tasa de muestreo objetivo", "The audio file to be processed": "El archivo de audio a procesar", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "Este software es de código abierto bajo la licencia MIT, el autor no tiene ningún control sobre el software, y aquellos que usan el software y difunden los sonidos exportados por el software son los únicos responsables.
Si no está de acuerdo con esta cláusula , no puede utilizar ni citar ningún código ni archivo del paquete de software Consulte el directorio raíz Agreement-LICENSE.txt para obtener más información.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Total de épocas de entrenamiento (total_epoch)", "Train": "Entrenamiento", "Train feature index": "Índice de características", diff --git a/i18n/locale/fr_FR.json b/i18n/locale/fr_FR.json index fbbd471..557f608 100644 --- a/i18n/locale/fr_FR.json +++ b/i18n/locale/fr_FR.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "Reprise du périphérique WASAPI", "Target sample rate": "Taux d'échantillonnage cible :", "The audio file to be processed": "Le fichier audio à traiter", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "Ce logiciel est open source sous la licence MIT. L'auteur n'a aucun contrôle sur le logiciel. Les utilisateurs qui utilisent le logiciel et distribuent les sons exportés par le logiciel en sont entièrement responsables.
Si vous n'acceptez pas cette clause, vous ne pouvez pas utiliser ou faire référence à aucun code ni fichier contenu dans le package logiciel. Consultez le fichier Agreement-LICENSE.txt dans le répertoire racine pour plus de détails.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Nombre total d'époques d'entraînement (total_epoch) :", "Train": "Entraîner", "Train feature index": "Entraîner l'index des caractéristiques", diff --git a/i18n/locale/it_IT.json b/i18n/locale/it_IT.json index 821dbb4..a428c0e 100644 --- a/i18n/locale/it_IT.json +++ b/i18n/locale/it_IT.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "Takeover WASAPI device", "Target sample rate": "Frequenza di campionamento target:", "The audio file to be processed": "The audio file to be processed", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "Questo software è open source con licenza MIT.
Se non si accetta questa clausola, non è possibile utilizzare o fare riferimento a codici e file all'interno del pacchetto software. Contratto-LICENZA.txt per dettagli.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Epoch totali di addestramento (total_epoch):", "Train": "Addestramento", "Train feature index": "Addestra indice delle caratteristiche", diff --git a/i18n/locale/ja_JP.json b/i18n/locale/ja_JP.json index 1391409..01bea83 100644 --- a/i18n/locale/ja_JP.json +++ b/i18n/locale/ja_JP.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "WASAPIデバイスを独占", "Target sample rate": "目標サンプリング率", "The audio file to be processed": "処理待ち音声", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "本ソフトウェアはMITライセンスに基づくオープンソースであり、製作者は本ソフトウェアに対していかなる責任を持ちません。本ソフトウェアの利用者および本ソフトウェアから派生した音源(成果物)を配布する者は、本ソフトウェアに対して自身で責任を負うものとします。
この条項に同意しない場合、パッケージ内のコードやファイルを使用や参照を禁じます。詳しくはLICENSEをご覧ください。", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "総エポック数", "Train": "学習", "Train feature index": "特徴索引の学習", diff --git a/i18n/locale/ko_KR.json b/i18n/locale/ko_KR.json index 49596af..c7a77b6 100644 --- a/i18n/locale/ko_KR.json +++ b/i18n/locale/ko_KR.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "Takeover WASAPI device", "Target sample rate": "목표 샘플링률", "The audio file to be processed": "The audio file to be processed", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "이 소프트웨어는 MIT 라이선스로 공개되며, 저자는 소프트웨어에 대해 어떠한 통제권도 가지지 않습니다. 모든 귀책사유는 소프트웨어 사용자 및 소프트웨어에서 생성된 결과물을 사용하는 당사자에게 있습니다.
해당 조항을 인정하지 않는 경우, 소프트웨어 패키지의 어떠한 코드나 파일도 사용하거나 인용할 수 없습니다. 자세한 내용은 루트 디렉토리의 LICENSE를 참조하세요.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "총 훈련 라운드 수 total_epoch", "Train": "훈련", "Train feature index": "특징 인덱스 훈련", diff --git a/i18n/locale/pt_BR.json b/i18n/locale/pt_BR.json index 50e4ef5..54a0b36 100644 --- a/i18n/locale/pt_BR.json +++ b/i18n/locale/pt_BR.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "Takeover WASAPI device", "Target sample rate": "Taxa de amostragem:", "The audio file to be processed": "The audio file to be processed", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "
The Mangio-RVC 💻 | Tradução por Krisp e Rafael Godoy Ebert | AI HUB BRASIL
Este software é de código aberto sob a licença MIT. O autor não tem qualquer controle sobre o software. Aqueles que usam o software e divulgam os sons exportados pelo software são totalmente responsáveis.
Se você não concorda com este termo, você não pode usar ou citar nenhum código e arquivo no pacote de software. Para obter detalhes, consulte o diretório raiz O acordo a ser seguido para uso LICENSE
", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Número total de ciclos(epoch) de treino (se escolher um valor alto demais, o seu modelo parecerá terrivelmente sobretreinado):", "Train": "Treinar", "Train feature index": "Treinar Index", diff --git a/i18n/locale/ru_RU.json b/i18n/locale/ru_RU.json index 2f3eaa6..311b951 100644 --- a/i18n/locale/ru_RU.json +++ b/i18n/locale/ru_RU.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "Takeover WASAPI device", "Target sample rate": "Частота дискретизации аудио:", "The audio file to be processed": "The audio file to be processed", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "Это программное обеспечение с открытым исходным кодом распространяется по лицензии MIT. Автор никак не контролирует это программное обеспечение. Пользователи, которые используют эту программу и распространяют аудиозаписи, полученные с помощью этой программы, несут полную ответственность за это. Если вы не согласны с этим, вы не можете использовать какие-либо коды и файлы в рамках этой программы или ссылаться на них. Подробнее в файле Agreement-LICENSE.txt в корневом каталоге программы.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Полное количество эпох (total_epoch):", "Train": "Обучение модели", "Train feature index": "Обучить индекс черт", diff --git a/i18n/locale/tr_TR.json b/i18n/locale/tr_TR.json index 69a4fcd..f9bc25c 100644 --- a/i18n/locale/tr_TR.json +++ b/i18n/locale/tr_TR.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "Takeover WASAPI device", "Target sample rate": "Hedef örnekleme oranı:", "The audio file to be processed": "The audio file to be processed", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "Bu yazılım, MIT lisansı altında açık kaynaklıdır. Yazarın yazılım üzerinde herhangi bir kontrolü yoktur. Yazılımı kullanan ve yazılım tarafından dışa aktarılan sesleri dağıtan kullanıcılar sorumludur.
Eğer bu maddeyle aynı fikirde değilseniz, yazılım paketi içindeki herhangi bir kod veya dosyayı kullanamaz veya referans göremezsiniz. Detaylar için kök dizindeki Agreement-LICENSE.txt dosyasına bakınız.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "Toplam eğitim turu (total_epoch):", "Train": "Eğitim", "Train feature index": "Özellik Dizinini Eğit", diff --git a/i18n/locale/zh_CN.json b/i18n/locale/zh_CN.json index 9bcf290..ea2a252 100644 --- a/i18n/locale/zh_CN.json +++ b/i18n/locale/zh_CN.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "独占 WASAPI 设备", "Target sample rate": "目标采样率", "The audio file to be processed": "待处理音频文件", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "总训练轮数total_epoch", "Train": "训练", "Train feature index": "训练特征索引", diff --git a/i18n/locale/zh_HK.json b/i18n/locale/zh_HK.json index 93071fc..854cd53 100644 --- a/i18n/locale/zh_HK.json +++ b/i18n/locale/zh_HK.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "独占 WASAPI 设备", "Target sample rate": "目標取樣率", "The audio file to be processed": "待处理音频文件", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "本軟體以MIT協議開源,作者不對軟體具備任何控制力,使用軟體者、傳播軟體導出的聲音者自負全責。
如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄使用需遵守的協議-LICENSE.txt。", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "總訓練輪數total_epoch", "Train": "訓練", "Train feature index": "訓練特徵索引", diff --git a/i18n/locale/zh_SG.json b/i18n/locale/zh_SG.json index 93071fc..854cd53 100644 --- a/i18n/locale/zh_SG.json +++ b/i18n/locale/zh_SG.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "独占 WASAPI 设备", "Target sample rate": "目標取樣率", "The audio file to be processed": "待处理音频文件", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "本軟體以MIT協議開源,作者不對軟體具備任何控制力,使用軟體者、傳播軟體導出的聲音者自負全責。
如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄使用需遵守的協議-LICENSE.txt。", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "總訓練輪數total_epoch", "Train": "訓練", "Train feature index": "訓練特徵索引", diff --git a/i18n/locale/zh_TW.json b/i18n/locale/zh_TW.json index 93071fc..854cd53 100644 --- a/i18n/locale/zh_TW.json +++ b/i18n/locale/zh_TW.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "独占 WASAPI 设备", "Target sample rate": "目標取樣率", "The audio file to be processed": "待处理音频文件", - "This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible.
If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory Agreement-LICENSE.txt for details.": "本軟體以MIT協議開源,作者不對軟體具備任何控制力,使用軟體者、傳播軟體導出的聲音者自負全責。
如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄使用需遵守的協議-LICENSE.txt。", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", "Total training epochs (total_epoch)": "總訓練輪數total_epoch", "Train": "訓練", "Train feature index": "訓練特徵索引", From 033306439c7fec3eac99917c9ac43b2d37c30037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Tue, 8 Jul 2025 16:40:38 +0900 Subject: [PATCH 03/19] fix(train): cannot extract feature on non-cuda devices (fix #123) --- configs/config.py | 27 +++++++++ infer/lib/rvcmd.py | 8 +-- infer/modules/train/extract_feature_print.py | 18 +++--- web.py | 64 ++------------------ 4 files changed, 43 insertions(+), 74 deletions(-) diff --git a/configs/config.py b/configs/config.py index 3fb3410..9db1126 100644 --- a/configs/config.py +++ b/configs/config.py @@ -53,6 +53,7 @@ class Config(metaclass=Singleton): self.instead = "" self.preprocess_per = 3.7 self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() + self.default_batch_size = self.get_default_batch_size() @staticmethod def load_config_json() -> dict: @@ -136,6 +137,32 @@ class Config(metaclass=Singleton): logging.warning("Using insecure weight loading for fairseq dictionary") except AttributeError: pass + + @staticmethod + def get_default_batch_size() -> int: + if not torch.cuda.is_available(): + #TODO: add non-cuda multicards + return 1 + # 判断是否有能用来训练和加速推理的N卡 + ngpu = torch.cuda.device_count() + if not ngpu: + return 1 + mem = [] + if_gpu_ok = False + + for i in range(ngpu): + if_gpu_ok = True # 至少有一张能用的N卡 + mem.append( + int( + torch.cuda.get_device_properties(i).total_memory + / 1024 / 1024 / 1024 + 0.4 + ) + ) + if if_gpu_ok: + default_batch_size = min(mem) // 2 + else: + default_batch_size = 1 + return default_batch_size def use_fp32_config(self): for config_file in version_config_list: diff --git a/infer/lib/rvcmd.py b/infer/lib/rvcmd.py index 32b87d3..b49ffd4 100644 --- a/infer/lib/rvcmd.py +++ b/infer/lib/rvcmd.py @@ -194,11 +194,11 @@ def download_all_assets(tmpdir: str, version="0.2.5"): if not architecture: logger.error(f"architecture {architecture} is not supported") exit(1) + BASE_URL = "https://github.com/fumiama/RVC-Models-Downloader/releases/download/" + suffix = "zip" if is_win else "tar.gz" + RVCMD_URL = BASE_URL + f"v{version}/rvcmd_{system_type}_{architecture}.{suffix}" + cmdfile = os.path.join(tmpdir, "rvcmd") try: - BASE_URL = "https://github.com/fumiama/RVC-Models-Downloader/releases/download/" - suffix = "zip" if is_win else "tar.gz" - RVCMD_URL = BASE_URL + f"v{version}/rvcmd_{system_type}_{architecture}.{suffix}" - cmdfile = os.path.join(tmpdir, "rvcmd") if is_win: download_and_extract_zip(RVCMD_URL, tmpdir) cmdfile += ".exe" diff --git a/infer/modules/train/extract_feature_print.py b/infer/modules/train/extract_feature_print.py index 1c3ec9a..ef6fb6a 100644 --- a/infer/modules/train/extract_feature_print.py +++ b/infer/modules/train/extract_feature_print.py @@ -10,19 +10,17 @@ from infer.lib.audio import load_audio os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0" +if len(sys.argv) != 8: + sys.exit(0) + device = sys.argv[1] n_part = int(sys.argv[2]) i_part = int(sys.argv[3]) -if len(sys.argv) == 7: - exp_dir = sys.argv[4] - version = sys.argv[5] - is_half = sys.argv[6].lower() == "true" -else: - i_gpu = sys.argv[4] - exp_dir = sys.argv[5] - os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu) - version = sys.argv[6] - is_half = sys.argv[7].lower() == "true" +i_gpu = sys.argv[4] +os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu) +exp_dir = sys.argv[5] +version = sys.argv[6] +is_half = sys.argv[7].lower() == "true" import fairseq import numpy as np diff --git a/web.py b/web.py index c929930..c493ec4 100644 --- a/web.py +++ b/web.py @@ -78,63 +78,6 @@ if config.dml == True: i18n = I18nAuto() logger.info(i18n) -# 判断是否有能用来训练和加速推理的N卡 -ngpu = torch.cuda.device_count() -gpu_infos = [] -mem = [] -if_gpu_ok = False - -if torch.cuda.is_available() or ngpu != 0: - for i in range(ngpu): - gpu_name = torch.cuda.get_device_name(i) - if any( - value in gpu_name.upper() - for value in [ - "10", - "16", - "20", - "30", - "40", - "A2", - "A3", - "A4", - "P4", - "A50", - "500", - "A60", - "70", - "80", - "90", - "M4", - "T4", - "TITAN", - "4060", - "L", - "6000", - ] - ): - # A10#A100#V100#A40#P40#M40#K80#A4500 - if_gpu_ok = True # 至少有一张能用的N卡 - gpu_infos.append("%s\t%s" % (i, gpu_name)) - mem.append( - int( - torch.cuda.get_device_properties(i).total_memory - / 1024 - / 1024 - / 1024 - + 0.4 - ) - ) -if if_gpu_ok and len(gpu_infos) > 0: - gpu_info = "\n".join(gpu_infos) - default_batch_size = min(mem) // 2 -else: - gpu_info = i18n( - "Unfortunately, there is no compatible GPU available to support your training." - ) - default_batch_size = 1 -gpus = "-".join([i[0] for i in gpu_infos]) - weight_root = os.getenv("weight_root") weight_uvr5_root = os.getenv("weight_uvr5_root") @@ -314,6 +257,7 @@ def extract_f0_feature(n_p, f0method, if_f0, exp_dir, version19): exp_dir=sys.argv[4] os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu) """ + gpus = [config.device] leng = len(gpus) ps = [] for idx, n_g in enumerate(gpus): @@ -1201,7 +1145,7 @@ with gr.Blocks(title="RVC WebUI") as app: with gr.Column(): gpu_info9 = gr.Textbox( label=i18n("GPU Information"), - value=gpu_info, + value=config.device, ) f0method8 = gr.Radio( label=i18n( @@ -1254,7 +1198,7 @@ with gr.Blocks(title="RVC WebUI") as app: maximum=40, step=1, label=i18n("Batch size per GPU"), - value=default_batch_size, + value=config.default_batch_size, interactive=True, ) if_save_latest13 = gr.Radio( @@ -1296,7 +1240,7 @@ with gr.Blocks(title="RVC WebUI") as app: label=i18n( "Enter the GPU index(es) separated by '-', e.g., 0-1-2 to use GPU 0, 1, and 2" ), - value=gpus, + value="0", interactive=True, ) sr2.change( From 51170b24c4c7bba70db5d938a3a0b95c2eb5a7a2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 8 Jul 2025 16:46:43 +0900 Subject: [PATCH 04/19] chore(format): run black on dev (#125) Co-authored-by: github-actions[bot] --- configs/config.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/configs/config.py b/configs/config.py index 9db1126..5232048 100644 --- a/configs/config.py +++ b/configs/config.py @@ -137,11 +137,11 @@ class Config(metaclass=Singleton): logging.warning("Using insecure weight loading for fairseq dictionary") except AttributeError: pass - + @staticmethod def get_default_batch_size() -> int: if not torch.cuda.is_available(): - #TODO: add non-cuda multicards + # TODO: add non-cuda multicards return 1 # 判断是否有能用来训练和加速推理的N卡 ngpu = torch.cuda.device_count() @@ -155,7 +155,10 @@ class Config(metaclass=Singleton): mem.append( int( torch.cuda.get_device_properties(i).total_memory - / 1024 / 1024 / 1024 + 0.4 + / 1024 + / 1024 + / 1024 + + 0.4 ) ) if if_gpu_ok: From 34f28d97e3d3b892fa27c3065b5604a2a59e6a54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Sun, 9 Nov 2025 18:30:11 +0800 Subject: [PATCH 05/19] deps: fix fairseq install in .venv --- requirements/amd.txt | 2 +- requirements/dml.txt | 2 +- requirements/ipex.txt | 2 +- requirements/main.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements/amd.txt b/requirements/amd.txt index a051034..2821e94 100644 --- a/requirements/amd.txt +++ b/requirements/amd.txt @@ -5,7 +5,7 @@ numpy scipy librosa>=0.10.2 llvmlite -fairseq @ git+https://github.com/One-sixth/fairseq.git +fairseq @ git+https://github.com/fumiama/fairseq.git faiss-cpu gradio Cython diff --git a/requirements/dml.txt b/requirements/dml.txt index 5493ece..5e0bd35 100644 --- a/requirements/dml.txt +++ b/requirements/dml.txt @@ -4,7 +4,7 @@ numpy scipy librosa>=0.10.2 llvmlite -fairseq @ git+https://github.com/One-sixth/fairseq.git +fairseq @ git+https://github.com/fumiama/fairseq.git faiss-cpu gradio Cython diff --git a/requirements/ipex.txt b/requirements/ipex.txt index 056d9fa..d166308 100644 --- a/requirements/ipex.txt +++ b/requirements/ipex.txt @@ -9,7 +9,7 @@ numpy scipy librosa>=0.10.2 llvmlite==0.39.0 -fairseq @ git+https://github.com/One-sixth/fairseq.git +fairseq @ git+https://github.com/fumiama/fairseq.git faiss-cpu gradio Cython diff --git a/requirements/main.txt b/requirements/main.txt index fef9c3c..099949d 100644 --- a/requirements/main.txt +++ b/requirements/main.txt @@ -4,7 +4,7 @@ numpy scipy librosa>=0.10.2 llvmlite -fairseq @ git+https://github.com/One-sixth/fairseq.git +fairseq @ git+https://github.com/fumiama/fairseq.git faiss-cpu gradio Cython From 215a3edcefb8b0e175b8362ed817b9b34a06853d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Tue, 18 Nov 2025 13:57:03 +0800 Subject: [PATCH 06/19] fix: remove outdated codes & add weights_only=True --- infer/lib/rvcmd.py | 48 +++++---------------------------- infer/lib/train/data_utils.py | 2 +- infer/lib/train/process_ckpt.py | 8 +++--- infer/modules/uvr5/vr.py | 2 +- infer/modules/vc/hash.py | 2 +- infer/modules/vc/info.py | 2 +- 6 files changed, 15 insertions(+), 49 deletions(-) diff --git a/infer/lib/rvcmd.py b/infer/lib/rvcmd.py index b49ffd4..439e346 100644 --- a/infer/lib/rvcmd.py +++ b/infer/lib/rvcmd.py @@ -198,44 +198,10 @@ def download_all_assets(tmpdir: str, version="0.2.5"): suffix = "zip" if is_win else "tar.gz" RVCMD_URL = BASE_URL + f"v{version}/rvcmd_{system_type}_{architecture}.{suffix}" cmdfile = os.path.join(tmpdir, "rvcmd") - try: - if is_win: - download_and_extract_zip(RVCMD_URL, tmpdir) - cmdfile += ".exe" - else: - download_and_extract_tar_gz(RVCMD_URL, tmpdir) - os.chmod(cmdfile, 0o755) - subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"]) - except Exception: - BASE_URL = "https://raw.gitcode.com/u011570312/RVC-Models-Downloader/assets/" - suffix = { - "darwin_amd64": "555", - "darwin_arm64": "556", - "linux_386": "557", - "linux_amd64": "558", - "linux_arm64": "559", - "windows_386": "562", - "windows_amd64": "563", - }[f"{system_type}_{architecture}"] - RVCMD_URL = BASE_URL + suffix - download_dns_yaml( - "https://raw.gitcode.com/u011570312/RVC-Models-Downloader/raw/main/dns.yaml", - tmpdir, - ) - if is_win: - download_and_extract_zip(RVCMD_URL, tmpdir) - cmdfile += ".exe" - else: - download_and_extract_tar_gz(RVCMD_URL, tmpdir) - os.chmod(cmdfile, 0o755) - subprocess.run( - [ - cmdfile, - "-notui", - "-w", - "0", - "-dns", - os.path.join(tmpdir, "dns.yaml"), - "assets/rvc", - ] - ) + if is_win: + download_and_extract_zip(RVCMD_URL, tmpdir) + cmdfile += ".exe" + else: + download_and_extract_tar_gz(RVCMD_URL, tmpdir) + os.chmod(cmdfile, 0o755) + subprocess.run([cmdfile, "-notui", "-w", "0", "assets/rvc"]) diff --git a/infer/lib/train/data_utils.py b/infer/lib/train/data_utils.py index a5e4a93..da2a923 100644 --- a/infer/lib/train/data_utils.py +++ b/infer/lib/train/data_utils.py @@ -303,7 +303,7 @@ class TextAudioLoader(torch.utils.data.Dataset): spec_filename = filename.replace(".wav", ".spec.pt") if os.path.exists(spec_filename): try: - spec = torch.load(spec_filename) + spec = torch.load(spec_filename, weights_only=True) except: logger.warning("%s %s", spec_filename, traceback.format_exc()) spec = spectrogram_torch( diff --git a/infer/lib/train/process_ckpt.py b/infer/lib/train/process_ckpt.py index cf9697f..8688a17 100644 --- a/infer/lib/train/process_ckpt.py +++ b/infer/lib/train/process_ckpt.py @@ -59,7 +59,7 @@ def save_small_model(ckpt, sr, if_f0, name, epoch, version, hps): def extract_small_model(path, name, author, sr, if_f0, info, version): try: - ckpt = torch.load(path, map_location="cpu") + ckpt = torch.load(path, map_location="cpu", weights_only=True) if "model" in ckpt: ckpt = ckpt["model"] opt = OrderedDict() @@ -196,7 +196,7 @@ def extract_small_model(path, name, author, sr, if_f0, info, version): def change_info(path, info, name): try: - ckpt = torch.load(path, map_location="cpu") + ckpt = torch.load(path, map_location="cpu", weights_only=True) ckpt["info"] = info if name == "": name = os.path.basename(path) @@ -229,8 +229,8 @@ def merge(path1, path2, alpha1, sr, f0, info, name, version): a2 = "Unknown" return f"{a1} & {a2}" - ckpt1 = torch.load(path1, map_location="cpu") - ckpt2 = torch.load(path2, map_location="cpu") + ckpt1 = torch.load(path1, map_location="cpu", weights_only=True) + ckpt2 = torch.load(path2, map_location="cpu", weights_only=True) cfg = ckpt1["config"] if "model" in ckpt1: ckpt1 = extract(ckpt1) diff --git a/infer/modules/uvr5/vr.py b/infer/modules/uvr5/vr.py index 999bc19..6c0bf42 100644 --- a/infer/modules/uvr5/vr.py +++ b/infer/modules/uvr5/vr.py @@ -37,7 +37,7 @@ class AudioPre: else: mp = ModelParameters("infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json") model = Nets.CascadedASPPNet(mp.param["bins"] * 2) - cpk = torch.load(model_path, map_location="cpu") + cpk = torch.load(model_path, map_location="cpu", weights_only=True) model.load_state_dict(cpk) model.eval() if is_half: diff --git a/infer/modules/vc/hash.py b/infer/modules/vc/hash.py index 77b6ae1..6a3561b 100644 --- a/infer/modules/vc/hash.py +++ b/infer/modules/vc/hash.py @@ -152,7 +152,7 @@ def model_hash_ckpt(cpt): def model_hash_from(path): - cpt = torch.load(path, map_location="cpu") + cpt = torch.load(path, map_location="cpu", weights_only=True) h = model_hash_ckpt(cpt) del cpt return h diff --git a/infer/modules/vc/info.py b/infer/modules/vc/info.py index 14a7346..c16a8f1 100644 --- a/infer/modules/vc/info.py +++ b/infer/modules/vc/info.py @@ -75,7 +75,7 @@ def show_info(path): try: if hasattr(path, "name"): path = path.name - a = torch.load(path, map_location="cpu") + a = torch.load(path, map_location="cpu", weights_only=True) txt = show_model_info(a, show_long_id=True) del a except: From 8ab9fe3dee1b74c37b511aebd845e60762e7c9d8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 21 Nov 2025 14:32:55 +0800 Subject: [PATCH 07/19] chore(i18n): sync locale on dev (#124) Co-authored-by: github-actions[bot] --- i18n/locale/en_US.json | 1 - i18n/locale/es_ES.json | 1 - i18n/locale/fr_FR.json | 1 - i18n/locale/it_IT.json | 1 - i18n/locale/ja_JP.json | 1 - i18n/locale/ko_KR.json | 1 - i18n/locale/pt_BR.json | 1 - i18n/locale/ru_RU.json | 1 - i18n/locale/tr_TR.json | 1 - i18n/locale/zh_CN.json | 1 - i18n/locale/zh_HK.json | 1 - i18n/locale/zh_SG.json | 1 - i18n/locale/zh_TW.json | 1 - 13 files changed, 13 deletions(-) diff --git a/i18n/locale/en_US.json b/i18n/locale/en_US.json index b5ccfc0..44c8da0 100644 --- a/i18n/locale/en_US.json +++ b/i18n/locale/en_US.json @@ -140,7 +140,6 @@ "Train model": "Train model", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)", - "Unfortunately, there is no compatible GPU available to support your training.": "Unfortunately, there is no compatible GPU available to support your training.", "Unknown": "Unknown", "Unload model to save GPU memory": "Unload model to save GPU memory", "Version": "Version", diff --git a/i18n/locale/es_ES.json b/i18n/locale/es_ES.json index 96bcc84..f21209e 100644 --- a/i18n/locale/es_ES.json +++ b/i18n/locale/es_ES.json @@ -140,7 +140,6 @@ "Train model": "Entrenar Modelo", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Entrenamiento finalizado, puede ver el registro de entrenamiento en la consola o en el archivo train.log en la carpeta del experimento", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Cambio de tono (entero, número de semitonos, subir una octava +12 o bajar una octava -12)", - "Unfortunately, there is no compatible GPU available to support your training.": "Lamentablemente, no tiene una tarjeta gráfica adecuada para soportar su entrenamiento", "Unknown": "Desconocido", "Unload model to save GPU memory": "Descargue la voz para ahorrar memoria GPU", "Version": "Versión", diff --git a/i18n/locale/fr_FR.json b/i18n/locale/fr_FR.json index 557f608..d55454f 100644 --- a/i18n/locale/fr_FR.json +++ b/i18n/locale/fr_FR.json @@ -140,7 +140,6 @@ "Train model": "Entraîner le modèle", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Entraînement terminé. Vous pouvez consulter les rapports d'entraînement dans la console ou dans le fichier 'train.log' situé dans le dossier de l'expérience.", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Transposer (entier, nombre de demi-tons, monter d'une octave : 12, descendre d'une octave : -12) :", - "Unfortunately, there is no compatible GPU available to support your training.": "Malheureusement, il n'y a pas de GPU compatible disponible pour prendre en charge votre entrainement.", "Unknown": "Inconnu", "Unload model to save GPU memory": "Décharger la voix pour économiser la mémoire GPU.", "Version": "Version", diff --git a/i18n/locale/it_IT.json b/i18n/locale/it_IT.json index a428c0e..c28b7f1 100644 --- a/i18n/locale/it_IT.json +++ b/i18n/locale/it_IT.json @@ -140,7 +140,6 @@ "Train model": "Addestra modello", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Addestramento completato. ", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Trasposizione (numero intero, numero di semitoni, alza di un'ottava: 12, abbassa di un'ottava: -12):", - "Unfortunately, there is no compatible GPU available to support your training.": "Sfortunatamente, non è disponibile alcuna GPU compatibile per supportare l'addestramento.", "Unknown": "Unknown", "Unload model to save GPU memory": "Scarica la voce per risparmiare memoria della GPU:", "Version": "Versione", diff --git a/i18n/locale/ja_JP.json b/i18n/locale/ja_JP.json index 01bea83..455446b 100644 --- a/i18n/locale/ja_JP.json +++ b/i18n/locale/ja_JP.json @@ -140,7 +140,6 @@ "Train model": "モデルの学習", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "学習終了時に、学習ログやフォルダ内のtrain.logを確認することができます", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "ピッチ変更(整数、半音数、上下オクターブ12-12)", - "Unfortunately, there is no compatible GPU available to support your training.": "学習に対応したGPUが動作しないのは残念です。", "Unknown": "未知", "Unload model to save GPU memory": "音源を削除してメモリを節約", "Version": "バージョン", diff --git a/i18n/locale/ko_KR.json b/i18n/locale/ko_KR.json index c7a77b6..8207271 100644 --- a/i18n/locale/ko_KR.json +++ b/i18n/locale/ko_KR.json @@ -140,7 +140,6 @@ "Train model": "모델 훈련", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "훈련 완료, 콘솔 훈련 로그 또는 실험 폴더 내의 train.log 확인 가능", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "키 변경(정수, 반음 수, 옥타브 상승 12, 옥타브 하강 -12)", - "Unfortunately, there is no compatible GPU available to support your training.": "사용 가능한 그래픽 카드가 없어 훈련을 지원할 수 없습니다", "Unknown": "Unknown", "Unload model to save GPU memory": "음색 언로드로 디스플레이 메모리 절약", "Version": "버전", diff --git a/i18n/locale/pt_BR.json b/i18n/locale/pt_BR.json index 54a0b36..3a9c6f7 100644 --- a/i18n/locale/pt_BR.json +++ b/i18n/locale/pt_BR.json @@ -140,7 +140,6 @@ "Train model": "Treinar Modelo", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Após o término do treinamento, você pode verificar o log de treinamento do console ou train.log na pasta de experimentos", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Mude o tom aqui. Se a voz for do mesmo sexo, não é necessario alterar (12 caso seja Masculino para feminino, -12 caso seja ao contrário).", - "Unfortunately, there is no compatible GPU available to support your training.": "Infelizmente, não há GPU compatível disponível para apoiar o seu treinamento.", "Unknown": "Unknown", "Unload model to save GPU memory": "Descarregue a voz para liberar a memória da GPU:", "Version": "Versão", diff --git a/i18n/locale/ru_RU.json b/i18n/locale/ru_RU.json index 311b951..28134b3 100644 --- a/i18n/locale/ru_RU.json +++ b/i18n/locale/ru_RU.json @@ -140,7 +140,6 @@ "Train model": "Обучить модель", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Обучение модели завершено. Журнал обучения можно просмотреть в консоли или в файле 'train.log' в папке с моделью.", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Изменить высоту голоса (укажите количество полутонов; чтобы поднять голос на октаву, выберите 12, понизить на октаву — -12):", - "Unfortunately, there is no compatible GPU available to support your training.": "К сожалению, у вас нету графического процессора, который поддерживает обучение моделей.", "Unknown": "Unknown", "Unload model to save GPU memory": "Выгрузить модель из памяти GPU для освобождения ресурсов", "Version": "Версия архитектуры модели:", diff --git a/i18n/locale/tr_TR.json b/i18n/locale/tr_TR.json index f9bc25c..776bab9 100644 --- a/i18n/locale/tr_TR.json +++ b/i18n/locale/tr_TR.json @@ -140,7 +140,6 @@ "Train model": "Modeli Eğit", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "Eğitim tamamlandı. Eğitim günlüklerini konsolda veya deney klasörü altındaki train.log dosyasında kontrol edebilirsiniz.", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "Transpoze et (tamsayı, yarıton sayısıyla; bir oktav yükseltmek için: 12, bir oktav düşürmek için: -12):", - "Unfortunately, there is no compatible GPU available to support your training.": "Maalesef, eğitiminizi desteklemek için uyumlu bir GPU bulunmamaktadır.", "Unknown": "Unknown", "Unload model to save GPU memory": "GPU bellek kullanımını azaltmak için sesi kaldır", "Version": "Sürüm", diff --git a/i18n/locale/zh_CN.json b/i18n/locale/zh_CN.json index ea2a252..aaa75ae 100644 --- a/i18n/locale/zh_CN.json +++ b/i18n/locale/zh_CN.json @@ -140,7 +140,6 @@ "Train model": "训练模型", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "变调(整数, 半音数量, 升八度12降八度-12)", - "Unfortunately, there is no compatible GPU available to support your training.": "很遗憾您这没有能用的显卡来支持您训练", "Unknown": "未知", "Unload model to save GPU memory": "卸载音色省显存", "Version": "版本", diff --git a/i18n/locale/zh_HK.json b/i18n/locale/zh_HK.json index 854cd53..e6b2a66 100644 --- a/i18n/locale/zh_HK.json +++ b/i18n/locale/zh_HK.json @@ -140,7 +140,6 @@ "Train model": "訓練模型", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "變調(整數、半音數量、升八度12降八度-12)", - "Unfortunately, there is no compatible GPU available to support your training.": "很遗憾您这没有能用的显卡来支持您训练", "Unknown": "Unknown", "Unload model to save GPU memory": "卸載音色節省 VRAM", "Version": "版本", diff --git a/i18n/locale/zh_SG.json b/i18n/locale/zh_SG.json index 854cd53..e6b2a66 100644 --- a/i18n/locale/zh_SG.json +++ b/i18n/locale/zh_SG.json @@ -140,7 +140,6 @@ "Train model": "訓練模型", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "變調(整數、半音數量、升八度12降八度-12)", - "Unfortunately, there is no compatible GPU available to support your training.": "很遗憾您这没有能用的显卡来支持您训练", "Unknown": "Unknown", "Unload model to save GPU memory": "卸載音色節省 VRAM", "Version": "版本", diff --git a/i18n/locale/zh_TW.json b/i18n/locale/zh_TW.json index 854cd53..e6b2a66 100644 --- a/i18n/locale/zh_TW.json +++ b/i18n/locale/zh_TW.json @@ -140,7 +140,6 @@ "Train model": "訓練模型", "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log", "Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)": "變調(整數、半音數量、升八度12降八度-12)", - "Unfortunately, there is no compatible GPU available to support your training.": "很遗憾您这没有能用的显卡来支持您训练", "Unknown": "Unknown", "Unload model to save GPU memory": "卸載音色節省 VRAM", "Version": "版本", From 53d54681cf1f5fd9e27f96e96ba0a3f90610f4df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Fri, 21 Nov 2025 15:06:04 +0800 Subject: [PATCH 08/19] fix: i18n & dl & tests --- .github/workflows/checksum.yml | 2 +- .github/workflows/unitest.yml | 4 ++-- configs/config.py | 2 +- i18n/locale/ja_JP.json | 2 +- i18n/locale/zh_CN.json | 2 +- infer/lib/rvcmd.py | 10 +--------- infer/modules/uvr5/modules.py | 4 ---- 7 files changed, 7 insertions(+), 19 deletions(-) diff --git a/.github/workflows/checksum.yml b/.github/workflows/checksum.yml index 1bf904b..1fa0fbc 100644 --- a/.github/workflows/checksum.yml +++ b/.github/workflows/checksum.yml @@ -15,7 +15,7 @@ jobs: - name: Run RVC-Models-Downloader run: | - wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb + wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb sudo apt -y install ./rvcmd_linux_amd64.deb rm -f ./rvcmd_linux_amd64.deb rvcmd -notrs -w 1 -notui assets/rvc diff --git a/.github/workflows/unitest.yml b/.github/workflows/unitest.yml index 7a4987b..c02696d 100644 --- a/.github/workflows/unitest.yml +++ b/.github/workflows/unitest.yml @@ -18,13 +18,13 @@ jobs: - name: Install dependencies run: | sudo apt update - wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.5/rvcmd_linux_amd64.deb + wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb sudo apt -y install ./rvcmd_linux_amd64.deb pip install --force pip==24.0 # fix fairseq installing issue https://github.com/facebookresearch/fairseq/issues/5552 python -m pip install --upgrade setuptools python -m pip install --upgrade wheel pip install torch torchvision torchaudio - pip install -r requirements/main.txt + pip install -r requirements/dml.txt rvcmd -notrs -w 1 -notui assets/rvc - name: Test step 1 & 2 run: | diff --git a/configs/config.py b/configs/config.py index 5232048..8a92f37 100644 --- a/configs/config.py +++ b/configs/config.py @@ -214,7 +214,7 @@ class Config(metaclass=Singleton): self.use_fp32_config() else: logger.info("No supported Nvidia GPU found") - self.device = self.instead = "cpu" + self.device = self.instead = torch.get_default_device() self.is_half = False self.use_fp32_config() diff --git a/i18n/locale/ja_JP.json b/i18n/locale/ja_JP.json index 455446b..c5c2a62 100644 --- a/i18n/locale/ja_JP.json +++ b/i18n/locale/ja_JP.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "WASAPIデバイスを独占", "Target sample rate": "目標サンプリング率", "The audio file to be processed": "処理待ち音声", - "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "本ソフトウェアは、GNU Affero General Public License バージョン3.0以降の下でライセンスされています。
作者は、このソフトウェアの使用に関して一切の管理権限や責任を負いません。
このソフトウェアを使用し、生成された音声やファイルを含むあらゆるコンテンツを配布するユーザーは、AGPL 3.0ライセンスの条項への準拠について全責任を負います。
これらの条項に同意しない場合、このソフトウェアパッケージに含まれるコードやファイルの使用、参照、配布は禁止されています。
詳細については、ルートディレクトリにあるLICENSEファイルを参照してください。", "Total training epochs (total_epoch)": "総エポック数", "Train": "学習", "Train feature index": "特徴索引の学習", diff --git a/i18n/locale/zh_CN.json b/i18n/locale/zh_CN.json index aaa75ae..24367be 100644 --- a/i18n/locale/zh_CN.json +++ b/i18n/locale/zh_CN.json @@ -133,7 +133,7 @@ "Takeover WASAPI device": "独占 WASAPI 设备", "Target sample rate": "目标采样率", "The audio file to be processed": "待处理音频文件", - "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.", + "This software is licensed under the GNU Affero General Public License, version 3.0 or later.
The author has no control or responsibility regarding the use of this software.
Users who use the software and distribute any content, including sounds or files generated by it, bear full responsibility for compliance with the terms of the AGPL 3.0 license.
If you do not accept these terms, you are prohibited from using, referencing, or distributing any code or files contained within this software package.
Refer to the LICENSE file located in the root directory for full details.": "本软件基于GNU Affero通用公共许可证3.0版本或更高版本进行许可。
作者对本软件的使用不承担任何控制权或责任。
使用本软件并分发任何内容(包括由其生成的声音或文件)的用户,需对遵守AGPL 3.0许可证条款承担全部责任。
如果您不接受这些条款,则禁止使用、引用或分发本软件包中包含的任何代码或文件。
请参阅位于根目录中的LICENSE文件以获取完整详情。", "Total training epochs (total_epoch)": "总训练轮数total_epoch", "Train": "训练", "Train feature index": "训练特征索引", diff --git a/infer/lib/rvcmd.py b/infer/lib/rvcmd.py index 439e346..a2a3ce5 100644 --- a/infer/lib/rvcmd.py +++ b/infer/lib/rvcmd.py @@ -162,15 +162,7 @@ def download_and_extract_zip(url: str, folder: str): logger.info(f"extracted into {folder}") -def download_dns_yaml(url: str, folder: str): - logger.info(f"downloading {url}") - response = requests.get(url, stream=True, timeout=(5, 10)) - with open(os.path.join(folder, "dns.yaml"), "wb") as out_file: - out_file.write(response.content) - logger.info(f"downloaded into {folder}") - - -def download_all_assets(tmpdir: str, version="0.2.5"): +def download_all_assets(tmpdir: str, version="0.2.11"): import subprocess import platform diff --git a/infer/modules/uvr5/modules.py b/infer/modules/uvr5/modules.py index 7b3af7c..6053d11 100644 --- a/infer/modules/uvr5/modules.py +++ b/infer/modules/uvr5/modules.py @@ -62,10 +62,6 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format os.path.basename(inp_path), ) resample_audio(inp_path, tmp_path, "pcm_s16le", "s16", 44100, "stereo") - try: # Remove the original file - os.remove(inp_path) - except Exception as e: - print(f"Failed to remove the original file: {e}") inp_path = tmp_path try: if done == 0: From 57bee42fc9a432e3e0c07c6a9a3e4a84de910b4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Fri, 21 Nov 2025 15:10:40 +0800 Subject: [PATCH 09/19] fix: make ci happy --- .github/workflows/unitest.yml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/.github/workflows/unitest.yml b/.github/workflows/unitest.yml index c02696d..1ea0ae2 100644 --- a/.github/workflows/unitest.yml +++ b/.github/workflows/unitest.yml @@ -11,12 +11,27 @@ jobs: steps: - uses: actions/checkout@master + - name: Space cleanup + env: + DEBIAN_FRONTEND: noninteractive + run: | + df -h + + docker rmi `docker images -q` + sudo rm -rf /usr/share/dotnet /etc/mysql /etc/php /etc/apt/sources.list.d + sudo -E apt-get -y purge azure-cli ghc* zulu* hhvm llvm* firefox google* dotnet* powershell openjdk* mysql* php* android* + sudo -E apt-get update + sudo -E apt-get -y autoremove --purge + sudo -E apt-get clean + + df -h - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | + nvidia-smi sudo apt update wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb sudo apt -y install ./rvcmd_linux_amd64.deb @@ -24,7 +39,7 @@ jobs: python -m pip install --upgrade setuptools python -m pip install --upgrade wheel pip install torch torchvision torchaudio - pip install -r requirements/dml.txt + pip install -r requirements/main.txt rvcmd -notrs -w 1 -notui assets/rvc - name: Test step 1 & 2 run: | From 7096797eaff3e7021314846389bd663486fe6f95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Fri, 21 Nov 2025 15:12:26 +0800 Subject: [PATCH 10/19] fix: make ci happy --- .github/workflows/unitest.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/unitest.yml b/.github/workflows/unitest.yml index 1ea0ae2..97e56a2 100644 --- a/.github/workflows/unitest.yml +++ b/.github/workflows/unitest.yml @@ -17,7 +17,6 @@ jobs: run: | df -h - docker rmi `docker images -q` sudo rm -rf /usr/share/dotnet /etc/mysql /etc/php /etc/apt/sources.list.d sudo -E apt-get -y purge azure-cli ghc* zulu* hhvm llvm* firefox google* dotnet* powershell openjdk* mysql* php* android* sudo -E apt-get update From 5f198f31c374478843e3646456abedcd56262687 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Fri, 21 Nov 2025 15:14:22 +0800 Subject: [PATCH 11/19] fix: make ci happy --- .github/workflows/unitest.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/unitest.yml b/.github/workflows/unitest.yml index 97e56a2..57c5ebc 100644 --- a/.github/workflows/unitest.yml +++ b/.github/workflows/unitest.yml @@ -6,7 +6,7 @@ jobs: strategy: matrix: python-version: ["3.8", "3.9", "3.10"] - os: [ubuntu-latest] + os: [ubuntu-20.04] fail-fast: true steps: @@ -17,6 +17,7 @@ jobs: run: | df -h + docker rmi `docker images -q` sudo rm -rf /usr/share/dotnet /etc/mysql /etc/php /etc/apt/sources.list.d sudo -E apt-get -y purge azure-cli ghc* zulu* hhvm llvm* firefox google* dotnet* powershell openjdk* mysql* php* android* sudo -E apt-get update From e71b6c440835b177585786beede9da62eb5b7128 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Fri, 21 Nov 2025 15:24:43 +0800 Subject: [PATCH 12/19] fix: make ci happy --- .github/workflows/unitest.yml | 47 ++++++++++++++++++++++++++++++++--- requirements/dml.txt | 3 +++ 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/.github/workflows/unitest.yml b/.github/workflows/unitest.yml index 57c5ebc..e693cfc 100644 --- a/.github/workflows/unitest.yml +++ b/.github/workflows/unitest.yml @@ -6,7 +6,7 @@ jobs: strategy: matrix: python-version: ["3.8", "3.9", "3.10"] - os: [ubuntu-20.04] + os: [ubuntu-latest] fail-fast: true steps: @@ -17,9 +17,48 @@ jobs: run: | df -h - docker rmi `docker images -q` - sudo rm -rf /usr/share/dotnet /etc/mysql /etc/php /etc/apt/sources.list.d - sudo -E apt-get -y purge azure-cli ghc* zulu* hhvm llvm* firefox google* dotnet* powershell openjdk* mysql* php* android* + # Source - https://stackoverflow.com/a + # Posted by Cosmin Bodnariuc + # Retrieved 2025-11-21, License - CC BY-SA 4.0 + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/share/boost + sudo rm -rf /usr/local/share/chromium + sudo rm -rf /usr/local/share/powershell + sudo rm -rf /usr/local/share/vcpkg + sudo rm -rf /usr/local/share/miniconda + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo rm -rf /opt/hostedtoolcache/go + sudo rm -rf /opt/hostedtoolcache/Python + sudo rm -rf /opt/hostedtoolcache/node + sudo rm -rf /opt/hostedtoolcache/R + sudo rm -rf /opt/hostedtoolcache/Java + sudo rm -rf /opt/hostedtoolcache/LLVM + sudo rm -rf /opt/hostedtoolcache/Swift + sudo rm -rf /opt/hostedtoolcache/Php + sudo rm -rf /opt/hostedtoolcache/Perl + sudo rm -rf /opt/hostedtoolcache/Scala + sudo rm -rf /opt/hostedtoolcache/Julia + sudo rm -rf /opt/hostedtoolcache/Mono + sudo rm -rf /opt/hostedtoolcache/PowerShell + sudo rm -rf /opt/hostedtoolcache/Crystal + sudo rm -rf /opt/hostedtoolcache/Elixir + sudo rm -rf /opt/hostedtoolcache/Erlang + sudo rm -rf /opt/hostedtoolcache/FSharp + sudo rm -rf /opt/hostedtoolcache/Haskell + sudo rm -rf /opt/hostedtoolcache/OCaml + sudo rm -rf /opt/hostedtoolcache/Rust + sudo rm -rf /opt/hostedtoolcache/Sbt + sudo rm -rf /opt/hostedtoolcache/Solidity + sudo rm -rf /opt/hostedtoolcache/VisualStudio + sudo rm -rf /opt/hostedtoolcache/WinAppDriver + sudo rm -rf /opt/hostedtoolcache/Xamarin + sudo rm -rf /opt/hostedtoolcache/Yarn + sudo rm -rf /opt/hostedtoolcache/Zephyr + sudo rm -rf /opt/hostedtoolcache/zig + sudo rm -rf /opt/hostedtoolcache/zulu + sudo rm -rf /opt/hostedtoolcache/azcopy + sudo -E apt-get update sudo -E apt-get -y autoremove --purge sudo -E apt-get clean diff --git a/requirements/dml.txt b/requirements/dml.txt index 5e0bd35..f54a76f 100644 --- a/requirements/dml.txt +++ b/requirements/dml.txt @@ -43,3 +43,6 @@ python-dotenv>=1.0.0 av torchfcpe pybase16384 +torch-directml +torchvision +torchaudio From 71cc31a96cdbc756feda78b4dffea4ab420ba85b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Fri, 21 Nov 2025 15:36:50 +0800 Subject: [PATCH 13/19] fix: make ci happy --- .github/workflows/unitest.yml | 4 +-- configs/config.py | 12 +++------ requirements/amd.txt | 1 - requirements/cpu.txt | 49 +++++++++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 13 deletions(-) create mode 100644 requirements/cpu.txt diff --git a/.github/workflows/unitest.yml b/.github/workflows/unitest.yml index e693cfc..db6273e 100644 --- a/.github/workflows/unitest.yml +++ b/.github/workflows/unitest.yml @@ -70,15 +70,13 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - nvidia-smi - sudo apt update wget https://github.com/fumiama/RVC-Models-Downloader/releases/download/v0.2.11/rvcmd_linux_amd64.deb sudo apt -y install ./rvcmd_linux_amd64.deb pip install --force pip==24.0 # fix fairseq installing issue https://github.com/facebookresearch/fairseq/issues/5552 python -m pip install --upgrade setuptools python -m pip install --upgrade wheel pip install torch torchvision torchaudio - pip install -r requirements/main.txt + pip install -r requirements/cpu.txt rvcmd -notrs -w 1 -notui assets/rvc - name: Test step 1 & 2 run: | diff --git a/configs/config.py b/configs/config.py index 8a92f37..ef26272 100644 --- a/configs/config.py +++ b/configs/config.py @@ -4,6 +4,7 @@ import sys import json import shutil from multiprocessing import cpu_count +import importlib.util import torch @@ -46,7 +47,6 @@ class Config(metaclass=Singleton): self.global_link, self.noparallel, self.noautoopen, - self.dml, self.nocheck, self.update, ) = self.arg_parse() @@ -83,11 +83,6 @@ class Config(metaclass=Singleton): action="store_true", help="Do not open in browser automatically", ) - parser.add_argument( - "--dml", - action="store_true", - help="torch_dml", - ) parser.add_argument( "--nocheck", action="store_true", help="Run without checking assets" ) @@ -104,7 +99,6 @@ class Config(metaclass=Singleton): cmd_opts.global_link, cmd_opts.noparallel, cmd_opts.noautoopen, - cmd_opts.dml, cmd_opts.nocheck, cmd_opts.update, ) @@ -183,7 +177,7 @@ class Config(metaclass=Singleton): if self.has_xpu(): self.device = self.instead = "xpu:0" self.is_half = True - i_device = int(self.device.split(":")[-1]) + i_device = int(str(self.device).split(":")[-1]) self.gpu_name = torch.cuda.get_device_name(i_device) if ( ("16" in self.gpu_name and "V100" not in self.gpu_name.upper()) @@ -239,7 +233,7 @@ class Config(metaclass=Singleton): x_query = 5 x_center = 30 x_max = 32 - if self.dml: + if importlib.util.find_spec("torch_directml") is not None: logger.info("Use DirectML instead") import torch_directml diff --git a/requirements/amd.txt b/requirements/amd.txt index 2821e94..0f52b52 100644 --- a/requirements/amd.txt +++ b/requirements/amd.txt @@ -1,4 +1,3 @@ -tensorflow-rocm joblib>=1.1.0 numba numpy diff --git a/requirements/cpu.txt b/requirements/cpu.txt new file mode 100644 index 0000000..6bdaefb --- /dev/null +++ b/requirements/cpu.txt @@ -0,0 +1,49 @@ +joblib>=1.1.0 +numba +numpy +scipy +librosa>=0.10.2 +llvmlite +fairseq @ git+https://github.com/fumiama/fairseq.git +faiss-cpu +gradio +Cython +pydub>=0.25.1 +tensorboardX +Jinja2>=3.1.2 +json5 +Markdown +matplotlib>=3.7.0 +matplotlib-inline>=0.1.3 +praat-parselmouth>=0.4.2 +Pillow>=9.1.1 +resampy>=0.4.2 +scikit-learn +tensorboard +tqdm>=4.63.1 +tornado>=6.1 +Werkzeug>=2.2.3 +uc-micro-py>=1.0.1 +sympy>=1.11.1 +tabulate>=0.8.10 +PyYAML>=6.0 +pyasn1>=0.4.8 +pyasn1-modules>=0.2.8 +fsspec>=2022.11.0 +absl-py>=1.2.0 +audioread +uvicorn>=0.21.1 +colorama>=0.4.5 +pyworld==0.3.2 +httpx +onnxruntime; sys_platform == 'darwin' +torchcrepe>=0.0.23 +fastapi +torchfcpe +python-dotenv>=1.0.0 +av +pybase16384 +--extra-index-url https://download.pytorch.org/whl/cpu +torch +torchvision +torchaudio From 7fa122045f6af3ed913e12bea83c6393dfb00ae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Fri, 21 Nov 2025 15:48:03 +0800 Subject: [PATCH 14/19] fix(config): dml load & av codec ctx has no base_rate attr --- configs/config.py | 2 ++ infer/lib/audio.py | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/configs/config.py b/configs/config.py index ef26272..2476efb 100644 --- a/configs/config.py +++ b/configs/config.py @@ -50,6 +50,7 @@ class Config(metaclass=Singleton): self.nocheck, self.update, ) = self.arg_parse() + self.dml = False self.instead = "" self.preprocess_per = 3.7 self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() @@ -239,6 +240,7 @@ class Config(metaclass=Singleton): self.device = torch_directml.device(torch_directml.default_device()) self.is_half = False + self.dml = True else: if self.instead: logger.info(f"Use {self.instead} instead") diff --git a/infer/lib/audio.py b/infer/lib/audio.py index 8c4c092..ca03c78 100644 --- a/infer/lib/audio.py +++ b/infer/lib/audio.py @@ -195,7 +195,10 @@ def get_audio_properties(input_path: str) -> Tuple[int, int]: container = av.open(input_path) audio_stream = next(s for s in container.streams if s.type == "audio") channels = 1 if audio_stream.layout == "mono" else 2 - rate = audio_stream.base_rate + try: + rate = audio_stream.base_rate + except: + rate = audio_stream.sample_rate container.close() return channels, rate From 43d19eb00eed6175e2c348fce82bd395e80df62b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Fri, 21 Nov 2025 16:52:17 +0800 Subject: [PATCH 15/19] fix(dml): train extract_f0_print error ModuleNotFoundError: No module named 'torch.privateuseone' due to new prosess --- infer/modules/train/extract_f0_print.py | 3 +++ rvc/f0/f0.py | 6 ++--- rvc/f0/mel.py | 6 ++--- rvc/f0/rmvpe.py | 2 +- rvc/layers/attentions.py | 2 +- rvc/layers/encoders.py | 6 ++--- rvc/layers/generators.py | 1 + rvc/layers/norms.py | 2 +- rvc/layers/nsf.py | 9 +++---- rvc/layers/residuals.py | 6 ++--- rvc/layers/synthesizers.py | 32 +++++++++++++++---------- rvc/layers/utils.py | 10 ++++---- rvc/onnx/synthesizer.py | 4 ++-- 13 files changed, 50 insertions(+), 39 deletions(-) diff --git a/infer/modules/train/extract_f0_print.py b/infer/modules/train/extract_f0_print.py index 87b6b14..d0c3c03 100644 --- a/infer/modules/train/extract_f0_print.py +++ b/infer/modules/train/extract_f0_print.py @@ -2,6 +2,7 @@ import os import sys import traceback from pathlib import Path +import importlib.util from dotenv import load_dotenv @@ -38,6 +39,8 @@ f0method = sys.argv[3] device = sys.argv[4] is_half = sys.argv[5] == "True" +if importlib.util.find_spec("torch_directml") is not None: + import torch_directml # use side effect class FeatureInput(object): def __init__(self, is_half: bool, device="cpu", samplerate=16000, hop_size=160): diff --git a/rvc/f0/f0.py b/rvc/f0/f0.py index 0f615e6..c79c4f8 100644 --- a/rvc/f0/f0.py +++ b/rvc/f0/f0.py @@ -11,14 +11,14 @@ class F0Predictor(object): f0_min=50, f0_max=1100, sampling_rate=44100, - device: Optional[str] = None, + device: Optional[Union[str, torch.device]] = None, ): self.hop_length = hop_length self.f0_min = f0_min self.f0_max = f0_max self.sampling_rate = sampling_rate - if device is None: - device = "cuda:0" if torch.cuda.is_available() else "cpu" + if not device: + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.device = device def compute_f0( diff --git a/rvc/f0/mel.py b/rvc/f0/mel.py index 2c06800..ee98b15 100644 --- a/rvc/f0/mel.py +++ b/rvc/f0/mel.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, Union import torch import numpy as np @@ -17,9 +17,9 @@ class MelSpectrogram(torch.nn.Module): hop_length: int, n_fft: Optional[int] = None, mel_fmin: int = 0, - mel_fmax: int = None, + mel_fmax: Optional[int] = None, clamp: float = 1e-5, - device=torch.device("cpu"), + device: Union[str, torch.device] = torch.device("cpu"), ): super().__init__() if n_fft is None: diff --git a/rvc/f0/rmvpe.py b/rvc/f0/rmvpe.py index 33eac0a..02d4f0c 100644 --- a/rvc/f0/rmvpe.py +++ b/rvc/f0/rmvpe.py @@ -1,6 +1,6 @@ from io import BytesIO import os -from typing import Any, Optional, Union +from typing import Optional, Union import numpy as np import torch diff --git a/rvc/layers/attentions.py b/rvc/layers/attentions.py index 22b626d..3e1fd01 100644 --- a/rvc/layers/attentions.py +++ b/rvc/layers/attentions.py @@ -12,8 +12,8 @@ class MultiHeadAttention(nn.Module): channels: int, out_channels: int, n_heads: int, + window_size: int, p_dropout: float = 0.0, - window_size: Optional[int] = None, heads_share: bool = True, block_length: Optional[int] = None, proximal_bias: bool = False, diff --git a/rvc/layers/encoders.py b/rvc/layers/encoders.py index 1fc2478..eeaa3fd 100644 --- a/rvc/layers/encoders.py +++ b/rvc/layers/encoders.py @@ -42,8 +42,8 @@ class Encoder(nn.Module): hidden_channels, hidden_channels, n_heads, + window_size, p_dropout=p_dropout, - window_size=window_size, ) ) self.norm_layers_1.append(LayerNorm(hidden_channels)) @@ -121,7 +121,7 @@ class TextEncoder(nn.Module): def __call__( self, phone: torch.Tensor, - pitch: torch.Tensor, + pitch: Optional[torch.Tensor], lengths: torch.Tensor, skip_head: Optional[int] = None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: @@ -135,7 +135,7 @@ class TextEncoder(nn.Module): def forward( self, phone: torch.Tensor, - pitch: torch.Tensor, + pitch: Optional[torch.Tensor], lengths: torch.Tensor, skip_head: Optional[int] = None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: diff --git a/rvc/layers/generators.py b/rvc/layers/generators.py index 185f939..d78d6cd 100644 --- a/rvc/layers/generators.py +++ b/rvc/layers/generators.py @@ -46,6 +46,7 @@ class Generator(torch.nn.Module): self.resblocks = nn.ModuleList() resblock_module = ResBlock1 if resblock == "1" else ResBlock2 + ch = 0 for i in range(len(self.ups)): ch = upsample_initial_channel // (2 ** (i + 1)) for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes): diff --git a/rvc/layers/norms.py b/rvc/layers/norms.py index 9418035..4b07143 100644 --- a/rvc/layers/norms.py +++ b/rvc/layers/norms.py @@ -30,7 +30,7 @@ class WN(torch.nn.Module): dilation_rate: int, n_layers: int, gin_channels: int = 0, - p_dropout: int = 0, + p_dropout: float = 0, ): super(WN, self).__init__() assert kernel_size % 2 == 1 diff --git a/rvc/layers/nsf.py b/rvc/layers/nsf.py index 5e9e35a..22fd968 100644 --- a/rvc/layers/nsf.py +++ b/rvc/layers/nsf.py @@ -1,4 +1,4 @@ -from typing import Optional, List +from typing import Optional, List, Union import math import torch @@ -83,7 +83,7 @@ class NSFGenerator(torch.nn.Module): self.conv_pre = Conv1d( initial_channel, upsample_initial_channel, 7, 1, padding=3 ) - resblock = ResBlock1 if resblock == "1" else ResBlock2 + resblockcls = ResBlock1 if resblock == "1" else ResBlock2 self.ups = nn.ModuleList() for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): @@ -114,12 +114,13 @@ class NSFGenerator(torch.nn.Module): self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1)) self.resblocks = nn.ModuleList() + ch = 0 for i in range(len(self.ups)): - ch: int = upsample_initial_channel // (2 ** (i + 1)) + ch = upsample_initial_channel // (2 ** (i + 1)) for j, (k, d) in enumerate( zip(resblock_kernel_sizes, resblock_dilation_sizes) ): - self.resblocks.append(resblock(ch, k, d)) + self.resblocks.append(resblockcls(ch, k, d)) self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False) self.ups.apply(call_weight_data_normal_if_Conv) diff --git a/rvc/layers/residuals.py b/rvc/layers/residuals.py index 4a135ce..45b3d11 100644 --- a/rvc/layers/residuals.py +++ b/rvc/layers/residuals.py @@ -20,7 +20,7 @@ class ResBlock1(torch.nn.Module): self, channels: int, kernel_size: int = 3, - dilation: List[int] = (1, 3, 5), + dilation: List[int] = [1, 3, 5], ): super(ResBlock1, self).__init__() @@ -117,7 +117,7 @@ class ResBlock2(torch.nn.Module): self, channels: int, kernel_size=3, - dilation: List[int] = (1, 3), + dilation: List[int] = [1, 3], ): super(ResBlock2, self).__init__() self.convs = nn.ModuleList() @@ -182,7 +182,7 @@ class ResidualCouplingLayer(nn.Module): kernel_size: int, dilation_rate: int, n_layers: int, - p_dropout: int = 0, + p_dropout: float = 0, gin_channels: int = 0, mean_only: bool = False, ): diff --git a/rvc/layers/synthesizers.py b/rvc/layers/synthesizers.py index 474781e..2bb7ea4 100644 --- a/rvc/layers/synthesizers.py +++ b/rvc/layers/synthesizers.py @@ -34,7 +34,7 @@ class SynthesizerTrnMsNSFsid(nn.Module): upsample_kernel_sizes: List[int], spk_embed_dim: int, gin_channels: int, - sr: Optional[Union[str, int]], + sr: Union[str, int], encoder_dim: int, use_f0: bool, ): @@ -143,7 +143,7 @@ class SynthesizerTrnMsNSFsid(nn.Module): torch.nn.utils.remove_weight_norm(self.enc_q) return self - @torch.jit.ignore + @torch.jit.ignore() def forward( self, phone: torch.Tensor, @@ -155,18 +155,20 @@ class SynthesizerTrnMsNSFsid(nn.Module): pitchf: Optional[torch.Tensor] = None, ): # 这里ds是id,[bs,1] # print(1,pitch.shape)#[bs,t] - g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的 + embg = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的 m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) - z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g) - z_p = self.flow(z, y_mask, g=g) + z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=embg) + z_p = self.flow(z, y_mask, g=embg) z_slice, ids_slice = rand_slice_segments_on_last_dim( z, y_lengths, self.segment_size ) - if pitchf is not None: + if pitchf is not None and isinstance(self.dec, NSFGenerator): pitchf = slice_on_last_dim(pitchf, ids_slice, self.segment_size) - o = self.dec(z_slice, pitchf, g=g) + o = self.dec(z_slice, pitchf, g=embg) # type: ignore + elif isinstance(self.dec, Generator): + o = self.dec(z_slice, g=embg) else: - o = self.dec(z_slice, g=g) + raise KeyError(f"unknown dec type: {type(self.dec).__name__}") return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) @torch.jit.export @@ -201,15 +203,17 @@ class SynthesizerTrnMsNSFsid(nn.Module): z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask z = self.flow(z_p, x_mask, g=g, reverse=True) del z_p, m_p, logs_p - if pitchf is not None: - o = self.dec( + if pitchf is not None and isinstance(self.dec, NSFGenerator): + o = self.dec( z * x_mask, pitchf, g=g, n_res=return_length2, ) - else: + elif isinstance(self.dec, Generator): o = self.dec(z * x_mask, g=g, n_res=return_length2) + else: + raise KeyError(f"unknown dec type: {type(self.dec).__name__}") del x_mask, z return o # , x_mask, (z, z_p, m_p, logs_p) @@ -326,7 +330,7 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid): upsample_kernel_sizes: List[int], spk_embed_dim: int, gin_channels: int, - sr=None, + sr: Union[str, int], ): super().__init__( spec_channels, @@ -346,6 +350,7 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid): upsample_kernel_sizes, spk_embed_dim, gin_channels, + sr, 256, False, ) @@ -371,7 +376,7 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid): upsample_kernel_sizes: List[int], spk_embed_dim: int, gin_channels: int, - sr=None, + sr: Union[str, int], ): super().__init__( spec_channels, @@ -391,6 +396,7 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid): upsample_kernel_sizes, spk_embed_dim, gin_channels, + sr, 768, False, ) diff --git a/rvc/layers/utils.py b/rvc/layers/utils.py index 418578b..bf5a9ec 100644 --- a/rvc/layers/utils.py +++ b/rvc/layers/utils.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Tuple, Iterator +from typing import List, Optional, Tuple, Iterator, Union import torch @@ -17,7 +17,7 @@ def get_padding(kernel_size: int, dilation=1) -> int: def slice_on_last_dim( x: torch.Tensor, - start_indices: List[int], + start_indices: Union[List[int], torch.Tensor], segment_size=4, ) -> torch.Tensor: new_shape = [*x.shape] @@ -32,9 +32,9 @@ def slice_on_last_dim( def rand_slice_segments_on_last_dim( x: torch.Tensor, - x_lengths: int = None, + x_lengths: Optional[Union[int, torch.Tensor]] = None, segment_size=4, -) -> Tuple[torch.Tensor, List[int]]: +) -> Tuple[torch.Tensor, Union[List[int], torch.Tensor]]: b, _, t = x.size() if x_lengths is None: x_lengths = t @@ -58,7 +58,7 @@ def activate_add_tanh_sigmoid_multiply( def sequence_mask( length: torch.Tensor, max_length: Optional[int] = None, -) -> torch.BoolTensor: +): if max_length is None: max_length = int(length.max()) x = torch.arange(max_length, dtype=length.dtype, device=length.device) diff --git a/rvc/onnx/synthesizer.py b/rvc/onnx/synthesizer.py index e8bf516..9dafce8 100644 --- a/rvc/onnx/synthesizer.py +++ b/rvc/onnx/synthesizer.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Union +from typing import List, Union import torch @@ -25,7 +25,7 @@ class SynthesizerTrnMsNSFsid(SynthesizerBase): upsample_kernel_sizes: List[int], spk_embed_dim: int, gin_channels: int, - sr: Optional[Union[str, int]], + sr: Union[str, int], encoder_dim: int, ): super().__init__( From 96f7bccb12e5c53b0bdf97ea81a0ec8b1a8a81eb Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 26 Nov 2025 13:06:55 +0800 Subject: [PATCH 16/19] chore(format): run black on dev (#136) Co-authored-by: github-actions[bot] --- infer/modules/train/extract_f0_print.py | 3 ++- rvc/layers/synthesizers.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/infer/modules/train/extract_f0_print.py b/infer/modules/train/extract_f0_print.py index d0c3c03..243606c 100644 --- a/infer/modules/train/extract_f0_print.py +++ b/infer/modules/train/extract_f0_print.py @@ -40,7 +40,8 @@ device = sys.argv[4] is_half = sys.argv[5] == "True" if importlib.util.find_spec("torch_directml") is not None: - import torch_directml # use side effect + import torch_directml # use side effect + class FeatureInput(object): def __init__(self, is_half: bool, device="cpu", samplerate=16000, hop_size=160): diff --git a/rvc/layers/synthesizers.py b/rvc/layers/synthesizers.py index 2bb7ea4..c2c70ba 100644 --- a/rvc/layers/synthesizers.py +++ b/rvc/layers/synthesizers.py @@ -164,7 +164,7 @@ class SynthesizerTrnMsNSFsid(nn.Module): ) if pitchf is not None and isinstance(self.dec, NSFGenerator): pitchf = slice_on_last_dim(pitchf, ids_slice, self.segment_size) - o = self.dec(z_slice, pitchf, g=embg) # type: ignore + o = self.dec(z_slice, pitchf, g=embg) # type: ignore elif isinstance(self.dec, Generator): o = self.dec(z_slice, g=embg) else: @@ -204,7 +204,7 @@ class SynthesizerTrnMsNSFsid(nn.Module): z = self.flow(z_p, x_mask, g=g, reverse=True) del z_p, m_p, logs_p if pitchf is not None and isinstance(self.dec, NSFGenerator): - o = self.dec( + o = self.dec( z * x_mask, pitchf, g=g, From cc50ede4fbbee1f923a47726e6260c9776cbce8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Sat, 18 Apr 2026 17:03:52 +0800 Subject: [PATCH 17/19] fix(train): extract f0 & feature hang --- infer/modules/train/extract_f0_print.py | 6 ++++++ infer/modules/train/extract_feature_print.py | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/infer/modules/train/extract_f0_print.py b/infer/modules/train/extract_f0_print.py index 243606c..5058a0a 100644 --- a/infer/modules/train/extract_f0_print.py +++ b/infer/modules/train/extract_f0_print.py @@ -106,6 +106,12 @@ if __name__ == "__main__": Config.use_insecure_load() printt(" ".join(sys.argv)) + # GPU methods (rmvpe, fcpe, crepe, etc.) gain nothing from multiprocessing since + # all processes share one GPU. Spawning n_p processes each lazily loading + # the model onto the same CUDA device exhausts VRAM and causes deadlocks. + if "cuda" in device: + printt("WARN: use 1 thread since GPU is used.") + n_p = 1 featureInput = FeatureInput(is_half, device) paths = [] inp_root = "%s/1_16k_wavs" % (exp_dir) diff --git a/infer/modules/train/extract_feature_print.py b/infer/modules/train/extract_feature_print.py index ef6fb6a..b9eef8a 100644 --- a/infer/modules/train/extract_feature_print.py +++ b/infer/modules/train/extract_feature_print.py @@ -17,7 +17,13 @@ device = sys.argv[1] n_part = int(sys.argv[2]) i_part = int(sys.argv[3]) i_gpu = sys.argv[4] -os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu) +# CUDA_VISIBLE_DEVICES expects bare GPU indices (e.g. "0" or "0,1"), +# but callers may pass "cuda:0", "cuda:0-cuda:1", etc. Strip the prefix +# and normalise separators so any combination works. +import re +i_gpu = re.sub(r'cuda:', '', str(i_gpu)) +i_gpu = i_gpu.replace('-', ',') +os.environ["CUDA_VISIBLE_DEVICES"] = i_gpu exp_dir = sys.argv[5] version = sys.argv[6] is_half = sys.argv[7].lower() == "true" From 3affc9415da3e9e0cf62d4a90e770d410cc3ff2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Sat, 18 Apr 2026 17:30:48 +0800 Subject: [PATCH 18/19] fix(train): unsupported gloo device on win --- infer/modules/train/train.py | 141 +++++++++++++++++++++++++---------- 1 file changed, 102 insertions(+), 39 deletions(-) diff --git a/infer/modules/train/train.py b/infer/modules/train/train.py index e8f7156..8c32671 100644 --- a/infer/modules/train/train.py +++ b/infer/modules/train/train.py @@ -106,23 +106,28 @@ def main(): # patch to unblock people without gpus. there is probably a better way. print("NO GPU DETECTED: falling back to CPU - this may take a while") n_gpus = 1 - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = str(randint(20000, 55555)) - children = [] logger = utils.get_logger(hps.model_dir) - for i in range(n_gpus): - subproc = mp.Process( - target=run, - args=(i, n_gpus, hps, logger), - ) - children.append(subproc) - subproc.start() + if n_gpus == 1: + # Single GPU: run directly without distributed to avoid gloo issues on Windows + run(0, 1, hps, logger) + else: + master_port = str(randint(20000, 55555)) + os.environ["MASTER_ADDR"] = "127.0.0.1" + os.environ["MASTER_PORT"] = master_port + children = [] + for i in range(n_gpus): + subproc = mp.Process( + target=run, + args=(i, n_gpus, hps, logger, master_port), + ) + children.append(subproc) + subproc.start() - for i in range(n_gpus): - children[i].join() + for i in range(n_gpus): + children[i].join() -def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger): +def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger, master_port: str = "29500"): global global_step if rank == 0: # logger = utils.get_logger(hps.model_dir) @@ -131,24 +136,81 @@ def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger): writer = SummaryWriter(log_dir=hps.model_dir) writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval")) - try: - dist.init_process_group( - backend=( - "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl" - ), - init_method="env://", - world_size=n_gpus, - rank=rank, - ) - except: - dist.init_process_group( - backend=( - "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl" - ), - init_method="env://?use_libuv=False", - world_size=n_gpus, - rank=rank, - ) + use_distributed = n_gpus > 1 + if use_distributed: + if os.name == "nt" or not torch.cuda.is_available(): + # On Windows, gloo's create_device(hostname=...) is gated to Linux only + # in the C++ layer (makeDeviceForHostname). We must use the interface- + # based path instead: create_device(interface=...) calls + # makeDeviceForInterface which is not platform-gated. + import socket as _socket + + try: + store = dist.TCPStore( + host_name="127.0.0.1", + port=int(master_port), + world_size=n_gpus, + is_master=(rank == 0), + ) + except Exception: + store = dist.TCPStore( + host_name="127.0.0.1", + port=int(master_port), + world_size=n_gpus, + is_master=(rank == 0), + use_libuv=False, + ) + + # Discover a working network interface for gloo device creation + gloo_device = None + try: + for idx, ifname in _socket.if_nameindex(): + try: + gloo_device = dist.ProcessGroupGloo.create_device( + interface=ifname + ) + print("Try device", idx, "name", ifname) + break + except RuntimeError as e: + print("Try device", idx, "name", ifname, "err:", e) + continue + except (OSError, AttributeError) as e: + print(e.with_traceback(None)) + + if gloo_device is None: + raise RuntimeError( + "Cannot create gloo device on Windows. " + "No usable network interface found. " + "Try adding your hostname to " + "C:\\Windows\\System32\\drivers\\etc\\hosts " + "with: 127.0.0.1 " + _socket.gethostname() + ) + + pg_options = dist.ProcessGroupGloo._Options() + pg_options._devices = [gloo_device] + dist.init_process_group( + backend="gloo", + store=store, + world_size=n_gpus, + rank=rank, + pg_options=pg_options, + ) + else: + init_url = f"tcp://127.0.0.1:{master_port}" + try: + dist.init_process_group( + backend="nccl", + init_method=init_url, + world_size=n_gpus, + rank=rank, + ) + except: + dist.init_process_group( + backend="nccl", + init_method=init_url + "?use_libuv=False", + world_size=n_gpus, + rank=rank, + ) torch.manual_seed(hps.train.seed) if torch.cuda.is_available(): torch.cuda.set_device(rank) @@ -221,14 +283,15 @@ def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger): ) # net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True) # net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True) - if hasattr(torch, "xpu") and torch.xpu.is_available(): - pass - elif torch.cuda.is_available(): - net_g = DDP(net_g, device_ids=[rank]) - net_d = DDP(net_d, device_ids=[rank]) - else: - net_g = DDP(net_g) - net_d = DDP(net_d) + if use_distributed: + if hasattr(torch, "xpu") and torch.xpu.is_available(): + pass + elif torch.cuda.is_available(): + net_g = DDP(net_g, device_ids=[rank]) + net_d = DDP(net_d, device_ids=[rank]) + else: + net_g = DDP(net_g) + net_d = DDP(net_d) try: # 如果能加载自动resume _, _, _, epoch_str = utils.load_checkpoint( From 645ce27dcc7e6159ba67a4b50a748b2be370af3c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 18 Apr 2026 17:36:44 +0800 Subject: [PATCH 19/19] chore(format): run black on dev (#143) Co-authored-by: github-actions[bot] --- infer/modules/train/extract_feature_print.py | 5 +++-- infer/modules/train/train.py | 4 +++- tools/web/infer-only.py | 6 ++---- web.py | 1 - 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/infer/modules/train/extract_feature_print.py b/infer/modules/train/extract_feature_print.py index b9eef8a..22755b5 100644 --- a/infer/modules/train/extract_feature_print.py +++ b/infer/modules/train/extract_feature_print.py @@ -21,8 +21,9 @@ i_gpu = sys.argv[4] # but callers may pass "cuda:0", "cuda:0-cuda:1", etc. Strip the prefix # and normalise separators so any combination works. import re -i_gpu = re.sub(r'cuda:', '', str(i_gpu)) -i_gpu = i_gpu.replace('-', ',') + +i_gpu = re.sub(r"cuda:", "", str(i_gpu)) +i_gpu = i_gpu.replace("-", ",") os.environ["CUDA_VISIBLE_DEVICES"] = i_gpu exp_dir = sys.argv[5] version = sys.argv[6] diff --git a/infer/modules/train/train.py b/infer/modules/train/train.py index 8c32671..c8b4875 100644 --- a/infer/modules/train/train.py +++ b/infer/modules/train/train.py @@ -127,7 +127,9 @@ def main(): children[i].join() -def run(rank, n_gpus, hps: utils.HParams, logger: logging.Logger, master_port: str = "29500"): +def run( + rank, n_gpus, hps: utils.HParams, logger: logging.Logger, master_port: str = "29500" +): global global_step if rank == 0: # logger = utils.get_logger(hps.model_dir) diff --git a/tools/web/infer-only.py b/tools/web/infer-only.py index 218788d..037031b 100644 --- a/tools/web/infer-only.py +++ b/tools/web/infer-only.py @@ -40,11 +40,9 @@ app = gr.Blocks() with app: with gr.Tabs(): with gr.TabItem("在线demo"): - gr.Markdown( - value=""" + gr.Markdown(value=""" RVC 在线demo - """ - ) + """) sid = gr.Dropdown(label=i18n("Inferencing voice"), choices=sorted(names)) with gr.Column(): spk_item = gr.Slider( diff --git a/web.py b/web.py index c493ec4..df7f11c 100644 --- a/web.py +++ b/web.py @@ -36,7 +36,6 @@ import threading import shutil import logging - logging.getLogger("numba").setLevel(logging.WARNING) logging.getLogger("httpx").setLevel(logging.WARNING)