diff --git a/infer/modules/train/extract_f0_print.py b/infer/modules/train/extract_f0_print.py
index 243606c..5058a0a 100644
--- a/infer/modules/train/extract_f0_print.py
+++ b/infer/modules/train/extract_f0_print.py
@@ -106,6 +106,12 @@ if __name__ == "__main__":
     Config.use_insecure_load()
 
     printt(" ".join(sys.argv))
+    # GPU methods (rmvpe, fcpe, crepe, etc.) gain nothing from multiprocessing since
+    # all processes share one GPU. Spawning n_p processes each lazily loading
+    # the model onto the same CUDA device exhausts VRAM and causes deadlocks.
+    if "cuda" in device:
+        printt("WARN: use 1 thread since GPU is used.")
+        n_p = 1
     featureInput = FeatureInput(is_half, device)
     paths = []
     inp_root = "%s/1_16k_wavs" % (exp_dir)
diff --git a/infer/modules/train/extract_feature_print.py b/infer/modules/train/extract_feature_print.py
index ef6fb6a..b9eef8a 100644
--- a/infer/modules/train/extract_feature_print.py
+++ b/infer/modules/train/extract_feature_print.py
@@ -17,7 +17,13 @@ device = sys.argv[1]
 n_part = int(sys.argv[2])
 i_part = int(sys.argv[3])
 i_gpu = sys.argv[4]
-os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
+# CUDA_VISIBLE_DEVICES expects bare GPU indices (e.g. "0" or "0,1"),
+# but callers may pass "cuda:0", "cuda:0-cuda:1", etc.  Strip the prefix
+# and normalise separators so any combination works.
+import re
+i_gpu = re.sub(r'cuda:', '', str(i_gpu))
+i_gpu = i_gpu.replace('-', ',')
+os.environ["CUDA_VISIBLE_DEVICES"] = i_gpu
 exp_dir = sys.argv[5]
 version = sys.argv[6]
 is_half = sys.argv[7].lower() == "true"