1
0
mirror of https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git synced 2026-06-09 04:29:50 +08:00

fix(dml): train extract_f0_print error

ModuleNotFoundError: No module named 'torch.privateuseone' due to new prosess
This commit is contained in:
源文雨
2025-11-21 16:52:17 +08:00
parent 7fa122045f
commit 43d19eb00e
13 changed files with 50 additions and 39 deletions

View File

@@ -2,6 +2,7 @@ import os
import sys import sys
import traceback import traceback
from pathlib import Path from pathlib import Path
import importlib.util
from dotenv import load_dotenv from dotenv import load_dotenv
@@ -38,6 +39,8 @@ f0method = sys.argv[3]
device = sys.argv[4] device = sys.argv[4]
is_half = sys.argv[5] == "True" is_half = sys.argv[5] == "True"
if importlib.util.find_spec("torch_directml") is not None:
import torch_directml # use side effect
class FeatureInput(object): class FeatureInput(object):
def __init__(self, is_half: bool, device="cpu", samplerate=16000, hop_size=160): def __init__(self, is_half: bool, device="cpu", samplerate=16000, hop_size=160):

View File

@@ -11,14 +11,14 @@ class F0Predictor(object):
f0_min=50, f0_min=50,
f0_max=1100, f0_max=1100,
sampling_rate=44100, sampling_rate=44100,
device: Optional[str] = None, device: Optional[Union[str, torch.device]] = None,
): ):
self.hop_length = hop_length self.hop_length = hop_length
self.f0_min = f0_min self.f0_min = f0_min
self.f0_max = f0_max self.f0_max = f0_max
self.sampling_rate = sampling_rate self.sampling_rate = sampling_rate
if device is None: if not device:
device = "cuda:0" if torch.cuda.is_available() else "cpu" device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.device = device self.device = device
def compute_f0( def compute_f0(

View File

@@ -1,4 +1,4 @@
from typing import Optional from typing import Optional, Union
import torch import torch
import numpy as np import numpy as np
@@ -17,9 +17,9 @@ class MelSpectrogram(torch.nn.Module):
hop_length: int, hop_length: int,
n_fft: Optional[int] = None, n_fft: Optional[int] = None,
mel_fmin: int = 0, mel_fmin: int = 0,
mel_fmax: int = None, mel_fmax: Optional[int] = None,
clamp: float = 1e-5, clamp: float = 1e-5,
device=torch.device("cpu"), device: Union[str, torch.device] = torch.device("cpu"),
): ):
super().__init__() super().__init__()
if n_fft is None: if n_fft is None:

View File

@@ -1,6 +1,6 @@
from io import BytesIO from io import BytesIO
import os import os
from typing import Any, Optional, Union from typing import Optional, Union
import numpy as np import numpy as np
import torch import torch

View File

@@ -12,8 +12,8 @@ class MultiHeadAttention(nn.Module):
channels: int, channels: int,
out_channels: int, out_channels: int,
n_heads: int, n_heads: int,
window_size: int,
p_dropout: float = 0.0, p_dropout: float = 0.0,
window_size: Optional[int] = None,
heads_share: bool = True, heads_share: bool = True,
block_length: Optional[int] = None, block_length: Optional[int] = None,
proximal_bias: bool = False, proximal_bias: bool = False,

View File

@@ -42,8 +42,8 @@ class Encoder(nn.Module):
hidden_channels, hidden_channels,
hidden_channels, hidden_channels,
n_heads, n_heads,
window_size,
p_dropout=p_dropout, p_dropout=p_dropout,
window_size=window_size,
) )
) )
self.norm_layers_1.append(LayerNorm(hidden_channels)) self.norm_layers_1.append(LayerNorm(hidden_channels))
@@ -121,7 +121,7 @@ class TextEncoder(nn.Module):
def __call__( def __call__(
self, self,
phone: torch.Tensor, phone: torch.Tensor,
pitch: torch.Tensor, pitch: Optional[torch.Tensor],
lengths: torch.Tensor, lengths: torch.Tensor,
skip_head: Optional[int] = None, skip_head: Optional[int] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
@@ -135,7 +135,7 @@ class TextEncoder(nn.Module):
def forward( def forward(
self, self,
phone: torch.Tensor, phone: torch.Tensor,
pitch: torch.Tensor, pitch: Optional[torch.Tensor],
lengths: torch.Tensor, lengths: torch.Tensor,
skip_head: Optional[int] = None, skip_head: Optional[int] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:

View File

@@ -46,6 +46,7 @@ class Generator(torch.nn.Module):
self.resblocks = nn.ModuleList() self.resblocks = nn.ModuleList()
resblock_module = ResBlock1 if resblock == "1" else ResBlock2 resblock_module = ResBlock1 if resblock == "1" else ResBlock2
ch = 0
for i in range(len(self.ups)): for i in range(len(self.ups)):
ch = upsample_initial_channel // (2 ** (i + 1)) ch = upsample_initial_channel // (2 ** (i + 1))
for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes): for k, d in zip(resblock_kernel_sizes, resblock_dilation_sizes):

View File

@@ -30,7 +30,7 @@ class WN(torch.nn.Module):
dilation_rate: int, dilation_rate: int,
n_layers: int, n_layers: int,
gin_channels: int = 0, gin_channels: int = 0,
p_dropout: int = 0, p_dropout: float = 0,
): ):
super(WN, self).__init__() super(WN, self).__init__()
assert kernel_size % 2 == 1 assert kernel_size % 2 == 1

View File

@@ -1,4 +1,4 @@
from typing import Optional, List from typing import Optional, List, Union
import math import math
import torch import torch
@@ -83,7 +83,7 @@ class NSFGenerator(torch.nn.Module):
self.conv_pre = Conv1d( self.conv_pre = Conv1d(
initial_channel, upsample_initial_channel, 7, 1, padding=3 initial_channel, upsample_initial_channel, 7, 1, padding=3
) )
resblock = ResBlock1 if resblock == "1" else ResBlock2 resblockcls = ResBlock1 if resblock == "1" else ResBlock2
self.ups = nn.ModuleList() self.ups = nn.ModuleList()
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
@@ -114,12 +114,13 @@ class NSFGenerator(torch.nn.Module):
self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1)) self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
self.resblocks = nn.ModuleList() self.resblocks = nn.ModuleList()
ch = 0
for i in range(len(self.ups)): for i in range(len(self.ups)):
ch: int = upsample_initial_channel // (2 ** (i + 1)) ch = upsample_initial_channel // (2 ** (i + 1))
for j, (k, d) in enumerate( for j, (k, d) in enumerate(
zip(resblock_kernel_sizes, resblock_dilation_sizes) zip(resblock_kernel_sizes, resblock_dilation_sizes)
): ):
self.resblocks.append(resblock(ch, k, d)) self.resblocks.append(resblockcls(ch, k, d))
self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False) self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False)
self.ups.apply(call_weight_data_normal_if_Conv) self.ups.apply(call_weight_data_normal_if_Conv)

View File

@@ -20,7 +20,7 @@ class ResBlock1(torch.nn.Module):
self, self,
channels: int, channels: int,
kernel_size: int = 3, kernel_size: int = 3,
dilation: List[int] = (1, 3, 5), dilation: List[int] = [1, 3, 5],
): ):
super(ResBlock1, self).__init__() super(ResBlock1, self).__init__()
@@ -117,7 +117,7 @@ class ResBlock2(torch.nn.Module):
self, self,
channels: int, channels: int,
kernel_size=3, kernel_size=3,
dilation: List[int] = (1, 3), dilation: List[int] = [1, 3],
): ):
super(ResBlock2, self).__init__() super(ResBlock2, self).__init__()
self.convs = nn.ModuleList() self.convs = nn.ModuleList()
@@ -182,7 +182,7 @@ class ResidualCouplingLayer(nn.Module):
kernel_size: int, kernel_size: int,
dilation_rate: int, dilation_rate: int,
n_layers: int, n_layers: int,
p_dropout: int = 0, p_dropout: float = 0,
gin_channels: int = 0, gin_channels: int = 0,
mean_only: bool = False, mean_only: bool = False,
): ):

View File

@@ -34,7 +34,7 @@ class SynthesizerTrnMsNSFsid(nn.Module):
upsample_kernel_sizes: List[int], upsample_kernel_sizes: List[int],
spk_embed_dim: int, spk_embed_dim: int,
gin_channels: int, gin_channels: int,
sr: Optional[Union[str, int]], sr: Union[str, int],
encoder_dim: int, encoder_dim: int,
use_f0: bool, use_f0: bool,
): ):
@@ -143,7 +143,7 @@ class SynthesizerTrnMsNSFsid(nn.Module):
torch.nn.utils.remove_weight_norm(self.enc_q) torch.nn.utils.remove_weight_norm(self.enc_q)
return self return self
@torch.jit.ignore @torch.jit.ignore()
def forward( def forward(
self, self,
phone: torch.Tensor, phone: torch.Tensor,
@@ -155,18 +155,20 @@ class SynthesizerTrnMsNSFsid(nn.Module):
pitchf: Optional[torch.Tensor] = None, pitchf: Optional[torch.Tensor] = None,
): # 这里ds是id[bs,1] ): # 这里ds是id[bs,1]
# print(1,pitch.shape)#[bs,t] # print(1,pitch.shape)#[bs,t]
g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t广播的 embg = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t广播的
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g) z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=embg)
z_p = self.flow(z, y_mask, g=g) z_p = self.flow(z, y_mask, g=embg)
z_slice, ids_slice = rand_slice_segments_on_last_dim( z_slice, ids_slice = rand_slice_segments_on_last_dim(
z, y_lengths, self.segment_size z, y_lengths, self.segment_size
) )
if pitchf is not None: if pitchf is not None and isinstance(self.dec, NSFGenerator):
pitchf = slice_on_last_dim(pitchf, ids_slice, self.segment_size) pitchf = slice_on_last_dim(pitchf, ids_slice, self.segment_size)
o = self.dec(z_slice, pitchf, g=g) o = self.dec(z_slice, pitchf, g=embg) # type: ignore
elif isinstance(self.dec, Generator):
o = self.dec(z_slice, g=embg)
else: else:
o = self.dec(z_slice, g=g) raise KeyError(f"unknown dec type: {type(self.dec).__name__}")
return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
@torch.jit.export @torch.jit.export
@@ -201,15 +203,17 @@ class SynthesizerTrnMsNSFsid(nn.Module):
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
z = self.flow(z_p, x_mask, g=g, reverse=True) z = self.flow(z_p, x_mask, g=g, reverse=True)
del z_p, m_p, logs_p del z_p, m_p, logs_p
if pitchf is not None: if pitchf is not None and isinstance(self.dec, NSFGenerator):
o = self.dec( o = self.dec(
z * x_mask, z * x_mask,
pitchf, pitchf,
g=g, g=g,
n_res=return_length2, n_res=return_length2,
) )
else: elif isinstance(self.dec, Generator):
o = self.dec(z * x_mask, g=g, n_res=return_length2) o = self.dec(z * x_mask, g=g, n_res=return_length2)
else:
raise KeyError(f"unknown dec type: {type(self.dec).__name__}")
del x_mask, z del x_mask, z
return o # , x_mask, (z, z_p, m_p, logs_p) return o # , x_mask, (z, z_p, m_p, logs_p)
@@ -326,7 +330,7 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes: List[int], upsample_kernel_sizes: List[int],
spk_embed_dim: int, spk_embed_dim: int,
gin_channels: int, gin_channels: int,
sr=None, sr: Union[str, int],
): ):
super().__init__( super().__init__(
spec_channels, spec_channels,
@@ -346,6 +350,7 @@ class SynthesizerTrnMs256NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes, upsample_kernel_sizes,
spk_embed_dim, spk_embed_dim,
gin_channels, gin_channels,
sr,
256, 256,
False, False,
) )
@@ -371,7 +376,7 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes: List[int], upsample_kernel_sizes: List[int],
spk_embed_dim: int, spk_embed_dim: int,
gin_channels: int, gin_channels: int,
sr=None, sr: Union[str, int],
): ):
super().__init__( super().__init__(
spec_channels, spec_channels,
@@ -391,6 +396,7 @@ class SynthesizerTrnMs768NSFsid_nono(SynthesizerTrnMsNSFsid):
upsample_kernel_sizes, upsample_kernel_sizes,
spk_embed_dim, spk_embed_dim,
gin_channels, gin_channels,
sr,
768, 768,
False, False,
) )

View File

@@ -1,4 +1,4 @@
from typing import List, Optional, Tuple, Iterator from typing import List, Optional, Tuple, Iterator, Union
import torch import torch
@@ -17,7 +17,7 @@ def get_padding(kernel_size: int, dilation=1) -> int:
def slice_on_last_dim( def slice_on_last_dim(
x: torch.Tensor, x: torch.Tensor,
start_indices: List[int], start_indices: Union[List[int], torch.Tensor],
segment_size=4, segment_size=4,
) -> torch.Tensor: ) -> torch.Tensor:
new_shape = [*x.shape] new_shape = [*x.shape]
@@ -32,9 +32,9 @@ def slice_on_last_dim(
def rand_slice_segments_on_last_dim( def rand_slice_segments_on_last_dim(
x: torch.Tensor, x: torch.Tensor,
x_lengths: int = None, x_lengths: Optional[Union[int, torch.Tensor]] = None,
segment_size=4, segment_size=4,
) -> Tuple[torch.Tensor, List[int]]: ) -> Tuple[torch.Tensor, Union[List[int], torch.Tensor]]:
b, _, t = x.size() b, _, t = x.size()
if x_lengths is None: if x_lengths is None:
x_lengths = t x_lengths = t
@@ -58,7 +58,7 @@ def activate_add_tanh_sigmoid_multiply(
def sequence_mask( def sequence_mask(
length: torch.Tensor, length: torch.Tensor,
max_length: Optional[int] = None, max_length: Optional[int] = None,
) -> torch.BoolTensor: ):
if max_length is None: if max_length is None:
max_length = int(length.max()) max_length = int(length.max())
x = torch.arange(max_length, dtype=length.dtype, device=length.device) x = torch.arange(max_length, dtype=length.dtype, device=length.device)

View File

@@ -1,4 +1,4 @@
from typing import List, Optional, Union from typing import List, Union
import torch import torch
@@ -25,7 +25,7 @@ class SynthesizerTrnMsNSFsid(SynthesizerBase):
upsample_kernel_sizes: List[int], upsample_kernel_sizes: List[int],
spk_embed_dim: int, spk_embed_dim: int,
gin_channels: int, gin_channels: int,
sr: Optional[Union[str, int]], sr: Union[str, int],
encoder_dim: int, encoder_dim: int,
): ):
super().__init__( super().__init__(