mirror of
https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git
synced 2026-06-07 19:20:28 +08:00
chore(format): run black on dev (#5)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
5eed789fe7
commit
ea66e6d28c
@@ -73,6 +73,7 @@ class Encoder(nn.Module):
|
|||||||
x = x * x_mask
|
x = x * x_mask
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
class Decoder(nn.Module):
|
class Decoder(nn.Module):
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -158,6 +159,7 @@ class Decoder(nn.Module):
|
|||||||
return x
|
return x
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class MultiHeadAttention(nn.Module):
|
class MultiHeadAttention(nn.Module):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -60,7 +60,9 @@ class TextEncoder(nn.Module):
|
|||||||
# skip_head: Optional[torch.Tensor] = None,
|
# skip_head: Optional[torch.Tensor] = None,
|
||||||
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
||||||
return super().__call__(
|
return super().__call__(
|
||||||
phone, pitch, lengths,
|
phone,
|
||||||
|
pitch,
|
||||||
|
lengths,
|
||||||
# skip_head=skip_head,
|
# skip_head=skip_head,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -80,8 +82,10 @@ class TextEncoder(nn.Module):
|
|||||||
x = torch.transpose(x, 1, -1) # [b, h, t]
|
x = torch.transpose(x, 1, -1) # [b, h, t]
|
||||||
x_mask = torch.unsqueeze(
|
x_mask = torch.unsqueeze(
|
||||||
utils.sequence_mask(
|
utils.sequence_mask(
|
||||||
lengths, x.size(2),
|
lengths,
|
||||||
), 1,
|
x.size(2),
|
||||||
|
),
|
||||||
|
1,
|
||||||
).to(x.dtype)
|
).to(x.dtype)
|
||||||
x = self.encoder(x * x_mask, x_mask)
|
x = self.encoder(x * x_mask, x_mask)
|
||||||
"""
|
"""
|
||||||
@@ -193,17 +197,19 @@ class PosteriorEncoder(nn.Module):
|
|||||||
self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1)
|
self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1)
|
||||||
|
|
||||||
def __call__(
|
def __call__(
|
||||||
self, x: torch.Tensor, x_lengths: torch.Tensor, g: Optional[torch.Tensor] = None
|
self, x: torch.Tensor, x_lengths: torch.Tensor, g: Optional[torch.Tensor] = None
|
||||||
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
|
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
|
||||||
super().__call__(x, x_lengths, g = g)
|
super().__call__(x, x_lengths, g=g)
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self, x: torch.Tensor, x_lengths: torch.Tensor, g: Optional[torch.Tensor] = None
|
self, x: torch.Tensor, x_lengths: torch.Tensor, g: Optional[torch.Tensor] = None
|
||||||
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
|
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
|
||||||
x_mask = torch.unsqueeze(
|
x_mask = torch.unsqueeze(
|
||||||
utils.sequence_mask(
|
utils.sequence_mask(
|
||||||
x_lengths, x.size(2),
|
x_lengths,
|
||||||
), 1,
|
x.size(2),
|
||||||
|
),
|
||||||
|
1,
|
||||||
).to(x.dtype)
|
).to(x.dtype)
|
||||||
x = self.pre(x) * x_mask
|
x = self.pre(x) * x_mask
|
||||||
x = self.enc(x, x_mask, g=g)
|
x = self.enc(x, x_mask, g=g)
|
||||||
@@ -663,11 +669,12 @@ class SynthesizerTrnMs256NSFsid(nn.Module):
|
|||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
super(SynthesizerTrnMs256NSFsid, self).__init__()
|
super(SynthesizerTrnMs256NSFsid, self).__init__()
|
||||||
if isinstance(sr, str): sr = {
|
if isinstance(sr, str):
|
||||||
"32k": 32000,
|
sr = {
|
||||||
"40k": 40000,
|
"32k": 32000,
|
||||||
"48k": 48000,
|
"40k": 40000,
|
||||||
}[sr]
|
"48k": 48000,
|
||||||
|
}[sr]
|
||||||
self.spec_channels = spec_channels
|
self.spec_channels = spec_channels
|
||||||
self.inter_channels = inter_channels
|
self.inter_channels = inter_channels
|
||||||
self.hidden_channels = hidden_channels
|
self.hidden_channels = hidden_channels
|
||||||
@@ -776,9 +783,7 @@ class SynthesizerTrnMs256NSFsid(nn.Module):
|
|||||||
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
|
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
|
||||||
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)
|
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)
|
||||||
z_p = self.flow(z, y_mask, g=g)
|
z_p = self.flow(z, y_mask, g=g)
|
||||||
z_slice, ids_slice = utils.rand_slice_segments(
|
z_slice, ids_slice = utils.rand_slice_segments(z, y_lengths, self.segment_size)
|
||||||
z, y_lengths, self.segment_size
|
|
||||||
)
|
|
||||||
# print(-1,pitchf.shape,ids_slice,self.segment_size,self.hop_length,self.segment_size//self.hop_length)
|
# print(-1,pitchf.shape,ids_slice,self.segment_size,self.hop_length,self.segment_size//self.hop_length)
|
||||||
pitchf = utils.slice_on_last_dim(pitchf, ids_slice, self.segment_size)
|
pitchf = utils.slice_on_last_dim(pitchf, ids_slice, self.segment_size)
|
||||||
# print(-2,pitchf.shape,z_slice.shape)
|
# print(-2,pitchf.shape,z_slice.shape)
|
||||||
@@ -816,7 +821,9 @@ class SynthesizerTrnMs256NSFsid(nn.Module):
|
|||||||
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
||||||
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
||||||
o = self.dec(
|
o = self.dec(
|
||||||
z * x_mask, nsff0, g=g,
|
z * x_mask,
|
||||||
|
nsff0,
|
||||||
|
g=g,
|
||||||
# n_res=return_length2,
|
# n_res=return_length2,
|
||||||
)
|
)
|
||||||
return o, x_mask, (z, z_p, m_p, logs_p)
|
return o, x_mask, (z, z_p, m_p, logs_p)
|
||||||
@@ -1000,9 +1007,7 @@ class SynthesizerTrnMs256NSFsid_nono(nn.Module):
|
|||||||
m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths)
|
m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths)
|
||||||
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)
|
z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)
|
||||||
z_p = self.flow(z, y_mask, g=g)
|
z_p = self.flow(z, y_mask, g=g)
|
||||||
z_slice, ids_slice = utils.rand_slice_segments(
|
z_slice, ids_slice = utils.rand_slice_segments(z, y_lengths, self.segment_size)
|
||||||
z, y_lengths, self.segment_size
|
|
||||||
)
|
|
||||||
o = self.dec(z_slice, g=g)
|
o = self.dec(z_slice, g=g)
|
||||||
return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
|
return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
|
||||||
|
|
||||||
@@ -1014,7 +1019,7 @@ class SynthesizerTrnMs256NSFsid_nono(nn.Module):
|
|||||||
sid: torch.Tensor,
|
sid: torch.Tensor,
|
||||||
skip_head: Optional[torch.Tensor] = None,
|
skip_head: Optional[torch.Tensor] = None,
|
||||||
return_length: Optional[torch.Tensor] = None,
|
return_length: Optional[torch.Tensor] = None,
|
||||||
#return_length2: Optional[torch.Tensor] = None,
|
# return_length2: Optional[torch.Tensor] = None,
|
||||||
):
|
):
|
||||||
g = self.emb_g(sid).unsqueeze(-1)
|
g = self.emb_g(sid).unsqueeze(-1)
|
||||||
if skip_head is not None and return_length is not None:
|
if skip_head is not None and return_length is not None:
|
||||||
@@ -1034,7 +1039,8 @@ class SynthesizerTrnMs256NSFsid_nono(nn.Module):
|
|||||||
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
||||||
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
||||||
o = self.dec(
|
o = self.dec(
|
||||||
z * x_mask, g=g,
|
z * x_mask,
|
||||||
|
g=g,
|
||||||
# n_res=return_length2
|
# n_res=return_length2
|
||||||
)
|
)
|
||||||
return o, x_mask, (z, z_p, m_p, logs_p)
|
return o, x_mask, (z, z_p, m_p, logs_p)
|
||||||
|
|||||||
@@ -1,7 +1,15 @@
|
|||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from .attentions import TextEncoder, ResidualCouplingBlock, PosteriorEncoder, Generator, SineGen, SourceModuleHnNSF, GeneratorNSF
|
from .attentions import (
|
||||||
|
TextEncoder,
|
||||||
|
ResidualCouplingBlock,
|
||||||
|
PosteriorEncoder,
|
||||||
|
Generator,
|
||||||
|
SineGen,
|
||||||
|
SourceModuleHnNSF,
|
||||||
|
GeneratorNSF,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SynthesizerTrnMsNSFsidM(nn.Module):
|
class SynthesizerTrnMsNSFsidM(nn.Module):
|
||||||
@@ -29,11 +37,12 @@ class SynthesizerTrnMsNSFsidM(nn.Module):
|
|||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
super(SynthesizerTrnMsNSFsidM, self).__init__()
|
super(SynthesizerTrnMsNSFsidM, self).__init__()
|
||||||
if isinstance(sr, str): sr = {
|
if isinstance(sr, str):
|
||||||
"32k": 32000,
|
sr = {
|
||||||
"40k": 40000,
|
"32k": 32000,
|
||||||
"48k": 48000,
|
"40k": 40000,
|
||||||
}[sr]
|
"48k": 48000,
|
||||||
|
}[sr]
|
||||||
self.spec_channels = spec_channels
|
self.spec_channels = spec_channels
|
||||||
self.inter_channels = inter_channels
|
self.inter_channels = inter_channels
|
||||||
self.hidden_channels = hidden_channels
|
self.hidden_channels = hidden_channels
|
||||||
|
|||||||
25
rvc/utils.py
25
rvc/utils.py
@@ -2,11 +2,12 @@ from typing import List, Optional, Tuple
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
def call_weight_data_normal_if_Conv(m: torch.nn.Module):
|
def call_weight_data_normal_if_Conv(m: torch.nn.Module):
|
||||||
classname = m.__class__.__name__
|
classname = m.__class__.__name__
|
||||||
if classname.find("Conv") != -1:
|
if classname.find("Conv") != -1:
|
||||||
mean=0.0
|
mean = 0.0
|
||||||
std=0.01
|
std = 0.01
|
||||||
m.weight.data.normal_(mean, std)
|
m.weight.data.normal_(mean, std)
|
||||||
|
|
||||||
|
|
||||||
@@ -15,8 +16,10 @@ def get_padding(kernel_size: int, dilation=1):
|
|||||||
|
|
||||||
|
|
||||||
def slice_on_last_dim(
|
def slice_on_last_dim(
|
||||||
x: torch.Tensor, start_indices: List[int], segment_size=4,
|
x: torch.Tensor,
|
||||||
) -> torch.Tensor:
|
start_indices: List[int],
|
||||||
|
segment_size=4,
|
||||||
|
) -> torch.Tensor:
|
||||||
new_shape = x.shape
|
new_shape = x.shape
|
||||||
new_shape[-1] = segment_size
|
new_shape[-1] = segment_size
|
||||||
ret = torch.empty(new_shape)
|
ret = torch.empty(new_shape)
|
||||||
@@ -28,10 +31,13 @@ def slice_on_last_dim(
|
|||||||
|
|
||||||
|
|
||||||
def rand_slice_segments(
|
def rand_slice_segments(
|
||||||
x: torch.Tensor, x_lengths: int = None, segment_size=4,
|
x: torch.Tensor,
|
||||||
) -> Tuple[torch.Tensor, List[int]]:
|
x_lengths: int = None,
|
||||||
|
segment_size=4,
|
||||||
|
) -> Tuple[torch.Tensor, List[int]]:
|
||||||
b, _, t = x.size()
|
b, _, t = x.size()
|
||||||
if x_lengths is None: x_lengths = t
|
if x_lengths is None:
|
||||||
|
x_lengths = t
|
||||||
ids_str_max = x_lengths - segment_size + 1
|
ids_str_max = x_lengths - segment_size + 1
|
||||||
ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long)
|
ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long)
|
||||||
ret = slice_on_last_dim(x, ids_str, segment_size)
|
ret = slice_on_last_dim(x, ids_str, segment_size)
|
||||||
@@ -53,8 +59,9 @@ def convert_pad_shape(pad_shape: List[List[int]]) -> List[int]:
|
|||||||
|
|
||||||
|
|
||||||
def sequence_mask(
|
def sequence_mask(
|
||||||
length: torch.Tensor, max_length: Optional[int] = None,
|
length: torch.Tensor,
|
||||||
) -> torch.BoolTensor:
|
max_length: Optional[int] = None,
|
||||||
|
) -> torch.BoolTensor:
|
||||||
if max_length is None:
|
if max_length is None:
|
||||||
max_length = int(length.max())
|
max_length = int(length.max())
|
||||||
x = torch.arange(max_length, dtype=length.dtype, device=length.device)
|
x = torch.arange(max_length, dtype=length.dtype, device=length.device)
|
||||||
|
|||||||
Reference in New Issue
Block a user