mirror of
https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git
synced 2026-06-07 19:40:44 +08:00
optimize(rvc): move . into layers
This commit is contained in:
@@ -1,9 +1,9 @@
|
|||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from rvc.nsf import NSFGenerator
|
from rvc.layers.nsf import NSFGenerator
|
||||||
from rvc.encoders import TextEncoder, PosteriorEncoder
|
from rvc.layers.encoders import TextEncoder, PosteriorEncoder
|
||||||
from rvc.residuals import ResidualCouplingBlock
|
from rvc.layers.residuals import ResidualCouplingBlock
|
||||||
|
|
||||||
|
|
||||||
class SynthesizerTrnMsNSFsidM(nn.Module):
|
class SynthesizerTrnMsNSFsidM(nn.Module):
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from rvc.synthesizers import SynthesizerTrnMsNSFsid
|
from rvc.layers.synthesizers import SynthesizerTrnMsNSFsid
|
||||||
|
|
||||||
|
|
||||||
def get_synthesizer_ckpt(cpt, device=torch.device("cpu")):
|
def get_synthesizer_ckpt(cpt, device=torch.device("cpu")):
|
||||||
|
|||||||
@@ -56,15 +56,15 @@ from infer.lib.train.data_utils import (
|
|||||||
TextAudioLoaderMultiNSFsid,
|
TextAudioLoaderMultiNSFsid,
|
||||||
)
|
)
|
||||||
|
|
||||||
from rvc.discriminators import MultiPeriodDiscriminator
|
from rvc.layers.discriminators import MultiPeriodDiscriminator
|
||||||
|
|
||||||
if hps.version == "v1":
|
if hps.version == "v1":
|
||||||
from rvc.synthesizers import SynthesizerTrnMs256NSFsid as RVC_Model_f0
|
from rvc.layers.synthesizers import SynthesizerTrnMs256NSFsid as RVC_Model_f0
|
||||||
from rvc.synthesizers import (
|
from rvc.layers.synthesizers import (
|
||||||
SynthesizerTrnMs256NSFsid_nono as RVC_Model_nof0,
|
SynthesizerTrnMs256NSFsid_nono as RVC_Model_nof0,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
from rvc.synthesizers import (
|
from rvc.layers.synthesizers import (
|
||||||
SynthesizerTrnMs768NSFsid as RVC_Model_f0,
|
SynthesizerTrnMs768NSFsid as RVC_Model_f0,
|
||||||
SynthesizerTrnMs768NSFsid_nono as RVC_Model_nof0,
|
SynthesizerTrnMs768NSFsid_nono as RVC_Model_nof0,
|
||||||
)
|
)
|
||||||
|
|||||||
0
rvc/layers/__init__.py
Normal file
0
rvc/layers/__init__.py
Normal file
@@ -3,14 +3,15 @@ from typing import Optional, List, Union
|
|||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from rvc.residuals import ResidualCouplingBlock
|
|
||||||
from rvc.utils import (
|
from .encoders import TextEncoder, PosteriorEncoder
|
||||||
|
from .generators import Generator
|
||||||
|
from .nsf import NSFGenerator
|
||||||
|
from .residuals import ResidualCouplingBlock
|
||||||
|
from .utils import (
|
||||||
slice_on_last_dim,
|
slice_on_last_dim,
|
||||||
rand_slice_segments_on_last_dim,
|
rand_slice_segments_on_last_dim,
|
||||||
)
|
)
|
||||||
from rvc.encoders import TextEncoder, PosteriorEncoder
|
|
||||||
from rvc.generators import Generator
|
|
||||||
from rvc.nsf import NSFGenerator
|
|
||||||
|
|
||||||
|
|
||||||
class SynthesizerTrnMsNSFsid(nn.Module):
|
class SynthesizerTrnMsNSFsid(nn.Module):
|
||||||
@@ -75,7 +75,7 @@ class RVC(Model):
|
|||||||
self.vec_model = ContentVec(vec_path, device)
|
self.vec_model = ContentVec(vec_path, device)
|
||||||
self.hop_len = hop_len
|
self.hop_len = hop_len
|
||||||
|
|
||||||
def inference(
|
def infer(
|
||||||
self,
|
self,
|
||||||
wav: np.ndarray[typing.Any, np.dtype],
|
wav: np.ndarray[typing.Any, np.dtype],
|
||||||
wav_sr: int,
|
wav_sr: int,
|
||||||
@@ -119,7 +119,7 @@ class RVC(Model):
|
|||||||
rnd = np.random.randn(1, 192, hubert_length).astype(np.float32)
|
rnd = np.random.randn(1, 192, hubert_length).astype(np.float32)
|
||||||
hubert_length = np.array([hubert_length]).astype(np.int64)
|
hubert_length = np.array([hubert_length]).astype(np.int64)
|
||||||
|
|
||||||
out_wav = self.__forward(
|
out_wav = self.forward(
|
||||||
hubert, hubert_length, pitch, pitchf, ds, rnd
|
hubert, hubert_length, pitch, pitchf, ds, rnd
|
||||||
).squeeze()
|
).squeeze()
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ class RVC(Model):
|
|||||||
|
|
||||||
return out_wav[0:org_length]
|
return out_wav[0:org_length]
|
||||||
|
|
||||||
def __forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
hubert: np.ndarray[typing.Any, np.dtype[np.float32]],
|
hubert: np.ndarray[typing.Any, np.dtype[np.float32]],
|
||||||
hubert_length: int,
|
hubert_length: int,
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ from fairseq import checkpoint_utils
|
|||||||
|
|
||||||
# from models import SynthesizerTrn256#hifigan_nonsf
|
# from models import SynthesizerTrn256#hifigan_nonsf
|
||||||
# from lib.infer_pack.models import SynthesizerTrn256NSF as SynthesizerTrn256#hifigan_nsf
|
# from lib.infer_pack.models import SynthesizerTrn256NSF as SynthesizerTrn256#hifigan_nsf
|
||||||
from rvc.synthesizers import (
|
from rvc.layers.synthesizers import (
|
||||||
SynthesizerTrnMs256NSFsid as SynthesizerTrn256,
|
SynthesizerTrnMs256NSFsid as SynthesizerTrn256,
|
||||||
) # hifigan_nsf
|
) # hifigan_nsf
|
||||||
from scipy.io import wavfile
|
from scipy.io import wavfile
|
||||||
|
|||||||
@@ -19,6 +19,6 @@ model = RVC(
|
|||||||
|
|
||||||
wav, sr = librosa.load(wav_path, sr=sampling_rate)
|
wav, sr = librosa.load(wav_path, sr=sampling_rate)
|
||||||
|
|
||||||
audio = model.inference(wav, sr, sid, f0_method=f0_method, f0_up_key=f0_up_key)
|
audio = model.infer(wav, sr, sid, f0_method=f0_method, f0_up_key=f0_up_key)
|
||||||
|
|
||||||
soundfile.write(out_path, audio, sampling_rate)
|
soundfile.write(out_path, audio, sampling_rate)
|
||||||
|
|||||||
Reference in New Issue
Block a user