mirror of
https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git
synced 2026-06-07 19:40:44 +08:00
optimize: some training optimizations (#95)
* optimzie(train&uvr5): rm sf & simp. AudioPre * fix(audio): too many mallocs * feat(audio): load_audio support stereo * fix(audio): float32 wav saving * fix(train): missing ckpt var
This commit is contained in:
@@ -2,6 +2,11 @@ import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
now_dir = os.getcwd()
|
||||
sys.path.append(now_dir)
|
||||
|
||||
from infer.lib.audio import load_audio
|
||||
|
||||
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
||||
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
|
||||
|
||||
@@ -20,7 +25,6 @@ else:
|
||||
is_half = sys.argv[7].lower() == "true"
|
||||
import fairseq
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
@@ -64,11 +68,9 @@ os.makedirs(outPath, exist_ok=True)
|
||||
|
||||
# wave must be 16k, hop_size=320
|
||||
def readwave(wav_path, normalize=False):
|
||||
wav, sr = sf.read(wav_path)
|
||||
wav, sr = load_audio(wav_path)
|
||||
assert sr == 16000
|
||||
feats = torch.from_numpy(wav).float()
|
||||
if feats.dim() == 2: # double channels
|
||||
feats = feats.mean(-1)
|
||||
assert feats.dim() == 1, feats.dim()
|
||||
if normalize:
|
||||
with torch.no_grad():
|
||||
|
||||
Reference in New Issue
Block a user