1
0
mirror of https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI.git synced 2026-06-05 01:10:22 +08:00

optimize(uvr5): apply jit to spec_utils & fix flac save

also fix #85
This commit is contained in:
源文雨
2024-11-28 23:19:05 +09:00
parent 4582d4b49a
commit 5969314e8d
11 changed files with 104 additions and 581 deletions

View File

@@ -43,9 +43,14 @@ def float_np_array_to_wav_buf(wav: np.ndarray, sr: int, f32=False) -> BytesIO:
return buf
def save_audio(path: str, audio: np.ndarray, sr: int, f32=False):
def save_audio(path: str, audio: np.ndarray, sr: int, f32=False, format="wav"):
buf = float_np_array_to_wav_buf(audio, sr, f32)
if format != "wav":
transbuf = BytesIO()
wav2(buf, transbuf, format)
buf = transbuf
with open(path, "wb") as f:
f.write(float_np_array_to_wav_buf(audio, sr, f32).getbuffer())
f.write(buf.getbuffer())
def wav2(i: BytesIO, o: BufferedWriter, format: str):
@@ -109,7 +114,7 @@ def load_audio(
frames_data = []
rate = 0
for frame in packet:
frame.pts = None # 清除时间戳,避免重新采样问题
# frame.pts = None # 清除时间戳,避免重新采样问题
resampled_frames = (
resampler.resample(frame) if resampler is not None else [frame]
)
@@ -137,6 +142,8 @@ def load_audio(
np.copyto(decoded_audio[..., offset:end_index], frame_data)
offset += len(frame_data[0])
container.close()
# Truncate the array to the actual size
decoded_audio = decoded_audio[..., :offset]
@@ -149,43 +156,6 @@ def load_audio(
return decoded_audio, rate
def downsample_audio(
input_path: str, output_path: str, format: str, br=128_000
) -> None:
"""
default to 128kb/s (equivalent to -q:a 2)
"""
if not os.path.exists(input_path):
return
input_container = av.open(input_path)
output_container = av.open(output_path, "w")
# Create a stream in the output container
input_stream = input_container.streams.audio[0]
output_stream = output_container.add_stream(format)
output_stream.bit_rate = br
# Copy packets from the input file to the output file
for packet in input_container.demux(input_stream):
for frame in packet.decode():
for out_packet in output_stream.encode(frame):
output_container.mux(out_packet)
for packet in output_stream.encode():
output_container.mux(packet)
# Close the containers
input_container.close()
output_container.close()
try: # Remove the original file
os.remove(input_path)
except Exception as e:
print(f"Failed to remove the original file: {e}")
def resample_audio(
input_path: str, output_path: str, codec: str, format: str, sr: int, layout: str
) -> None:
@@ -204,7 +174,7 @@ def resample_audio(
# Copy packets from the input file to the output file
for packet in input_container.demux(input_stream):
for frame in packet.decode():
frame.pts = None # Clear presentation timestamp to avoid resampling issues
# frame.pts = None # Clear presentation timestamp to avoid resampling issues
out_frames = resampler.resample(frame)
for out_frame in out_frames:
for out_packet in output_stream.encode(out_frame):
@@ -217,10 +187,6 @@ def resample_audio(
input_container.close()
output_container.close()
try: # Remove the original file
os.remove(input_path)
except Exception as e:
print(f"Failed to remove the original file: {e}")
def get_audio_properties(input_path: str) -> Tuple[int, int]: