From 09285d5f5b73a0de55beb35c00da27f014bc8deb Mon Sep 17 00:00:00 2001 From: Alex Murkoff <413x1nkp@gmail.com> Date: Wed, 12 Jun 2024 19:53:56 +0700 Subject: [PATCH] perf: improve codec handling in load_filepaths_and_text function in `infer.lib.train.utils` (#44) --- infer/lib/train/utils.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/infer/lib/train/utils.py b/infer/lib/train/utils.py index bfffcd7..b8f7e37 100644 --- a/infer/lib/train/utils.py +++ b/infer/lib/train/utils.py @@ -5,6 +5,7 @@ import logging import os import sys +import codecs import numpy as np import torch from scipy.io.wavfile import read @@ -251,13 +252,11 @@ def load_wav_to_torch(full_path): def load_filepaths_and_text(filename, split="|"): try: - with open(filename, encoding="utf-8") as f: - filepaths_and_text = [line.strip().split(split) for line in f] - except UnicodeDecodeError: - with open(filename) as f: - filepaths_and_text = [line.strip().split(split) for line in f] + return [line.strip().split(split) for line in codecs.open(filename, encoding="utf-8")] + except UnicodeDecodeError as e: + logger.error("Error loading file %s: %s", filename, e) - return filepaths_and_text + return [] def get_hparams(init=True):