diff --git a/.gitignore b/.gitignore index 6ac2d0f..0ae6381 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ output/ +input/ temp/ *.pck *.hdiff diff --git a/README.md b/README.md index 5d06a10..54b6ddd 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,44 @@ # AnimeWwise +An easy to use tool to extract audio from some anime games, with the original filenames and paths. -# ⚠️ as of right now, this branch won't be updated beyond genshin 4.7, refer to the v2 branch for newer version, more games and other stuff until merged -Extract audio from `.pck` and `.hdiff` to `.mp3` or `.ogg` including original filenames with this tool. It can in theory extract any `.pck` or `.hdiff` file from any game even though it was made for Genshin Impact. There are others tools that do the same but none of them were working so I just made my own. - -⚠️ Only audio from genshin will be exported with original filenames, and the coverage is very low, don't except every file to have a name +![image](https://github.com/user-attachments/assets/e66048df-4d71-4bda-8201-1c2c67f44de7) # Usage 1. Get the repo by [downloading it](https://github.com/Escartem/WwiseExtract/archive/refs/heads/master.zip) or cloning it (`git clone https://github.com/Escartem/WwiseExtract`) -> ℹ️ This project uses ffmpeg version *3.4.2* which is the latest under 50MB. But it is also slower, if you want to slightly improve extraction speed, consider updating the ffmpeg binary to a [newer version](https://github.com/BtbN/FFmpeg-Builds/releases) +> [!NOTE] +> This project uses ffmpeg version *3.4.2* which is the latest under 50MB. But it is also slower, if you want to slightly improve extraction speed, consider updating the ffmpeg binary to a [newer version](https://github.com/BtbN/FFmpeg-Builds/releases) 2. Install dependencies -> `pip install -r requirements.txt` -3. Place all of your `.pck` files in the *audio* folder and `.pck.hdiff` in the *patch* folder -> ⚠️ If you want to extract an hdiff content, you must place the pck file with the *same name before patch* in the *audio* folder, pck's that do not have a corresponding hdiff file will be extracted normally, when they do have a corresponding hdiff file, *only the hdiff file content is extracted* and not the full pck -4. Start the program -> `python extract.py`. Pass in `--format [ogg|mp3]` to change the audio output format - if not specified, it will default to mp3. -5. After finishing, everything will be in the *output* folder in your chosen format +3. Run the app with `python app.py` +4. Select your input folder containing your `.pck` files, it can be your game audio folder directly (if you decide to use this one, make sure the game is not running) +![image](https://github.com/user-attachments/assets/72cf7983-00d0-4e98-b0d0-8b5547057a56) +> [!TIP] +> The audio folder can be found in the following locations +> - `GenshinImpact_Data\StreamingAssets\AudioAsset\...` +> - `StarRail_Data\Persistent\Audio\AudioPackage\Windows\... ` +> - `ZenlessZoneZero_Data\StreamingAssets\Audio\Windows\Full\...` +5. Select your hdiff folder if needed +> [!NOTE] +> Diff files are `.hdiff` present in the update patches of the games. If you want to extract an hdiff content, you must have the pck file with the *same name before patch* in the input folder, pck's that do not have a corresponding hdiff file will be extracted normally, when they do have a corresponding hdiff file, *only the hdiff file content is extracted* and not the full pck +6. Select a mapping +> [!WARNING] +> By default, the files extracted from the game don't have names, the mappings are here to help restore the original filenames and paths so it's easier to search, but not all games are supported, not at every version and the mapping does not guarantee to have every file named +7. After that, you can browse the files you loaded, if you messed up and wanna go back, you can select File > Reset to unload everything and go back to the starting screen. +![image](https://github.com/user-attachments/assets/9714b6ab-527a-49d9-ae98-354d1979a2b9) +8. In the `Extract` tab, you will be able to select what audio you want, choosing the output folder and audio format. You can extract everything or extract the files you selected in the `Browse` tab +> [!NOTE] +> The program does not check for existing files in the output folder, it will overwrite them, make sure to check your folder before starting the extraction +9. Extract your files, and enjoy ! ---- +# Why was this made -### And that's pretty much it, if you have any issue, suggestion or anything just open an issue or create a pr :) +I know there is already dozens of tools that have the exact same purpose, being to extract audio from games or hoyo games, however, I made this anyway because of one functionality that others don't possess, which is file name recovery using mappings, because extracting is cool but browsing thousands of files with no names is just a pain, every single voiceline is a unique file. And I'm also planning a second unique functionality being a lookup tool, giving the user the ability to see every file inside the game, search the ones he needs and then extract them automatically, instead of having to load files and see what's in them. Stay tuned for that one :3 + +# Performance + +The program has been tested and proved to be very efficient with extraction (not conversion), I've loaded the entire english package from genshin at 4.8 (around 17gb) and it took around 15 seconds to load and map every single of the ~100k files inside. And upon extracting them to .wem, it took around 10 seconds as well and during the entire process the program did not exceeded 500mb or so of ram usage. So I would say that it si good enough, however conversion is much slower, especially with ffmpeg (mp3 & ogg), some tweaks may be required to improve the speed. + +# Contribute + +Feel free to contribute to this project as much as you want, a share would be very appreciated aswell, I'll be glad to know if this helped anyone <3 diff --git a/allocator.py b/allocator.py new file mode 100644 index 0000000..4ab4edb --- /dev/null +++ b/allocator.py @@ -0,0 +1,28 @@ +# memory manager to prevent redundant calls to files and save up disk usage +import os +import mmap + +class Allocator: + def __init__(self): + self.files = {} + + def load_file(self, path): + filename = os.path.basename(path) + with open(path, "r+b") as f: + mmap_object = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + + self.files[filename] = mmap_object + + def unload_file(self, name): + self.files[name].close() + + def read_at(self, file, offset, size): + mmap_object = self.files[file] + mmap_object.seek(offset) + data = mmap_object.read(size) + return data + + def free_mem(self): + for file in list(self.files.keys()): + self.files[file].close() + self.files.clear() diff --git a/app.py b/app.py new file mode 100644 index 0000000..310a53a --- /dev/null +++ b/app.py @@ -0,0 +1,315 @@ +import os +import sys +import json +import math +import extract +from PyQt5 import uic +from PyQt5.QtGui import QTextCursor +from PyQt5.QtCore import QObject, pyqtSignal, pyqtSlot, QThread, QMetaType, Qt +from PyQt5.QtWidgets import QMessageBox, QMainWindow, QApplication, QFileDialog, QHeaderView, QAbstractItemView, QTreeWidgetItem + +QMetaType.type("QTextCursor") + +class TextEditStream(QObject): + append_text = pyqtSignal(str) + + def __init__(self, text_edit): + super().__init__() + self.text_edit = text_edit + self.append_text.connect(self._append_text) + + def write(self, text): + self.append_text.emit(text) + + def flush(self): + pass + + def _append_text(self, text): + self.text_edit.moveCursor(QTextCursor.End) + self.text_edit.insertPlainText(text) + self.text_edit.moveCursor(QTextCursor.End) + +class BackgroundWorker(QObject): + finished = pyqtSignal(dict) + progress = pyqtSignal(list) + + def __init__(self, action, extract, data): + super().__init__() + self.action = action + self.extract = extract + + if action == "load": + self.input = data["input"] + self.map = data["map"] + self.diff = data["diff"] + if action == "extract": + self.input = data["input"] + self.files = data["files"] + self.format = data["format"] + self.output = data["output"] + + def run(self): + if self.action == "load": + print("Loading files and mapping if necessary...") + fileStructure = self.extract.load_folder(self.map, self.input, self.diff, progress=self.progress.emit) + if fileStructure is None: + self.finished.emit({"action": "error", "content": {"msg": "Nothing found !", "state": 1}}) + print("Nothing found !") + return + print("Building file structure...") + self.finished.emit({"action": "load", "content": fileStructure}) + if self.action == "extract": + if len(self.files) == 0: + self.finished.emit({"action": "error", "content": {"msg": "Nothing selected !", "state": 2}}) + return + print(f"Extracting {len(self.files)} files...") + self.extract.extract_files(self.input, self.files, self.output, self.format, progress=self.progress.emit) + self.finished.emit({"action": "extract"}) + +class AnimeWwise(QMainWindow): + def __init__(self): + super(AnimeWwise, self).__init__() + uic.loadUi("gui.ui", self) + self.maps = self.getMaps() + self.folders = { + "input": "", + "output": "", + "diff": "" + } + self.setupActions() + sys.stdout = TextEditStream(self.console) + self.extract = extract.WwiseExtract() + + # utils + self.selectFolder = lambda: QFileDialog.getExistingDirectory(self, "Select Folder") + + def getMaps(self): + with open("maps/index.json", "r") as f: + maps = json.loads(f.read()) + f.close() + + return maps + + def setFolder(self, elem, folder): + path = self.selectFolder() + self.folders[folder] = path + elem.setText(path) + + def setupActions(self): + self.changeInput.clicked.connect(lambda: self.setFolder(self.inputPath, "input")) + self.changeAltInput.clicked.connect(lambda: self.setFolder(self.altInputPath, "diff")) + self.changeOutput.clicked.connect(lambda: self.setFolder(self.outputPath, "output")) + + self.outputFormat.addItems(["wem (fastest)", "wav (fast)", "mp3 (slow)", "ogg (slow)"]) + self.assetMap.addItems(["No map", *[f'{e["game"]} - v{e["version"]}' for e in self.maps["maps"]]]) + + self.tabs.setTabEnabled(1, False) + self.tabs.setTabEnabled(2, False) + + self.loadFilesButton.clicked.connect(lambda: self.loadFiles()) + + self.actionReset.triggered.connect(lambda: self.resetApp()) + self.actionExit.triggered.connect(lambda: self.close()) + + self.extractSelected.clicked.connect(lambda: self.extractItems(False)) + self.extractAll.clicked.connect(lambda: self.extractItems(True)) + + self.searchAsset.textChanged.connect(lambda: self.filterAsset()) + + # workers + @pyqtSlot(list) + def progressBarSlot(self, progress): + if progress[0] == "load": + self.loadProgress.setValue(math.ceil(progress[1])) + if progress[0] == "total": + self.totalProgress.setValue(math.ceil(progress[1])) + elif progress[0] == "file": + self.fileProgress.setValue(math.ceil(progress[1])) + + @pyqtSlot(dict) + def handleFinished(self, data): + if data["action"] == "load": + self.fileStructure = data["content"] + self.updateTreeWidget(self.fileStructure) + self.tabs.setTabEnabled(0, False) + self.tabs.setTabEnabled(1, True) + self.tabs.setTabEnabled(2, True) + self.tabs.setCurrentIndex(1) + print("Done !") + if data["action"] == "error": + QMessageBox.warning(None, "Warning", data["content"]["msg"], QMessageBox.Ok) + state = data["content"]["state"] + if state == 1: + self.tabs.setTabEnabled(0, True) + elif state == 2: + self.tabs.setTabEnabled(1, True) + self.tabs.setTabEnabled(2, True) + if data["action"] == "extract": + self.tabs.setTabEnabled(1, True) + self.tabs.setTabEnabled(2, True) + self.tabs.setCurrentIndex(2) + print("Finished extracting everything !") + os.startfile(self.folders["output"]) + + # page 1 - config + def loadFiles(self): + if self.folders["input"] == "": + QMessageBox.warning(None, "Warning", "Missing input folder !", QMessageBox.Ok) + return + + _map = self.assetMap.currentIndex() + if _map != 0: + _map = self.maps["maps"][_map-1]["name"] + else: + _map = None + + self.tabs.setTabEnabled(0, False) + self.resetTreeWidget() + + # why is all this required for threading damnit + self.backgroundThread = QThread() + self.backgroundWorker = BackgroundWorker("load", self.extract, {"input": self.folders["input"], "map": _map, "diff": self.folders["diff"]}) + self.backgroundWorker.moveToThread(self.backgroundThread) + self.backgroundThread.started.connect(self.backgroundWorker.run) + self.backgroundWorker.finished.connect(self.handleFinished) + self.backgroundWorker.finished.connect(self.backgroundThread.quit) + self.backgroundWorker.finished.connect(self.backgroundWorker.deleteLater) + self.backgroundThread.finished.connect(self.backgroundThread.deleteLater) + + self.backgroundWorker.progress.connect(self.progressBarSlot) + self.backgroundThread.start() + + # page 2 - browsing + def filterAsset(self): + search = self.searchAsset.text() + if search == "": + self.updateTreeWidget(self.fileStructure) + return + result = self.searchFiles(self.fileStructure, search) + self.updateTreeWidget(result) + + def searchFiles(self, data, substring, current_path=""): + result = {"folders": {}, "files": []} + + result["files"] = [file for file in data.get("files", []) if substring in file[0]] + + for folder_name, folder_data in data.get("folders", {}).items(): + subfolder_result = self.searchFiles(folder_data, substring) + if subfolder_result["files"] or subfolder_result["folders"]: + result["folders"][folder_name] = subfolder_result + + return result + + def resetTreeWidget(self): + self.treeWidget.clear() + self.tabs.setTabEnabled(1, False) + + def updateTreeWidget(self, structure): + self.treeWidget.clear() + self.treeWidget.setColumnCount(3) + self.treeWidget.setHeaderLabels(["Name", "Offset", "Size", "Source"]) + + self.addItems(None, structure) + + self.treeWidget.expandAll() + self.treeWidget.header().setSectionResizeMode(0, QHeaderView.Stretch) + self.treeWidget.header().setSectionResizeMode(1, QHeaderView.ResizeToContents) + self.treeWidget.header().setSectionResizeMode(2, QHeaderView.ResizeToContents) + self.treeWidget.setHeaderHidden(False) + + self.treeWidget.setEditTriggers(QAbstractItemView.NoEditTriggers) + self.treeWidget.setDragDropMode(QAbstractItemView.NoDragDrop) + + def addItems(self, parent, element): + for folder_name in sorted(element.get("folders", {}).keys()): + folder_content = element["folders"][folder_name] + folder_item = QTreeWidgetItem([folder_name, "", "", ""]) + folder_item.setFlags(folder_item.flags() | Qt.ItemIsTristate | Qt.ItemIsUserCheckable) + folder_item.setCheckState(0, Qt.Unchecked) + if parent is None: + self.treeWidget.addTopLevelItem(folder_item) + else: + parent.addChild(folder_item) + self.addItems(folder_item, folder_content) + + for file in sorted(element.get("files", [])): + file_item = QTreeWidgetItem([str(file[0]), str(hex(file[1])), str(file[2]), str(file[3])]) + file_item.setFlags(file_item.flags() | Qt.ItemIsUserCheckable) + file_item.setCheckState(0, Qt.Unchecked) + if parent is None: + self.treeWidget.addTopLevelItem(file_item) + else: + parent.addChild(file_item) + + # page 3 - extraction + def extractItems(self, _all): + if self.folders["output"] == "": + QMessageBox.warning(None, "Warning", "Missing output folder !", QMessageBox.Ok) + return + + checked_items = [] + + def check_items(item, _all): + if item.checkState(0) == Qt.Checked or _all: + if item.text(1) != "": + checked_items.append(self.getFileMeta(item)) + for i in range(item.childCount()): + check_items(item.child(i), _all) + + for i in range(self.treeWidget.topLevelItemCount()): + check_items(self.treeWidget.topLevelItem(i), _all) + + self.tabs.setTabEnabled(1, False) + self.tabs.setTabEnabled(2, False) + self.tabs.setCurrentIndex(2) + + # yet another block of threading bs + self.backgroundThread = QThread() + self.backgroundWorker = BackgroundWorker("extract", self.extract, {"input": self.folders["input"], "files": checked_items, "format": self.outputFormat.currentText()[:3], "output": self.folders["output"]}) + self.backgroundWorker.moveToThread(self.backgroundThread) + self.backgroundThread.started.connect(self.backgroundWorker.run) + self.backgroundWorker.finished.connect(self.handleFinished) + self.backgroundWorker.finished.connect(self.backgroundThread.quit) + self.backgroundWorker.finished.connect(self.backgroundWorker.deleteLater) + self.backgroundThread.finished.connect(self.backgroundThread.deleteLater) + + self.backgroundWorker.progress.connect(self.progressBarSlot) + self.backgroundThread.start() + + def getFileMeta(self, item): + path = [] + current_item = item + + while current_item is not None: + path.insert(0, current_item.text(0)) + current_item = current_item.parent() + + return { + "name": item.text(0), + "path": path[1:-1], + "source": item.text(3), + "offset": int(item.text(1), 16), + "size": int(item.text(2)) + } + + # misc + def resetApp(self): + self.resetTreeWidget() + self.extract.reset() + self.tabs.setTabEnabled(0, True) + self.tabs.setTabEnabled(1, False) + self.tabs.setTabEnabled(2, False) + print("Reset !") + + def _appendText(self, text): + cursor = self.console.textCursor() + cursor.movePosition(cursor.End) + cursor.insertText(text) + self.console.setTextCursor(cursor) + self.console.ensureCursorVisible() + +if __name__ == "__main__": + app = QApplication(sys.argv) + window = AnimeWwise() + window.show() + sys.exit(app.exec_()) diff --git a/audio/.keep b/audio/.keep deleted file mode 100644 index e69de29..0000000 diff --git a/bnk.py b/bnk.py new file mode 100644 index 0000000..ab48c84 --- /dev/null +++ b/bnk.py @@ -0,0 +1,46 @@ +# bnk reader because they exist in the game +import io +from filereader import FileReader + +def bnk2wem(data): + # gets raw data from object + reader = FileReader(io.BytesIO(data), "little") + + bkhd_signature = reader.ReadBytes(4) + + if bkhd_signature != b"\x42\x4B\x48\x44": + raise Exception("not a valid bnk") + + bkhd_size = reader.ReadUInt32() + reader.ReadBytes(bkhd_size) + + if reader.GetBufferPos() == reader.GetStreamLength(): + return [] # empty bnk + + didx_signature = reader.ReadBytes(4) + + if didx_signature != b"\x44\x49\x44\x58": + return [] # invalid index signature (hirc block instead ?) + + didx_size = reader.ReadUInt32() + n_wems = didx_size // 12 + wems = [] + + for i in range(n_wems): + wem_id = reader.ReadUInt32() + wem_offset = reader.ReadUInt32() + wem_size = reader.ReadUInt32() + wems.append([wem_id, wem_offset, wem_size]) + + data_signature = reader.ReadBytes(4) + + if data_signature != b"\x44\x41\x54\x41": + return [] # invalid data signature (missing sector ?) + + data_size = reader.ReadUInt32() + data_offset = reader.GetBufferPos() + + for wem in wems: + wem[1] += data_offset + + return wems diff --git a/extract.py b/extract.py index 1c1606a..2a5ac78 100644 --- a/extract.py +++ b/extract.py @@ -1,337 +1,329 @@ import os -import sys -import mapper -import shutil -import filecmp -import argparse +import io +import tempfile import wavescan import subprocess -from halo import Halo -from progress.bar import PixelBar +from mapper import Mapper +from allocator import Allocator +from filereader import FileReader - -print("Setting up...") cwd = os.getcwd() -path = lambda path: os.path.join(cwd, path) +path = lambda *args: os.path.join(*args) call = lambda args: subprocess.call(args, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) -mapper = mapper.Mapper(path("mapping/latest.map")) -spinner = Halo(text="spinner", spinner={'interval': 100, 'frames': ['◜', '◠', '◝', '◞', '◡', '◟']}, placement="right") -skips = "000000000" # used for debugging -# 1 - original extract -# 2 - patch -# 3 - patch extract -# 4 - filter files -# 5 - wem to wav -# 6 - wav to mp3 -# 7 - map names -# 8 - clean up -# 9 - temp clean up +class WwiseExtract: + def __init__(self): + self.allocator = Allocator() + self.hdiff_dir = None -def main(): - parser = argparse.ArgumentParser() - # TODO: add skip / select mapping option - parser.add_argument("--format", nargs="?", type=str, default="mp3", help="Output audio format, can be either mp3 or ogg") - args = parser.parse_args() + ### loading files ### - formats = ["mp3", "ogg"] - audio_format = "mp3" - if args.format in formats: - audio_format = args.format + def load_folder(self, _map, folder_path, diff_path, progress): + self.mapper = None + if _map is not None: + self.mapper = Mapper(path(cwd, f"maps/{_map}")) + self.file_structure = {"folders": {}, "files": []} - print(f'Format: {audio_format}') + files = [f for f in os.listdir(folder_path) if f.endswith(".pck")] + hdiff_files = [] + if diff_path != "": + hdiff_files = [f for f in os.listdir(diff_path) if f.endswith(".pck.hdiff")] - # Initial cleanup - if os.path.exists("temp") and skips[8] != "1": - shutil.rmtree("temp") + if len(files) == 0: + return None - if os.path.exists("output") and len(os.listdir("output")) > 0: - print("The output folder needs to be cleared, continue ? [Y/N]") - select = input(">") - if select.lower() == "y": - shutil.rmtree("output") + pos = 0 + print(f"\nLoading {len(files)} files...") + for file in files: + pos += 1 + progress(["load", pos * 100 // len(files)]) + + hdiff = None + if f"{file}.hdiff" in hdiff_files: + hdiff = path(diff_path, hdiff_files[hdiff_files.index(f"{file}.hdiff")]) + self.load_file(path(folder_path, file), hdiff) + + return self.file_structure + + def load_file(self, _input, hdiff): + with open(_input, "rb") as f: + data = f.read() + f.close() + self.get_wems(data, os.path.basename(_input), hdiff) + + def get_wems(self, data, filename, hdiff): + reader = FileReader(io.BytesIO(data), "little") + files = wavescan.get_data(reader, filename) + if hdiff is not None: + with open(hdiff, "rb") as f: + hdiff_data = f.read() + f.close() + hdiff_files = self.get_hdiff_files(data, hdiff_data, filename) + files = self.compare_diff(files, hdiff_files) + + self.map_names(files, filename, hdiff is not None) + + def compare_diff(self, old, new): + old_dict = {file[0]:file[2] for file in old} + new_files = [file for file in new if file not in list(old_dict.keys())] + changed_files = [file for file in new if file in list(old_dict.keys()) and file[2] != old_dict[file[0]]] + + return [new_files, changed_files] + + def get_hdiff_files(self, data, hdiff_data, source_name): + working_dir = tempfile.TemporaryDirectory() + if self.hdiff_dir is None: + self.hdiff_dir = tempfile.TemporaryDirectory() + + with open(path(working_dir.name, "source.pck"), "wb") as f: + f.write(data) + f.close() + + with open(path(working_dir.name, "patch.pck.hdiff"), "wb") as f: + f.write(hdiff_data) + f.close() + + args = [ + path(cwd, "tools/hpatchz/hpatchz.exe"), + "-f", + path(working_dir.name, "source.pck"), + path(working_dir.name, "patch.pck.hdiff"), + path(working_dir.name, "patch.pck") + ] + + call(args) + + with open(path(working_dir.name, "patch.pck"), "rb") as f: + data = f.read() + f.close() + + with open(path(self.hdiff_dir.name, source_name), "wb") as f: + f.write(data) + f.close() + + reader = FileReader(io.BytesIO(data), "little") + files = wavescan.get_data(reader, source_name) + + working_dir.cleanup() + + return files + + def map_names(self, files, filename, hdiff=False, skip_source=True): + # disable skip source if required + mapper = self.mapper + base = self.file_structure + + if hdiff: + old_files = files + filename = f"{filename} (hdiff)" + files = [*files[0], *files[1]] + + for file in files: + if mapper is not None: + key = mapper.get_key(file[0].split(".")[0]) + else: + key = None + + if key is not None: + if hdiff: + if file in old_files[0]: + key[0] = f"new_files\\{key[0]}" + else: + key[0] = f"changed_files\\{key[0]}" + + parts = f"{filename}\\{key[0]}.wem".split("\\") + if skip_source: + parts = parts[1:] + + self.add_to_structure(parts, [file[1], file[2], file[3]]) + else: + temp = base["folders"] + + if not skip_source: + if filename not in temp: + temp[filename] = {"folders": {}, "files": []} + temp = temp[filename]["folders"] + + if hdiff: + if file in old_files[0]: + if "new_files" not in temp: + temp["new_files"] = {"folders": {}, "files": []} + temp = temp["new_files"]["folders"] + + if file in old_files[1]: + if "changed_files" not in temp: + temp["changed_files"] = {"folders": {}, "files": []} + temp = temp["changed_files"]["folders"] + + if "unmapped" not in temp: + temp["unmapped"] = {"folders": {}, "files": []} + temp["unmapped"]["files"].append(file) + + self.file_structure = base + + def add_to_structure(self, parts, meta): + current_level = self.file_structure + for part in parts[:-1]: + if "folders" not in current_level: + current_level["folders"] = {} + if part not in current_level["folders"]: + current_level["folders"][part] = {"folders": {}, "files": []} + current_level = current_level["folders"][part] + if "files" not in current_level: + current_level["files"] = [] + current_level["files"].append([parts[-1], meta[0], meta[1], meta[2]]) + + ### extracting files ### + + def extract_files(self, _input, files, output, _format, progress): + temp_dir = tempfile.TemporaryDirectory() + self.progress = progress + self.steps = { + "wem": 1, + "wav": 2, + "mp3": 3, + "ogg": 3 + }[_format] + + # wem + if _format == "wem": + output_folder = output else: - print("Aborting") - exit() + output_folder = path(temp_dir.name, "wem") - # Get all files to process - hdiff_files = [f for f in os.listdir("audio") if f.endswith(".pck") and os.path.exists(f"patch/{f}.hdiff")] - alone_files = [f for f in os.listdir("audio") if f.endswith(".pck") and not os.path.exists(f"patch/{f}.hdiff")] - files = [*hdiff_files, *alone_files] + self.extract_wem(_input, files, output_folder) - if len(files) == 0: - print("No files found !") + if _format == "wem": + temp_dir.cleanup() + return + + # wav + new_input = output_folder + files = [path("/".join(file["path"]), file["name"]) for file in files] + + if _format == "wav": + output_folder = output + else: + output_folder = path(temp_dir.name, "wav") + + self.extract_wav(new_input, files, output_folder) + + if _format == "wav": + temp_dir.cleanup() + return + + # mp3 & ogg + files = [path(os.path.dirname(file), f'{os.path.basename(file).split(".")[0]}.wav') for file in files] + new_input = output_folder + output_folder = output + + self.extract_ffmpeg(new_input, files, output_folder, _format) + + temp_dir.cleanup() return - print(f"{len(files)} file{'s' if len(files) != 1 else ''} to extract") - iteration = 0 + def extract_wem(self, _input, files, output): + print(": Extracting audio as wem") + all_sources = list(set([e["source"] for e in files])) - for file in files: - try: - iteration += 1 - filename = file - if file in hdiff_files: - filename = f"{file.split('.')[0]}.hdiff.pck" - print(f"--- {filename} ({iteration}/{len(files)}) ---") + pos = 0 + for source in all_sources: + # load source + load_path = path(_input, source) + if self.hdiff_dir is not None: + source = source.split(" (hdiff)")[0] + hdiff_path = path(self.hdiff_dir.name, source) + + if os.path.isfile(hdiff_path): + load_path = hdiff_path + + self.allocator.load_file(load_path) - alone, steps, curr = False, 8, 1 - if file in alone_files: - alone, steps = True, 5 + # extract every file from this one + for file in [file for file in files if file["source"] == source]: + pos += 1 + self.update_progress(pos, len(files), 1) - ###################################### - ### 1 - Extract original .pck file ### - ###################################### + file["source"] = file["source"].split(" (hdiff)")[0] + data = self.allocator.read_at(file["source"], file["offset"], file["size"]) + + filepath = path("/".join(file["path"]), file["name"]) + fullpath = path(output, filepath) + os.makedirs(os.path.dirname(fullpath), exist_ok=True) + + with open(fullpath, "wb") as f: + f.write(data) + f.close() - if skips[0] != "1": - # update files - if os.path.exists("temp"): - shutil.rmtree("temp") - os.makedirs(path("temp"), exist_ok=True) - shutil.copy(f"audio/{file}", f"temp/{file}") + # unload source + self.allocator.unload_file(source) - output_path = "original_decoded" - if alone: - output_path = "wem" + # security + self.allocator.free_mem() - # update spinner and call program - spinner.text = f"[{curr}/{steps}] Extracting" - spinner.start() - wavescan.extract(path(f"temp/{file}"), path(f"temp/{output_path}")) - spinner.stop() - print(f"[{curr}/{steps}] Extracting") + def extract_wav(self, _input, files, output): + print(": Converting audio to wav") + pos = 0 + for file in files: + pos += 1 + self.update_progress(pos, len(files), 2) - if alone: - all_files = os.listdir(path("temp/wem")) + filename = f'{os.path.basename(file).split(".")[0]}.wav' + filepath = path(output, os.path.dirname(file), filename) + os.makedirs(os.path.dirname(filepath), exist_ok=True) - ###################################### - ### 2 - Patch the .pck with .hdiff ### - ###################################### + args = [ + path(cwd, "tools/vgmstream/vgmstream-cli.exe"), + "-o", + filepath, + path(_input, file) + ] - if skips[1] != "1": - if not alone: - curr += 1 + call(args) - # update files - shutil.copy(f"patch/{file}.hdiff", f"temp/{file}.hdiff") - shutil.move(f"temp/{file}", f"temp/{file.split('.')[0]}.original.pck") + def extract_ffmpeg(self, _input, files, output, _format): + print(f": Converting audio to {_format}") - # prepare args - args = [ - path("tools/hpatchz/hpatchz.exe"), - "-f", - path(f"temp/{file.split('.')[0]}.original.pck"), - path(f"temp/{file}.hdiff"), - path(f"temp/{file}") - ] + encoders = { + "mp3": "libmp3lame", + "ogg": "libvorbis" + } + + encoder = encoders[_format] - # update spinner and call program - spinner.text = f"[{curr}/{steps}] Patching" - spinner.start() - call(args) - spinner.stop() - print(f"[{curr}/{steps}] Patching") + pos = 0 + for file in files: + pos += 1 + self.update_progress(pos, len(files), 3) - ##################################### - ### 3 - Extract patched .pck file ### - ##################################### + filename = f'{os.path.basename(file).split(".")[0]}.{_format}' + filepath = path(output, os.path.dirname(file), filename) + os.makedirs(os.path.dirname(filepath), exist_ok=True) - if skips[2] != "1": - if not alone: - curr += 1 + args = [ + path(cwd, "tools/ffmpeg/ffmpeg.exe"), + "-i", + path(_input, file), + "-acodec", + encoder, + "-b:a", + "192k", # 192|4 + filepath + ] - # update spinner and call program - spinner.text = f"[{curr}/{steps}] Extracting patch" - spinner.start() - wavescan.extract(path(f"temp/{file}"), path(f"temp/patched_decoded")) - spinner.stop() - print(f"[{curr}/{steps}] Extracting patch") + call(args) + + ### other ### - # cleanup useless files to save storage - os.remove(f"temp/{file}") - os.remove(f"temp/{file}.hdiff") - os.remove(f"temp/{file.split('.')[0]}.original.pck") + def update_progress(self, current, total, step): + base = 100 / self.steps + self.progress(["total", current * base // total + base * (step - 1)]) + self.progress(["file", current * 100 // total]) - #################################### - ### 4 - Search new/changed files ### - #################################### - - if skips[3] != "1": - if not alone: - curr += 1 - - # update spinner - spinner.text = f"[{curr}/{steps}] Filtering files" - spinner.start() - - # compare folders - diff = filecmp.dircmp(path("temp/original_decoded"), path("temp/patched_decoded")) - new_files, changed_files = diff.right_only, diff.diff_files - all_files = [*new_files, *changed_files] - - # merge files - os.makedirs(path("temp/wem"), exist_ok=True) - - for file in all_files: - shutil.move(f"temp/patched_decoded/{file}", f"temp/wem/{file}") - - # cleanup useless folders to save storage - shutil.rmtree("temp/original_decoded") - shutil.rmtree("temp/patched_decoded") - - spinner.stop() - print(f"[{curr}/{steps}] Filtering files") - - ###################################### - ### 5 - Convert .wem files to .wav ### - ###################################### - - if skips[4] != "1": - curr += 1 - - # updates folders and progress bar - os.makedirs(path("temp/wav"), exist_ok=True) - bar = PixelBar(f"[{curr}/{steps}] Converting to wav ", max=len(all_files), suffix='%(percent).1f%% - %(eta)ds left') - - # convert each file one by one - for file in all_files: - args = [ - path("tools/vgmstream/vgmstream-cli.exe"), - "-o", - path(f"temp/wav/{file.split('.')[0]}.wav"), - path(f"temp/wem/{file}") - ] - - call(args) - bar.next() - bar.finish() - - # cleanup - shutil.rmtree("temp/wem") - wem_length = len(all_files) - all_files = [f for f in os.listdir(path("temp/wav"))] - diff_length = wem_length - len(all_files) - - if diff_length > 0: - print(f": Failed to extract {diff_length} files out of {wem_length} (probably no extractable content)") - - ############################################# - ### 6 - Convert .wav files to .mp3 or ogg ### - ############################################# - - if skips[5] != "1": - curr += 1 - - # updates folders and progress bar - os.makedirs(path(f"temp/{audio_format}"), exist_ok=True) - bar = PixelBar( - f"[{curr}/{steps}] Converting to {audio_format} ", - max=len(all_files), - suffix="%(percent).1f%% - %(eta)ds left", - ) - - # update file list - all_files = [f"{f.split('.')[0]}.wav" for f in all_files] - - # convert each file one by one - for file in all_files: - args = [ - path("tools/ffmpeg/ffmpeg.exe"), - "-i", - path(f"temp/wav/{file}"), - "-acodec", - "libvorbis" if audio_format == "ogg" else "libmp3lame", - "-b:a", - "192k", - path(f"temp/{audio_format}/{file.split('.')[0]}.{audio_format}"), - ] - - call(args) - bar.next() - bar.finish() - - # cleanup - shutil.rmtree("temp/wav") - - # update files list - all_files = [f"{f.split('.')[0]}.{audio_format}" for f in all_files] - - if not alone: - new_files = [f"{f.split('.')[0]}.{audio_format}" for f in new_files] - changed_files = [f"{f.split('.')[0]}.{audio_format}" for f in changed_files] - - ######################### - ### 7 - Map filenames ### - ######################### - - if skips[6] != "1": - curr += 1 - - # update spinner - spinner.text = f"[{curr}/{steps}] Mapping names" - spinner.start() - - os.makedirs(path(f"temp/map/unmapped"), exist_ok=True) - if not alone: - os.makedirs(path(f"temp/map/new_files/unmapped"), exist_ok=True) - os.makedirs(path(f"temp/map/changed_files/unmapped"), exist_ok=True) - - lang = None - - for file in all_files: - file_name = file.split(".")[0] - base_path = "temp/map" - if not alone: - if file in new_files: - base_path = "temp/map/new_files" - elif file in changed_files: - base_path = "temp/map/changed_files" - - key_data = mapper.get_key(file_name, lang is None) - - if key_data is not None: - if lang is None: - lang = key_data[1] - # TODO: use language for output path - print(f"\n: {lang} detected") - - dir_path = path(f"{base_path}/{key_data[0]}.{audio_format}") - os.makedirs(os.path.dirname(dir_path), exist_ok=True) - shutil.copy(path(f"temp/{audio_format}/{file}"), dir_path) - else: - shutil.copy(path(f"temp/{audio_format}/{file}"), path(f"{base_path}/unmapped/{file}")) - - # stop spinner - spinner.stop() - print(f"[{curr}/{steps}] Mapping names") - - ###################################################### - ### 8 - Clean everything and move result to output ### - ###################################################### - - if skips[7] != "1": - curr += 1 - - # update spinner - spinner.text = f"[{curr}/{steps}] Cleaning up" - spinner.start() - - filename = filename.split('.')[0] - - shutil.move(f"temp/map", f"output/{filename}") - - spinner.stop() - print(f"[{curr}/{steps}] Cleaning up") - - except Exception as e: - print("") - print("An error occured while processing this file ! Skipping to the next one, details of the error bellow :") - print(f"Line {sys.exc_info()[-1].tb_lineno}, {e}") - - # all files processed - if os.path.exists("temp") and skips[8] != "1": - shutil.rmtree("temp") - print("-"*30) - print("Done extracting everything !") - -if __name__ == "__main__": - main() + def reset(self): + if self.mapper is not None: + self.mapper.reset() + self.allocator.free_mem() + if self.hdiff_dir is not None: + self.hdiff_dir.cleanup() + self.hdiff_dir = None diff --git a/filereader.py b/filereader.py index ea1691b..86c5a9b 100644 --- a/filereader.py +++ b/filereader.py @@ -1,10 +1,11 @@ +import io +import os import struct class FileReader: """ - Simplified byte file reader with buffer, it's not particularly optimised but good enough - In the scope of this project, not everything will be used in here + File reader for files, not much too say """ def __init__(self, file, endianness:str): @@ -60,3 +61,18 @@ class FileReader: def SetBufferPos(self, pos:int): self.stream.seek(pos) + + def GetStreamLength(self) -> int: + if isinstance(self.stream, io.BytesIO): + return self.stream.getbuffer().nbytes + elif isinstance(self.stream, io.BufferedReader): + pos = self.GetBufferPos() + self.stream.seek(0, os.SEEK_END) + length = self.GetBufferPos() + self.SetBufferPos(pos) + return length + else: + raise Exception("unknown buffer type") + + def GetRemainingLength(self) -> int: + return self.GetStreamLength() - self.GetBufferPos() diff --git a/gui.ui b/gui.ui new file mode 100644 index 0000000..5b633ec --- /dev/null +++ b/gui.ui @@ -0,0 +1,452 @@ + + + AnimeWwise + + + Qt::NonModal + + + + 0 + 0 + 1100 + 800 + + + + + 1100 + 800 + + + + + 1100 + 800 + + + + AnimeWwise + + + + + + 4 + -1 + 1091 + 641 + + + + 1 + + + true + + + false + + + false + + + false + + + false + + + + true + + + Config + + + + + 9 + 9 + 1071 + 601 + + + + + + + + + true + + + Select + + + + + + + Select + + + false + + + false + + + + + + + true + + + true + + + + + + + Diff folder (optional) + + + + + + + Input folder + + + + + + + true + + + + + + true + + + + + + + + + Qt::Horizontal + + + + + + + + + + + + Asset map + + + + + + + + + Qt::Horizontal + + + + + + + + + Load file(s) + + + + + + + + + Progress + + + + + + + 0 + + + false + + + + + + + + + + + + + Browse + + + + + 0 + 20 + 1081 + 591 + + + + 1 + + + + 1 + + + + + + + 2 + 1 + 1081 + 21 + + + + Search something... + + + + + + Extract + + + + + 9 + 9 + 1061 + 601 + + + + + + + + + Output folder + + + + + + + true + + + + + + + Select + + + + + + + + + + + Output format + + + + + + + + + + + + + + + + Qt::Horizontal + + + + + + + + + + + Total progress + + + + + + + 0 + + + + + + + + + + + Per file progress + + + + + + + 0 + + + + + + + + + + + Qt::Horizontal + + + + + + + + + Extract All + + + + + + + Extract Selected + + + + + + + + + + + + + 10 + 640 + 1081 + 131 + + + + + 16777215 + 220 + + + + false + + + 0 + + + true + + + + + + + 0 + 0 + 1100 + 26 + + + + + File + + + + + + + + + + not working here yet + + + + + All files + + + + + Selected files + + + + + Reset + + + + + Exit + + + + + inputPath + changeInput + altInputPath + changeAltInput + tabs + + + + diff --git a/mapper.py b/mapper.py index 6f3eb1c..c2ff03e 100644 --- a/mapper.py +++ b/mapper.py @@ -1,7 +1,5 @@ # reader for the .map format i've made to improve reading speed and mapping size from filereader import FileReader -import os -import json class Mapper: @@ -31,21 +29,39 @@ class Mapper: reader = self.reader # utils - val = lambda length: int.from_bytes(reader.ReadBytes(length), "little") - raw = lambda length: reader.ReadBytes(length).rstrip(b"\x00").decode("utf-8") + val = lambda length: vl2(reader.ReadBytes(length)) + vl2 = lambda data: int.from_bytes(data, "little") + raw = lambda length: rw2(reader.ReadBytes(length)) + rw2 = lambda data: data.rstrip(b"\x00").decode("utf-8") + n2p = lambda val: [e[0] for e in enumerate(list(bin(val)[2:][::-1])) if e[1] == "1"] # get map meta reader.ReadBytes(2) games = { - b"ys": "Genshin" + "ys": "Genshin", + "sr": "Star Rail", + "zzz": "Zenless Zone Zero" # more later } + coverages = [ + "english voicelines", + "chinese voicelines", + "japanese voicelines", + "korean voicelines", + "music", + "sfx" + ] + + header_size = val(1) # header size + block_size = 4 + header_blocks = [reader.ReadBytes(block_size) for _ in range(header_size // block_size)] + infos = { - "game": games[reader.ReadBytes(2)], - "version": list(raw(2)), - "null": reader.ReadBytes(4) + "game": games[rw2(header_blocks[0])], + "version": list(rw2(header_blocks[1])), + "coverage": int(rw2(header_blocks[2])), # more later } @@ -54,19 +70,23 @@ class Mapper: # read prefixes prefixes = {} n_prefixes = reader.ReadUInt8() + l_prefixes = reader.ReadUInt8() for i in range(n_prefixes): - prefix = raw(4) + prefix = raw(l_prefixes) marker = reader.ReadBytes(1) prefixes[marker] = prefix # read languages langs_offsets = {} n_langs = reader.ReadUInt8() + l_langs = reader.ReadUInt8() for i in range(n_langs): offset = reader.GetBufferPos() - langs_offsets[offset] = raw(11) + langs_offsets[offset] = raw(l_langs) + + self.langs_offsets = langs_offsets # read folders folder_offsets = {} @@ -92,7 +112,11 @@ class Mapper: path.append(folder_offsets[reader.ReadUInt16()]) name_length = reader.ReadUInt8() - prefix = prefixes[reader.ReadBytes(1)] + prefix = reader.ReadBytes(1) + if prefix != b"\x00": + prefix = prefixes[prefix] + else: + prefix = "" name = raw(name_length) name = f"{prefix}{name}" @@ -101,17 +125,16 @@ class Mapper: files_offsets[offset] = path + self.files_offsets = files_offsets + # read keys + # GI 3649050 keys_data = {} n_keys = val(3) - for i in range(n_keys): - key = raw(16) - - lang_offset = reader.ReadUInt8() - file_offset = val(3) - - keys_data[key] = [files_offsets[file_offset], langs_offsets[lang_offset]] + left = reader.GetRemainingLength() + data = bytearray(reader.ReadBytes(left)) + keys_data = {rw2(data[i:i+16]): bytes(data[i+16:i+21]) for i in range(0, len(data), 21)} self.keys_data = keys_data @@ -120,7 +143,14 @@ class Mapper: print(f": {n_langs} supported languages") print(f": {n_files} mapped files") print(f": {n_keys} available keys") - + print(f"") + print(f"> Mapping coverage") + coverage = n2p(infos["coverage"]) + for val in coverage: + if val%2 == 0: + print(f": partial {coverages[val//2-1]}") + else: + print(f": {coverages[(val-1)//2]}") def get_key(self, key, lang=False): keys_data = self.keys_data @@ -128,9 +158,15 @@ class Mapper: return None key_data = keys_data[key] - data = [key_data[0]] + data = [self.files_offsets[int.from_bytes(key_data[2:], "little")]] if lang: - data.append(key_data[1]) + data.append(self.langs_offsets[int.from_bytes(key_data[:1], "little")]) return data + + def reset(self): + self.reader = None + self.langs_offsets.clear() + self.files_offsets.clear() + self.keys_data.clear() diff --git a/maps/hk4e.map b/maps/hk4e.map new file mode 100644 index 0000000..e798e40 Binary files /dev/null and b/maps/hk4e.map differ diff --git a/mapping/latest.map b/maps/hkrpg.map similarity index 59% rename from mapping/latest.map rename to maps/hkrpg.map index 2fc626e..4b37c66 100644 Binary files a/mapping/latest.map and b/maps/hkrpg.map differ diff --git a/maps/index.json b/maps/index.json new file mode 100644 index 0000000..1c30f6d --- /dev/null +++ b/maps/index.json @@ -0,0 +1,17 @@ +{"maps": [ + { + "name": "hk4e.map", + "game": "Genshin Impact", + "version": "4.8" + }, + { + "name": "hkrpg.map", + "game": "Star Rail", + "version": "2.2" + }, + { + "name": "nap.map", + "game": "Zenless Zone Zero", + "version": "1.0" + } +]} \ No newline at end of file diff --git a/maps/nap.map b/maps/nap.map new file mode 100644 index 0000000..53667e0 Binary files /dev/null and b/maps/nap.map differ diff --git a/patch/.keep b/patch/.keep deleted file mode 100644 index e69de29..0000000 diff --git a/requirements.txt b/requirements.txt index b186656..c6cfe1b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -halo==0.0.31 -progress==1.6 +PyQt5==5.15.11 +PyQt5_sip==12.15.0 diff --git a/tools/hpatchz/hdiffz.exe b/tools/hpatchz/hdiffz.exe deleted file mode 100644 index ed25cbe..0000000 Binary files a/tools/hpatchz/hdiffz.exe and /dev/null differ diff --git a/wavescan.py b/wavescan.py index da643e9..55e1fa8 100644 --- a/wavescan.py +++ b/wavescan.py @@ -1,23 +1,28 @@ # Custom rewrite of the Wwise AKPK packages extractor, original by Nicknine and bnnm -from filereader import FileReader -import traceback import os +import traceback +from bnk import bnk2wem reader = None bank_version = 0 +wwise_data = [] +filename = "" -def extract(input_file, output_folder): +def get_data(_reader, _filename): + global wwise_data global bank_version global reader + global filename - file = open(input_file, "rb") - reader = FileReader(file, "little") # defaults to little endian + filename = _filename + wwise_data = [] + reader = _reader # check file if reader.ReadBytes(4) != b"AKPK": - file.close() + # file.close() raise Exception("not a valid audio file") # check endianness @@ -29,7 +34,6 @@ def extract(input_file, output_folder): elif endian_check == 0x1000000: endianness = 1 # big else: - file.close() raise Exception("couldn't detect endianness") # retrieve sectors in header @@ -52,7 +56,6 @@ def extract(input_file, output_folder): try: lang_array = get_langs(languages_sector_size) except Exception as e: - file.close() raise Exception(f"failed to read languages, {e}, {traceback.format_exc()}") # extract each sector @@ -60,18 +63,16 @@ def extract(input_file, output_folder): try: for sector in sectors: curr_sector = sector - extract_sector(*sector[1:], endianness, lang_array, bank_version, output_folder) + extract_sector(*sector[1:], endianness, lang_array, bank_version) if sector[0] and bank_version == 0: if externals_sector_size == 0: print("can't detect bank version") bank_version = 62 except Exception as e: - file.close() raise Exception(f"failed to extract sector {curr_sector}, {e}, {traceback.format_exc()}") - # close - file.close() + return wwise_data def get_langs(langs_sector_size): string_offset = reader.GetBufferPos() @@ -125,7 +126,9 @@ def detect_bank_version(offset): reader.SetBufferPos(current) -def extract_sector(section_size, is_sounds, is_externals, ext, endianness, lang_array, bank_version, output_folder, filter_bnk_only=0, filter_wem_only=0, include_name=False): +def extract_sector(section_size, is_sounds, is_externals, ext, endianness, lang_array, bank_version, filter_bnk_only=0, filter_wem_only=0, include_name=False): + global wwise_data + # check sector validity if section_size == 0: return @@ -210,23 +213,16 @@ def extract_sector(section_size, is_sounds, is_externals, ext, endianness, lang_ continue # file infos - # print(f"NAME - {name} | OFFSET - {offset} | SIZE - {size}") + if ext == "bnk": + # get data from bnk + pos = reader.GetBufferPos() + reader.SetBufferPos(offset) + bnk_data = reader.ReadBytes(size) + reader.SetBufferPos(pos) - # save file into disk - current = reader.GetBufferPos() - reader.SetBufferPos(offset) - file_data = reader.ReadBytes(size) + wems = bnk2wem(bnk_data) - if include_name: - file_path = os.path.join(output_folder, os.path.dirname(name)) + for wem in wems: + wwise_data.append([f"{os.path.basename(name).split('.')[0]}_{wem[0]}.wem", offset+wem[1], wem[2], filename]) else: - file_path = output_folder - name = os.path.basename(name) - - os.makedirs(file_path, exist_ok=True) - - with open(os.path.join(file_path, name), "wb+") as f: - f.write(file_data) - f.close() - - reader.SetBufferPos(current) + wwise_data.append([os.path.basename(name), offset, size, filename])