From 7a025ff5d30322e14d164d76f19d95ec00c81a31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?=
<41315874+fumiama@users.noreply.github.com>
Date: Thu, 4 Apr 2024 03:11:53 +0900
Subject: [PATCH] feat: add param -n -t & optimize param parsing & tidy
documents
---
CMakeLists.txt | 3 +-
base1432.c | 152 ++++++++++++++++++++---------
base1464.c | 145 +++++++++++++++++++---------
base16384.1 | 24 ++++-
base16384.c | 135 +++++++++++++++-----------
base16384.h | 176 ++++++++++++++++++++++++++-------
binary.h | 90 +++++++++++++++++
file.c | 257 +++++++++++++++++++++++++++++++++++++------------
8 files changed, 736 insertions(+), 246 deletions(-)
create mode 100644 binary.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f75817f..edafedc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 2.8.12)
if (POLICY CMP0048)
cmake_policy(SET CMP0048 NEW)
endif (POLICY CMP0048)
-project(base16384 VERSION 2.2.5)
+project(base16384 VERSION 2.3.0)
add_executable(base16384_b base16384.c)
@@ -13,6 +13,7 @@ if (${isBigEndian})
endif()
IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
+ add_definitions(-DIS_64BIT_PROCESSOR)
add_library(base16384 SHARED file.c base1464.c)
add_library(base16384_s STATIC file.c base1464.c)
ELSE()
diff --git a/base1432.c b/base1432.c
index ac09987..df723e2 100644
--- a/base1432.c
+++ b/base1432.c
@@ -1,6 +1,6 @@
/* base1432.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
- * Copyright (c) 2022-2023 Fumiama Minamoto.
+ * Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,51 +16,7 @@
* along with this program. If not, see .
*/
-#ifdef __cosmopolitan // always le
-# define be16toh(x) bswap_16(x)
-# define be32toh(x) bswap_32(x)
-# define htobe16(x) bswap_16(x)
-# define htobe32(x) bswap_32(x)
-#else
-#include
-#include
-#include
-#ifdef __linux__
-# include
-#endif
-#ifdef __FreeBSD__
-# include
-#endif
-#ifdef __NetBSD__
-# include
-#endif
-#ifdef __OpenBSD__
-# include
-# define be16toh(x) betoh16(x)
-# define be32toh(x) betoh32(x)
-#endif
-#ifdef __APPLE__
-# define be16toh(x) ntohs(x)
-# define be32toh(x) ntohl(x)
-# define htobe16(x) ntohs(x)
-# define htobe32(x) htonl(x)
-#endif
-#ifdef _WIN32
- #ifdef WORDS_BIGENDIAN
- # define be16toh(x) (x)
- # define be32toh(x) (x)
- # define htobe16(x) (x)
- # define htobe32(x) (x)
- #else
- # define be16toh(x) _byteswap_ushort(x)
- # define be32toh(x) _byteswap_ulong(x)
- # define htobe16(x) _byteswap_ushort(x)
- # define htobe32(x) _byteswap_ulong(x)
- #endif
-#endif
-#endif
-
-// #define DEBUG
+#include "binary.h"
int base16384_encode(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
@@ -140,6 +96,49 @@ int base16384_encode(const char* data, int dlen, char* buf) {
return outlen;
}
+int base16384_encode_unsafe(const char* data, int dlen, char* buf) {
+ int outlen = dlen / 7 * 8;
+ int offset = dlen % 7;
+ switch(offset) { // 算上偏移标志字符占用的2字节
+ case 0: break;
+ case 1: outlen += 4; break;
+ case 2:
+ case 3: outlen += 6; break;
+ case 4:
+ case 5: outlen += 8; break;
+ case 6: outlen += 10; break;
+ default: break;
+ }
+ #ifdef DEBUG
+ printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8);
+ #endif
+ uint32_t* vals = (uint32_t*)buf;
+ uint32_t n = 0;
+ int32_t i = 0;
+ for(; i < dlen; i += 7) {
+ register uint32_t sum = 0;
+ register uint32_t shift = htobe32(*(uint32_t*)(data+i));
+ sum |= (shift>>2) & 0x3fff0000;
+ sum |= (shift>>4) & 0x00003fff;
+ sum += 0x4e004e00;
+ vals[n++] = be32toh(sum);
+ shift <<= 26;
+ shift &= 0x3c000000;
+ sum = 0;
+ shift |= (htobe32(*(uint32_t*)(data+i+4))>>6)&0x03fffffc;
+ sum |= shift & 0x3fff0000;
+ shift >>= 2;
+ sum |= shift & 0x00003fff;
+ sum += 0x4e004e00;
+ vals[n++] = be32toh(sum);
+ }
+ if(offset) {
+ buf[outlen - 2] = '=';
+ buf[outlen - 1] = offset;
+ }
+ return outlen;
+}
+
int base16384_decode(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
@@ -214,3 +213,64 @@ int base16384_decode(const char* data, int dlen, char* buf) {
}
return outlen;
}
+
+int base16384_decode_unsafe(const char* data, int dlen, char* buf) {
+ int outlen = dlen;
+ int offset = 0;
+ if(data[dlen-2] == '=') {
+ offset = data[dlen-1];
+ switch(offset) { // 算上偏移标志字符占用的2字节
+ case 0: break;
+ case 1: outlen -= 4; break;
+ case 2:
+ case 3: outlen -= 6; break;
+ case 4:
+ case 5: outlen -= 8; break;
+ case 6: outlen -= 10; break;
+ default: break;
+ }
+ }
+ outlen = outlen / 8 * 7 + offset;
+ uint32_t* vals = (uint32_t*)data;
+ uint32_t n = 0;
+ int32_t i = 0;
+ for(; i < outlen-7; i+=7) { // n实际每次自增2
+ register uint32_t sum = 0;
+ register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
+ shift <<= 2;
+ sum |= shift & 0xfffc0000;
+ shift <<= 2;
+ sum |= shift & 0x0003fff0;
+ shift = htobe32(vals[n++]) - 0x4e004e00;
+ sum |= shift >> 26;
+ *(uint32_t*)(buf+i) = be32toh(sum);
+ sum = 0;
+ shift <<= 6;
+ sum |= shift & 0xffc00000;
+ shift <<= 2;
+ sum |= shift & 0x003fff00;
+ *(uint32_t*)(buf+i+4) = be32toh(sum);
+ }
+ register uint32_t sum = 0;
+ register uint32_t shift = htobe32(vals[n++]);
+ if(((shift>>24)&0xff) < 0x4e) shift |= 0xff000000;
+ if(((shift>> 8)&0xff) < 0x4e) shift |= 0x0000ff00;
+ shift -= 0x4e004e00;
+ shift <<= 2;
+ sum |= shift & 0xfffc0000;
+ shift <<= 2;
+ sum |= shift & 0x0003fff0;
+ shift = htobe32(vals[n++]);
+ if(((shift>>24)&0xff) < 0x4e) shift |= 0xff000000;
+ if(((shift>> 8)&0xff) < 0x4e) shift |= 0x0000ff00;
+ shift -= 0x4e004e00;
+ sum |= shift >> 26;
+ *(uint32_t*)(buf+i) = be32toh(sum);
+ sum = 0;
+ shift <<= 6;
+ sum |= shift & 0xffc00000;
+ shift <<= 2;
+ sum |= shift & 0x003fff00;
+ *(uint32_t*)(buf+i+4) = be32toh(sum);
+ return outlen;
+}
diff --git a/base1464.c b/base1464.c
index 73b0432..77d1ff3 100644
--- a/base1464.c
+++ b/base1464.c
@@ -1,6 +1,6 @@
/* base1464.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
- * Copyright (c) 2022-2023 Fumiama Minamoto.
+ * Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,51 +16,7 @@
* along with this program. If not, see .
*/
-#include
-#include
-#include
-#ifdef __linux__
-# include
-#endif
-#ifdef __FreeBSD__
-# include
-#endif
-#ifdef __NetBSD__
-# include
-#endif
-#ifdef __OpenBSD__
-# include
-# define be16toh(x) betoh16(x)
-# define be32toh(x) betoh32(x)
-# define be64toh(x) betoh64(x)
-#endif
-#ifdef __APPLE__
-# define be16toh(x) ntohs(x)
-# define be32toh(x) ntohl(x)
-# define be64toh(x) ntohll(x)
-# define htobe16(x) ntohs(x)
-# define htobe32(x) htonl(x)
-# define htobe64(x) htonll(x)
-#endif
-#ifdef _WIN64
- #ifdef WORDS_BIGENDIAN
- # define be16toh(x) (x)
- # define be32toh(x) (x)
- # define be64toh(x) (x)
- # define htobe16(x) (x)
- # define htobe32(x) (x)
- # define htobe64(x) (x)
- #else
- # define be16toh(x) _byteswap_ushort(x)
- # define be32toh(x) _byteswap_ulong(x)
- # define be64toh(x) _byteswap_uint64(x)
- # define htobe16(x) _byteswap_ushort(x)
- # define htobe32(x) _byteswap_ulong(x)
- # define htobe64(x) _byteswap_uint64(x)
- #endif
-#endif
-
-// #define DEBUG
+#include "binary.h"
int base16384_encode(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
@@ -136,6 +92,48 @@ int base16384_encode(const char* data, int dlen, char* buf) {
return outlen;
}
+int base16384_encode_unsafe(const char* data, int dlen, char* buf) {
+ int outlen = dlen / 7 * 8;
+ int offset = dlen % 7;
+ switch(offset) { // 算上偏移标志字符占用的2字节
+ case 0: break;
+ case 1: outlen += 4; break;
+ case 2:
+ case 3: outlen += 6; break;
+ case 4:
+ case 5: outlen += 8; break;
+ case 6: outlen += 10; break;
+ default: break;
+ }
+ #ifdef DEBUG
+ printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8);
+ #endif
+ uint64_t* vals = (uint64_t*)buf;
+ uint64_t n = 0;
+ int64_t i = 0;
+ for(; i < dlen; i += 7) {
+ register uint64_t sum = 0;
+ register uint64_t shift = htobe64(*(uint64_t*)(data+i))>>2; // 这里有读取越界
+ sum |= shift & 0x3fff000000000000;
+ shift >>= 2;
+ sum |= shift & 0x00003fff00000000;
+ shift >>= 2;
+ sum |= shift & 0x000000003fff0000;
+ shift >>= 2;
+ sum |= shift & 0x0000000000003fff;
+ sum += 0x4e004e004e004e00;
+ vals[n++] = be64toh(sum);
+ #ifdef DEBUG
+ printf("i: %llu, add sum: %016llx\n", i, sum);
+ #endif
+ }
+ if(offset) {
+ buf[outlen - 2] = '=';
+ buf[outlen - 1] = offset;
+ }
+ return outlen;
+}
+
int base16384_decode(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
@@ -201,3 +199,58 @@ int base16384_decode(const char* data, int dlen, char* buf) {
}
return outlen;
}
+
+int base16384_decode_unsafe(const char* data, int dlen, char* buf) {
+ int outlen = dlen;
+ int offset = 0;
+ if(data[dlen-2] == '=') {
+ offset = data[dlen-1];
+ switch(offset) { // 算上偏移标志字符占用的2字节
+ case 0: break;
+ case 1: outlen -= 4; break;
+ case 2:
+ case 3: outlen -= 6; break;
+ case 4:
+ case 5: outlen -= 8; break;
+ case 6: outlen -= 10; break;
+ default: break;
+ }
+ }
+ outlen = outlen / 8 * 7 + offset;
+ uint64_t* vals = (uint64_t*)data;
+ uint64_t n = 0;
+ int64_t i = 0;
+ for(; i < outlen-7; n++, i+=7) {
+ register uint64_t sum = 0;
+ register uint64_t shift = htobe64(vals[n]) - 0x4e004e004e004e00;
+ shift <<= 2;
+ sum |= shift & 0xfffc000000000000;
+ shift <<= 2;
+ sum |= shift & 0x0003fff000000000;
+ shift <<= 2;
+ sum |= shift & 0x0000000fffc00000;
+ shift <<= 2;
+ sum |= shift & 0x00000000003fff00;
+ *(uint64_t*)(buf+i) = be64toh(sum);
+ #ifdef DEBUG
+ printf("i: %llu, add sum: %016llx\n", i, sum);
+ #endif
+ }
+ register uint64_t sum = 0;
+ register uint64_t shift = htobe64(vals[n]);
+ if(((shift>>56)&0xff) < 0x4e) shift |= 0xff00000000000000;
+ if(((shift>>40)&0xff) < 0x4e) shift |= 0x0000ff0000000000;
+ if(((shift>>24)&0xff) < 0x4e) shift |= 0x00000000ff000000;
+ if(((shift>> 8)&0xff) < 0x4e) shift |= 0x000000000000ff00;
+ shift -= 0x4e004e004e004e00;
+ shift <<= 2;
+ sum |= shift & 0xfffc000000000000;
+ shift <<= 2;
+ sum |= shift & 0x0003fff000000000;
+ shift <<= 2;
+ sum |= shift & 0x0000000fffc00000;
+ shift <<= 2;
+ sum |= shift & 0x00000000003fff00;
+ *(uint64_t*)(buf+i) = be64toh(sum);
+ return outlen;
+}
diff --git a/base16384.1 b/base16384.1
index 279534d..47c869a 100644
--- a/base16384.1
+++ b/base16384.1
@@ -1,9 +1,9 @@
-.TH BASE16384 1 "26 August 2023" "GNU" "User Commands"
+.TH BASE16384 1 "4 April 2024" "GNU" "User Commands"
.SH NAME
base16384 \- Encode binary files to printable utf16be
.SH SYNOPSIS
.B base16384
--[e|d|t] <\fIinputfile\fR> <\fIoutputfile\fR>
+-[edtn] <\fIinputfile\fR> <\fIoutputfile\fR>
.SH DESCRIPTION
.LP
There are
@@ -27,7 +27,11 @@ to
.sp 1
.TP 0.5i
\fB\-e\fR
-Read data from \fIinputfile\fR and encode them into \fIoutputfile\fR.
+Read data from \fIinputfile\fR and encode them into \fIoutputfile\fR. It's the default option when neither
+.B -e
+nor
+.B -d
+is specified.
.TP 0.5i
\fB\-d\fR
Read data from \fIinputfile\fR and decode them into \fIoutputfile\fR.
@@ -35,6 +39,12 @@ Read data from \fIinputfile\fR and decode them into \fIoutputfile\fR.
\fB\-t\fR
Show spend time.
.TP 0.5i
+\fB\-n\fR
+Do not write utf16be file header (0xFEFF) to the output.
+.TP 0.5i
+\fB\-c\fR
+Embed or validate checksum in remainder when using stdin/stdout or inputsize > _BASE16384_ENCBUFSZ.
+.TP 0.5i
\fBinputfile\fR
An absolute or relative file path. Specially, pass - to read from stdin.
.TP 0.5i
@@ -68,6 +78,12 @@ Write file error in mmap.
.TP 0.5i
\fB8\fR
Invalid input/output filename.
+.TP 0.5i
+\fB9\fR
+Invalid commandline parameter.
+.TP 0.5i
+\fB10\fR
+Invalid decoding checksum.
.SH "SEE ALSO"
https://github.com/fumiama/base16384
.SH BUGS
@@ -77,7 +93,7 @@ on github.
.SH AUTHOR
This manual page contributed by Fumiama Minamoto.
.SH "COPYRIGHT"
-Copyright \(co 2022-2023, Fumiama Minamoto
+Copyright \(co 2022-2024, Fumiama Minamoto
This file is part of
.IR "base16384" .
.LP
diff --git a/base16384.c b/base16384.c
index 53d1891..dffafaa 100644
--- a/base16384.c
+++ b/base16384.c
@@ -1,6 +1,6 @@
/* base16384.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
- * Copyright (c) 2022-2023 Fumiama Minamoto.
+ * Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -38,77 +38,98 @@ unsigned long get_start_ms() {
}
#endif
-static void print_usage() {
- puts("Copyright (c) 2022-2023 Fumiama Minamoto.\nBase16384 2.2.5 (August 26th 2023). Usage:");
- puts("base16384 [-edt] [inputfile] [outputfile]");
- puts(" -e\t\tencode");
- puts(" -d\t\tdecode");
- puts(" -t\t\tshow spend time");
- puts(" inputfile\tpass - to read from stdin");
- puts(" outputfile\tpass - to write to stdout");
+static base16384_err_t print_usage() {
+ fputs("Copyright (c) 2022-2024 Fumiama Minamoto.\nBase16384 2.3.0 (April 4th 2024). Usage:\n", stderr);
+ fputs("base16384 [-edtn] [inputfile] [outputfile]\n", stderr);
+ fputs(" -e\t\tencode (default)\n", stderr);
+ fputs(" -d\t\tdecode\n", stderr);
+ fputs(" -t\t\tshow spend time\n", stderr);
+ fputs(" -n\t\tdon't write utf16be file header (0xFEFF)\n", stderr);
+ fputs(" -c\t\tembed or validate checksum in remainder\n", stderr);
+ fputs(" inputfile\tpass - to read from stdin\n", stderr);
+ fputs(" outputfile\tpass - to write to stdout\n", stderr);
+ return base16384_err_invalid_commandline_parameter;
}
int main(int argc, char** argv) {
- if(argc != 4 || argv[1][0] != '-') {
- print_usage();
- return -1;
- }
- int flaglen = strlen(argv[1]);
- if(flaglen <= 1 || flaglen > 3) {
- print_usage();
- return -2;
- }
+
+ const char* cmd = argv[1];
+ if(argc != 4 || cmd[0] != '-') return print_usage();
+
+ int flaglen = strlen(cmd);
+ if(flaglen <= 1 || flaglen > 5) return print_usage();
+
#ifdef _WIN32
clock_t t = 0;
#else
unsigned long t = 0;
#endif
+
+ uint16_t is_encode = 1, use_timer = 0, no_header = 0, use_checksum = 0;
+ #define set_flag(f, v) ((f) = (((((f)>>8)+1) << 8)&0xff00) | (v&0x00ff))
+ #define flag_has_been_set(f) ((f)>>8)
+ #define set_or_test_flag(f, v) (flag_has_been_set(f)?1:(set_flag(f, v), 0))
+ while(--flaglen) switch(cmd[flaglen]) { // skip cmd[0] = '-'
+ case 'e':
+ if(set_or_test_flag(is_encode, 1)) return print_usage();
+ break;
+ case 'd':
+ if(set_or_test_flag(is_encode, 0)) return print_usage();
+ break;
+ case 't':
+ if(set_or_test_flag(use_timer, 1)) return print_usage();
+ break;
+ case 'n':
+ if(set_or_test_flag(no_header, 1)) return print_usage();
+ break;
+ case 'c':
+ if(set_or_test_flag(use_checksum, 1)) return print_usage();
+ break;
+ default:
+ return print_usage();
+ break;
+ }
+ #define clear_high_byte(x) ((x) &= 0x00ff)
+ clear_high_byte(is_encode); clear_high_byte(use_timer);
+ clear_high_byte(no_header); clear_high_byte(use_checksum);
+
+ if(use_timer) {
+ #ifdef _WIN32
+ t = clock();
+ #else
+ t = get_start_ms();
+ #endif
+ }
+
base16384_err_t exitstat = base16384_err_ok;
- char cmd = argv[1][1];
- if(cmd == 't') {
- if(flaglen == 2) {
- print_usage(); return -3;
- }
+ #define do_coding(method) base16384_##method##_file_detailed( \
+ argv[2], argv[3], encbuf, decbuf, \
+ (no_header?BASE16384_FLAG_NOHEADER:0) | (use_checksum?BASE16384_FLAG_SUM_CHECK_ON_REMAIN:0) \
+ )
+ exitstat = is_encode?do_coding(encode):do_coding(decode);
+
+ if(t) {
#ifdef _WIN32
- t = clock();
+ fprintf(stderr, "spend time: %lums\n", clock() - t);
#else
- t = get_start_ms();
- #endif
- cmd = argv[1][2];
- } else if(flaglen == 3) {
- if(argv[1][2] != 't') {
- print_usage(); return -4;
- }
- #ifdef _WIN32
- t = clock();
- #else
- t = get_start_ms();
+ fprintf(stderr, "spend time: %lums\n", get_start_ms() - t);
#endif
}
- switch(cmd) {
- case 'e': exitstat = base16384_encode_file(argv[2], argv[3], encbuf, decbuf); break;
- case 'd': exitstat = base16384_decode_file(argv[2], argv[3], encbuf, decbuf); break;
- default: print_usage(); return -5;
- }
- if(t && !exitstat && *(uint16_t*)(argv[3]) != *(uint16_t*)"-") {
- #ifdef _WIN32
- printf("spend time: %lums\n", clock() - t);
- #else
- printf("spend time: %lums\n", get_start_ms() - t);
- #endif
- }
- #define print_base16384_err(n) case base16384_err_##n: perror("base16384_err_"#n); break
+
+ #define print_base16384_err(n) case base16384_err_##n: perror("base16384_err_"#n)
if(exitstat) switch(exitstat) {
- print_base16384_err(get_file_size);
- print_base16384_err(fopen_output_file);
- print_base16384_err(fopen_input_file);
- print_base16384_err(write_file);
- print_base16384_err(open_input_file);
- print_base16384_err(map_input_file);
- print_base16384_err(read_file);
- print_base16384_err(invalid_file_name);
+ print_base16384_err(get_file_size); break;
+ print_base16384_err(fopen_output_file); break;
+ print_base16384_err(fopen_input_file); break;
+ print_base16384_err(write_file); break;
+ print_base16384_err(open_input_file); break;
+ print_base16384_err(map_input_file); break;
+ print_base16384_err(read_file); break;
+ print_base16384_err(invalid_file_name); break;
+ print_base16384_err(invalid_commandline_parameter); break;
+ print_base16384_err(invalid_decoding_checksum); break;
default: perror("base16384"); break;
}
- #undef print_base16384_err
return exitstat;
+
}
diff --git a/base16384.h b/base16384.h
index fd19063..6bf9e74 100644
--- a/base16384.h
+++ b/base16384.h
@@ -3,7 +3,7 @@
/* base16384.h
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
- * Copyright (c) 2022-2023 Fumiama Minamoto.
+ * Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -26,31 +26,49 @@
#define define_base16384_err_t(n) base16384_err_##n
-// base16384_err_t is the return value of base16384_en/decode_file
-enum base16384_err_t {
- define_base16384_err_t(ok),
- define_base16384_err_t(get_file_size),
- define_base16384_err_t(fopen_output_file),
- define_base16384_err_t(fopen_input_file),
- define_base16384_err_t(write_file),
- define_base16384_err_t(open_input_file),
- define_base16384_err_t(map_input_file),
- define_base16384_err_t(read_file),
- define_base16384_err_t(invalid_file_name),
-};
-// base16384_err_t is the return value of base16384_en/decode_file
-typedef enum base16384_err_t base16384_err_t;
+ enum base16384_err_t {
+ define_base16384_err_t(ok),
+ define_base16384_err_t(get_file_size),
+ define_base16384_err_t(fopen_output_file),
+ define_base16384_err_t(fopen_input_file),
+ define_base16384_err_t(write_file),
+ define_base16384_err_t(open_input_file),
+ define_base16384_err_t(map_input_file),
+ define_base16384_err_t(read_file),
+ define_base16384_err_t(invalid_file_name),
+ define_base16384_err_t(invalid_commandline_parameter),
+ define_base16384_err_t(invalid_decoding_checksum),
+ };
+
+ /**
+ * @brief return value of base16384_en/decode_file
+ */
+ typedef enum base16384_err_t base16384_err_t;
#undef define_base16384_err_t
-#define BASE16384_ENCBUFSZ (BUFSIZ*1024/7*7+7)
-#define BASE16384_DECBUFSZ (BUFSIZ*1024/8*8+16)
+#define _BASE16384_ENCBUFSZ (BUFSIZ*1024/7*7)
+#define _BASE16384_DECBUFSZ (BUFSIZ*1024/8*8)
-// base16384_encode_len calc min buf size to fill encode result
-static inline int base16384_encode_len(int dlen) {
+#define BASE16384_ENCBUFSZ (_BASE16384_ENCBUFSZ+16)
+#define BASE16384_DECBUFSZ (_BASE16384_DECBUFSZ+16)
+
+// disable 0xFEFF file header in encode
+#define BASE16384_FLAG_NOHEADER (1<<0)
+// enable sum check when using stdin or inputsize > _BASE16384_ENCBUFSZ
+#define BASE16384_FLAG_SUM_CHECK_ON_REMAIN (1<<1)
+// initial sum value used in BASE16384_FLAG_SUM_CHECK_ON_REMAIN
+#define BASE16384_SIMPLE_SUM_INIT_VALUE (0x8e29c213)
+
+/**
+ * @brief calculate the exact encoded size
+ * @param dlen the data length to encode
+ * @return the size
+*/
+static inline int _base16384_encode_len(int dlen) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
- switch(offset) { // 算上偏移标志字符占用的2字节
+ switch(offset) { // 算上偏移标志字符占用的 2 字节
case 0: break;
case 1: outlen += 4; break;
case 2:
@@ -60,13 +78,27 @@ static inline int base16384_encode_len(int dlen) {
case 6: outlen += 10; break;
default: break;
}
- return outlen + 8; // 冗余的8B用于可能的结尾的覆盖
+ return outlen;
}
-// base16384_decode_len calc min buf size to fill decode result
-static inline int base16384_decode_len(int dlen, int offset) {
+/**
+ * @brief calculate minimum encoding buffer size (16 bits larger than the real encoded size)
+ * @param dlen the data length to encode
+ * @return the minimum encoding buffer size
+*/
+static inline int base16384_encode_len(int dlen) {
+ return _base16384_encode_len(dlen) + 16; // 冗余的 16 字节用于可能的结尾的 unsafe 覆盖
+}
+
+/**
+ * @brief calculate the exact decoded size
+ * @param dlen the data length to decode
+ * @param offset the last char `xx` of the underfilled coding (0x3Dxx) or 0 for the full coding
+ * @return the size
+*/
+static inline int _base16384_decode_len(int dlen, int offset) {
int outlen = dlen;
- switch(offset) { // 算上偏移标志字符占用的2字节
+ switch(offset) { // 算上偏移标志字符占用的 2 字节
case 0: break;
case 1: outlen -= 4; break;
case 2:
@@ -76,39 +108,117 @@ static inline int base16384_decode_len(int dlen, int offset) {
case 6: outlen -= 10; break;
default: break;
}
- return outlen / 8 * 7 + offset + 1; // 多出1字节用于循环覆盖
+ return outlen / 8 * 7 + offset;
}
-// base16384_encode encodes data and write result into buf
+/**
+ * @brief calculate minimum decoding buffer size (16 bits larger than the real decoded size)
+ * @param dlen the data length to decode
+ * @param offset the last char `xx` of the underfilled coding (0x3Dxx) or 0 for the full coding
+ * @return the minimum decoding buffer size
+*/
+static inline int base16384_decode_len(int dlen, int offset) {
+ return _base16384_decode_len(dlen, offset) + 16; // 多出 16 字节用于 unsafe 循环覆盖
+}
+
+/**
+ * @brief encode data and write result into buf
+ * @param data data to encode
+ * @param dlen the data length
+ * @param buf the output buffer, whose size must greater than `base16384_encode_len`
+ * @return the total length written
+*/
int base16384_encode(const char* data, int dlen, char* buf);
-// base16384_decode decodes data and write result into buf
+/**
+ * @brief encode data and write result into buf without considering border condition
+ * @param data data to encode
+ * @param dlen the data length
+ * @param buf the output buffer, whose size must greater than `base16384_encode_len`
+ * @return the total length written
+*/
+int base16384_encode_unsafe(const char* data, int dlen, char* buf);
+
+/**
+ * @brief decode data and write result into buf
+ * @param data data to decode
+ * @param dlen the data length
+ * @param buf the output buffer, whose size must greater than `base16384_decode_len`
+ * @return the total length written
+*/
int base16384_decode(const char* data, int dlen, char* buf);
+/**
+ * @brief decode data and write result into buf without considering border condition
+ * @param data data to decode
+ * @param dlen the data length
+ * @param buf the output buffer, whose size must greater than `base16384_decode_len`
+ * @return the total length written
+*/
+int base16384_decode_unsafe(const char* data, int dlen, char* buf);
+
+// base16384_encode_file_detailed encodes input file to output file.
+// use `-` to specify stdin/stdout
+// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
+base16384_err_t base16384_encode_file_detailed(const char* input, const char* output, char* encbuf, char* decbuf, int flag);
+
+// base16384_encode_fp_detailed encodes input file to output file.
+// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
+base16384_err_t base16384_encode_fp_detailed(FILE* input, FILE* output, char* encbuf, char* decbuf, int flag);
+
+// base16384_encode_fd_detailed encodes input fd to output fd.
+// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
+base16384_err_t base16384_encode_fd_detailed(int input, int output, char* encbuf, char* decbuf, int flag);
+
+// base16384_decode_file_detailed decodes input file to output file.
+// use `-` to specify stdin/stdout
+// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
+base16384_err_t base16384_decode_file_detailed(const char* input, const char* output, char* encbuf, char* decbuf, int flag);
+
+// base16384_decode_fp_detailed decodes input file to output file.
+// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
+base16384_err_t base16384_decode_fp_detailed(FILE* input, FILE* output, char* encbuf, char* decbuf, int flag);
+
+// base16384_decode_fd_detailed decodes input fd to output fd.
+// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
+base16384_err_t base16384_decode_fd_detailed(int input, int output, char* encbuf, char* decbuf, int flag);
+
// base16384_encode_file encodes input file to output file.
// use `-` to specify stdin/stdout
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
-base16384_err_t base16384_encode_file(const char* input, const char* output, char* encbuf, char* decbuf);
+static inline base16384_err_t base16384_encode_file(const char* input, const char* output, char* encbuf, char* decbuf) {
+ return base16384_encode_file_detailed(input, output, encbuf, decbuf, 0);
+}
// base16384_encode_fp encodes input file to output file.
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
-base16384_err_t base16384_encode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf);
+static inline base16384_err_t base16384_encode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf) {
+ return base16384_encode_fp_detailed(input, output, encbuf, decbuf, 0);
+}
// base16384_encode_fd encodes input fd to output fd.
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
-base16384_err_t base16384_encode_fd(int input, int output, char* encbuf, char* decbuf);
+static inline base16384_err_t base16384_encode_fd(int input, int output, char* encbuf, char* decbuf) {
+ return base16384_encode_fd_detailed(input, output, encbuf, decbuf, 0);
+}
// base16384_decode_file decodes input file to output file.
// use `-` to specify stdin/stdout
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
-base16384_err_t base16384_decode_file(const char* input, const char* output, char* encbuf, char* decbuf);
+static inline base16384_err_t base16384_decode_file(const char* input, const char* output, char* encbuf, char* decbuf) {
+ return base16384_decode_file_detailed(input, output, encbuf, decbuf, 0);
+}
// base16384_decode_fp decodes input file to output file.
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
-base16384_err_t base16384_decode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf);
+static inline base16384_err_t base16384_decode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf) {
+ return base16384_decode_fp_detailed(input, output, encbuf, decbuf, 0);
+}
// base16384_decode_fd decodes input fd to output fd.
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
-base16384_err_t base16384_decode_fd(int input, int output, char* encbuf, char* decbuf);
+static inline base16384_err_t base16384_decode_fd(int input, int output, char* encbuf, char* decbuf) {
+ return base16384_decode_fd_detailed(input, output, encbuf, decbuf, 0);
+}
#endif
diff --git a/binary.h b/binary.h
new file mode 100644
index 0000000..7f180d7
--- /dev/null
+++ b/binary.h
@@ -0,0 +1,90 @@
+#ifndef _BINARY_H_
+#define _BINARY_H_
+
+/* binary.h
+ * This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
+ * Copyright (c) 2022-2024 Fumiama Minamoto.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#ifdef __cosmopolitan // always le
+ #define be16toh(x) bswap_16(x)
+ #define be32toh(x) bswap_32(x)
+ #define htobe16(x) bswap_16(x)
+ #define htobe32(x) bswap_32(x)
+#else
+ #include
+ #include
+ #include
+ #ifdef __linux__
+ #include
+ #endif
+ #ifdef __FreeBSD__
+ #include
+ #endif
+ #ifdef __NetBSD__
+ #include
+ #endif
+ #ifdef __OpenBSD__
+ #include
+ #define be16toh(x) betoh16(x)
+ #define be32toh(x) betoh32(x)
+ #ifdef IS_64BIT_PROCESSOR
+ #define be64toh(x) betoh64(x)
+ #endif
+ #endif
+ #ifdef __APPLE__
+ #define be16toh(x) ntohs(x)
+ #define be32toh(x) ntohl(x)
+ #ifdef IS_64BIT_PROCESSOR
+ #define be64toh(x) ntohll(x)
+ #endif
+ #define htobe16(x) htons(x)
+ #define htobe32(x) htonl(x)
+ #ifdef IS_64BIT_PROCESSOR
+ #define htobe64(x) htonll(x)
+ #endif
+ #endif
+ #ifdef _MSC_VER
+ #ifdef WORDS_BIGENDIAN
+ #define be16toh(x) (x)
+ #define be32toh(x) (x)
+ #ifdef IS_64BIT_PROCESSOR
+ #define be64toh(x) (x)
+ #endif
+ #define htobe16(x) (x)
+ #define htobe32(x) (x)
+ #ifdef IS_64BIT_PROCESSOR
+ #define htobe64(x) (x)
+ #endif
+ #else
+ #define be16toh(x) _byteswap_ushort(x)
+ #define be32toh(x) _byteswap_ulong(x)
+ #ifdef IS_64BIT_PROCESSOR
+ #define be64toh(x) _byteswap_uint64(x)
+ #endif
+ #define htobe16(x) _byteswap_ushort(x)
+ #define htobe32(x) _byteswap_ulong(x)
+ #ifdef IS_64BIT_PROCESSOR
+ #define htobe64(x) _byteswap_uint64(x)
+ #endif
+ #endif
+ #endif
+#endif
+
+// leftrotate function definition
+#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (sizeof(x)*8 - (c))))
+
+#endif
diff --git a/file.c b/file.c
index 0d07623..35ff956 100644
--- a/file.c
+++ b/file.c
@@ -1,6 +1,6 @@
/* file.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
- * Copyright (c) 2022-2023 Fumiama Minamoto.
+ * Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -23,6 +23,7 @@
#include
#include
#include
+#include
#ifdef _WIN32
#include
#include
@@ -33,6 +34,7 @@
#endif
#endif
#include "base16384.h"
+#include "binary.h"
#ifdef __cosmopolitan
#define get_file_size(filepath) ((off_t)GetFileSize(filepath))
@@ -45,11 +47,77 @@ static inline off_t get_file_size(const char* filepath) {
#define is_standard_io(filename) (*(uint16_t*)(filename) == *(uint16_t*)"-")
-base16384_err_t base16384_encode_file(const char* input, const char* output, char* encbuf, char* decbuf) {
+static inline uint32_t calc_sum(uint32_t sum, size_t cnt, char* encbuf) {
+ uint32_t i;
+ #ifdef DEBUG
+ fprintf(stderr, "cnt: %zu, roundin: %08x, ", cnt, sum);
+ #endif
+ for(i = 0; i < cnt/sizeof(sum); i++) {
+ #ifdef DEBUG
+ if (!i) {
+ fprintf(stderr, "firstval: %08x, ", htobe32(((uint32_t*)encbuf)[i]));
+ }
+ #endif
+ sum += LEFTROTATE(htobe32(((uint32_t*)encbuf)[i]), encbuf[i*sizeof(sum)]%(8*sizeof(sum)));
+ }
+ #ifdef DEBUG
+ fprintf(stderr, "roundmid: %08x", sum);
+ #endif
+ size_t rem = cnt % sizeof(sum);
+ if(rem) {
+ uint32_t x = htobe32(((uint32_t*)encbuf)[i]) & (0xffffffff << (8*(sizeof(sum)-rem)));
+ sum += LEFTROTATE(x, encbuf[i*sizeof(sum)]%(8*sizeof(sum)));
+ #ifdef DEBUG
+ fprintf(stderr, ", roundrem:%08x\n", sum);
+ #endif
+ }
+ #ifdef DEBUG
+ else fprintf(stderr, "\n");
+ #endif
+ return sum;
+}
+
+static inline uint32_t calc_and_embed_sum(uint32_t sum, size_t cnt, char* encbuf) {
+ sum = calc_sum(sum, cnt, encbuf);
+ if(cnt%7) { // last encode
+ *(uint32_t*)(&encbuf[cnt]) = htobe32(sum);
+ }
+ return sum;
+}
+
+static inline int calc_and_check_sum(uint32_t* s, size_t cnt, char* encbuf) {
+ uint32_t sum = calc_sum(*s, cnt, encbuf);
+ if(cnt%7) { // is last decode block
+ int shift = (int[]){0, 26, 20, 28, 22, 30, 24}[cnt%7];
+ uint32_t sum_read = be32toh((*(uint32_t*)(&encbuf[cnt]))) >> shift;
+ sum >>= shift;
+ #ifdef DEBUG
+ fprintf(stderr, "cntrm: %lu, mysum: %08x, sumrd: %08x\n", cnt%7, sum, sum_read);
+ #endif
+ return sum != sum_read;
+ }
+ *s = sum;
+ return 0;
+}
+
+#define goto_base16384_file_detailed_cleanup(method, reason, dobeforereturn) { \
+ errnobak = errno; \
+ retval = reason; \
+ dobeforereturn; \
+ goto base16384_##method##_file_detailed_cleanup; \
+}
+
+base16384_err_t base16384_encode_file_detailed(const char* input, const char* output, char* encbuf, char* decbuf, int flag) {
off_t inputsize;
FILE* fp = NULL;
FILE* fpo;
- if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) return base16384_err_invalid_file_name;
+ uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
+ int errnobak = 0;
+ base16384_err_t retval = base16384_err_ok;
+ if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) {
+ errno = EINVAL;
+ return base16384_err_invalid_file_name;
+ }
if(is_standard_io(input)) { // read from stdin
inputsize = 0;
fp = stdin;
@@ -61,64 +129,81 @@ base16384_err_t base16384_encode_file(const char* input, const char* output, cha
if(!fpo) {
return base16384_err_fopen_output_file;
}
- if(!inputsize || inputsize > BASE16384_ENCBUFSZ) { // stdin or big file, use encbuf & fread
- inputsize = BASE16384_ENCBUFSZ-7;
+ if(!inputsize || inputsize > _BASE16384_ENCBUFSZ) { // stdin or big file, use encbuf & fread
+ inputsize = _BASE16384_ENCBUFSZ;
#if defined _WIN32 || defined __cosmopolitan
}
#endif
if(!fp) fp = fopen(input, "rb");
if(!fp) {
- return base16384_err_fopen_input_file;
+ goto_base16384_file_detailed_cleanup(encode, base16384_err_fopen_input_file, {});
}
- size_t cnt = 0;
- fputc(0xFE, fpo);
- fputc(0xFF, fpo);
+ if(!(flag&BASE16384_FLAG_NOHEADER)) {
+ fputc(0xFE, fpo);
+ fputc(0xFF, fpo);
+ }
+ #ifdef DEBUG
+ inputsize = 917504;
+ fprintf(stderr, "inputsize: %lld\n", inputsize);
+ #endif
+ size_t cnt;
while((cnt = fread(encbuf, sizeof(char), inputsize, fp)) > 0) {
- int n = base16384_encode(encbuf, cnt, decbuf);
+ if(flag&BASE16384_FLAG_SUM_CHECK_ON_REMAIN) sum = calc_and_embed_sum(sum, cnt, encbuf);
+ int n = base16384_encode_unsafe(encbuf, cnt, decbuf);
if(fwrite(decbuf, n, 1, fpo) <= 0) {
- return base16384_err_write_file;
+ goto_base16384_file_detailed_cleanup(encode, base16384_err_write_file, {});
}
}
- if(!is_standard_io(output)) fclose(fpo);
- if(!is_standard_io(input)) fclose(fp);
#if !defined _WIN32 && !defined __cosmopolitan
} else { // small file, use mmap & fwrite
int fd = open(input, O_RDONLY);
if(fd < 0) {
- return base16384_err_open_input_file;
+ goto_base16384_file_detailed_cleanup(encode, base16384_err_open_input_file, {});
}
char *input_file = mmap(NULL, (size_t)inputsize+16, PROT_READ, MAP_PRIVATE, fd, 0);
if(input_file == MAP_FAILED) {
- return base16384_err_map_input_file;
+ goto_base16384_file_detailed_cleanup(encode, base16384_err_map_input_file, close(fd));
+ }
+ if(!(flag&BASE16384_FLAG_NOHEADER)) {
+ fputc(0xFE, fpo);
+ fputc(0xFF, fpo);
}
- fputc(0xFE, fpo);
- fputc(0xFF, fpo);
int n = base16384_encode(input_file, (int)inputsize, decbuf);
if(fwrite(decbuf, n, 1, fpo) <= 0) {
- return base16384_err_write_file;
+ goto_base16384_file_detailed_cleanup(encode, base16384_err_write_file, {
+ munmap(input_file, (size_t)inputsize);
+ close(fd);
+ });
}
munmap(input_file, (size_t)inputsize);
- if(!is_standard_io(output)) fclose(fpo);
close(fd);
}
#endif
- return base16384_err_ok;
+base16384_encode_file_detailed_cleanup:
+ if(fpo && !is_standard_io(output)) fclose(fpo);
+ if(fp && !is_standard_io(input)) fclose(fp);
+ if(errnobak) errno = errnobak;
+ return retval;
}
-base16384_err_t base16384_encode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf) {
+base16384_err_t base16384_encode_fp_detailed(FILE* input, FILE* output, char* encbuf, char* decbuf, int flag) {
if(!input) {
return base16384_err_fopen_input_file;
}
if(!output) {
return base16384_err_fopen_output_file;
}
- off_t inputsize = BASE16384_ENCBUFSZ-7;
+ off_t inputsize = _BASE16384_ENCBUFSZ;
size_t cnt = 0;
- fputc(0xFE, output);
- fputc(0xFF, output);
+ uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
+ if(!(flag&BASE16384_FLAG_NOHEADER)) {
+ fputc(0xFE, output);
+ fputc(0xFF, output);
+ }
while((cnt = fread(encbuf, sizeof(char), inputsize, input)) > 0) {
- int n = base16384_encode(encbuf, cnt, decbuf);
+ if(flag&BASE16384_FLAG_SUM_CHECK_ON_REMAIN) sum = calc_and_embed_sum(sum, cnt, encbuf);
+ int n = base16384_encode_unsafe(encbuf, cnt, decbuf);
if(fwrite(decbuf, n, 1, output) <= 0) {
return base16384_err_write_file;
}
@@ -126,18 +211,20 @@ base16384_err_t base16384_encode_fp(FILE* input, FILE* output, char* encbuf, cha
return base16384_err_ok;
}
-base16384_err_t base16384_encode_fd(int input, int output, char* encbuf, char* decbuf) {
+base16384_err_t base16384_encode_fd_detailed(int input, int output, char* encbuf, char* decbuf, int flag) {
if(input < 0) {
return base16384_err_fopen_input_file;
}
if(output < 0) {
return base16384_err_fopen_output_file;
}
- off_t inputsize = BASE16384_ENCBUFSZ-7;
+ off_t inputsize = _BASE16384_ENCBUFSZ;
size_t cnt = 0;
- write(output, "\xfe\xff", 2);
+ uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
+ if(!(flag&BASE16384_FLAG_NOHEADER)) write(output, "\xfe\xff", 2);
while((cnt = read(input, encbuf, inputsize)) > 0) {
- int n = base16384_encode(encbuf, cnt, decbuf);
+ if(flag&BASE16384_FLAG_SUM_CHECK_ON_REMAIN) sum = calc_and_embed_sum(sum, cnt, encbuf);
+ int n = base16384_encode_unsafe(encbuf, cnt, decbuf);
if(write(output, decbuf, n) < n) {
return base16384_err_write_file;
}
@@ -161,11 +248,17 @@ static inline int is_next_end(FILE* fp) {
return 0;
}
-base16384_err_t base16384_decode_file(const char* input, const char* output, char* encbuf, char* decbuf) {
+base16384_err_t base16384_decode_file_detailed(const char* input, const char* output, char* encbuf, char* decbuf, int flag) {
off_t inputsize;
FILE* fp = NULL;
FILE* fpo;
- if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) return base16384_err_invalid_file_name;
+ uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
+ base16384_err_t retval = base16384_err_ok;
+ int errnobak = 0;
+ if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) {
+ errno = EINVAL;
+ return base16384_err_invalid_file_name;
+ }
if(is_standard_io(input)) { // read from stdin
inputsize = 0;
fp = stdin;
@@ -177,93 +270,130 @@ base16384_err_t base16384_decode_file(const char* input, const char* output, cha
if(!fpo) {
return base16384_err_fopen_output_file;
}
- if(!inputsize || inputsize > BASE16384_DECBUFSZ) { // stdin or big file, use decbuf & fread
- inputsize = BASE16384_DECBUFSZ/8*8;
+ if(!inputsize || inputsize > _BASE16384_DECBUFSZ) { // stdin or big file, use decbuf & fread
+ inputsize = _BASE16384_DECBUFSZ;
#if defined _WIN32 || defined __cosmopolitan
}
#endif
if(!fp) fp = fopen(input, "rb");
if(!fp) {
- return base16384_err_fopen_input_file;
+ goto_base16384_file_detailed_cleanup(decode, base16384_err_fopen_input_file, {});
}
int cnt = 0;
int end = 0;
rm_head(fp);
+ if(errno) {
+ goto_base16384_file_detailed_cleanup(decode, base16384_err_read_file, {});
+ }
+ #ifdef DEBUG
+ fprintf(stderr, "inputsize: %lld\n", inputsize);
+ #endif
while((cnt = fread(decbuf, sizeof(char), inputsize, fp)) > 0) {
if((end = is_next_end(fp))) {
decbuf[cnt++] = '=';
decbuf[cnt++] = end;
}
- if(fwrite(encbuf, base16384_decode(decbuf, cnt, encbuf), 1, fpo) <= 0) {
- return base16384_err_write_file;
+ if(errno) goto_base16384_file_detailed_cleanup(decode, base16384_err_read_file, {});
+ cnt = base16384_decode_unsafe(decbuf, cnt, encbuf);
+ if(fwrite(encbuf, cnt, 1, fpo) <= 0) {
+ goto_base16384_file_detailed_cleanup(decode, base16384_err_write_file, {});
+ }
+ if(flag&BASE16384_FLAG_SUM_CHECK_ON_REMAIN) {
+ if(calc_and_check_sum(&sum, cnt, encbuf)) {
+ errno = EINVAL;
+ goto_base16384_file_detailed_cleanup(decode, base16384_err_invalid_decoding_checksum, {});
+ }
}
}
- if(!is_standard_io(output)) fclose(fpo);
- if(!is_standard_io(input)) fclose(fp);
#if !defined _WIN32 && !defined __cosmopolitan
} else { // small file, use mmap & fwrite
int fd = open(input, O_RDONLY);
if(fd < 0) {
- return base16384_err_open_input_file;
+ goto_base16384_file_detailed_cleanup(decode, base16384_err_open_input_file, {});
}
char *input_file = mmap(NULL, (size_t)inputsize+16, PROT_READ, MAP_PRIVATE, fd, 0);
if(input_file == MAP_FAILED) {
- return base16384_err_map_input_file;
+ goto_base16384_file_detailed_cleanup(decode, base16384_err_map_input_file, close(fd));
}
int off = skip_offset(input_file);
if(fwrite(encbuf, base16384_decode(input_file+off, inputsize-off, encbuf), 1, fpo) <= 0) {
- return base16384_err_write_file;
+ goto_base16384_file_detailed_cleanup(decode, base16384_err_write_file, {
+ munmap(input_file, (size_t)inputsize);
+ close(fd);
+ });
}
munmap(input_file, (size_t)inputsize);
- if(!is_standard_io(output)) fclose(fpo);
close(fd);
}
#endif
- return base16384_err_ok;
+base16384_decode_file_detailed_cleanup:
+ if(fpo && !is_standard_io(output)) fclose(fpo);
+ if(fp && !is_standard_io(input)) fclose(fp);
+ if(errnobak) errno = errnobak;
+ return retval;
}
-base16384_err_t base16384_decode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf) {
+base16384_err_t base16384_decode_fp_detailed(FILE* input, FILE* output, char* encbuf, char* decbuf, int flag) {
if(!input) {
+ errno = EINVAL;
return base16384_err_fopen_input_file;
}
if(!output) {
+ errno = EINVAL;
return base16384_err_fopen_output_file;
}
- off_t inputsize = BASE16384_DECBUFSZ/8*8;
+ off_t inputsize = _BASE16384_DECBUFSZ;
int cnt = 0;
int end = 0;
+ uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
rm_head(input);
+ if(errno) {
+ return base16384_err_read_file;
+ }
while((cnt = fread(decbuf, sizeof(char), inputsize, input)) > 0) {
if((end = is_next_end(input))) {
decbuf[cnt++] = '=';
decbuf[cnt++] = end;
}
- if(fwrite(encbuf, base16384_decode(decbuf, cnt, encbuf), 1, output) <= 0) {
+ cnt = base16384_decode_unsafe(decbuf, cnt, encbuf);
+ if(fwrite(encbuf, cnt, 1, output) <= 0) {
return base16384_err_write_file;
}
+ if(flag&BASE16384_FLAG_SUM_CHECK_ON_REMAIN) {
+ if (calc_and_check_sum(&sum, cnt, encbuf)) {
+ errno = EINVAL;
+ return base16384_err_invalid_decoding_checksum;
+ }
+ }
}
return base16384_err_ok;
}
-static inline int is_next_end_fd(int fd) {
- char ch = 0;
+static inline uint16_t is_next_end_fd(int fd) {
+ uint8_t ch = 0;
read(fd, &ch, 1);
+ uint16_t ret = (uint16_t)ch & 0x00ff;
if(ch == '=') {
read(fd, &ch, 1);
+ ret <<= 8;
+ ret |= (uint16_t)ch & 0x00ff;
}
- return (int)ch;
+ return ret;
}
-base16384_err_t base16384_decode_fd(int input, int output, char* encbuf, char* decbuf) {
+base16384_err_t base16384_decode_fd_detailed(int input, int output, char* encbuf, char* decbuf, int flag) {
if(input < 0) {
+ errno = EINVAL;
return base16384_err_fopen_input_file;
}
if(output < 0) {
+ errno = EINVAL;
return base16384_err_fopen_output_file;
}
- off_t inputsize = BASE16384_DECBUFSZ/8*8;
+ off_t inputsize = _BASE16384_DECBUFSZ;
int cnt = 0;
int end = 0;
+ uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
decbuf[0] = 0;
if(read(input, decbuf, 2) < 2) {
return base16384_err_read_file;
@@ -272,17 +402,26 @@ base16384_err_t base16384_decode_fd(int input, int output, char* encbuf, char* d
while((end = read(input, decbuf+cnt, inputsize-cnt)) > 0 || cnt > 0) {
if(end > 0) {
cnt += end;
- if((end = is_next_end_fd(input))) {
+ uint16_t next = is_next_end_fd(input);
+ if(errno) {
+ return base16384_err_read_file;
+ }
+ if(next&0xff00) {
decbuf[cnt++] = '=';
- decbuf[cnt++] = end;
- end = 0;
- } else end = 1;
- } else end = 0;
- cnt = base16384_decode(decbuf, cnt, encbuf);
- if(write(output, encbuf, cnt) < cnt) {
+ }
+ decbuf[cnt++] = (char)(next&0x00ff);
+ }
+ end = base16384_decode_unsafe(decbuf, cnt, encbuf);
+ if(write(output, encbuf, end) < end) {
return base16384_err_write_file;
}
- cnt = end;
+ if(flag&BASE16384_FLAG_SUM_CHECK_ON_REMAIN) {
+ if (calc_and_check_sum(&sum, cnt, encbuf)) {
+ errno = EINVAL;
+ return base16384_err_invalid_decoding_checksum;
+ }
+ }
+ cnt = 0;
}
return base16384_err_ok;
}