1
0
mirror of https://github.com/fumiama/base16384.git synced 2026-06-20 05:40:24 +08:00

Merge branch 'master' of https://github.com/fumiama/base16384 into debian

This commit is contained in:
源文雨
2024-04-05 22:09:49 +09:00
15 changed files with 1913 additions and 351 deletions

View File

@@ -2,30 +2,49 @@ cmake_minimum_required(VERSION 2.8.12)
if (POLICY CMP0048) if (POLICY CMP0048)
cmake_policy(SET CMP0048 NEW) cmake_policy(SET CMP0048 NEW)
endif (POLICY CMP0048) endif (POLICY CMP0048)
project(base16384 VERSION 2.2.5) project(base16384 VERSION 2.3.0)
add_executable(base16384_b base16384.c) add_definitions(-DBASE16384_VERSION="${PROJECT_VERSION}")
add_definitions(-DBASE16384_VERSION_DATE="April 5th 2024")
message(STATUS "Testing endian...")
include(TestBigEndian) include(TestBigEndian)
test_big_endian(isBigEndian) test_big_endian(isBigEndian)
message(STATUS "Is big endian: ${isBigEndian}.")
if (${isBigEndian}) if (${isBigEndian})
add_definitions(-DWORDS_BIGENDIAN) add_definitions(-DWORDS_BIGENDIAN)
endif () endif ()
IF(CMAKE_SIZEOF_VOID_P EQUAL 8) if (BUILD STREQUAL "test")
add_library(base16384 SHARED file.c base1464.c) add_definitions(-DBASE16384_BUFSZ_FACTOR=1)
add_library(base16384_s STATIC file.c base1464.c) endif ()
add_executable(base16384_b base16384.c)
IF ((NOT FORCE_32BIT) AND CMAKE_SIZEOF_VOID_P EQUAL 8)
message(STATUS "Adding 64bit libraries...")
add_definitions(-DIS_64BIT_PROCESSOR)
add_library(base16384 SHARED wrap.c file.c base1464.c)
add_library(base16384_s STATIC wrap.c file.c base1464.c)
ELSE () ELSE ()
add_library(base16384 SHARED file.c base1432.c) message(STATUS "Adding 32bit libraries...")
add_library(base16384_s STATIC file.c base1432.c) add_library(base16384 SHARED wrap.c file.c base1432.c)
add_library(base16384_s STATIC wrap.c file.c base1432.c)
ENDIF () ENDIF ()
set_target_properties(base16384_b PROPERTIES OUTPUT_NAME base16384) set_target_properties(base16384_b PROPERTIES OUTPUT_NAME base16384)
set_target_properties(base16384_s PROPERTIES OUTPUT_NAME base16384) set_target_properties(base16384_s PROPERTIES OUTPUT_NAME base16384)
set_target_properties(base16384 PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) set_target_properties(base16384 PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
message(STATUS "Linking libraries...")
target_link_libraries(base16384_b base16384_s) target_link_libraries(base16384_b base16384_s)
if (BUILD STREQUAL "test")
message(STATUS "Building test...")
enable_testing()
add_subdirectory(test)
endif ()
INSTALL(TARGETS base16384_b RUNTIME DESTINATION bin) INSTALL(TARGETS base16384_b RUNTIME DESTINATION bin)
INSTALL(TARGETS base16384 LIBRARY DESTINATION lib) INSTALL(TARGETS base16384 LIBRARY DESTINATION lib)
INSTALL(TARGETS base16384_s ARCHIVE DESTINATION lib) INSTALL(TARGETS base16384_s ARCHIVE DESTINATION lib)

View File

@@ -5,7 +5,9 @@
Encode binary file to printable utf16be, and vice versa. Encode binary file to printable utf16be, and vice versa.
## Description 说明 ## Description
> 说明
Use 16384 Chinene characters (from \u4E00 to \u8DFF) as the "alphabet", just like what base64 did. Use 16384 Chinene characters (from \u4E00 to \u8DFF) as the "alphabet", just like what base64 did.
使用16384个汉字(从`\u4E00``\u8DFF`)作为字符表就像base64用64个字符作为字符表一样。 使用16384个汉字(从`\u4E00``\u8DFF`)作为字符表就像base64用64个字符作为字符表一样。
@@ -14,31 +16,38 @@ If length of the data has a remainder after moduled by 7, we will use \u3Dxx to
使用`\u3Dxx`附加在末尾以表示编码时数据不满7位的个数其范围在01~06。 使用`\u3Dxx`附加在末尾以表示编码时数据不满7位的个数其范围在01~06。
## Benefits 优点 ## Benefits
> 优点
Save more space and since the code 0x0000 is encoded to "一", finding zero space seems to be easier. Save more space and since the code 0x0000 is encoded to "一", finding zero space seems to be easier.
相较base64节省更多空间更容易发现二进制文件的规律。 相较base64节省更多空间更容易发现二进制文件的规律。
## Usage 使用说明 ## Usage
> 用法
### Install from Debian Bookworm or higher 从 Debian Bookworm 或更高版本安装 ### Install from Debian Bookworm or higher
> 从 Debian Bookworm 或更高版本安装
```bash ```bash
sudo apt install base16384 sudo apt install base16384
``` ```
### Install from Homebrew 从 Homebrew 安装 ### Install from Homebrew
> 从 Homebrew 安装
```bash ```bash
brew install base16384 brew install base16384
``` ```
### Install from my PPA in Ubuntu 乌班图下从我的 PPA 安装 ### Install from my PPA in Ubuntu
> 乌班图下从我的 PPA 安装
```bash ```bash
sudo add-apt-repository ppa:fumiama/ppa sudo add-apt-repository ppa:fumiama/ppa
sudo apt-get update sudo apt-get update
sudo apt-get install base16384 sudo apt-get install base16384
``` ```
### Build from source code 编译 ### Build from source code
> 编译
Clone this repo first. Clone this repo first.
@@ -67,30 +76,36 @@ Now you can encode/decode a file by commands below.
```kotlin ```kotlin
Usage: Usage:
base16384 [-edt] [inputfile] [outputfile] base16384 [-edtn] [inputfile] [outputfile]
-e encode -e encode (default)
-d decode -d decode
-t show spend time -t show spend time
-n don't write utf16be file header (0xFEFF)
-c embed or validate checksum in remainder
inputfile pass - to read from stdin inputfile pass - to read from stdin
outputfile pass - to write to stdout outputfile pass - to write to stdout
``` ```
## Examples 用例 ## Examples
1. Encode simple text 简单文本编码 > 用例
1. Encode simple text
> 简单文本编码
```bash ```bash
echo -n "1234567" | base16384 -e - - | iconv -f utf-16be -t utf-8 echo -n "1234567" | base16384 -e - - | iconv -f utf-16be -t utf-8
婌焳廔萷 婌焳廔萷
``` ```
3. Decode simple text 简单文本解码 3. Decode simple text
> 简单文本解码
```bash ```bash
echo -n "婌焳廔萷" | iconv -f utf-8 -t utf-16be | base16384 -d - - echo -n "婌焳廔萷" | iconv -f utf-8 -t utf-16be | base16384 -d - -
1234567 1234567
``` ```
3. Encode file 编码文件 3. Encode file
> 编码文件
The text below is the encoding of the base16384 itself on MacOS 12.6 arm64. It is clear to see the strucutre of the binary file. The text below is the encoding of the base16384 itself on MacOS 12.6 arm64. It is clear to see the strucutre of the binary file.

View File

@@ -1,6 +1,6 @@
/* base1432.c /* base1432.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384). * This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2023 Fumiama Minamoto. * Copyright (c) 2022-2024 Fumiama Minamoto.
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -16,51 +16,113 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifdef __cosmopolitan // always le #ifndef __cosmopolitan
# define be16toh(x) bswap_16(x) #include <string.h>
# define be32toh(x) bswap_32(x)
# define htobe16(x) bswap_16(x)
# define htobe32(x) bswap_32(x)
#else
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef __linux__
# include <endian.h>
#endif
#ifdef __FreeBSD__
# include <sys/endian.h>
#endif
#ifdef __NetBSD__
# include <sys/endian.h>
#endif
#ifdef __OpenBSD__
# include <sys/types.h>
# define be16toh(x) betoh16(x)
# define be32toh(x) betoh32(x)
#endif
#ifdef __APPLE__
# define be16toh(x) ntohs(x)
# define be32toh(x) ntohl(x)
# define htobe16(x) ntohs(x)
# define htobe32(x) htonl(x)
#endif
#ifdef _WIN32
#ifdef WORDS_BIGENDIAN
# define be16toh(x) (x)
# define be32toh(x) (x)
# define htobe16(x) (x)
# define htobe32(x) (x)
#else
# define be16toh(x) _byteswap_ushort(x)
# define be32toh(x) _byteswap_ulong(x)
# define htobe16(x) _byteswap_ushort(x)
# define htobe32(x) _byteswap_ulong(x)
#endif
#endif
#endif #endif
// #define DEBUG #include "binary.h"
typedef union {
uint8_t buf[4];
uint32_t val;
} base16384_union_remainder;
int base16384_encode_safe(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen += 4; break;
case 2:
case 3: outlen += 6; break;
case 4:
case 5: outlen += 8; break;
case 6: outlen += 10; break;
default: break;
}
uint32_t* vals = (uint32_t*)buf;
uint32_t n = 0;
int32_t i = 0;
for(; i < dlen - 7; i += 7) {
register uint32_t sum = 0;
register uint32_t shift = htobe32(*(uint32_t*)(data+i));
sum |= (shift>>2) & 0x3fff0000;
sum |= (shift>>4) & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
shift <<= 26;
shift &= 0x3c000000;
sum = 0;
shift |= (htobe32(*(uint32_t*)(data+i+4))>>6)&0x03fffffc;
sum |= shift & 0x3fff0000;
shift >>= 2;
sum |= shift & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
}
base16384_union_remainder valbuf;
if(dlen - i == 7) {
register uint32_t sum = 0;
register uint32_t shift = htobe32(*(uint32_t*)(data+i));
sum |= (shift>>2) & 0x3fff0000;
sum |= (shift>>4) & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
shift <<= 26;
shift &= 0x3c000000;
sum = 0;
memcpy(valbuf.buf, data+i+4, 3);
shift |= (htobe32(valbuf.val)>>6)&0x03fffffc;
sum |= shift & 0x3fff0000;
shift >>= 2;
sum |= shift & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
return outlen;
}
uint8_t o = offset;
if(o--) {
register uint32_t sum = 0x0000003f & (data[i] >> 2);
sum |= ((uint32_t)data[i] << 14) & 0x0000c000;
if(o--) {
sum |= ((uint32_t)data[i + 1] << 6) & 0x00003f00;
sum |= ((uint32_t)data[i + 1] << 20) & 0x00300000;
if(o--) {
sum |= ((uint32_t)data[i + 2] << 12) & 0x000f0000;
sum |= ((uint32_t)data[i + 2] << 28) & 0xf0000000;
if(o--) {
sum |= ((uint32_t)data[i + 3] << 20) & 0x0f000000;
sum += 0x004e004e;
// safe, because it will never go over 0x3dxx
#ifdef WORDS_BIGENDIAN
vals[n++] = __builtin_bswap32(sum);
#else
vals[n++] = sum;
#endif
sum = (((uint32_t)data[i + 3] << 2)) & 0x0000003c;
if(o--) {
sum |= (((uint32_t)data[i + 4] >> 6)) & 0x00000003;
sum |= ((uint32_t)data[i + 4] << 10) & 0x0000fc00;
if(o--) {
sum |= ((uint32_t)data[i + 5] << 2) & 0x00000300;
sum |= ((uint32_t)data[i + 5] << 16) & 0x003f0000;
}
}
}
}
}
sum += 0x004e004e;
// safe, because it will never go over 0x3dxx
#ifdef WORDS_BIGENDIAN
vals[n] = __builtin_bswap32(sum);
#else
vals[n] = sum;
#endif
buf[outlen - 2] = '=';
buf[outlen - 1] = offset;
}
return outlen;
}
int base16384_encode(const char* data, int dlen, char* buf) { int base16384_encode(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8; int outlen = dlen / 7 * 8;
@@ -75,9 +137,6 @@ int base16384_encode(const char* data, int dlen, char* buf) {
case 6: outlen += 10; break; case 6: outlen += 10; break;
default: break; default: break;
} }
#ifdef DEBUG
printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8);
#endif
uint32_t* vals = (uint32_t*)buf; uint32_t* vals = (uint32_t*)buf;
uint32_t n = 0; uint32_t n = 0;
int32_t i = 0; int32_t i = 0;
@@ -140,6 +199,143 @@ int base16384_encode(const char* data, int dlen, char* buf) {
return outlen; return outlen;
} }
int base16384_encode_unsafe(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen += 4; break;
case 2:
case 3: outlen += 6; break;
case 4:
case 5: outlen += 8; break;
case 6: outlen += 10; break;
default: break;
}
uint32_t* vals = (uint32_t*)buf;
uint32_t n = 0;
int32_t i = 0;
for(; i < dlen; i += 7) {
register uint32_t sum = 0;
register uint32_t shift = htobe32(*(uint32_t*)(data+i));
sum |= (shift>>2) & 0x3fff0000;
sum |= (shift>>4) & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
shift <<= 26;
shift &= 0x3c000000;
sum = 0;
shift |= (htobe32(*(uint32_t*)(data+i+4))>>6)&0x03fffffc;
sum |= shift & 0x3fff0000;
shift >>= 2;
sum |= shift & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
}
if(offset) {
buf[outlen - 2] = '=';
buf[outlen - 1] = offset;
}
return outlen;
}
int base16384_decode_safe(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
if(data[dlen-2] == '=') {
offset = data[dlen-1];
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen -= 4; break;
case 2:
case 3: outlen -= 6; break;
case 4:
case 5: outlen -= 8; break;
case 6: outlen -= 10; break;
default: break;
}
}
outlen = outlen / 8 * 7 + offset;
const uint32_t* vals = (const uint32_t*)data;
uint32_t n = 0;
int32_t i = 0;
for(; i < outlen - 7; i+=7) { // n实际每次自增2
register uint32_t sum = 0;
register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
shift <<= 2;
sum |= shift & 0xfffc0000;
shift <<= 2;
sum |= shift & 0x0003fff0;
shift = htobe32(vals[n++]) - 0x4e004e00;
sum |= shift >> 26;
*(uint32_t*)(buf+i) = be32toh(sum);
sum = 0;
shift <<= 6;
sum |= shift & 0xffc00000;
shift <<= 2;
sum |= shift & 0x003fff00;
*(uint32_t*)(buf+i+4) = be32toh(sum);
}
base16384_union_remainder valbuf;
if(outlen - i == 7) {
register uint32_t sum = 0;
register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
shift <<= 2;
sum |= shift & 0xfffc0000;
shift <<= 2;
sum |= shift & 0x0003fff0;
shift = htobe32(vals[n]) - 0x4e004e00;
sum |= shift >> 26;
*(uint32_t*)(buf+i) = be32toh(sum);
sum = 0;
shift <<= 6;
sum |= shift & 0xffc00000;
shift <<= 2;
sum |= shift & 0x003fff00;
valbuf.val = be32toh(sum);
memcpy(buf+i+4, valbuf.buf, 3);
} else if((*(uint8_t*)(&vals[n]) != '=') && offset--) {
int cnt = dlen-2-(int)n*(int)sizeof(uint32_t);
if (cnt > 4) cnt = 4;
memcpy(valbuf.buf, &vals[n], cnt);
n++;
#ifdef WORDS_BIGENDIAN
register uint32_t sum = __builtin_bswap32(valbuf.val);
#else
register uint32_t sum = valbuf.val;
#endif
sum -= 0x0000004e;
buf[i++] = ((sum & 0x0000003f) << 2) | ((sum & 0x0000c000) >> 14);
if(offset--) {
sum -= 0x004e0000;
buf[i++] = ((sum & 0x00003f00) >> 6) | ((sum & 0x00300000) >> 20);
if(offset--) {
buf[i++] = ((sum & 0x000f0000) >> 12) | ((sum & 0xf0000000) >> 28);
if(offset--) {
buf[i] = (sum & 0x0f000000) >> 20;
if(*(uint8_t*)(&vals[n]) == '=') return outlen;
memcpy(valbuf.buf, &vals[n], dlen-2-(int)n*(int)sizeof(uint32_t));
#ifdef WORDS_BIGENDIAN
sum = __builtin_bswap32(valbuf.val);
#else
sum = valbuf.val;
#endif
sum -= 0x0000004e;
buf[i++] |= (sum & 0x0000003c) >> 2;
if(offset--) {
buf[i++] = ((sum & 0x00000003) << 6) | ((sum & 0x0000fc00) >> 10);
if(offset--) {
sum -= 0x004e0000;
buf[i] = ((sum & 0x00000300) >> 2) | ((sum & 0x003f0000) >> 16);
}
}
}
}
}
}
return outlen;
}
int base16384_decode(const char* data, int dlen, char* buf) { int base16384_decode(const char* data, int dlen, char* buf) {
int outlen = dlen; int outlen = dlen;
int offset = 0; int offset = 0;
@@ -157,7 +353,7 @@ int base16384_decode(const char* data, int dlen, char* buf) {
} }
} }
outlen = outlen / 8 * 7 + offset; outlen = outlen / 8 * 7 + offset;
uint32_t* vals = (uint32_t*)data; const uint32_t* vals = (const uint32_t*)data;
uint32_t n = 0; uint32_t n = 0;
int32_t i = 0; int32_t i = 0;
for(; i <= outlen - 7; i+=7) { // n实际每次自增2 for(; i <= outlen - 7; i+=7) { // n实际每次自增2
@@ -177,6 +373,7 @@ int base16384_decode(const char* data, int dlen, char* buf) {
sum |= shift & 0x003fff00; sum |= shift & 0x003fff00;
*(uint32_t*)(buf+i+4) = be32toh(sum); *(uint32_t*)(buf+i+4) = be32toh(sum);
} }
if(*(uint8_t*)(&vals[n]) == '=') return outlen;
if(offset--) { if(offset--) {
// 这里有读取越界 // 这里有读取越界
#ifdef WORDS_BIGENDIAN #ifdef WORDS_BIGENDIAN
@@ -193,6 +390,7 @@ int base16384_decode(const char* data, int dlen, char* buf) {
buf[i++] = ((sum & 0x000f0000) >> 12) | ((sum & 0xf0000000) >> 28); buf[i++] = ((sum & 0x000f0000) >> 12) | ((sum & 0xf0000000) >> 28);
if(offset--) { if(offset--) {
buf[i] = (sum & 0x0f000000) >> 20; buf[i] = (sum & 0x0f000000) >> 20;
if(*(uint8_t*)(&vals[n]) == '=') return outlen;
// 这里有读取越界 // 这里有读取越界
#ifdef WORDS_BIGENDIAN #ifdef WORDS_BIGENDIAN
sum = __builtin_bswap32(vals[n]); sum = __builtin_bswap32(vals[n]);
@@ -214,3 +412,69 @@ int base16384_decode(const char* data, int dlen, char* buf) {
} }
return outlen; return outlen;
} }
int base16384_decode_unsafe(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
if(data[dlen-2] == '=') {
offset = data[dlen-1];
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen -= 4; break;
case 2:
case 3: outlen -= 6; break;
case 4:
case 5: outlen -= 8; break;
case 6: outlen -= 10; break;
default: break;
}
}
outlen = outlen / 8 * 7 + offset;
const uint32_t* vals = (const uint32_t*)data;
uint32_t n = 0;
int32_t i = 0;
for(; i < outlen-7; i+=7) { // n实际每次自增2
register uint32_t sum = 0;
register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
shift <<= 2;
sum |= shift & 0xfffc0000;
shift <<= 2;
sum |= shift & 0x0003fff0;
shift = htobe32(vals[n++]) - 0x4e004e00;
sum |= shift >> 26;
*(uint32_t*)(buf+i) = be32toh(sum);
sum = 0;
shift <<= 6;
sum |= shift & 0xffc00000;
shift <<= 2;
sum |= shift & 0x003fff00;
*(uint32_t*)(buf+i+4) = be32toh(sum);
}
register uint32_t sum = 0;
register uint32_t shift = htobe32(vals[n++]);
if(((shift>>24)&0xff) == 0x3d) return outlen;
if(((shift>>24)&0xff) < 0x4e) shift |= 0xff000000;
if(((shift>> 8)&0xff) < 0x4e) shift |= 0x0000ff00;
shift -= 0x4e004e00;
shift <<= 2;
sum |= shift & 0xfffc0000;
shift <<= 2;
sum |= shift & 0x0003fff0;
shift = htobe32(vals[n]);
if(((shift>>24)&0xff) == 0x3d) {
*(uint32_t*)(buf+i) = be32toh(sum);
return outlen;
}
if(((shift>>24)&0xff) < 0x4e) shift |= 0xff000000;
if(((shift>> 8)&0xff) < 0x4e) shift |= 0x0000ff00;
shift -= 0x4e004e00;
sum |= shift >> 26;
*(uint32_t*)(buf+i) = be32toh(sum);
sum = 0;
shift <<= 6;
sum |= shift & 0xffc00000;
shift <<= 2;
sum |= shift & 0x003fff00;
*(uint32_t*)(buf+i+4) = be32toh(sum);
return outlen;
}

View File

@@ -1,6 +1,6 @@
/* base1464.c /* base1464.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384). * This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2023 Fumiama Minamoto. * Copyright (c) 2022-2024 Fumiama Minamoto.
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -16,51 +16,98 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <stdio.h> #ifndef __cosmopolitan
#include <stdint.h> #include <string.h>
#include <stdlib.h>
#ifdef __linux__
# include <endian.h>
#endif
#ifdef __FreeBSD__
# include <sys/endian.h>
#endif
#ifdef __NetBSD__
# include <sys/endian.h>
#endif
#ifdef __OpenBSD__
# include <sys/types.h>
# define be16toh(x) betoh16(x)
# define be32toh(x) betoh32(x)
# define be64toh(x) betoh64(x)
#endif
#ifdef __APPLE__
# define be16toh(x) ntohs(x)
# define be32toh(x) ntohl(x)
# define be64toh(x) ntohll(x)
# define htobe16(x) ntohs(x)
# define htobe32(x) htonl(x)
# define htobe64(x) htonll(x)
#endif
#ifdef _WIN64
#ifdef WORDS_BIGENDIAN
# define be16toh(x) (x)
# define be32toh(x) (x)
# define be64toh(x) (x)
# define htobe16(x) (x)
# define htobe32(x) (x)
# define htobe64(x) (x)
#else
# define be16toh(x) _byteswap_ushort(x)
# define be32toh(x) _byteswap_ulong(x)
# define be64toh(x) _byteswap_uint64(x)
# define htobe16(x) _byteswap_ushort(x)
# define htobe32(x) _byteswap_ulong(x)
# define htobe64(x) _byteswap_uint64(x)
#endif
#endif #endif
// #define DEBUG #include "binary.h"
typedef union {
uint8_t buf[8];
uint64_t val;
} base16384_union_remainder;
int base16384_encode_safe(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen += 4; break;
case 2:
case 3: outlen += 6; break;
case 4:
case 5: outlen += 8; break;
case 6: outlen += 10; break;
default: break;
}
uint64_t* vals = (uint64_t*)buf;
uint64_t n = 0;
int64_t i = 0;
for(; i < dlen - 7; i += 7) {
register uint64_t sum = 0;
register uint64_t shift = htobe64(*(uint64_t*)(data+i))>>2;
sum |= shift & 0x3fff000000000000;
shift >>= 2;
sum |= shift & 0x00003fff00000000;
shift >>= 2;
sum |= shift & 0x000000003fff0000;
shift >>= 2;
sum |= shift & 0x0000000000003fff;
sum += 0x4e004e004e004e00;
vals[n++] = be64toh(sum);
}
base16384_union_remainder valbuf;
if(dlen - i == 7) {
memcpy(valbuf.buf, data+i, 7);
register uint64_t sum = 0;
register uint64_t shift = htobe64(valbuf.val)>>2;
sum |= shift & 0x3fff000000000000;
shift >>= 2;
sum |= shift & 0x00003fff00000000;
shift >>= 2;
sum |= shift & 0x000000003fff0000;
shift >>= 2;
sum |= shift & 0x0000000000003fff;
sum += 0x4e004e004e004e00;
vals[n++] = be64toh(sum);
return outlen;
}
int o = offset;
if(o--) {
register uint64_t sum = 0x000000000000003f & (data[i] >> 2);
sum |= ((uint64_t)data[i] << 14) & 0x000000000000c000;
if(o--) {
sum |= ((uint64_t)data[i + 1] << 6) & 0x0000000000003f00;
sum |= ((uint64_t)data[i + 1] << 20) & 0x0000000000300000;
if(o--) {
sum |= ((uint64_t)data[i + 2] << 12) & 0x00000000000f0000;
sum |= ((uint64_t)data[i + 2] << 28) & 0x00000000f0000000;
if(o--) {
sum |= ((uint64_t)data[i + 3] << 20) & 0x000000000f000000;
sum |= ((uint64_t)data[i + 3] << 34) & 0x0000003c00000000;
if(o--) {
sum |= ((uint64_t)data[i + 4] << 26) & 0x0000000300000000;
sum |= ((uint64_t)data[i + 4] << 42) & 0x0000fc0000000000;
if(o--) {
sum |= ((uint64_t)data[i + 5] << 34) & 0x0000030000000000;
sum |= ((uint64_t)data[i + 5] << 48) & 0x003f000000000000;
}
}
}
}
}
sum += 0x004e004e004e004e;
#ifdef WORDS_BIGENDIAN
valbuf.val = __builtin_bswap64(sum);
#else
valbuf.val = sum;
#endif
memcpy(&vals[n], valbuf.buf, outlen-2-(int)n*(int)sizeof(uint64_t));
buf[outlen - 2] = '=';
buf[outlen - 1] = offset;
}
return outlen;
}
int base16384_encode(const char* data, int dlen, char* buf) { int base16384_encode(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8; int outlen = dlen / 7 * 8;
@@ -75,9 +122,6 @@ int base16384_encode(const char* data, int dlen, char* buf) {
case 6: outlen += 10; break; case 6: outlen += 10; break;
default: break; default: break;
} }
#ifdef DEBUG
printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8);
#endif
uint64_t* vals = (uint64_t*)buf; uint64_t* vals = (uint64_t*)buf;
uint64_t n = 0; uint64_t n = 0;
int64_t i = 0; int64_t i = 0;
@@ -93,9 +137,6 @@ int base16384_encode(const char* data, int dlen, char* buf) {
sum |= shift & 0x0000000000003fff; sum |= shift & 0x0000000000003fff;
sum += 0x4e004e004e004e00; sum += 0x4e004e004e004e00;
vals[n++] = be64toh(sum); vals[n++] = be64toh(sum);
#ifdef DEBUG
printf("i: %llu, add sum: %016llx\n", i, sum);
#endif
} }
int o = offset; int o = offset;
if(o--) { if(o--) {
@@ -127,15 +168,125 @@ int base16384_encode(const char* data, int dlen, char* buf) {
#else #else
vals[n] = sum; vals[n] = sum;
#endif #endif
#ifdef DEBUG
printf("i: %llu, add sum: %016llx\n", i, sum);
#endif
buf[outlen - 2] = '='; buf[outlen - 2] = '=';
buf[outlen - 1] = offset; buf[outlen - 1] = offset;
} }
return outlen; return outlen;
} }
int base16384_encode_unsafe(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen += 4; break;
case 2:
case 3: outlen += 6; break;
case 4:
case 5: outlen += 8; break;
case 6: outlen += 10; break;
default: break;
}
uint64_t* vals = (uint64_t*)buf;
uint64_t n = 0;
int64_t i = 0;
for(; i < dlen; i += 7) {
register uint64_t sum = 0;
register uint64_t shift = htobe64(*(uint64_t*)(data+i))>>2; // 这里有读取越界
sum |= shift & 0x3fff000000000000;
shift >>= 2;
sum |= shift & 0x00003fff00000000;
shift >>= 2;
sum |= shift & 0x000000003fff0000;
shift >>= 2;
sum |= shift & 0x0000000000003fff;
sum += 0x4e004e004e004e00;
vals[n++] = be64toh(sum);
}
if(offset) {
buf[outlen - 2] = '=';
buf[outlen - 1] = offset;
}
return outlen;
}
int base16384_decode_safe(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
if(data[dlen-2] == '=') {
offset = data[dlen-1];
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen -= 4; break;
case 2:
case 3: outlen -= 6; break;
case 4:
case 5: outlen -= 8; break;
case 6: outlen -= 10; break;
default: break;
}
}
outlen = outlen / 8 * 7 + offset;
const uint64_t* vals = (const uint64_t*)data;
uint64_t n = 0;
int64_t i = 0;
for(; i < outlen - 7; n++, i+=7) {
register uint64_t sum = 0;
register uint64_t shift = htobe64(vals[n]) - 0x4e004e004e004e00;
shift <<= 2;
sum |= shift & 0xfffc000000000000;
shift <<= 2;
sum |= shift & 0x0003fff000000000;
shift <<= 2;
sum |= shift & 0x0000000fffc00000;
shift <<= 2;
sum |= shift & 0x00000000003fff00;
*(uint64_t*)(buf+i) = be64toh(sum);
}
base16384_union_remainder valbuf;
if(outlen - i == 7) {
register uint64_t sum = 0;
register uint64_t shift = htobe64(vals[n]) - 0x4e004e004e004e00;
shift <<= 2;
sum |= shift & 0xfffc000000000000;
shift <<= 2;
sum |= shift & 0x0003fff000000000;
shift <<= 2;
sum |= shift & 0x0000000fffc00000;
shift <<= 2;
sum |= shift & 0x00000000003fff00;
valbuf.val = be64toh(sum);
memcpy(buf+i, valbuf.buf, 7);
} else if((*(uint8_t*)(&vals[n]) != '=') && offset--) {
memcpy(valbuf.buf, &vals[n], dlen-2-(int)n*(int)sizeof(uint64_t));
#ifdef WORDS_BIGENDIAN
register uint64_t sum = __builtin_bswap64(valbuf.val) - 0x000000000000004e;
#else
register uint64_t sum = valbuf.val - 0x000000000000004e;
#endif
buf[i++] = ((sum & 0x000000000000003f) << 2) | ((sum & 0x000000000000c000) >> 14);
if(offset--) {
sum -= 0x00000000004e0000;
buf[i++] = ((sum & 0x0000000000003f00) >> 6) | ((sum & 0x0000000000300000) >> 20);
if(offset--) {
buf[i++] = ((sum & 0x00000000000f0000) >> 12) | ((sum & 0x00000000f0000000) >> 28);
if(offset--) {
sum -= 0x0000004e00000000;
buf[i++] = ((sum & 0x000000000f000000) >> 20) | ((sum & 0x0000003c00000000) >> 34);
if(offset--) {
buf[i++] = ((sum & 0x0000000300000000) >> 26) | ((sum & 0x0000fc0000000000) >> 42);
if(offset--) {
sum -= 0x004e000000000000;
buf[i] = ((sum & 0x0000030000000000) >> 34) | ((sum & 0x003f000000000000) >> 48);
}
}
}
}
}
}
return outlen;
}
int base16384_decode(const char* data, int dlen, char* buf) { int base16384_decode(const char* data, int dlen, char* buf) {
int outlen = dlen; int outlen = dlen;
int offset = 0; int offset = 0;
@@ -153,7 +304,7 @@ int base16384_decode(const char* data, int dlen, char* buf) {
} }
} }
outlen = outlen / 8 * 7 + offset; outlen = outlen / 8 * 7 + offset;
uint64_t* vals = (uint64_t*)data; const uint64_t* vals = (const uint64_t*)data;
uint64_t n = 0; uint64_t n = 0;
int64_t i = 0; int64_t i = 0;
for(; i <= outlen - 7; n++, i+=7) { for(; i <= outlen - 7; n++, i+=7) {
@@ -168,10 +319,8 @@ int base16384_decode(const char* data, int dlen, char* buf) {
shift <<= 2; shift <<= 2;
sum |= shift & 0x00000000003fff00; sum |= shift & 0x00000000003fff00;
*(uint64_t*)(buf+i) = be64toh(sum); *(uint64_t*)(buf+i) = be64toh(sum);
#ifdef DEBUG
printf("i: %llu, add sum: %016llx\n", i, sum);
#endif
} }
if(*(uint8_t*)(&vals[n]) == '=') return outlen;
if(offset--) { if(offset--) {
// 这里有读取越界 // 这里有读取越界
#ifdef WORDS_BIGENDIAN #ifdef WORDS_BIGENDIAN
@@ -201,3 +350,56 @@ int base16384_decode(const char* data, int dlen, char* buf) {
} }
return outlen; return outlen;
} }
int base16384_decode_unsafe(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
if(data[dlen-2] == '=') {
offset = data[dlen-1];
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen -= 4; break;
case 2:
case 3: outlen -= 6; break;
case 4:
case 5: outlen -= 8; break;
case 6: outlen -= 10; break;
default: break;
}
}
outlen = outlen / 8 * 7 + offset;
const uint64_t* vals = (const uint64_t*)data;
uint64_t n = 0;
int64_t i = 0;
for(; i < outlen-7; n++, i+=7) {
register uint64_t sum = 0;
register uint64_t shift = htobe64(vals[n]) - 0x4e004e004e004e00;
shift <<= 2;
sum |= shift & 0xfffc000000000000;
shift <<= 2;
sum |= shift & 0x0003fff000000000;
shift <<= 2;
sum |= shift & 0x0000000fffc00000;
shift <<= 2;
sum |= shift & 0x00000000003fff00;
*(uint64_t*)(buf+i) = be64toh(sum);
}
register uint64_t sum = 0;
register uint64_t shift = htobe64(vals[n]);
if(((shift>>56)&0xff) == 0x3d) return outlen;
if(((shift>>56)&0xff) < 0x4e) shift |= 0xff00000000000000;
if(((shift>>40)&0xff) < 0x4e) shift |= 0x0000ff0000000000;
if(((shift>>24)&0xff) < 0x4e) shift |= 0x00000000ff000000;
if(((shift>> 8)&0xff) < 0x4e) shift |= 0x000000000000ff00;
shift -= 0x4e004e004e004e00;
shift <<= 2;
sum |= shift & 0xfffc000000000000;
shift <<= 2;
sum |= shift & 0x0003fff000000000;
shift <<= 2;
sum |= shift & 0x0000000fffc00000;
shift <<= 2;
sum |= shift & 0x00000000003fff00;
*(uint64_t*)(buf+i) = be64toh(sum);
return outlen;
}

View File

@@ -1,9 +1,9 @@
.TH BASE16384 1 "26 August 2023" "GNU" "User Commands" .TH BASE16384 1 "5 April 2024" "GNU" "User Commands"
.SH NAME .SH NAME
base16384 \- Encode binary files to printable utf16be base16384 \- Encode binary files to printable utf16be
.SH SYNOPSIS .SH SYNOPSIS
.B base16384 .B base16384
-[e|d|t] <\fIinputfile\fR> <\fIoutputfile\fR> -[edtn] <\fIinputfile\fR> <\fIoutputfile\fR>
.SH DESCRIPTION .SH DESCRIPTION
.LP .LP
There are There are
@@ -27,7 +27,30 @@ to
.sp 1 .sp 1
.TP 0.5i .TP 0.5i
\fB\-e\fR \fB\-e\fR
Read data from \fIinputfile\fR and encode them into \fIoutputfile\fR. Read data from \fIinputfile\fR and encode them into \fIoutputfile\fR. It's the default option when neither
.B -e
nor
.B -d
is specified.
.TP 0.5i
\fB\-d\fR
Read data from \fIinputfile\fR and decode them into \fIoutputfile\fR.
.TP 0.5i
\fB\-t\fR
Show spend time.
.TP 0.5i
\fB\-n\fR
Do not write utf16be file header
.B 0xFEFF
to the output.
.TP 0.5i
\fB\-c\fR
Embed or validate checksum in remainder when using \fIstdin\fR or \fIstdout\fR or inputsize > _BASE16384_ENCBUFSZ.
.TP 0.5i
\fB\-C\fR
Do
.B -c
forcely.
.TP 0.5i .TP 0.5i
\fB\-d\fR \fB\-d\fR
Read data from \fIinputfile\fR and decode them into \fIoutputfile\fR. Read data from \fIinputfile\fR and decode them into \fIoutputfile\fR.
@@ -36,10 +59,14 @@ Read data from \fIinputfile\fR and decode them into \fIoutputfile\fR.
Show spend time. Show spend time.
.TP 0.5i .TP 0.5i
\fBinputfile\fR \fBinputfile\fR
An absolute or relative file path. Specially, pass - to read from stdin. An absolute or relative file path. Specially, pass
.B -
to read from \fIstdin\fR.
.TP 0.5i .TP 0.5i
\fBoutputfile\fR \fBoutputfile\fR
An absolute or relative file path. Specially, pass - to write to stdout. An absolute or relative file path. Specially, pass
.B -
to write to \fIstdout\fR.
.SH "EXIT STATUS" .SH "EXIT STATUS"
.TP 0.5i .TP 0.5i
\fB0\fR \fB0\fR
@@ -68,6 +95,12 @@ Write file error in mmap.
.TP 0.5i .TP 0.5i
\fB8\fR \fB8\fR
Invalid input/output filename. Invalid input/output filename.
.TP 0.5i
\fB9\fR
Invalid commandline parameter.
.TP 0.5i
\fB10\fR
Invalid decoding checksum.
.SH "SEE ALSO" .SH "SEE ALSO"
https://github.com/fumiama/base16384 https://github.com/fumiama/base16384
.SH BUGS .SH BUGS
@@ -77,7 +110,7 @@ on github.
.SH AUTHOR .SH AUTHOR
This manual page contributed by Fumiama Minamoto. This manual page contributed by Fumiama Minamoto.
.SH "COPYRIGHT" .SH "COPYRIGHT"
Copyright \(co 2022-2023, Fumiama Minamoto Copyright \(co 2022-2024, Fumiama Minamoto
This file is part of This file is part of
.IR "base16384" . .IR "base16384" .
.LP .LP

View File

@@ -1,6 +1,6 @@
/* base16384.c /* base16384.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384). * This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2023 Fumiama Minamoto. * Copyright (c) 2022-2024 Fumiama Minamoto.
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -38,77 +38,103 @@ unsigned long get_start_ms() {
} }
#endif #endif
static void print_usage() { static base16384_err_t print_usage() {
puts("Copyright (c) 2022-2023 Fumiama Minamoto.\nBase16384 2.2.5 (August 26th 2023). Usage:"); #ifndef BASE16384_VERSION
puts("base16384 [-edt] [inputfile] [outputfile]"); #define BASE16384_VERSION "dev"
puts(" -e\t\tencode"); #endif
puts(" -d\t\tdecode"); #ifndef BASE16384_VERSION_DATE
puts(" -t\t\tshow spend time"); #define BASE16384_VERSION_DATE "unknown date"
puts(" inputfile\tpass - to read from stdin"); #endif
puts(" outputfile\tpass - to write to stdout"); fputs(
"Copyright (c) 2022-2024 Fumiama Minamoto.\nBase16384 "
BASE16384_VERSION
" ("
BASE16384_VERSION_DATE
"). Usage:\n", stderr
);
fputs("base16384 [-edtn] [inputfile] [outputfile]\n", stderr);
fputs(" -e\t\tencode (default)\n", stderr);
fputs(" -d\t\tdecode\n", stderr);
fputs(" -t\t\tshow spend time\n", stderr);
fputs(" -n\t\tdon't write utf16be file header (0xFEFF)\n", stderr);
fputs(" -c\t\tembed or validate checksum in remainder\n", stderr);
fputs(" -C\t\tdo -c forcely\n", stderr);
fputs(" inputfile\tpass - to read from stdin\n", stderr);
fputs(" outputfile\tpass - to write to stdout\n", stderr);
return base16384_err_invalid_commandline_parameter;
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
if(argc != 4 || argv[1][0] != '-') {
print_usage(); const char* cmd = argv[1];
return -1; if(argc != 4 || cmd[0] != '-') return print_usage();
}
int flaglen = strlen(argv[1]); int flaglen = strlen(cmd);
if(flaglen <= 1 || flaglen > 3) { if(flaglen <= 1 || flaglen > 5) return print_usage();
print_usage();
return -2;
}
#ifdef _WIN32 #ifdef _WIN32
clock_t t = 0; clock_t t = 0;
#else #else
unsigned long t = 0; unsigned long t = 0;
#endif #endif
uint16_t is_encode = 1, use_timer = 0, no_header = 0, use_checksum = 0;
#define set_flag(f, v) ((f) = (((((f)>>8)+1) << 8)&0xff00) | (v&0x00ff))
#define flag_has_been_set(f) ((f)>>8)
#define set_or_test_flag(f, v) (flag_has_been_set(f)?1:(set_flag(f, v), 0))
while(--flaglen) switch(cmd[flaglen]) { // skip cmd[0] = '-'
case 'e':
if(set_or_test_flag(is_encode, 1)) return print_usage();
break;
case 'd':
if(set_or_test_flag(is_encode, 0)) return print_usage();
break;
case 't':
if(set_or_test_flag(use_timer, 1)) return print_usage();
break;
case 'n':
if(set_or_test_flag(no_header, 1)) return print_usage();
break;
case 'c':
if(set_or_test_flag(use_checksum, 1)) return print_usage();
break;
case 'C':
if(set_or_test_flag(use_checksum, 2)) return print_usage();
break;
default:
return print_usage();
break;
}
#define clear_high_byte(x) ((x) &= 0x00ff)
clear_high_byte(is_encode); clear_high_byte(use_timer);
clear_high_byte(no_header); clear_high_byte(use_checksum);
if(use_timer) {
#ifdef _WIN32
t = clock();
#else
t = get_start_ms();
#endif
}
base16384_err_t exitstat = base16384_err_ok; base16384_err_t exitstat = base16384_err_ok;
char cmd = argv[1][1];
if(cmd == 't') { #define do_coding(method) base16384_##method##_file_detailed( \
if(flaglen == 2) { argv[2], argv[3], encbuf, decbuf, \
print_usage(); return -3; (no_header?BASE16384_FLAG_NOHEADER:0) \
} | ((use_checksum&1)?BASE16384_FLAG_SUM_CHECK_ON_REMAIN:0) \
| ((use_checksum&2)?BASE16384_FLAG_DO_SUM_CHECK_FORCELY:0) \
)
exitstat = is_encode?do_coding(encode):do_coding(decode);
#undef do_coding
if(t) {
#ifdef _WIN32 #ifdef _WIN32
t = clock(); fprintf(stderr, "spend time: %lums\n", clock() - t);
#else #else
t = get_start_ms(); fprintf(stderr, "spend time: %lums\n", get_start_ms() - t);
#endif
cmd = argv[1][2];
} else if(flaglen == 3) {
if(argv[1][2] != 't') {
print_usage(); return -4;
}
#ifdef _WIN32
t = clock();
#else
t = get_start_ms();
#endif #endif
} }
switch(cmd) {
case 'e': exitstat = base16384_encode_file(argv[2], argv[3], encbuf, decbuf); break; return base16384_perror(exitstat);
case 'd': exitstat = base16384_decode_file(argv[2], argv[3], encbuf, decbuf); break;
default: print_usage(); return -5;
}
if(t && !exitstat && *(uint16_t*)(argv[3]) != *(uint16_t*)"-") {
#ifdef _WIN32
printf("spend time: %lums\n", clock() - t);
#else
printf("spend time: %lums\n", get_start_ms() - t);
#endif
}
#define print_base16384_err(n) case base16384_err_##n: perror("base16384_err_"#n); break
if(exitstat) switch(exitstat) {
print_base16384_err(get_file_size);
print_base16384_err(fopen_output_file);
print_base16384_err(fopen_input_file);
print_base16384_err(write_file);
print_base16384_err(open_input_file);
print_base16384_err(map_input_file);
print_base16384_err(read_file);
print_base16384_err(invalid_file_name);
default: perror("base16384"); break;
}
#undef print_base16384_err
return exitstat;
} }

View File

@@ -3,7 +3,7 @@
/* base16384.h /* base16384.h
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384). * This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2023 Fumiama Minamoto. * Copyright (c) 2022-2024 Fumiama Minamoto.
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -24,30 +24,47 @@
#include <stdio.h> #include <stdio.h>
#endif #endif
#define define_base16384_err_t(n) base16384_err_##n
// base16384_err_t is the return value of base16384_en/decode_file
enum base16384_err_t { enum base16384_err_t {
define_base16384_err_t(ok), base16384_err_ok,
define_base16384_err_t(get_file_size), base16384_err_get_file_size,
define_base16384_err_t(fopen_output_file), base16384_err_fopen_output_file,
define_base16384_err_t(fopen_input_file), base16384_err_fopen_input_file,
define_base16384_err_t(write_file), base16384_err_write_file,
define_base16384_err_t(open_input_file), base16384_err_open_input_file,
define_base16384_err_t(map_input_file), base16384_err_map_input_file,
define_base16384_err_t(read_file), base16384_err_read_file,
define_base16384_err_t(invalid_file_name), base16384_err_invalid_file_name,
base16384_err_invalid_commandline_parameter,
base16384_err_invalid_decoding_checksum,
}; };
// base16384_err_t is the return value of base16384_en/decode_file /**
* @brief return value of base16384_en/decode_file
*/
typedef enum base16384_err_t base16384_err_t; typedef enum base16384_err_t base16384_err_t;
#undef define_base16384_err_t #ifndef BASE16384_BUFSZ_FACTOR
#define BASE16384_BUFSZ_FACTOR (8)
#endif
#define BASE16384_ENCBUFSZ (BUFSIZ*1024/7*7+7) #define _BASE16384_ENCBUFSZ ((BUFSIZ*BASE16384_BUFSZ_FACTOR)/7*7)
#define BASE16384_DECBUFSZ (BUFSIZ*1024/8*8+16) #define _BASE16384_DECBUFSZ ((BUFSIZ*BASE16384_BUFSZ_FACTOR)/8*8)
// base16384_encode_len calc min buf size to fill encode result #define BASE16384_ENCBUFSZ (_BASE16384_ENCBUFSZ+16)
static inline int base16384_encode_len(int dlen) { #define BASE16384_DECBUFSZ (_BASE16384_DECBUFSZ+16)
// disable 0xFEFF file header in encode
#define BASE16384_FLAG_NOHEADER (1<<0)
// enable sum check when using stdin or stdout or inputsize > _BASE16384_ENCBUFSZ
#define BASE16384_FLAG_SUM_CHECK_ON_REMAIN (1<<1)
// forcely do sumcheck without checking data length
#define BASE16384_FLAG_DO_SUM_CHECK_FORCELY (1<<2)
/**
* @brief calculate the exact encoded size
* @param dlen the data length to encode
* @return the size
*/
static inline int _base16384_encode_len(int dlen) {
int outlen = dlen / 7 * 8; int outlen = dlen / 7 * 8;
int offset = dlen % 7; int offset = dlen % 7;
switch(offset) { // 算上偏移标志字符占用的 2 字节 switch(offset) { // 算上偏移标志字符占用的 2 字节
@@ -60,11 +77,25 @@ static inline int base16384_encode_len(int dlen) {
case 6: outlen += 10; break; case 6: outlen += 10; break;
default: break; default: break;
} }
return outlen + 8; // 冗余的8B用于可能的结尾的覆盖 return outlen;
} }
// base16384_decode_len calc min buf size to fill decode result /**
static inline int base16384_decode_len(int dlen, int offset) { * @brief calculate minimum encoding buffer size (16 bits larger than the real encoded size)
* @param dlen the data length to encode
* @return the minimum encoding buffer size
*/
static inline int base16384_encode_len(int dlen) {
return _base16384_encode_len(dlen) + 16; // 冗余的 16 字节用于可能的结尾的 unsafe 覆盖
}
/**
* @brief calculate the exact decoded size
* @param dlen the data length to decode
* @param offset the last char `xx` of the underfilled coding (0x3Dxx) or 0 for the full coding
* @return the size
*/
static inline int _base16384_decode_len(int dlen, int offset) {
int outlen = dlen; int outlen = dlen;
switch(offset) { // 算上偏移标志字符占用的 2 字节 switch(offset) { // 算上偏移标志字符占用的 2 字节
case 0: break; case 0: break;
@@ -76,39 +107,180 @@ static inline int base16384_decode_len(int dlen, int offset) {
case 6: outlen -= 10; break; case 6: outlen -= 10; break;
default: break; default: break;
} }
return outlen / 8 * 7 + offset + 1; // 多出1字节用于循环覆盖 return outlen / 8 * 7 + offset;
} }
// base16384_encode encodes data and write result into buf /**
* @brief calculate minimum decoding buffer size (16 bits larger than the real decoded size)
* @param dlen the data length to decode
* @param offset the last char `xx` of the underfilled coding (0x3Dxx) or 0 for the full coding
* @return the minimum decoding buffer size
*/
static inline int base16384_decode_len(int dlen, int offset) {
return _base16384_decode_len(dlen, offset) + 16; // 多出 16 字节用于 unsafe 循环覆盖
}
/**
* @brief safely encode data and write result into buf
* @param data data to encode, no data overread
* @param dlen the data length
* @param buf the output buffer, whose size can be exactly `_base16384_encode_len`
* @return the total length written
*/
int base16384_encode_safe(const char* data, int dlen, char* buf);
/**
* @brief encode data and write result into buf
* @param data data to encode
* @param dlen the data length
* @param buf the output buffer, whose size must greater than `base16384_encode_len`
* @return the total length written
*/
int base16384_encode(const char* data, int dlen, char* buf); int base16384_encode(const char* data, int dlen, char* buf);
// base16384_decode decodes data and write result into buf /**
* @brief encode data and write result into buf without considering border condition
* @param data data to encode
* @param dlen the data length
* @param buf the output buffer, whose size must greater than `base16384_encode_len`
* @return the total length written
*/
int base16384_encode_unsafe(const char* data, int dlen, char* buf);
/**
* @brief safely decode data and write result into buf
* @param data data to decode, no data overread
* @param dlen the data length
* @param buf the output buffer, whose size can be exactly `_base16384_decode_len`
* @return the total length written
*/
int base16384_decode_safe(const char* data, int dlen, char* buf);
/**
* @brief decode data and write result into buf
* @param data data to decode
* @param dlen the data length
* @param buf the output buffer, whose size must greater than `base16384_decode_len`
* @return the total length written
*/
int base16384_decode(const char* data, int dlen, char* buf); int base16384_decode(const char* data, int dlen, char* buf);
// base16384_encode_file encodes input file to output file. /**
// use `-` to specify stdin/stdout * @brief decode data and write result into buf without considering border condition
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ * @param data data to decode
base16384_err_t base16384_encode_file(const char* input, const char* output, char* encbuf, char* decbuf); * @param dlen the data length
* @param buf the output buffer, whose size must greater than `base16384_decode_len`
* @return the total length written
*/
int base16384_decode_unsafe(const char* data, int dlen, char* buf);
// base16384_encode_fp encodes input file to output file. #define base16384_typed_params(type) type input, type output, char* encbuf, char* decbuf
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ #define base16384_typed_flag_params(type) base16384_typed_params(type), int flag
base16384_err_t base16384_encode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf);
// base16384_encode_fd encodes input fd to output fd. /**
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ * @brief encode input file to output file
base16384_err_t base16384_encode_fd(int input, int output, char* encbuf, char* decbuf); * @param input filename or `-` to specify stdin
* @param output filename or `-` to specify stdout
* @param encbuf must be no less than BASE16384_ENCBUFSZ
* @param decbuf must be no less than BASE16384_DECBUFSZ
* @param flag BASE16384_FLAG_xxx value, add multiple flags by `|`
* @return the error code
*/
base16384_err_t base16384_encode_file_detailed(base16384_typed_flag_params(const char*));
// base16384_decode_file decodes input file to output file. /**
// use `-` to specify stdin/stdout * @brief encode input `FILE*` to output `FILE*`
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ * @param input `FILE*` pointer
base16384_err_t base16384_decode_file(const char* input, const char* output, char* encbuf, char* decbuf); * @param output `FILE*` pointer
* @param encbuf must be no less than BASE16384_ENCBUFSZ
* @param decbuf must be no less than BASE16384_DECBUFSZ
* @param flag BASE16384_FLAG_xxx value, add multiple flags by `|`
* @return the error code
*/
base16384_err_t base16384_encode_fp_detailed(base16384_typed_flag_params(FILE*));
// base16384_decode_fp decodes input file to output file. /**
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ * @brief encode input stream to output stream
base16384_err_t base16384_decode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf); * @param input file descripter
* @param output file descripter
* @param encbuf must be no less than BASE16384_ENCBUFSZ
* @param decbuf must be no less than BASE16384_DECBUFSZ
* @param flag BASE16384_FLAG_xxx value, add multiple flags by `|`
* @return the error code
*/
base16384_err_t base16384_encode_fd_detailed(base16384_typed_flag_params(int));
// base16384_decode_fd decodes input fd to output fd. /**
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ * @brief decode input file to output file
base16384_err_t base16384_decode_fd(int input, int output, char* encbuf, char* decbuf); * @param input filename or `-` to specify stdin
* @param output filename or `-` to specify stdout
* @param encbuf must be no less than BASE16384_ENCBUFSZ
* @param decbuf must be no less than BASE16384_DECBUFSZ
* @param flag BASE16384_FLAG_xxx value, add multiple flags by `|`
* @return the error code
*/
base16384_err_t base16384_decode_file_detailed(base16384_typed_flag_params(const char*));
/**
* @brief decode input `FILE*` to output `FILE*`
* @param input `FILE*` pointer
* @param output `FILE*` pointer
* @param encbuf must be no less than BASE16384_ENCBUFSZ
* @param decbuf must be no less than BASE16384_DECBUFSZ
* @param flag BASE16384_FLAG_xxx value, add multiple flags by `|`
* @return the error code
*/
base16384_err_t base16384_decode_fp_detailed(base16384_typed_flag_params(FILE*));
/**
* @brief decode input stream to output stream
* @param input file descripter
* @param output file descripter
* @param encbuf must be no less than BASE16384_ENCBUFSZ
* @param decbuf must be no less than BASE16384_DECBUFSZ
* @param flag BASE16384_FLAG_xxx value, add multiple flags by `|`
* @return the error code
*/
base16384_err_t base16384_decode_fd_detailed(base16384_typed_flag_params(int));
#define BASE16384_WRAP_DECL(method, name, type) \
base16384_err_t base16384_##method##_##name(base16384_typed_params(type));
BASE16384_WRAP_DECL(encode, file, const char*);
BASE16384_WRAP_DECL(encode, fp, FILE*);
BASE16384_WRAP_DECL(encode, fd, int);
BASE16384_WRAP_DECL(decode, file, const char*);
BASE16384_WRAP_DECL(decode, fp, FILE*);
BASE16384_WRAP_DECL(decode, fd, int);
#undef BASE16384_WRAP_DECL
#undef base16384_typed_flag_params
#undef base16384_typed_params
/**
* @brief call perror on error
* @param err the error
* @return the input parameter `err`
*/
static inline base16384_err_t base16384_perror(base16384_err_t err) {
#define base16384_perror_case(n) case base16384_err_##n: perror("base16384_err_"#n)
if(err) switch(err) {
base16384_perror_case(get_file_size); break;
base16384_perror_case(fopen_output_file); break;
base16384_perror_case(fopen_input_file); break;
base16384_perror_case(write_file); break;
base16384_perror_case(open_input_file); break;
base16384_perror_case(map_input_file); break;
base16384_perror_case(read_file); break;
base16384_perror_case(invalid_file_name); break;
base16384_perror_case(invalid_commandline_parameter); break;
base16384_perror_case(invalid_decoding_checksum); break;
default: perror("base16384"); break;
}
#undef base16384_perror_case
return err;
}
#endif #endif

124
binary.h Normal file
View File

@@ -0,0 +1,124 @@
#ifndef _BINARY_H_
#define _BINARY_H_
/* binary.h
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifdef __cosmopolitan // always le
#define be16toh(x) bswap_16(x)
#define be32toh(x) bswap_32(x)
#define htobe16(x) bswap_16(x)
#define htobe32(x) bswap_32(x)
#else
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef __linux__
#include <endian.h>
#endif
#ifdef __FreeBSD__
#include <sys/endian.h>
#endif
#ifdef __NetBSD__
#include <sys/endian.h>
#endif
#ifdef __OpenBSD__
#include <sys/types.h>
#define be16toh(x) betoh16(x)
#define be32toh(x) betoh32(x)
#ifdef IS_64BIT_PROCESSOR
#define be64toh(x) betoh64(x)
#endif
#endif
#ifdef __APPLE__
#define be16toh(x) ntohs(x)
#define be32toh(x) ntohl(x)
#ifdef IS_64BIT_PROCESSOR
#define be64toh(x) ntohll(x)
#endif
#define htobe16(x) htons(x)
#define htobe32(x) htonl(x)
#ifdef IS_64BIT_PROCESSOR
#define htobe64(x) htonll(x)
#endif
#endif
#ifdef _WIN32
#ifdef WORDS_BIGENDIAN
#define be16toh(x) (x)
#define be32toh(x) (x)
#ifdef IS_64BIT_PROCESSOR
#define be64toh(x) (x)
#endif
#define htobe16(x) (x)
#define htobe32(x) (x)
#ifdef IS_64BIT_PROCESSOR
#define htobe64(x) (x)
#endif
#else
#define be16toh(x) _byteswap_ushort(x)
#define be32toh(x) _byteswap_ulong(x)
#ifdef IS_64BIT_PROCESSOR
#define be64toh(x) _byteswap_uint64(x)
#endif
#define htobe16(x) _byteswap_ushort(x)
#define htobe32(x) _byteswap_ulong(x)
#ifdef IS_64BIT_PROCESSOR
#define htobe64(x) _byteswap_uint64(x)
#endif
#endif
#endif
#endif
// leftrotate function definition
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (sizeof(x)*8 - (c))))
// initial sum value used in BASE16384_FLAG_SUM_CHECK_ON_REMAIN
#define BASE16384_SIMPLE_SUM_INIT_VALUE (0x8e29c213)
static inline uint32_t calc_sum(uint32_t sum, size_t cnt, const char* encbuf) {
size_t i;
uint32_t buf;
for(i = 0; i < cnt; i++) {
buf = (uint32_t)(encbuf[i])&0xff;
buf = ((buf<<(24-6))&0x03000000) | ((buf<<(16-4))&0x00030000) | ((buf<<(8-2))&0x00000300) | (buf&0x03);
sum += buf;
sum = ~LEFTROTATE(sum, 3);
}
return sum;
}
static inline int check_sum(uint32_t sum, uint32_t sum_read_raw, int offset) {
offset = offset%7;
if(!offset--) return 0; // no remain bits, pass
// offset 1: 0011 1111 1100 0000 remain: 3*2 bits
// offset 2: 0011 1111 1111 1111 0011 0000 0000 0000 remain: 6*2 bits
// offset 3: 0011 1111 1111 0000 remain: 2*2 bits
// offset 4: 0011 1111 1111 1111 0011 1100 0000 0000 remain: 5*2 bits
// offset 5: 0011 1111 1111 1100 remain: 1*2 bits
// offset 6: 0011 1111 1111 1111 0011 1111 0000 0000 remain: 4*2 bits
// encode: 0415263 (6-1) per 3bits, thus 0x021ab3
int shift = sizeof(uint32_t)*8 - ((0x021ab3>>(offset*3))&0x07)*2;
uint32_t sum_read = be32toh(sum_read_raw) >> shift;
sum >>= shift;
#ifdef DEBUG
fprintf(stderr, "shift: %d, offset: %d, mysum: %08x, sumrd: %08x\n", shift, offset+1, sum, sum_read);
#endif
return sum != sum_read;
}
#endif

338
file.c
View File

@@ -1,6 +1,6 @@
/* file.c /* file.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384). * This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2023 Fumiama Minamoto. * Copyright (c) 2022-2024 Fumiama Minamoto.
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -23,6 +23,7 @@
#include <string.h> #include <string.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <fcntl.h> #include <fcntl.h>
#include <errno.h>
#ifdef _WIN32 #ifdef _WIN32
#include <windows.h> #include <windows.h>
#include <io.h> #include <io.h>
@@ -33,6 +34,7 @@
#endif #endif
#endif #endif
#include "base16384.h" #include "base16384.h"
#include "binary.h"
#ifdef __cosmopolitan #ifdef __cosmopolitan
#define get_file_size(filepath) ((off_t)GetFileSize(filepath)) #define get_file_size(filepath) ((off_t)GetFileSize(filepath))
@@ -45,100 +47,166 @@ static inline off_t get_file_size(const char* filepath) {
#define is_standard_io(filename) (*(uint16_t*)(filename) == *(uint16_t*)"-") #define is_standard_io(filename) (*(uint16_t*)(filename) == *(uint16_t*)"-")
base16384_err_t base16384_encode_file(const char* input, const char* output, char* encbuf, char* decbuf) { #define goto_base16384_file_detailed_cleanup(method, reason, dobeforereturn) { \
errnobak = errno; \
retval = reason; \
dobeforereturn; \
goto base16384_##method##_file_detailed_cleanup; \
}
#define do_sum_check(flag) ((flag)&(BASE16384_FLAG_DO_SUM_CHECK_FORCELY|BASE16384_FLAG_SUM_CHECK_ON_REMAIN))
base16384_err_t base16384_encode_file_detailed(const char* input, const char* output, char* encbuf, char* decbuf, int flag) {
off_t inputsize; off_t inputsize;
FILE* fp = NULL; FILE *fp = NULL, *fpo;
FILE* fpo; int errnobak = 0, is_stdin = is_standard_io(input);
if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) return base16384_err_invalid_file_name; base16384_err_t retval = base16384_err_ok;
if(is_standard_io(input)) { // read from stdin if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) {
inputsize = 0; errno = EINVAL;
return base16384_err_invalid_file_name;
}
if(is_stdin) { // read from stdin
inputsize = _BASE16384_ENCBUFSZ;
fp = stdin; fp = stdin;
} else inputsize = get_file_size(input); } else inputsize = get_file_size(input);
if(inputsize < 0) { if(inputsize <= 0) {
if(!inputsize) errno = EINVAL;
return base16384_err_get_file_size; return base16384_err_get_file_size;
} }
fpo = is_standard_io(output)?stdout:fopen(output, "wb"); fpo = is_standard_io(output)?stdout:fopen(output, "wb");
if(!fpo) { if(!fpo) {
return base16384_err_fopen_output_file; return base16384_err_fopen_output_file;
} }
if(!inputsize || inputsize > BASE16384_ENCBUFSZ) { // stdin or big file, use encbuf & fread if(flag&BASE16384_FLAG_DO_SUM_CHECK_FORCELY || inputsize >= _BASE16384_ENCBUFSZ) { // stdin or big file, use encbuf & fread
inputsize = BASE16384_ENCBUFSZ-7; inputsize = _BASE16384_ENCBUFSZ;
#if defined _WIN32 || defined __cosmopolitan #if defined _WIN32 || defined __cosmopolitan
} }
#endif #endif
if(!fp) fp = fopen(input, "rb"); if(!fp) fp = fopen(input, "rb");
if(!fp) { if(!fp) {
return base16384_err_fopen_input_file; goto_base16384_file_detailed_cleanup(encode, base16384_err_fopen_input_file, {});
} }
size_t cnt = 0; if(!(flag&BASE16384_FLAG_NOHEADER)) {
fputc(0xFE, fpo); fputc(0xFE, fpo);
fputc(0xFF, fpo); fputc(0xFF, fpo);
}
size_t cnt;
uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
while((cnt = fread(encbuf, sizeof(char), inputsize, fp)) > 0) { while((cnt = fread(encbuf, sizeof(char), inputsize, fp)) > 0) {
int n = base16384_encode(encbuf, cnt, decbuf); int n;
if(fwrite(decbuf, n, 1, fpo) <= 0) { while(cnt%7) {
return base16384_err_write_file; n = fread(encbuf+cnt, sizeof(char), 1, fp);
if(n > 0) cnt++;
else break;
}
if(do_sum_check(flag)) {
sum = calc_sum(sum, cnt, encbuf);
if(cnt%7) { // last encode
*(uint32_t*)(&encbuf[cnt]) = htobe32(sum);
#ifdef DEBUG
fprintf(stderr, "writesum: %08x\n", sum);
#endif
}
}
n = base16384_encode_unsafe(encbuf, cnt, decbuf);
if(n && fwrite(decbuf, n, 1, fpo) <= 0) {
goto_base16384_file_detailed_cleanup(encode, base16384_err_write_file, {});
} }
} }
if(!is_standard_io(output)) fclose(fpo);
if(!is_standard_io(input)) fclose(fp);
#if !defined _WIN32 && !defined __cosmopolitan #if !defined _WIN32 && !defined __cosmopolitan
} else { // small file, use mmap & fwrite } else { // small file, use mmap & fwrite
int fd = open(input, O_RDONLY); int fd = open(input, O_RDONLY);
if(fd < 0) { if(fd < 0) {
return base16384_err_open_input_file; goto_base16384_file_detailed_cleanup(encode, base16384_err_open_input_file, {});
} }
char *input_file = mmap(NULL, (size_t)inputsize+16, PROT_READ, MAP_PRIVATE, fd, 0); char *input_file = mmap(NULL, (size_t)inputsize+16, PROT_READ, MAP_PRIVATE, fd, 0);
if(input_file == MAP_FAILED) { if(input_file == MAP_FAILED) {
return base16384_err_map_input_file; goto_base16384_file_detailed_cleanup(encode, base16384_err_map_input_file, close(fd));
} }
if(!(flag&BASE16384_FLAG_NOHEADER)) {
fputc(0xFE, fpo); fputc(0xFE, fpo);
fputc(0xFF, fpo); fputc(0xFF, fpo);
int n = base16384_encode(input_file, (int)inputsize, decbuf); }
if(fwrite(decbuf, n, 1, fpo) <= 0) { int n = base16384_encode_safe(input_file, (int)inputsize, decbuf);
return base16384_err_write_file; if(n && fwrite(decbuf, n, 1, fpo) <= 0) {
goto_base16384_file_detailed_cleanup(encode, base16384_err_write_file, {
munmap(input_file, (size_t)inputsize);
close(fd);
});
} }
munmap(input_file, (size_t)inputsize); munmap(input_file, (size_t)inputsize);
if(!is_standard_io(output)) fclose(fpo);
close(fd); close(fd);
} }
#endif #endif
return base16384_err_ok; base16384_encode_file_detailed_cleanup:
if(fpo && !is_standard_io(output)) fclose(fpo);
if(fp && !is_stdin) fclose(fp);
if(errnobak) errno = errnobak;
return retval;
} }
base16384_err_t base16384_encode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf) { base16384_err_t base16384_encode_fp_detailed(FILE* input, FILE* output, char* encbuf, char* decbuf, int flag) {
if(!input) { if(!input) {
return base16384_err_fopen_input_file; return base16384_err_fopen_input_file;
} }
if(!output) { if(!output) {
return base16384_err_fopen_output_file; return base16384_err_fopen_output_file;
} }
off_t inputsize = BASE16384_ENCBUFSZ-7; if(!(flag&BASE16384_FLAG_NOHEADER)) {
size_t cnt = 0;
fputc(0xFE, output); fputc(0xFE, output);
fputc(0xFF, output); fputc(0xFF, output);
}
off_t inputsize = _BASE16384_ENCBUFSZ;
uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
size_t cnt;
while((cnt = fread(encbuf, sizeof(char), inputsize, input)) > 0) { while((cnt = fread(encbuf, sizeof(char), inputsize, input)) > 0) {
int n = base16384_encode(encbuf, cnt, decbuf); int n;
if(fwrite(decbuf, n, 1, output) <= 0) { while(cnt%7) {
n = fread(encbuf+cnt, sizeof(char), 1, input);
if(n > 0) cnt++;
else break;
}
if(do_sum_check(flag)) {
sum = calc_sum(sum, cnt, encbuf);
if(cnt%7) { // last encode
*(uint32_t*)(&encbuf[cnt]) = htobe32(sum);
}
}
n = base16384_encode_unsafe(encbuf, cnt, decbuf);
if(n && fwrite(decbuf, n, 1, output) <= 0) {
return base16384_err_write_file; return base16384_err_write_file;
} }
} }
return base16384_err_ok; return base16384_err_ok;
} }
base16384_err_t base16384_encode_fd(int input, int output, char* encbuf, char* decbuf) { base16384_err_t base16384_encode_fd_detailed(int input, int output, char* encbuf, char* decbuf, int flag) {
if(input < 0) { if(input < 0) {
return base16384_err_fopen_input_file; return base16384_err_fopen_input_file;
} }
if(output < 0) { if(output < 0) {
return base16384_err_fopen_output_file; return base16384_err_fopen_output_file;
} }
off_t inputsize = BASE16384_ENCBUFSZ-7; off_t inputsize = _BASE16384_ENCBUFSZ;
size_t cnt = 0; size_t cnt = 0;
write(output, "\xfe\xff", 2); uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
if(!(flag&BASE16384_FLAG_NOHEADER)) write(output, "\xfe\xff", 2);
while((cnt = read(input, encbuf, inputsize)) > 0) { while((cnt = read(input, encbuf, inputsize)) > 0) {
int n = base16384_encode(encbuf, cnt, decbuf); int n;
if(write(output, decbuf, n) < n) { while(cnt%7) {
n = read(input, encbuf+cnt, sizeof(char));
if(n > 0) cnt++;
else break;
}
if(do_sum_check(flag)) {
sum = calc_sum(sum, cnt, encbuf);
if(cnt%7) { // last encode
*(uint32_t*)(&encbuf[cnt]) = htobe32(sum);
}
}
n = base16384_encode_unsafe(encbuf, cnt, decbuf);
if(n && write(output, decbuf, n) < n) {
return base16384_err_write_file; return base16384_err_write_file;
} }
} }
@@ -161,128 +229,232 @@ static inline int is_next_end(FILE* fp) {
return 0; return 0;
} }
base16384_err_t base16384_decode_file(const char* input, const char* output, char* encbuf, char* decbuf) { base16384_err_t base16384_decode_file_detailed(const char* input, const char* output, char* encbuf, char* decbuf, int flag) {
off_t inputsize; off_t inputsize;
FILE* fp = NULL; FILE* fp = NULL;
FILE* fpo; FILE* fpo;
if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) return base16384_err_invalid_file_name; uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
if(is_standard_io(input)) { // read from stdin base16384_err_t retval = base16384_err_ok;
inputsize = 0; int errnobak = 0, is_stdin = is_standard_io(input);
if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) {
errno = EINVAL;
return base16384_err_invalid_file_name;
}
if(is_stdin) { // read from stdin
inputsize = _BASE16384_DECBUFSZ;
fp = stdin; fp = stdin;
} else inputsize = get_file_size(input); } else inputsize = get_file_size(input);
if(inputsize < 0) { if(inputsize <= 0) {
if(!inputsize) errno = EINVAL;
return base16384_err_get_file_size; return base16384_err_get_file_size;
} }
fpo = is_standard_io(output)?stdout:fopen(output, "wb"); fpo = is_standard_io(output)?stdout:fopen(output, "wb");
if(!fpo) { if(!fpo) {
return base16384_err_fopen_output_file; return base16384_err_fopen_output_file;
} }
if(!inputsize || inputsize > BASE16384_DECBUFSZ) { // stdin or big file, use decbuf & fread int loop_count = 0;
inputsize = BASE16384_DECBUFSZ/8*8; if(inputsize >= _BASE16384_DECBUFSZ) { // stdin or big file, use decbuf & fread
if(!is_stdin) loop_count = inputsize/_BASE16384_DECBUFSZ;
inputsize = _BASE16384_DECBUFSZ;
#if defined _WIN32 || defined __cosmopolitan #if defined _WIN32 || defined __cosmopolitan
} }
#endif #endif
if(!fp) fp = fopen(input, "rb"); if(!fp) fp = fopen(input, "rb");
if(!fp) { if(!fp) {
return base16384_err_fopen_input_file; goto_base16384_file_detailed_cleanup(decode, base16384_err_fopen_input_file, {});
} }
int cnt = 0;
int end = 0;
rm_head(fp); rm_head(fp);
if(errno) {
goto_base16384_file_detailed_cleanup(decode, base16384_err_read_file, {});
}
int cnt, last_encbuf_cnt = 0, last_decbuf_cnt = 0, offset = 0;
size_t total_decoded_len = 0;
while((cnt = fread(decbuf, sizeof(char), inputsize, fp)) > 0) { while((cnt = fread(decbuf, sizeof(char), inputsize, fp)) > 0) {
int n;
while(cnt%8) {
n = fread(decbuf+cnt, sizeof(char), 1, fp);
if(n > 0) cnt++;
else break;
}
int end;
if((end = is_next_end(fp))) { if((end = is_next_end(fp))) {
decbuf[cnt++] = '='; decbuf[cnt++] = '=';
decbuf[cnt++] = end; decbuf[cnt++] = end;
} }
if(fwrite(encbuf, base16384_decode(decbuf, cnt, encbuf), 1, fpo) <= 0) { if(errno) goto_base16384_file_detailed_cleanup(decode, base16384_err_read_file, {});
return base16384_err_write_file; offset = decbuf[cnt-1];
last_decbuf_cnt = cnt;
cnt = base16384_decode_unsafe(decbuf, cnt, encbuf);
if(cnt && fwrite(encbuf, cnt, 1, fpo) <= 0) {
goto_base16384_file_detailed_cleanup(decode, base16384_err_write_file, {});
} }
total_decoded_len += cnt;
if(do_sum_check(flag)) sum = calc_sum(sum, cnt, encbuf);
last_encbuf_cnt = cnt;
}
if(do_sum_check(flag)
&& (flag&BASE16384_FLAG_DO_SUM_CHECK_FORCELY || total_decoded_len >= _BASE16384_ENCBUFSZ)
&& last_decbuf_cnt > 2
&& decbuf[last_decbuf_cnt-2] == '='
&& check_sum(sum, *(uint32_t*)(&encbuf[last_encbuf_cnt]), offset)) {
errno = EINVAL;
goto_base16384_file_detailed_cleanup(decode, base16384_err_invalid_decoding_checksum, {});
} }
if(!is_standard_io(output)) fclose(fpo);
if(!is_standard_io(input)) fclose(fp);
#if !defined _WIN32 && !defined __cosmopolitan #if !defined _WIN32 && !defined __cosmopolitan
} else { // small file, use mmap & fwrite } else { // small file, use mmap & fwrite
int fd = open(input, O_RDONLY); int fd = open(input, O_RDONLY);
if(fd < 0) { if(fd < 0) {
return base16384_err_open_input_file; goto_base16384_file_detailed_cleanup(decode, base16384_err_open_input_file, {});
} }
char *input_file = mmap(NULL, (size_t)inputsize+16, PROT_READ, MAP_PRIVATE, fd, 0); char *input_file = mmap(NULL, (size_t)inputsize+16, PROT_READ, MAP_PRIVATE, fd, 0);
if(input_file == MAP_FAILED) { if(input_file == MAP_FAILED) {
return base16384_err_map_input_file; goto_base16384_file_detailed_cleanup(decode, base16384_err_map_input_file, close(fd));
} }
int off = skip_offset(input_file); int n = skip_offset(input_file);
if(fwrite(encbuf, base16384_decode(input_file+off, inputsize-off, encbuf), 1, fpo) <= 0) { n = base16384_decode_safe(input_file+n, inputsize-n, encbuf);
return base16384_err_write_file; if(n && fwrite(encbuf, n, 1, fpo) <= 0) {
goto_base16384_file_detailed_cleanup(decode, base16384_err_write_file, {
munmap(input_file, (size_t)inputsize);
close(fd);
});
} }
munmap(input_file, (size_t)inputsize); munmap(input_file, (size_t)inputsize);
if(!is_standard_io(output)) fclose(fpo);
close(fd); close(fd);
} }
#endif #endif
return base16384_err_ok; base16384_decode_file_detailed_cleanup:
if(fpo && !is_standard_io(output)) fclose(fpo);
if(fp && !is_stdin) fclose(fp);
if(errnobak) errno = errnobak;
return retval;
} }
base16384_err_t base16384_decode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf) { base16384_err_t base16384_decode_fp_detailed(FILE* input, FILE* output, char* encbuf, char* decbuf, int flag) {
if(!input) { if(!input) {
errno = EINVAL;
return base16384_err_fopen_input_file; return base16384_err_fopen_input_file;
} }
if(!output) { if(!output) {
errno = EINVAL;
return base16384_err_fopen_output_file; return base16384_err_fopen_output_file;
} }
off_t inputsize = BASE16384_DECBUFSZ/8*8; off_t inputsize = _BASE16384_DECBUFSZ;
int cnt = 0; uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
int end = 0;
rm_head(input); rm_head(input);
if(errno) {
return base16384_err_read_file;
}
int cnt, last_encbuf_cnt = 0, last_decbuf_cnt = 0, offset = 0;
size_t total_decoded_len = 0;
while((cnt = fread(decbuf, sizeof(char), inputsize, input)) > 0) { while((cnt = fread(decbuf, sizeof(char), inputsize, input)) > 0) {
int n;
while(cnt%8) {
n = fread(decbuf+cnt, sizeof(char), 1, input);
if(n > 0) cnt++;
else break;
}
int end;
if((end = is_next_end(input))) { if((end = is_next_end(input))) {
decbuf[cnt++] = '='; decbuf[cnt++] = '=';
decbuf[cnt++] = end; decbuf[cnt++] = end;
} }
if(fwrite(encbuf, base16384_decode(decbuf, cnt, encbuf), 1, output) <= 0) { if(errno) return base16384_err_read_file;
offset = decbuf[cnt-1];
last_decbuf_cnt = cnt;
cnt = base16384_decode_unsafe(decbuf, cnt, encbuf);
if(cnt && fwrite(encbuf, cnt, 1, output) <= 0) {
return base16384_err_write_file; return base16384_err_write_file;
} }
total_decoded_len += cnt;
if(do_sum_check(flag)) sum = calc_sum(sum, cnt, encbuf);
last_encbuf_cnt = cnt;
}
if(do_sum_check(flag)
&& (flag&BASE16384_FLAG_DO_SUM_CHECK_FORCELY || total_decoded_len >= _BASE16384_ENCBUFSZ)
&& last_decbuf_cnt > 2
&& decbuf[last_decbuf_cnt-2] == '='
&& check_sum(sum, *(uint32_t*)(&encbuf[last_encbuf_cnt]), offset)) {
errno = EINVAL;
return base16384_err_invalid_decoding_checksum;
} }
return base16384_err_ok; return base16384_err_ok;
} }
static inline int is_next_end_fd(int fd) { static inline uint16_t is_next_end_fd(int fd) {
char ch = 0; uint8_t ch = 0;
read(fd, &ch, 1); if(read(fd, &ch, 1) != 1) return (uint16_t)EOF;
uint16_t ret = (uint16_t)ch & 0x00ff;
if(ch == '=') { if(ch == '=') {
read(fd, &ch, 1); if(read(fd, &ch, 1) != 1) return (uint16_t)EOF;
ret <<= 8;
ret |= (uint16_t)ch & 0x00ff;
} }
return (int)ch; return ret;
} }
base16384_err_t base16384_decode_fd(int input, int output, char* encbuf, char* decbuf) { base16384_err_t base16384_decode_fd_detailed(int input, int output, char* encbuf, char* decbuf, int flag) {
if(input < 0) { if(input < 0) {
errno = EINVAL;
return base16384_err_fopen_input_file; return base16384_err_fopen_input_file;
} }
if(output < 0) { if(output < 0) {
errno = EINVAL;
return base16384_err_fopen_output_file; return base16384_err_fopen_output_file;
} }
off_t inputsize = BASE16384_DECBUFSZ/8*8;
int cnt = 0; off_t inputsize = _BASE16384_DECBUFSZ;
int end = 0; uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
uint8_t remains[8];
decbuf[0] = 0; decbuf[0] = 0;
if(read(input, decbuf, 2) < 2) { if(read(input, remains, 2) != 2) {
return base16384_err_read_file; return base16384_err_read_file;
} }
if(decbuf[0] != (char)(0xfe)) cnt = 2;
while((end = read(input, decbuf+cnt, inputsize-cnt)) > 0 || cnt > 0) { int p = 0;
if(end > 0) { if(remains[0] != (uint8_t)(0xfe)) p = 2;
cnt += end;
if((end = is_next_end_fd(input))) { int n, last_encbuf_cnt = 0, last_decbuf_cnt = 0, offset = 0;
decbuf[cnt++] = '='; size_t total_decoded_len = 0;
decbuf[cnt++] = end; while((n = read(input, decbuf+p, inputsize-p)) > 0) {
end = 0; if(p) {
} else end = 1; memcpy(decbuf, remains, p);
} else end = 0; n += p;
cnt = base16384_decode(decbuf, cnt, encbuf); p = 0;
if(write(output, encbuf, cnt) < cnt) { }
int x;
while(n%8) {
x = read(input, decbuf+n, sizeof(char));
if(x > 0) n++;
else break;
}
uint16_t next = is_next_end_fd(input);
if(errno) {
return base16384_err_read_file;
}
if((uint16_t)(~next)) {
if(next&0xff00) {
decbuf[n++] = '=';
decbuf[n++] = (char)(next&0x00ff);
} else remains[p++] = (char)(next&0x00ff);
}
offset = decbuf[n-1];
last_decbuf_cnt = n;
n = base16384_decode_unsafe(decbuf, n, encbuf);
if(n && write(output, encbuf, n) != n) {
return base16384_err_write_file; return base16384_err_write_file;
} }
cnt = end; total_decoded_len += n;
if(do_sum_check(flag)) sum = calc_sum(sum, n, encbuf);
last_encbuf_cnt = n;
}
if(do_sum_check(flag)
&& (flag&BASE16384_FLAG_DO_SUM_CHECK_FORCELY || total_decoded_len >= _BASE16384_ENCBUFSZ)
&& last_decbuf_cnt > 2
&& decbuf[last_decbuf_cnt-2] == '='
&& check_sum(sum, *(uint32_t*)(&encbuf[last_encbuf_cnt]), offset)) {
errno = EINVAL;
return base16384_err_invalid_decoding_checksum;
} }
return base16384_err_ok; return base16384_err_ok;
} }

16
test/CMakeLists.txt Normal file
View File

@@ -0,0 +1,16 @@
cmake_minimum_required(VERSION 2.8.12)
if (POLICY CMP0048)
cmake_policy(SET CMP0048 NEW)
endif (POLICY CMP0048)
project(base16384_test VERSION 1.0.0)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)
file (GLOB_RECURSE C_FILES "*.c")
foreach (C_FILE ${C_FILES})
get_filename_component(FILE_NAME ${C_FILE} NAME_WE)
message(STATUS "Add test ${FILE_NAME}")
add_executable(${FILE_NAME} ${C_FILE})
target_link_libraries(${FILE_NAME} base16384_s)
add_test(NAME do_${FILE_NAME} COMMAND ${FILE_NAME})
endforeach ()

91
test/coder_test.c Normal file
View File

@@ -0,0 +1,91 @@
/* test/coder_test.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "base16384.h"
#define TEST_SIZE (4096)
char encbuf[TEST_SIZE+16];
char decbuf[TEST_SIZE/7*8+16];
char tstbuf[TEST_SIZE+16];
#define loop_diff(target) \
for(i = start; i < end; i++) { \
if (encbuf[i] != tstbuf[i]) { \
if(n) { \
fprintf(stderr, " @%d", i); \
n = 0; \
} \
fprintf(stderr, " %02x", (uint8_t)(target[i])); \
} else if(!n) { \
n = 1; \
fprintf(stderr, " ..."); \
} \
}
#define return_error(i, n) { \
int end = i; \
int start; \
for(start = 0; start < end; start++) { \
if(encbuf[start] != tstbuf[start]) break; \
} \
fprintf(stderr, "result mismatch @ loop %d, decsz: %d, first diff @ %d\n", i, n, start); \
fprintf(stderr, "expect"); \
n = 1; \
loop_diff(encbuf); \
fprintf(stderr, "\ngot "); \
n = 1; \
loop_diff(tstbuf); \
fputc('\n', stderr); \
return 1; \
}
#define test_batch(encode, decode) \
fputs("testing base16384_"#encode"/base16384_"#decode"...\n", stderr); \
for(i = 0; i <= TEST_SIZE; i++) { \
n = base16384_##encode(encbuf, i, decbuf); \
n = base16384_##decode(decbuf, n, tstbuf); \
if (memcmp(encbuf, tstbuf, n)) return_error(i, n); \
}
int main() {
srand(time(NULL));
int i, n;
for(i = 0; i <= TEST_SIZE; i += sizeof(int)) {
*(int*)(&encbuf[i]) = rand();
}
test_batch(encode, decode);
test_batch(encode, decode_unsafe);
test_batch(encode, decode_safe);
test_batch(encode_unsafe, decode);
test_batch(encode_unsafe, decode_unsafe);
test_batch(encode_unsafe, decode_safe);
test_batch(encode_safe, decode);
test_batch(encode_safe, decode_unsafe);
test_batch(encode_safe, decode_safe);
return 0;
}

165
test/file_test.c Normal file
View File

@@ -0,0 +1,165 @@
/* test/file_test.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifdef _WIN32
#include <io.h>
#define ftruncate _chsize_s
#else
#define _POSIX1_SOURCE 2
#include <unistd.h>
#endif
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "base16384.h"
#include "binary.h"
#include "file_test.h"
#define TEST_SIZE (4096)
#define TEST_INPUT_FILENAME "file_test_input.bin"
#define TEST_OUTPUT_FILENAME "file_test_output.bin"
#define TEST_VALIDATE_FILENAME "file_test_validate.bin"
char encbuf[BASE16384_ENCBUFSZ];
char decbuf[BASE16384_DECBUFSZ];
char tstbuf[BASE16384_ENCBUFSZ];
#define init_input_file() \
for(i = 0; i < BASE16384_ENCBUFSZ; i += sizeof(int)) { \
*(int*)(&encbuf[i]) = rand(); \
} \
fp = fopen(TEST_INPUT_FILENAME, "wb"); \
ok(!fp, "fopen"); \
ok(fwrite(encbuf, BASE16384_ENCBUFSZ, 1, fp) != 1, "fwrite"); \
ok(fclose(fp), "fclose"); \
fputs("input file created.\n", stderr);
#define test_file_detailed(flag) \
fputs("testing base16384_en/decode_file with flag "#flag"...\n", stderr); \
init_input_file(); \
for(i = TEST_SIZE; i > 0; i--) { \
reset_and_truncate(fd, i); \
loop_ok(close(fd), i, "close"); \
\
err = base16384_encode_file_detailed(TEST_INPUT_FILENAME, TEST_OUTPUT_FILENAME, encbuf, decbuf, flag); \
base16384_loop_ok(err); \
\
err = base16384_decode_file_detailed(TEST_OUTPUT_FILENAME, TEST_VALIDATE_FILENAME, encbuf, decbuf, flag); \
base16384_loop_ok(err); \
\
validate_result(); \
}
#define test_fp_detailed(flag) \
fputs("testing base16384_en/decode_fp with flag "#flag"...\n", stderr); \
init_input_file(); \
for(i = TEST_SIZE; i > 0; i--) { \
reset_and_truncate(fd, i); \
loop_ok(close(fd), i, "close"); \
\
FILE* fpin = fopen(TEST_INPUT_FILENAME, "rb"); \
loop_ok(!fpin, i, "fopen"); \
\
FILE* fpout = fopen(TEST_OUTPUT_FILENAME, "wb+"); \
loop_ok(!fpout, i, "fopen"); \
\
err = base16384_encode_fp_detailed(fpin, fpout, encbuf, decbuf, flag); \
base16384_loop_ok(err); \
\
loop_ok(fclose(fpin), i, "fclose"); \
\
FILE* fpval = fopen(TEST_VALIDATE_FILENAME, "wb"); \
loop_ok(!fpval, i, "fopen"); \
\
rewind(fpout); \
\
err = base16384_decode_fp_detailed(fpout, fpval, encbuf, decbuf, flag); \
base16384_loop_ok(err); \
\
loop_ok(fclose(fpout), i, "fclose"); \
loop_ok(fclose(fpval), i, "fclose"); \
\
validate_result(); \
}
#define test_fd_detailed(flag) \
fputs("testing base16384_en/decode_fd with flag "#flag"...\n", stderr); \
init_input_file(); \
for(i = TEST_SIZE; i > 0; i--) { \
reset_and_truncate(fd, i); \
\
int fdout = open(TEST_OUTPUT_FILENAME, O_RDWR|O_TRUNC|O_CREAT|O_APPEND); \
loop_ok(!fdout, i, "open"); \
\
err = base16384_encode_fd_detailed(fd, fdout, encbuf, decbuf, 0); \
base16384_loop_ok(err); \
loop_ok(close(fd), i, "close"); \
\
int fdval = open(TEST_VALIDATE_FILENAME, O_WRONLY|O_TRUNC|O_CREAT); \
loop_ok(!fdval, i, "open"); \
\
loop_ok(lseek(fdout, 0, SEEK_SET), i, "lseek"); \
\
err = base16384_decode_fd_detailed(fdout, fdval, encbuf, decbuf, 0); \
base16384_loop_ok(err); \
\
loop_ok(close(fdout), i, "close"); \
loop_ok(close(fdval), i, "close"); \
\
validate_result(); \
}
#define test_detailed(name) \
test_##name##_detailed(0); \
\
test_##name##_detailed(BASE16384_FLAG_NOHEADER); \
test_##name##_detailed(BASE16384_FLAG_SUM_CHECK_ON_REMAIN); \
test_##name##_detailed(BASE16384_FLAG_DO_SUM_CHECK_FORCELY); \
\
test_##name##_detailed(BASE16384_FLAG_NOHEADER|BASE16384_FLAG_SUM_CHECK_ON_REMAIN); \
test_##name##_detailed(BASE16384_FLAG_NOHEADER|BASE16384_FLAG_DO_SUM_CHECK_FORCELY); \
\
test_##name##_detailed(BASE16384_FLAG_SUM_CHECK_ON_REMAIN|BASE16384_FLAG_DO_SUM_CHECK_FORCELY); \
\
test_##name##_detailed(BASE16384_FLAG_NOHEADER|BASE16384_FLAG_SUM_CHECK_ON_REMAIN|BASE16384_FLAG_DO_SUM_CHECK_FORCELY);
#define remove_test_files() \
remove(TEST_INPUT_FILENAME); \
remove(TEST_OUTPUT_FILENAME); \
remove(TEST_VALIDATE_FILENAME);
int main() {
srand(time(NULL));
FILE* fp;
int fd, i;
base16384_err_t err;
test_detailed(file);
test_detailed(fp);
test_detailed(fd);
remove_test_files();
return 0;
}

85
test/file_test.h Normal file
View File

@@ -0,0 +1,85 @@
#ifndef _FILE_TEST_H_
#define _FILE_TEST_H_
/* test/file_test.h
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define ok(has_failed, reason) \
if (has_failed) { \
perror(reason); \
return 1; \
}
#define loop_ok(has_failed, i, reason) \
if (has_failed) { \
fprintf(stderr, "loop @%d: ", i); \
perror(reason); \
return 1; \
}
#define reset_and_truncate(fd, i) { \
fd = open(TEST_INPUT_FILENAME, O_RDWR); \
ok(!fd, "open"); \
loop_ok(lseek(fd, 0, SEEK_SET), i, "lseek"); \
loop_ok(ftruncate(fd, i), i, "ftruncate"); \
}
#define base16384_loop_ok(err) \
if (err) { \
fprintf(stderr, "loop @%d: ", i); \
base16384_perror(err); \
return 1; \
}
#define validate_result() \
uint64_t buf, sum_input = 0, sum_validate = 0; \
fp = fopen(TEST_INPUT_FILENAME, "rb"); { \
loop_ok(!fp, i, "fopen"); \
int cnt; \
while ((cnt = fread(&buf, 1, sizeof(sum_input), fp)) > 0) { \
int n; \
buf = 0; \
while(cnt < sizeof(sum_input)) { \
n = fread((uint8_t*)(&buf)+cnt, 1, 1, fp); \
if (n) cnt++; \
else break; \
} \
sum_input += buf; \
} \
} fclose(fp); \
fp = fopen(TEST_VALIDATE_FILENAME, "rb"); { \
loop_ok(!fp, i, "fopen"); \
int cnt; \
while ((cnt = fread(&buf, 1, sizeof(sum_validate), fp)) > 0) { \
int n; \
buf = 0; \
while(cnt < sizeof(sum_validate)) { \
n = fread((uint8_t*)(&buf)+cnt, 1, 1, fp); \
if (n) cnt++; \
else break; \
} \
sum_validate += buf; \
} \
} fclose(fp); \
if (sum_input != sum_validate) { \
fprintf(stderr, "loop @%d, expect: %016llx, got: %016llx: ", i, (unsigned long long)sum_input, (unsigned long long)sum_validate); \
fputs(TEST_INPUT_FILENAME " and " TEST_VALIDATE_FILENAME " mismatch.", stderr); \
return 1; \
}
#endif

140
test/wrap_test.c Normal file
View File

@@ -0,0 +1,140 @@
/* test/wrap_test.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifdef _WIN32
#include <io.h>
#define ftruncate _chsize_s
#else
#define _POSIX1_SOURCE 2
#include <unistd.h>
#endif
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "base16384.h"
#include "binary.h"
#include "file_test.h"
#define TEST_SIZE (4096)
#define TEST_INPUT_FILENAME "wrap_test_input.bin"
#define TEST_OUTPUT_FILENAME "wrap_test_output.bin"
#define TEST_VALIDATE_FILENAME "wrap_test_validate.bin"
char encbuf[BASE16384_ENCBUFSZ];
char decbuf[BASE16384_DECBUFSZ];
char tstbuf[BASE16384_ENCBUFSZ];
#define init_input_file() \
for(i = 0; i < BASE16384_ENCBUFSZ; i += sizeof(int)) { \
*(int*)(&encbuf[i]) = rand(); \
} \
fp = fopen(TEST_INPUT_FILENAME, "wb"); \
ok(!fp, "fopen"); \
ok(fwrite(encbuf, BASE16384_ENCBUFSZ, 1, fp) != 1, "fwrite"); \
ok(fclose(fp), "fclose"); \
fputs("input file created.\n", stderr);
int main() {
srand(time(NULL));
FILE* fp;
int fd, i;
base16384_err_t err;
fputs("testing base16384_en/decode_file...\n", stderr);
init_input_file();
for(i = TEST_SIZE; i > 0; i--) {
reset_and_truncate(fd, i);
loop_ok(close(fd), i, "close");
err = base16384_encode_file(TEST_INPUT_FILENAME, TEST_OUTPUT_FILENAME, encbuf, decbuf);
base16384_loop_ok(err);
err = base16384_decode_file(TEST_OUTPUT_FILENAME, TEST_VALIDATE_FILENAME, encbuf, decbuf);
base16384_loop_ok(err);
validate_result();
}
fputs("testing base16384_en/decode_fp...\n", stderr);
init_input_file();
for(i = TEST_SIZE; i > 0; i--) {
reset_and_truncate(fd, i);
loop_ok(close(fd), i, "close");
FILE* fpin = fopen(TEST_INPUT_FILENAME, "rb");
loop_ok(!fpin, i, "fopen");
FILE* fpout = fopen(TEST_OUTPUT_FILENAME, "wb+");
loop_ok(!fpout, i, "fopen");
err = base16384_encode_fp(fpin, fpout, encbuf, decbuf);
base16384_loop_ok(err);
loop_ok(fclose(fpin), i, "fclose");
FILE* fpval = fopen(TEST_VALIDATE_FILENAME, "wb");
loop_ok(!fpval, i, "fopen");
rewind(fpout);
err = base16384_decode_fp(fpout, fpval, encbuf, decbuf);
base16384_loop_ok(err);
loop_ok(fclose(fpout), i, "fclose");
loop_ok(fclose(fpval), i, "fclose");
validate_result();
}
fputs("testing base16384_en/decode_fd...\n", stderr);
init_input_file();
for(i = TEST_SIZE; i > 0; i--) {
reset_and_truncate(fd, i);
int fdout = open(TEST_OUTPUT_FILENAME, O_RDWR|O_TRUNC|O_CREAT|O_APPEND);
loop_ok(!fdout, i, "open");
err = base16384_encode_fd(fd, fdout, encbuf, decbuf);
base16384_loop_ok(err);
loop_ok(close(fd), i, "close");
int fdval = open(TEST_VALIDATE_FILENAME, O_WRONLY|O_TRUNC|O_CREAT);
loop_ok(!fdval, i, "open");
loop_ok(lseek(fdout, 0, SEEK_SET), i, "lseek");
err = base16384_decode_fd(fdout, fdval, encbuf, decbuf);
base16384_loop_ok(err);
loop_ok(close(fdout), i, "close");
loop_ok(close(fdval), i, "close");
validate_result();
}
remove(TEST_INPUT_FILENAME);
remove(TEST_OUTPUT_FILENAME);
remove(TEST_VALIDATE_FILENAME);
return 0;
}

38
wrap.c Normal file
View File

@@ -0,0 +1,38 @@
/* wrap.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "base16384.h"
#define base16384_typed_params(type) type input, type output, char* encbuf, char* decbuf
#define BASE16384_WRAP_DECL(method, name, type) \
base16384_err_t base16384_##method##_##name(base16384_typed_params(type)) { \
return base16384_##method##_##name##_detailed(input, output, encbuf, decbuf, 0); \
}
BASE16384_WRAP_DECL(encode, file, const char*);
BASE16384_WRAP_DECL(encode, fp, FILE*);
BASE16384_WRAP_DECL(encode, fd, int);
BASE16384_WRAP_DECL(decode, file, const char*);
BASE16384_WRAP_DECL(decode, fp, FILE*);
BASE16384_WRAP_DECL(decode, fd, int);
#undef BASE16384_WRAP_DECL
#undef base16384_typed_params