1
0
mirror of https://github.com/fumiama/base16384.git synced 2026-06-06 02:30:31 +08:00

Merge branch 'master' of https://github.com/fumiama/base16384 into debian

This commit is contained in:
源文雨
2024-04-05 22:09:49 +09:00
15 changed files with 1913 additions and 351 deletions

View File

@@ -1,31 +1,50 @@
cmake_minimum_required(VERSION 2.8.12)
if (POLICY CMP0048)
cmake_policy(SET CMP0048 NEW)
cmake_policy(SET CMP0048 NEW)
endif (POLICY CMP0048)
project(base16384 VERSION 2.2.5)
project(base16384 VERSION 2.3.0)
add_definitions(-DBASE16384_VERSION="${PROJECT_VERSION}")
add_definitions(-DBASE16384_VERSION_DATE="April 5th 2024")
message(STATUS "Testing endian...")
include(TestBigEndian)
test_big_endian(isBigEndian)
message(STATUS "Is big endian: ${isBigEndian}.")
if (${isBigEndian})
add_definitions(-DWORDS_BIGENDIAN)
endif ()
if (BUILD STREQUAL "test")
add_definitions(-DBASE16384_BUFSZ_FACTOR=1)
endif ()
add_executable(base16384_b base16384.c)
include(TestBigEndian)
test_big_endian(isBigEndian)
if (${isBigEndian})
add_definitions(-DWORDS_BIGENDIAN)
endif()
IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
add_library(base16384 SHARED file.c base1464.c)
add_library(base16384_s STATIC file.c base1464.c)
ELSE()
add_library(base16384 SHARED file.c base1432.c)
add_library(base16384_s STATIC file.c base1432.c)
ENDIF()
IF ((NOT FORCE_32BIT) AND CMAKE_SIZEOF_VOID_P EQUAL 8)
message(STATUS "Adding 64bit libraries...")
add_definitions(-DIS_64BIT_PROCESSOR)
add_library(base16384 SHARED wrap.c file.c base1464.c)
add_library(base16384_s STATIC wrap.c file.c base1464.c)
ELSE ()
message(STATUS "Adding 32bit libraries...")
add_library(base16384 SHARED wrap.c file.c base1432.c)
add_library(base16384_s STATIC wrap.c file.c base1432.c)
ENDIF ()
set_target_properties(base16384_b PROPERTIES OUTPUT_NAME base16384)
set_target_properties(base16384_s PROPERTIES OUTPUT_NAME base16384)
set_target_properties(base16384 PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
message(STATUS "Linking libraries...")
target_link_libraries(base16384_b base16384_s)
if (BUILD STREQUAL "test")
message(STATUS "Building test...")
enable_testing()
add_subdirectory(test)
endif ()
INSTALL(TARGETS base16384_b RUNTIME DESTINATION bin)
INSTALL(TARGETS base16384 LIBRARY DESTINATION lib)
INSTALL(TARGETS base16384_s ARCHIVE DESTINATION lib)

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +1,6 @@
/* base1432.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2023 Fumiama Minamoto.
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,51 +16,113 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifdef __cosmopolitan // always le
# define be16toh(x) bswap_16(x)
# define be32toh(x) bswap_32(x)
# define htobe16(x) bswap_16(x)
# define htobe32(x) bswap_32(x)
#else
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef __linux__
# include <endian.h>
#endif
#ifdef __FreeBSD__
# include <sys/endian.h>
#endif
#ifdef __NetBSD__
# include <sys/endian.h>
#endif
#ifdef __OpenBSD__
# include <sys/types.h>
# define be16toh(x) betoh16(x)
# define be32toh(x) betoh32(x)
#endif
#ifdef __APPLE__
# define be16toh(x) ntohs(x)
# define be32toh(x) ntohl(x)
# define htobe16(x) ntohs(x)
# define htobe32(x) htonl(x)
#endif
#ifdef _WIN32
#ifdef WORDS_BIGENDIAN
# define be16toh(x) (x)
# define be32toh(x) (x)
# define htobe16(x) (x)
# define htobe32(x) (x)
#else
# define be16toh(x) _byteswap_ushort(x)
# define be32toh(x) _byteswap_ulong(x)
# define htobe16(x) _byteswap_ushort(x)
# define htobe32(x) _byteswap_ulong(x)
#endif
#endif
#ifndef __cosmopolitan
#include <string.h>
#endif
// #define DEBUG
#include "binary.h"
typedef union {
uint8_t buf[4];
uint32_t val;
} base16384_union_remainder;
int base16384_encode_safe(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen += 4; break;
case 2:
case 3: outlen += 6; break;
case 4:
case 5: outlen += 8; break;
case 6: outlen += 10; break;
default: break;
}
uint32_t* vals = (uint32_t*)buf;
uint32_t n = 0;
int32_t i = 0;
for(; i < dlen - 7; i += 7) {
register uint32_t sum = 0;
register uint32_t shift = htobe32(*(uint32_t*)(data+i));
sum |= (shift>>2) & 0x3fff0000;
sum |= (shift>>4) & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
shift <<= 26;
shift &= 0x3c000000;
sum = 0;
shift |= (htobe32(*(uint32_t*)(data+i+4))>>6)&0x03fffffc;
sum |= shift & 0x3fff0000;
shift >>= 2;
sum |= shift & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
}
base16384_union_remainder valbuf;
if(dlen - i == 7) {
register uint32_t sum = 0;
register uint32_t shift = htobe32(*(uint32_t*)(data+i));
sum |= (shift>>2) & 0x3fff0000;
sum |= (shift>>4) & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
shift <<= 26;
shift &= 0x3c000000;
sum = 0;
memcpy(valbuf.buf, data+i+4, 3);
shift |= (htobe32(valbuf.val)>>6)&0x03fffffc;
sum |= shift & 0x3fff0000;
shift >>= 2;
sum |= shift & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
return outlen;
}
uint8_t o = offset;
if(o--) {
register uint32_t sum = 0x0000003f & (data[i] >> 2);
sum |= ((uint32_t)data[i] << 14) & 0x0000c000;
if(o--) {
sum |= ((uint32_t)data[i + 1] << 6) & 0x00003f00;
sum |= ((uint32_t)data[i + 1] << 20) & 0x00300000;
if(o--) {
sum |= ((uint32_t)data[i + 2] << 12) & 0x000f0000;
sum |= ((uint32_t)data[i + 2] << 28) & 0xf0000000;
if(o--) {
sum |= ((uint32_t)data[i + 3] << 20) & 0x0f000000;
sum += 0x004e004e;
// safe, because it will never go over 0x3dxx
#ifdef WORDS_BIGENDIAN
vals[n++] = __builtin_bswap32(sum);
#else
vals[n++] = sum;
#endif
sum = (((uint32_t)data[i + 3] << 2)) & 0x0000003c;
if(o--) {
sum |= (((uint32_t)data[i + 4] >> 6)) & 0x00000003;
sum |= ((uint32_t)data[i + 4] << 10) & 0x0000fc00;
if(o--) {
sum |= ((uint32_t)data[i + 5] << 2) & 0x00000300;
sum |= ((uint32_t)data[i + 5] << 16) & 0x003f0000;
}
}
}
}
}
sum += 0x004e004e;
// safe, because it will never go over 0x3dxx
#ifdef WORDS_BIGENDIAN
vals[n] = __builtin_bswap32(sum);
#else
vals[n] = sum;
#endif
buf[outlen - 2] = '=';
buf[outlen - 1] = offset;
}
return outlen;
}
int base16384_encode(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
@@ -75,9 +137,6 @@ int base16384_encode(const char* data, int dlen, char* buf) {
case 6: outlen += 10; break;
default: break;
}
#ifdef DEBUG
printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8);
#endif
uint32_t* vals = (uint32_t*)buf;
uint32_t n = 0;
int32_t i = 0;
@@ -140,6 +199,143 @@ int base16384_encode(const char* data, int dlen, char* buf) {
return outlen;
}
int base16384_encode_unsafe(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen += 4; break;
case 2:
case 3: outlen += 6; break;
case 4:
case 5: outlen += 8; break;
case 6: outlen += 10; break;
default: break;
}
uint32_t* vals = (uint32_t*)buf;
uint32_t n = 0;
int32_t i = 0;
for(; i < dlen; i += 7) {
register uint32_t sum = 0;
register uint32_t shift = htobe32(*(uint32_t*)(data+i));
sum |= (shift>>2) & 0x3fff0000;
sum |= (shift>>4) & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
shift <<= 26;
shift &= 0x3c000000;
sum = 0;
shift |= (htobe32(*(uint32_t*)(data+i+4))>>6)&0x03fffffc;
sum |= shift & 0x3fff0000;
shift >>= 2;
sum |= shift & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
}
if(offset) {
buf[outlen - 2] = '=';
buf[outlen - 1] = offset;
}
return outlen;
}
int base16384_decode_safe(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
if(data[dlen-2] == '=') {
offset = data[dlen-1];
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen -= 4; break;
case 2:
case 3: outlen -= 6; break;
case 4:
case 5: outlen -= 8; break;
case 6: outlen -= 10; break;
default: break;
}
}
outlen = outlen / 8 * 7 + offset;
const uint32_t* vals = (const uint32_t*)data;
uint32_t n = 0;
int32_t i = 0;
for(; i < outlen - 7; i+=7) { // n实际每次自增2
register uint32_t sum = 0;
register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
shift <<= 2;
sum |= shift & 0xfffc0000;
shift <<= 2;
sum |= shift & 0x0003fff0;
shift = htobe32(vals[n++]) - 0x4e004e00;
sum |= shift >> 26;
*(uint32_t*)(buf+i) = be32toh(sum);
sum = 0;
shift <<= 6;
sum |= shift & 0xffc00000;
shift <<= 2;
sum |= shift & 0x003fff00;
*(uint32_t*)(buf+i+4) = be32toh(sum);
}
base16384_union_remainder valbuf;
if(outlen - i == 7) {
register uint32_t sum = 0;
register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
shift <<= 2;
sum |= shift & 0xfffc0000;
shift <<= 2;
sum |= shift & 0x0003fff0;
shift = htobe32(vals[n]) - 0x4e004e00;
sum |= shift >> 26;
*(uint32_t*)(buf+i) = be32toh(sum);
sum = 0;
shift <<= 6;
sum |= shift & 0xffc00000;
shift <<= 2;
sum |= shift & 0x003fff00;
valbuf.val = be32toh(sum);
memcpy(buf+i+4, valbuf.buf, 3);
} else if((*(uint8_t*)(&vals[n]) != '=') && offset--) {
int cnt = dlen-2-(int)n*(int)sizeof(uint32_t);
if (cnt > 4) cnt = 4;
memcpy(valbuf.buf, &vals[n], cnt);
n++;
#ifdef WORDS_BIGENDIAN
register uint32_t sum = __builtin_bswap32(valbuf.val);
#else
register uint32_t sum = valbuf.val;
#endif
sum -= 0x0000004e;
buf[i++] = ((sum & 0x0000003f) << 2) | ((sum & 0x0000c000) >> 14);
if(offset--) {
sum -= 0x004e0000;
buf[i++] = ((sum & 0x00003f00) >> 6) | ((sum & 0x00300000) >> 20);
if(offset--) {
buf[i++] = ((sum & 0x000f0000) >> 12) | ((sum & 0xf0000000) >> 28);
if(offset--) {
buf[i] = (sum & 0x0f000000) >> 20;
if(*(uint8_t*)(&vals[n]) == '=') return outlen;
memcpy(valbuf.buf, &vals[n], dlen-2-(int)n*(int)sizeof(uint32_t));
#ifdef WORDS_BIGENDIAN
sum = __builtin_bswap32(valbuf.val);
#else
sum = valbuf.val;
#endif
sum -= 0x0000004e;
buf[i++] |= (sum & 0x0000003c) >> 2;
if(offset--) {
buf[i++] = ((sum & 0x00000003) << 6) | ((sum & 0x0000fc00) >> 10);
if(offset--) {
sum -= 0x004e0000;
buf[i] = ((sum & 0x00000300) >> 2) | ((sum & 0x003f0000) >> 16);
}
}
}
}
}
}
return outlen;
}
int base16384_decode(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
@@ -157,7 +353,7 @@ int base16384_decode(const char* data, int dlen, char* buf) {
}
}
outlen = outlen / 8 * 7 + offset;
uint32_t* vals = (uint32_t*)data;
const uint32_t* vals = (const uint32_t*)data;
uint32_t n = 0;
int32_t i = 0;
for(; i <= outlen - 7; i+=7) { // n实际每次自增2
@@ -177,6 +373,7 @@ int base16384_decode(const char* data, int dlen, char* buf) {
sum |= shift & 0x003fff00;
*(uint32_t*)(buf+i+4) = be32toh(sum);
}
if(*(uint8_t*)(&vals[n]) == '=') return outlen;
if(offset--) {
// 这里有读取越界
#ifdef WORDS_BIGENDIAN
@@ -193,6 +390,7 @@ int base16384_decode(const char* data, int dlen, char* buf) {
buf[i++] = ((sum & 0x000f0000) >> 12) | ((sum & 0xf0000000) >> 28);
if(offset--) {
buf[i] = (sum & 0x0f000000) >> 20;
if(*(uint8_t*)(&vals[n]) == '=') return outlen;
// 这里有读取越界
#ifdef WORDS_BIGENDIAN
sum = __builtin_bswap32(vals[n]);
@@ -214,3 +412,69 @@ int base16384_decode(const char* data, int dlen, char* buf) {
}
return outlen;
}
int base16384_decode_unsafe(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
if(data[dlen-2] == '=') {
offset = data[dlen-1];
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen -= 4; break;
case 2:
case 3: outlen -= 6; break;
case 4:
case 5: outlen -= 8; break;
case 6: outlen -= 10; break;
default: break;
}
}
outlen = outlen / 8 * 7 + offset;
const uint32_t* vals = (const uint32_t*)data;
uint32_t n = 0;
int32_t i = 0;
for(; i < outlen-7; i+=7) { // n实际每次自增2
register uint32_t sum = 0;
register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
shift <<= 2;
sum |= shift & 0xfffc0000;
shift <<= 2;
sum |= shift & 0x0003fff0;
shift = htobe32(vals[n++]) - 0x4e004e00;
sum |= shift >> 26;
*(uint32_t*)(buf+i) = be32toh(sum);
sum = 0;
shift <<= 6;
sum |= shift & 0xffc00000;
shift <<= 2;
sum |= shift & 0x003fff00;
*(uint32_t*)(buf+i+4) = be32toh(sum);
}
register uint32_t sum = 0;
register uint32_t shift = htobe32(vals[n++]);
if(((shift>>24)&0xff) == 0x3d) return outlen;
if(((shift>>24)&0xff) < 0x4e) shift |= 0xff000000;
if(((shift>> 8)&0xff) < 0x4e) shift |= 0x0000ff00;
shift -= 0x4e004e00;
shift <<= 2;
sum |= shift & 0xfffc0000;
shift <<= 2;
sum |= shift & 0x0003fff0;
shift = htobe32(vals[n]);
if(((shift>>24)&0xff) == 0x3d) {
*(uint32_t*)(buf+i) = be32toh(sum);
return outlen;
}
if(((shift>>24)&0xff) < 0x4e) shift |= 0xff000000;
if(((shift>> 8)&0xff) < 0x4e) shift |= 0x0000ff00;
shift -= 0x4e004e00;
sum |= shift >> 26;
*(uint32_t*)(buf+i) = be32toh(sum);
sum = 0;
shift <<= 6;
sum |= shift & 0xffc00000;
shift <<= 2;
sum |= shift & 0x003fff00;
*(uint32_t*)(buf+i+4) = be32toh(sum);
return outlen;
}

View File

@@ -1,6 +1,6 @@
/* base1464.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2023 Fumiama Minamoto.
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,51 +16,98 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef __linux__
# include <endian.h>
#endif
#ifdef __FreeBSD__
# include <sys/endian.h>
#endif
#ifdef __NetBSD__
# include <sys/endian.h>
#endif
#ifdef __OpenBSD__
# include <sys/types.h>
# define be16toh(x) betoh16(x)
# define be32toh(x) betoh32(x)
# define be64toh(x) betoh64(x)
#endif
#ifdef __APPLE__
# define be16toh(x) ntohs(x)
# define be32toh(x) ntohl(x)
# define be64toh(x) ntohll(x)
# define htobe16(x) ntohs(x)
# define htobe32(x) htonl(x)
# define htobe64(x) htonll(x)
#endif
#ifdef _WIN64
#ifdef WORDS_BIGENDIAN
# define be16toh(x) (x)
# define be32toh(x) (x)
# define be64toh(x) (x)
# define htobe16(x) (x)
# define htobe32(x) (x)
# define htobe64(x) (x)
#else
# define be16toh(x) _byteswap_ushort(x)
# define be32toh(x) _byteswap_ulong(x)
# define be64toh(x) _byteswap_uint64(x)
# define htobe16(x) _byteswap_ushort(x)
# define htobe32(x) _byteswap_ulong(x)
# define htobe64(x) _byteswap_uint64(x)
#endif
#ifndef __cosmopolitan
#include <string.h>
#endif
// #define DEBUG
#include "binary.h"
typedef union {
uint8_t buf[8];
uint64_t val;
} base16384_union_remainder;
int base16384_encode_safe(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen += 4; break;
case 2:
case 3: outlen += 6; break;
case 4:
case 5: outlen += 8; break;
case 6: outlen += 10; break;
default: break;
}
uint64_t* vals = (uint64_t*)buf;
uint64_t n = 0;
int64_t i = 0;
for(; i < dlen - 7; i += 7) {
register uint64_t sum = 0;
register uint64_t shift = htobe64(*(uint64_t*)(data+i))>>2;
sum |= shift & 0x3fff000000000000;
shift >>= 2;
sum |= shift & 0x00003fff00000000;
shift >>= 2;
sum |= shift & 0x000000003fff0000;
shift >>= 2;
sum |= shift & 0x0000000000003fff;
sum += 0x4e004e004e004e00;
vals[n++] = be64toh(sum);
}
base16384_union_remainder valbuf;
if(dlen - i == 7) {
memcpy(valbuf.buf, data+i, 7);
register uint64_t sum = 0;
register uint64_t shift = htobe64(valbuf.val)>>2;
sum |= shift & 0x3fff000000000000;
shift >>= 2;
sum |= shift & 0x00003fff00000000;
shift >>= 2;
sum |= shift & 0x000000003fff0000;
shift >>= 2;
sum |= shift & 0x0000000000003fff;
sum += 0x4e004e004e004e00;
vals[n++] = be64toh(sum);
return outlen;
}
int o = offset;
if(o--) {
register uint64_t sum = 0x000000000000003f & (data[i] >> 2);
sum |= ((uint64_t)data[i] << 14) & 0x000000000000c000;
if(o--) {
sum |= ((uint64_t)data[i + 1] << 6) & 0x0000000000003f00;
sum |= ((uint64_t)data[i + 1] << 20) & 0x0000000000300000;
if(o--) {
sum |= ((uint64_t)data[i + 2] << 12) & 0x00000000000f0000;
sum |= ((uint64_t)data[i + 2] << 28) & 0x00000000f0000000;
if(o--) {
sum |= ((uint64_t)data[i + 3] << 20) & 0x000000000f000000;
sum |= ((uint64_t)data[i + 3] << 34) & 0x0000003c00000000;
if(o--) {
sum |= ((uint64_t)data[i + 4] << 26) & 0x0000000300000000;
sum |= ((uint64_t)data[i + 4] << 42) & 0x0000fc0000000000;
if(o--) {
sum |= ((uint64_t)data[i + 5] << 34) & 0x0000030000000000;
sum |= ((uint64_t)data[i + 5] << 48) & 0x003f000000000000;
}
}
}
}
}
sum += 0x004e004e004e004e;
#ifdef WORDS_BIGENDIAN
valbuf.val = __builtin_bswap64(sum);
#else
valbuf.val = sum;
#endif
memcpy(&vals[n], valbuf.buf, outlen-2-(int)n*(int)sizeof(uint64_t));
buf[outlen - 2] = '=';
buf[outlen - 1] = offset;
}
return outlen;
}
int base16384_encode(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
@@ -75,9 +122,6 @@ int base16384_encode(const char* data, int dlen, char* buf) {
case 6: outlen += 10; break;
default: break;
}
#ifdef DEBUG
printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8);
#endif
uint64_t* vals = (uint64_t*)buf;
uint64_t n = 0;
int64_t i = 0;
@@ -93,9 +137,6 @@ int base16384_encode(const char* data, int dlen, char* buf) {
sum |= shift & 0x0000000000003fff;
sum += 0x4e004e004e004e00;
vals[n++] = be64toh(sum);
#ifdef DEBUG
printf("i: %llu, add sum: %016llx\n", i, sum);
#endif
}
int o = offset;
if(o--) {
@@ -127,15 +168,125 @@ int base16384_encode(const char* data, int dlen, char* buf) {
#else
vals[n] = sum;
#endif
#ifdef DEBUG
printf("i: %llu, add sum: %016llx\n", i, sum);
#endif
buf[outlen - 2] = '=';
buf[outlen - 1] = offset;
}
return outlen;
}
int base16384_encode_unsafe(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen += 4; break;
case 2:
case 3: outlen += 6; break;
case 4:
case 5: outlen += 8; break;
case 6: outlen += 10; break;
default: break;
}
uint64_t* vals = (uint64_t*)buf;
uint64_t n = 0;
int64_t i = 0;
for(; i < dlen; i += 7) {
register uint64_t sum = 0;
register uint64_t shift = htobe64(*(uint64_t*)(data+i))>>2; // 这里有读取越界
sum |= shift & 0x3fff000000000000;
shift >>= 2;
sum |= shift & 0x00003fff00000000;
shift >>= 2;
sum |= shift & 0x000000003fff0000;
shift >>= 2;
sum |= shift & 0x0000000000003fff;
sum += 0x4e004e004e004e00;
vals[n++] = be64toh(sum);
}
if(offset) {
buf[outlen - 2] = '=';
buf[outlen - 1] = offset;
}
return outlen;
}
int base16384_decode_safe(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
if(data[dlen-2] == '=') {
offset = data[dlen-1];
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen -= 4; break;
case 2:
case 3: outlen -= 6; break;
case 4:
case 5: outlen -= 8; break;
case 6: outlen -= 10; break;
default: break;
}
}
outlen = outlen / 8 * 7 + offset;
const uint64_t* vals = (const uint64_t*)data;
uint64_t n = 0;
int64_t i = 0;
for(; i < outlen - 7; n++, i+=7) {
register uint64_t sum = 0;
register uint64_t shift = htobe64(vals[n]) - 0x4e004e004e004e00;
shift <<= 2;
sum |= shift & 0xfffc000000000000;
shift <<= 2;
sum |= shift & 0x0003fff000000000;
shift <<= 2;
sum |= shift & 0x0000000fffc00000;
shift <<= 2;
sum |= shift & 0x00000000003fff00;
*(uint64_t*)(buf+i) = be64toh(sum);
}
base16384_union_remainder valbuf;
if(outlen - i == 7) {
register uint64_t sum = 0;
register uint64_t shift = htobe64(vals[n]) - 0x4e004e004e004e00;
shift <<= 2;
sum |= shift & 0xfffc000000000000;
shift <<= 2;
sum |= shift & 0x0003fff000000000;
shift <<= 2;
sum |= shift & 0x0000000fffc00000;
shift <<= 2;
sum |= shift & 0x00000000003fff00;
valbuf.val = be64toh(sum);
memcpy(buf+i, valbuf.buf, 7);
} else if((*(uint8_t*)(&vals[n]) != '=') && offset--) {
memcpy(valbuf.buf, &vals[n], dlen-2-(int)n*(int)sizeof(uint64_t));
#ifdef WORDS_BIGENDIAN
register uint64_t sum = __builtin_bswap64(valbuf.val) - 0x000000000000004e;
#else
register uint64_t sum = valbuf.val - 0x000000000000004e;
#endif
buf[i++] = ((sum & 0x000000000000003f) << 2) | ((sum & 0x000000000000c000) >> 14);
if(offset--) {
sum -= 0x00000000004e0000;
buf[i++] = ((sum & 0x0000000000003f00) >> 6) | ((sum & 0x0000000000300000) >> 20);
if(offset--) {
buf[i++] = ((sum & 0x00000000000f0000) >> 12) | ((sum & 0x00000000f0000000) >> 28);
if(offset--) {
sum -= 0x0000004e00000000;
buf[i++] = ((sum & 0x000000000f000000) >> 20) | ((sum & 0x0000003c00000000) >> 34);
if(offset--) {
buf[i++] = ((sum & 0x0000000300000000) >> 26) | ((sum & 0x0000fc0000000000) >> 42);
if(offset--) {
sum -= 0x004e000000000000;
buf[i] = ((sum & 0x0000030000000000) >> 34) | ((sum & 0x003f000000000000) >> 48);
}
}
}
}
}
}
return outlen;
}
int base16384_decode(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
@@ -153,7 +304,7 @@ int base16384_decode(const char* data, int dlen, char* buf) {
}
}
outlen = outlen / 8 * 7 + offset;
uint64_t* vals = (uint64_t*)data;
const uint64_t* vals = (const uint64_t*)data;
uint64_t n = 0;
int64_t i = 0;
for(; i <= outlen - 7; n++, i+=7) {
@@ -168,10 +319,8 @@ int base16384_decode(const char* data, int dlen, char* buf) {
shift <<= 2;
sum |= shift & 0x00000000003fff00;
*(uint64_t*)(buf+i) = be64toh(sum);
#ifdef DEBUG
printf("i: %llu, add sum: %016llx\n", i, sum);
#endif
}
if(*(uint8_t*)(&vals[n]) == '=') return outlen;
if(offset--) {
// 这里有读取越界
#ifdef WORDS_BIGENDIAN
@@ -201,3 +350,56 @@ int base16384_decode(const char* data, int dlen, char* buf) {
}
return outlen;
}
int base16384_decode_unsafe(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
if(data[dlen-2] == '=') {
offset = data[dlen-1];
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen -= 4; break;
case 2:
case 3: outlen -= 6; break;
case 4:
case 5: outlen -= 8; break;
case 6: outlen -= 10; break;
default: break;
}
}
outlen = outlen / 8 * 7 + offset;
const uint64_t* vals = (const uint64_t*)data;
uint64_t n = 0;
int64_t i = 0;
for(; i < outlen-7; n++, i+=7) {
register uint64_t sum = 0;
register uint64_t shift = htobe64(vals[n]) - 0x4e004e004e004e00;
shift <<= 2;
sum |= shift & 0xfffc000000000000;
shift <<= 2;
sum |= shift & 0x0003fff000000000;
shift <<= 2;
sum |= shift & 0x0000000fffc00000;
shift <<= 2;
sum |= shift & 0x00000000003fff00;
*(uint64_t*)(buf+i) = be64toh(sum);
}
register uint64_t sum = 0;
register uint64_t shift = htobe64(vals[n]);
if(((shift>>56)&0xff) == 0x3d) return outlen;
if(((shift>>56)&0xff) < 0x4e) shift |= 0xff00000000000000;
if(((shift>>40)&0xff) < 0x4e) shift |= 0x0000ff0000000000;
if(((shift>>24)&0xff) < 0x4e) shift |= 0x00000000ff000000;
if(((shift>> 8)&0xff) < 0x4e) shift |= 0x000000000000ff00;
shift -= 0x4e004e004e004e00;
shift <<= 2;
sum |= shift & 0xfffc000000000000;
shift <<= 2;
sum |= shift & 0x0003fff000000000;
shift <<= 2;
sum |= shift & 0x0000000fffc00000;
shift <<= 2;
sum |= shift & 0x00000000003fff00;
*(uint64_t*)(buf+i) = be64toh(sum);
return outlen;
}

View File

@@ -1,9 +1,9 @@
.TH BASE16384 1 "26 August 2023" "GNU" "User Commands"
.TH BASE16384 1 "5 April 2024" "GNU" "User Commands"
.SH NAME
base16384 \- Encode binary files to printable utf16be
.SH SYNOPSIS
.B base16384
-[e|d|t] <\fIinputfile\fR> <\fIoutputfile\fR>
-[edtn] <\fIinputfile\fR> <\fIoutputfile\fR>
.SH DESCRIPTION
.LP
There are
@@ -27,7 +27,30 @@ to
.sp 1
.TP 0.5i
\fB\-e\fR
Read data from \fIinputfile\fR and encode them into \fIoutputfile\fR.
Read data from \fIinputfile\fR and encode them into \fIoutputfile\fR. It's the default option when neither
.B -e
nor
.B -d
is specified.
.TP 0.5i
\fB\-d\fR
Read data from \fIinputfile\fR and decode them into \fIoutputfile\fR.
.TP 0.5i
\fB\-t\fR
Show spend time.
.TP 0.5i
\fB\-n\fR
Do not write utf16be file header
.B 0xFEFF
to the output.
.TP 0.5i
\fB\-c\fR
Embed or validate checksum in remainder when using \fIstdin\fR or \fIstdout\fR or inputsize > _BASE16384_ENCBUFSZ.
.TP 0.5i
\fB\-C\fR
Do
.B -c
forcely.
.TP 0.5i
\fB\-d\fR
Read data from \fIinputfile\fR and decode them into \fIoutputfile\fR.
@@ -36,10 +59,14 @@ Read data from \fIinputfile\fR and decode them into \fIoutputfile\fR.
Show spend time.
.TP 0.5i
\fBinputfile\fR
An absolute or relative file path. Specially, pass - to read from stdin.
An absolute or relative file path. Specially, pass
.B -
to read from \fIstdin\fR.
.TP 0.5i
\fBoutputfile\fR
An absolute or relative file path. Specially, pass - to write to stdout.
An absolute or relative file path. Specially, pass
.B -
to write to \fIstdout\fR.
.SH "EXIT STATUS"
.TP 0.5i
\fB0\fR
@@ -68,6 +95,12 @@ Write file error in mmap.
.TP 0.5i
\fB8\fR
Invalid input/output filename.
.TP 0.5i
\fB9\fR
Invalid commandline parameter.
.TP 0.5i
\fB10\fR
Invalid decoding checksum.
.SH "SEE ALSO"
https://github.com/fumiama/base16384
.SH BUGS
@@ -77,7 +110,7 @@ on github.
.SH AUTHOR
This manual page contributed by Fumiama Minamoto.
.SH "COPYRIGHT"
Copyright \(co 2022-2023, Fumiama Minamoto
Copyright \(co 2022-2024, Fumiama Minamoto
This file is part of
.IR "base16384" .
.LP

View File

@@ -1,6 +1,6 @@
/* base16384.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2023 Fumiama Minamoto.
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -38,77 +38,103 @@ unsigned long get_start_ms() {
}
#endif
static void print_usage() {
puts("Copyright (c) 2022-2023 Fumiama Minamoto.\nBase16384 2.2.5 (August 26th 2023). Usage:");
puts("base16384 [-edt] [inputfile] [outputfile]");
puts(" -e\t\tencode");
puts(" -d\t\tdecode");
puts(" -t\t\tshow spend time");
puts(" inputfile\tpass - to read from stdin");
puts(" outputfile\tpass - to write to stdout");
static base16384_err_t print_usage() {
#ifndef BASE16384_VERSION
#define BASE16384_VERSION "dev"
#endif
#ifndef BASE16384_VERSION_DATE
#define BASE16384_VERSION_DATE "unknown date"
#endif
fputs(
"Copyright (c) 2022-2024 Fumiama Minamoto.\nBase16384 "
BASE16384_VERSION
" ("
BASE16384_VERSION_DATE
"). Usage:\n", stderr
);
fputs("base16384 [-edtn] [inputfile] [outputfile]\n", stderr);
fputs(" -e\t\tencode (default)\n", stderr);
fputs(" -d\t\tdecode\n", stderr);
fputs(" -t\t\tshow spend time\n", stderr);
fputs(" -n\t\tdon't write utf16be file header (0xFEFF)\n", stderr);
fputs(" -c\t\tembed or validate checksum in remainder\n", stderr);
fputs(" -C\t\tdo -c forcely\n", stderr);
fputs(" inputfile\tpass - to read from stdin\n", stderr);
fputs(" outputfile\tpass - to write to stdout\n", stderr);
return base16384_err_invalid_commandline_parameter;
}
int main(int argc, char** argv) {
if(argc != 4 || argv[1][0] != '-') {
print_usage();
return -1;
}
int flaglen = strlen(argv[1]);
if(flaglen <= 1 || flaglen > 3) {
print_usage();
return -2;
}
const char* cmd = argv[1];
if(argc != 4 || cmd[0] != '-') return print_usage();
int flaglen = strlen(cmd);
if(flaglen <= 1 || flaglen > 5) return print_usage();
#ifdef _WIN32
clock_t t = 0;
#else
unsigned long t = 0;
#endif
uint16_t is_encode = 1, use_timer = 0, no_header = 0, use_checksum = 0;
#define set_flag(f, v) ((f) = (((((f)>>8)+1) << 8)&0xff00) | (v&0x00ff))
#define flag_has_been_set(f) ((f)>>8)
#define set_or_test_flag(f, v) (flag_has_been_set(f)?1:(set_flag(f, v), 0))
while(--flaglen) switch(cmd[flaglen]) { // skip cmd[0] = '-'
case 'e':
if(set_or_test_flag(is_encode, 1)) return print_usage();
break;
case 'd':
if(set_or_test_flag(is_encode, 0)) return print_usage();
break;
case 't':
if(set_or_test_flag(use_timer, 1)) return print_usage();
break;
case 'n':
if(set_or_test_flag(no_header, 1)) return print_usage();
break;
case 'c':
if(set_or_test_flag(use_checksum, 1)) return print_usage();
break;
case 'C':
if(set_or_test_flag(use_checksum, 2)) return print_usage();
break;
default:
return print_usage();
break;
}
#define clear_high_byte(x) ((x) &= 0x00ff)
clear_high_byte(is_encode); clear_high_byte(use_timer);
clear_high_byte(no_header); clear_high_byte(use_checksum);
if(use_timer) {
#ifdef _WIN32
t = clock();
#else
t = get_start_ms();
#endif
}
base16384_err_t exitstat = base16384_err_ok;
char cmd = argv[1][1];
if(cmd == 't') {
if(flaglen == 2) {
print_usage(); return -3;
}
#define do_coding(method) base16384_##method##_file_detailed( \
argv[2], argv[3], encbuf, decbuf, \
(no_header?BASE16384_FLAG_NOHEADER:0) \
| ((use_checksum&1)?BASE16384_FLAG_SUM_CHECK_ON_REMAIN:0) \
| ((use_checksum&2)?BASE16384_FLAG_DO_SUM_CHECK_FORCELY:0) \
)
exitstat = is_encode?do_coding(encode):do_coding(decode);
#undef do_coding
if(t) {
#ifdef _WIN32
t = clock();
fprintf(stderr, "spend time: %lums\n", clock() - t);
#else
t = get_start_ms();
#endif
cmd = argv[1][2];
} else if(flaglen == 3) {
if(argv[1][2] != 't') {
print_usage(); return -4;
}
#ifdef _WIN32
t = clock();
#else
t = get_start_ms();
fprintf(stderr, "spend time: %lums\n", get_start_ms() - t);
#endif
}
switch(cmd) {
case 'e': exitstat = base16384_encode_file(argv[2], argv[3], encbuf, decbuf); break;
case 'd': exitstat = base16384_decode_file(argv[2], argv[3], encbuf, decbuf); break;
default: print_usage(); return -5;
}
if(t && !exitstat && *(uint16_t*)(argv[3]) != *(uint16_t*)"-") {
#ifdef _WIN32
printf("spend time: %lums\n", clock() - t);
#else
printf("spend time: %lums\n", get_start_ms() - t);
#endif
}
#define print_base16384_err(n) case base16384_err_##n: perror("base16384_err_"#n); break
if(exitstat) switch(exitstat) {
print_base16384_err(get_file_size);
print_base16384_err(fopen_output_file);
print_base16384_err(fopen_input_file);
print_base16384_err(write_file);
print_base16384_err(open_input_file);
print_base16384_err(map_input_file);
print_base16384_err(read_file);
print_base16384_err(invalid_file_name);
default: perror("base16384"); break;
}
#undef print_base16384_err
return exitstat;
return base16384_perror(exitstat);
}

View File

@@ -3,7 +3,7 @@
/* base16384.h
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2023 Fumiama Minamoto.
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -24,33 +24,50 @@
#include <stdio.h>
#endif
#define define_base16384_err_t(n) base16384_err_##n
// base16384_err_t is the return value of base16384_en/decode_file
enum base16384_err_t {
define_base16384_err_t(ok),
define_base16384_err_t(get_file_size),
define_base16384_err_t(fopen_output_file),
define_base16384_err_t(fopen_input_file),
define_base16384_err_t(write_file),
define_base16384_err_t(open_input_file),
define_base16384_err_t(map_input_file),
define_base16384_err_t(read_file),
define_base16384_err_t(invalid_file_name),
base16384_err_ok,
base16384_err_get_file_size,
base16384_err_fopen_output_file,
base16384_err_fopen_input_file,
base16384_err_write_file,
base16384_err_open_input_file,
base16384_err_map_input_file,
base16384_err_read_file,
base16384_err_invalid_file_name,
base16384_err_invalid_commandline_parameter,
base16384_err_invalid_decoding_checksum,
};
// base16384_err_t is the return value of base16384_en/decode_file
/**
* @brief return value of base16384_en/decode_file
*/
typedef enum base16384_err_t base16384_err_t;
#undef define_base16384_err_t
#ifndef BASE16384_BUFSZ_FACTOR
#define BASE16384_BUFSZ_FACTOR (8)
#endif
#define BASE16384_ENCBUFSZ (BUFSIZ*1024/7*7+7)
#define BASE16384_DECBUFSZ (BUFSIZ*1024/8*8+16)
#define _BASE16384_ENCBUFSZ ((BUFSIZ*BASE16384_BUFSZ_FACTOR)/7*7)
#define _BASE16384_DECBUFSZ ((BUFSIZ*BASE16384_BUFSZ_FACTOR)/8*8)
// base16384_encode_len calc min buf size to fill encode result
static inline int base16384_encode_len(int dlen) {
#define BASE16384_ENCBUFSZ (_BASE16384_ENCBUFSZ+16)
#define BASE16384_DECBUFSZ (_BASE16384_DECBUFSZ+16)
// disable 0xFEFF file header in encode
#define BASE16384_FLAG_NOHEADER (1<<0)
// enable sum check when using stdin or stdout or inputsize > _BASE16384_ENCBUFSZ
#define BASE16384_FLAG_SUM_CHECK_ON_REMAIN (1<<1)
// forcely do sumcheck without checking data length
#define BASE16384_FLAG_DO_SUM_CHECK_FORCELY (1<<2)
/**
* @brief calculate the exact encoded size
* @param dlen the data length to encode
* @return the size
*/
static inline int _base16384_encode_len(int dlen) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
switch(offset) { // 算上偏移标志字符占用的2字节
switch(offset) { // 算上偏移标志字符占用的 2 字节
case 0: break;
case 1: outlen += 4; break;
case 2:
@@ -60,13 +77,27 @@ static inline int base16384_encode_len(int dlen) {
case 6: outlen += 10; break;
default: break;
}
return outlen + 8; // 冗余的8B用于可能的结尾的覆盖
return outlen;
}
// base16384_decode_len calc min buf size to fill decode result
static inline int base16384_decode_len(int dlen, int offset) {
/**
* @brief calculate minimum encoding buffer size (16 bits larger than the real encoded size)
* @param dlen the data length to encode
* @return the minimum encoding buffer size
*/
static inline int base16384_encode_len(int dlen) {
return _base16384_encode_len(dlen) + 16; // 冗余的 16 字节用于可能的结尾的 unsafe 覆盖
}
/**
* @brief calculate the exact decoded size
* @param dlen the data length to decode
* @param offset the last char `xx` of the underfilled coding (0x3Dxx) or 0 for the full coding
* @return the size
*/
static inline int _base16384_decode_len(int dlen, int offset) {
int outlen = dlen;
switch(offset) { // 算上偏移标志字符占用的2字节
switch(offset) { // 算上偏移标志字符占用的 2 字节
case 0: break;
case 1: outlen -= 4; break;
case 2:
@@ -76,39 +107,180 @@ static inline int base16384_decode_len(int dlen, int offset) {
case 6: outlen -= 10; break;
default: break;
}
return outlen / 8 * 7 + offset + 1; // 多出1字节用于循环覆盖
return outlen / 8 * 7 + offset;
}
// base16384_encode encodes data and write result into buf
/**
* @brief calculate minimum decoding buffer size (16 bits larger than the real decoded size)
* @param dlen the data length to decode
* @param offset the last char `xx` of the underfilled coding (0x3Dxx) or 0 for the full coding
* @return the minimum decoding buffer size
*/
static inline int base16384_decode_len(int dlen, int offset) {
return _base16384_decode_len(dlen, offset) + 16; // 多出 16 字节用于 unsafe 循环覆盖
}
/**
* @brief safely encode data and write result into buf
* @param data data to encode, no data overread
* @param dlen the data length
* @param buf the output buffer, whose size can be exactly `_base16384_encode_len`
* @return the total length written
*/
int base16384_encode_safe(const char* data, int dlen, char* buf);
/**
* @brief encode data and write result into buf
* @param data data to encode
* @param dlen the data length
* @param buf the output buffer, whose size must greater than `base16384_encode_len`
* @return the total length written
*/
int base16384_encode(const char* data, int dlen, char* buf);
// base16384_decode decodes data and write result into buf
/**
* @brief encode data and write result into buf without considering border condition
* @param data data to encode
* @param dlen the data length
* @param buf the output buffer, whose size must greater than `base16384_encode_len`
* @return the total length written
*/
int base16384_encode_unsafe(const char* data, int dlen, char* buf);
/**
* @brief safely decode data and write result into buf
* @param data data to decode, no data overread
* @param dlen the data length
* @param buf the output buffer, whose size can be exactly `_base16384_decode_len`
* @return the total length written
*/
int base16384_decode_safe(const char* data, int dlen, char* buf);
/**
* @brief decode data and write result into buf
* @param data data to decode
* @param dlen the data length
* @param buf the output buffer, whose size must greater than `base16384_decode_len`
* @return the total length written
*/
int base16384_decode(const char* data, int dlen, char* buf);
// base16384_encode_file encodes input file to output file.
// use `-` to specify stdin/stdout
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
base16384_err_t base16384_encode_file(const char* input, const char* output, char* encbuf, char* decbuf);
/**
* @brief decode data and write result into buf without considering border condition
* @param data data to decode
* @param dlen the data length
* @param buf the output buffer, whose size must greater than `base16384_decode_len`
* @return the total length written
*/
int base16384_decode_unsafe(const char* data, int dlen, char* buf);
// base16384_encode_fp encodes input file to output file.
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
base16384_err_t base16384_encode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf);
#define base16384_typed_params(type) type input, type output, char* encbuf, char* decbuf
#define base16384_typed_flag_params(type) base16384_typed_params(type), int flag
// base16384_encode_fd encodes input fd to output fd.
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
base16384_err_t base16384_encode_fd(int input, int output, char* encbuf, char* decbuf);
/**
* @brief encode input file to output file
* @param input filename or `-` to specify stdin
* @param output filename or `-` to specify stdout
* @param encbuf must be no less than BASE16384_ENCBUFSZ
* @param decbuf must be no less than BASE16384_DECBUFSZ
* @param flag BASE16384_FLAG_xxx value, add multiple flags by `|`
* @return the error code
*/
base16384_err_t base16384_encode_file_detailed(base16384_typed_flag_params(const char*));
// base16384_decode_file decodes input file to output file.
// use `-` to specify stdin/stdout
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
base16384_err_t base16384_decode_file(const char* input, const char* output, char* encbuf, char* decbuf);
/**
* @brief encode input `FILE*` to output `FILE*`
* @param input `FILE*` pointer
* @param output `FILE*` pointer
* @param encbuf must be no less than BASE16384_ENCBUFSZ
* @param decbuf must be no less than BASE16384_DECBUFSZ
* @param flag BASE16384_FLAG_xxx value, add multiple flags by `|`
* @return the error code
*/
base16384_err_t base16384_encode_fp_detailed(base16384_typed_flag_params(FILE*));
// base16384_decode_fp decodes input file to output file.
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
base16384_err_t base16384_decode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf);
/**
* @brief encode input stream to output stream
* @param input file descripter
* @param output file descripter
* @param encbuf must be no less than BASE16384_ENCBUFSZ
* @param decbuf must be no less than BASE16384_DECBUFSZ
* @param flag BASE16384_FLAG_xxx value, add multiple flags by `|`
* @return the error code
*/
base16384_err_t base16384_encode_fd_detailed(base16384_typed_flag_params(int));
// base16384_decode_fd decodes input fd to output fd.
// encbuf & decbuf must be no less than BASE16384_ENCBUFSZ & BASE16384_DECBUFSZ
base16384_err_t base16384_decode_fd(int input, int output, char* encbuf, char* decbuf);
/**
* @brief decode input file to output file
* @param input filename or `-` to specify stdin
* @param output filename or `-` to specify stdout
* @param encbuf must be no less than BASE16384_ENCBUFSZ
* @param decbuf must be no less than BASE16384_DECBUFSZ
* @param flag BASE16384_FLAG_xxx value, add multiple flags by `|`
* @return the error code
*/
base16384_err_t base16384_decode_file_detailed(base16384_typed_flag_params(const char*));
/**
* @brief decode input `FILE*` to output `FILE*`
* @param input `FILE*` pointer
* @param output `FILE*` pointer
* @param encbuf must be no less than BASE16384_ENCBUFSZ
* @param decbuf must be no less than BASE16384_DECBUFSZ
* @param flag BASE16384_FLAG_xxx value, add multiple flags by `|`
* @return the error code
*/
base16384_err_t base16384_decode_fp_detailed(base16384_typed_flag_params(FILE*));
/**
* @brief decode input stream to output stream
* @param input file descripter
* @param output file descripter
* @param encbuf must be no less than BASE16384_ENCBUFSZ
* @param decbuf must be no less than BASE16384_DECBUFSZ
* @param flag BASE16384_FLAG_xxx value, add multiple flags by `|`
* @return the error code
*/
base16384_err_t base16384_decode_fd_detailed(base16384_typed_flag_params(int));
#define BASE16384_WRAP_DECL(method, name, type) \
base16384_err_t base16384_##method##_##name(base16384_typed_params(type));
BASE16384_WRAP_DECL(encode, file, const char*);
BASE16384_WRAP_DECL(encode, fp, FILE*);
BASE16384_WRAP_DECL(encode, fd, int);
BASE16384_WRAP_DECL(decode, file, const char*);
BASE16384_WRAP_DECL(decode, fp, FILE*);
BASE16384_WRAP_DECL(decode, fd, int);
#undef BASE16384_WRAP_DECL
#undef base16384_typed_flag_params
#undef base16384_typed_params
/**
* @brief call perror on error
* @param err the error
* @return the input parameter `err`
*/
static inline base16384_err_t base16384_perror(base16384_err_t err) {
#define base16384_perror_case(n) case base16384_err_##n: perror("base16384_err_"#n)
if(err) switch(err) {
base16384_perror_case(get_file_size); break;
base16384_perror_case(fopen_output_file); break;
base16384_perror_case(fopen_input_file); break;
base16384_perror_case(write_file); break;
base16384_perror_case(open_input_file); break;
base16384_perror_case(map_input_file); break;
base16384_perror_case(read_file); break;
base16384_perror_case(invalid_file_name); break;
base16384_perror_case(invalid_commandline_parameter); break;
base16384_perror_case(invalid_decoding_checksum); break;
default: perror("base16384"); break;
}
#undef base16384_perror_case
return err;
}
#endif

124
binary.h Normal file
View File

@@ -0,0 +1,124 @@
#ifndef _BINARY_H_
#define _BINARY_H_
/* binary.h
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifdef __cosmopolitan // always le
#define be16toh(x) bswap_16(x)
#define be32toh(x) bswap_32(x)
#define htobe16(x) bswap_16(x)
#define htobe32(x) bswap_32(x)
#else
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef __linux__
#include <endian.h>
#endif
#ifdef __FreeBSD__
#include <sys/endian.h>
#endif
#ifdef __NetBSD__
#include <sys/endian.h>
#endif
#ifdef __OpenBSD__
#include <sys/types.h>
#define be16toh(x) betoh16(x)
#define be32toh(x) betoh32(x)
#ifdef IS_64BIT_PROCESSOR
#define be64toh(x) betoh64(x)
#endif
#endif
#ifdef __APPLE__
#define be16toh(x) ntohs(x)
#define be32toh(x) ntohl(x)
#ifdef IS_64BIT_PROCESSOR
#define be64toh(x) ntohll(x)
#endif
#define htobe16(x) htons(x)
#define htobe32(x) htonl(x)
#ifdef IS_64BIT_PROCESSOR
#define htobe64(x) htonll(x)
#endif
#endif
#ifdef _WIN32
#ifdef WORDS_BIGENDIAN
#define be16toh(x) (x)
#define be32toh(x) (x)
#ifdef IS_64BIT_PROCESSOR
#define be64toh(x) (x)
#endif
#define htobe16(x) (x)
#define htobe32(x) (x)
#ifdef IS_64BIT_PROCESSOR
#define htobe64(x) (x)
#endif
#else
#define be16toh(x) _byteswap_ushort(x)
#define be32toh(x) _byteswap_ulong(x)
#ifdef IS_64BIT_PROCESSOR
#define be64toh(x) _byteswap_uint64(x)
#endif
#define htobe16(x) _byteswap_ushort(x)
#define htobe32(x) _byteswap_ulong(x)
#ifdef IS_64BIT_PROCESSOR
#define htobe64(x) _byteswap_uint64(x)
#endif
#endif
#endif
#endif
// leftrotate function definition
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (sizeof(x)*8 - (c))))
// initial sum value used in BASE16384_FLAG_SUM_CHECK_ON_REMAIN
#define BASE16384_SIMPLE_SUM_INIT_VALUE (0x8e29c213)
static inline uint32_t calc_sum(uint32_t sum, size_t cnt, const char* encbuf) {
size_t i;
uint32_t buf;
for(i = 0; i < cnt; i++) {
buf = (uint32_t)(encbuf[i])&0xff;
buf = ((buf<<(24-6))&0x03000000) | ((buf<<(16-4))&0x00030000) | ((buf<<(8-2))&0x00000300) | (buf&0x03);
sum += buf;
sum = ~LEFTROTATE(sum, 3);
}
return sum;
}
static inline int check_sum(uint32_t sum, uint32_t sum_read_raw, int offset) {
offset = offset%7;
if(!offset--) return 0; // no remain bits, pass
// offset 1: 0011 1111 1100 0000 remain: 3*2 bits
// offset 2: 0011 1111 1111 1111 0011 0000 0000 0000 remain: 6*2 bits
// offset 3: 0011 1111 1111 0000 remain: 2*2 bits
// offset 4: 0011 1111 1111 1111 0011 1100 0000 0000 remain: 5*2 bits
// offset 5: 0011 1111 1111 1100 remain: 1*2 bits
// offset 6: 0011 1111 1111 1111 0011 1111 0000 0000 remain: 4*2 bits
// encode: 0415263 (6-1) per 3bits, thus 0x021ab3
int shift = sizeof(uint32_t)*8 - ((0x021ab3>>(offset*3))&0x07)*2;
uint32_t sum_read = be32toh(sum_read_raw) >> shift;
sum >>= shift;
#ifdef DEBUG
fprintf(stderr, "shift: %d, offset: %d, mysum: %08x, sumrd: %08x\n", shift, offset+1, sum, sum_read);
#endif
return sum != sum_read;
}
#endif

350
file.c
View File

@@ -1,6 +1,6 @@
/* file.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2023 Fumiama Minamoto.
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -23,6 +23,7 @@
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#ifdef _WIN32
#include <windows.h>
#include <io.h>
@@ -33,6 +34,7 @@
#endif
#endif
#include "base16384.h"
#include "binary.h"
#ifdef __cosmopolitan
#define get_file_size(filepath) ((off_t)GetFileSize(filepath))
@@ -45,100 +47,166 @@ static inline off_t get_file_size(const char* filepath) {
#define is_standard_io(filename) (*(uint16_t*)(filename) == *(uint16_t*)"-")
base16384_err_t base16384_encode_file(const char* input, const char* output, char* encbuf, char* decbuf) {
#define goto_base16384_file_detailed_cleanup(method, reason, dobeforereturn) { \
errnobak = errno; \
retval = reason; \
dobeforereturn; \
goto base16384_##method##_file_detailed_cleanup; \
}
#define do_sum_check(flag) ((flag)&(BASE16384_FLAG_DO_SUM_CHECK_FORCELY|BASE16384_FLAG_SUM_CHECK_ON_REMAIN))
base16384_err_t base16384_encode_file_detailed(const char* input, const char* output, char* encbuf, char* decbuf, int flag) {
off_t inputsize;
FILE* fp = NULL;
FILE* fpo;
if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) return base16384_err_invalid_file_name;
if(is_standard_io(input)) { // read from stdin
inputsize = 0;
FILE *fp = NULL, *fpo;
int errnobak = 0, is_stdin = is_standard_io(input);
base16384_err_t retval = base16384_err_ok;
if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) {
errno = EINVAL;
return base16384_err_invalid_file_name;
}
if(is_stdin) { // read from stdin
inputsize = _BASE16384_ENCBUFSZ;
fp = stdin;
} else inputsize = get_file_size(input);
if(inputsize < 0) {
if(inputsize <= 0) {
if(!inputsize) errno = EINVAL;
return base16384_err_get_file_size;
}
fpo = is_standard_io(output)?stdout:fopen(output, "wb");
if(!fpo) {
return base16384_err_fopen_output_file;
}
if(!inputsize || inputsize > BASE16384_ENCBUFSZ) { // stdin or big file, use encbuf & fread
inputsize = BASE16384_ENCBUFSZ-7;
if(flag&BASE16384_FLAG_DO_SUM_CHECK_FORCELY || inputsize >= _BASE16384_ENCBUFSZ) { // stdin or big file, use encbuf & fread
inputsize = _BASE16384_ENCBUFSZ;
#if defined _WIN32 || defined __cosmopolitan
}
#endif
if(!fp) fp = fopen(input, "rb");
if(!fp) {
return base16384_err_fopen_input_file;
goto_base16384_file_detailed_cleanup(encode, base16384_err_fopen_input_file, {});
}
size_t cnt = 0;
fputc(0xFE, fpo);
fputc(0xFF, fpo);
if(!(flag&BASE16384_FLAG_NOHEADER)) {
fputc(0xFE, fpo);
fputc(0xFF, fpo);
}
size_t cnt;
uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
while((cnt = fread(encbuf, sizeof(char), inputsize, fp)) > 0) {
int n = base16384_encode(encbuf, cnt, decbuf);
if(fwrite(decbuf, n, 1, fpo) <= 0) {
return base16384_err_write_file;
int n;
while(cnt%7) {
n = fread(encbuf+cnt, sizeof(char), 1, fp);
if(n > 0) cnt++;
else break;
}
if(do_sum_check(flag)) {
sum = calc_sum(sum, cnt, encbuf);
if(cnt%7) { // last encode
*(uint32_t*)(&encbuf[cnt]) = htobe32(sum);
#ifdef DEBUG
fprintf(stderr, "writesum: %08x\n", sum);
#endif
}
}
n = base16384_encode_unsafe(encbuf, cnt, decbuf);
if(n && fwrite(decbuf, n, 1, fpo) <= 0) {
goto_base16384_file_detailed_cleanup(encode, base16384_err_write_file, {});
}
}
if(!is_standard_io(output)) fclose(fpo);
if(!is_standard_io(input)) fclose(fp);
#if !defined _WIN32 && !defined __cosmopolitan
} else { // small file, use mmap & fwrite
int fd = open(input, O_RDONLY);
if(fd < 0) {
return base16384_err_open_input_file;
goto_base16384_file_detailed_cleanup(encode, base16384_err_open_input_file, {});
}
char *input_file = mmap(NULL, (size_t)inputsize+16, PROT_READ, MAP_PRIVATE, fd, 0);
if(input_file == MAP_FAILED) {
return base16384_err_map_input_file;
goto_base16384_file_detailed_cleanup(encode, base16384_err_map_input_file, close(fd));
}
fputc(0xFE, fpo);
fputc(0xFF, fpo);
int n = base16384_encode(input_file, (int)inputsize, decbuf);
if(fwrite(decbuf, n, 1, fpo) <= 0) {
return base16384_err_write_file;
if(!(flag&BASE16384_FLAG_NOHEADER)) {
fputc(0xFE, fpo);
fputc(0xFF, fpo);
}
int n = base16384_encode_safe(input_file, (int)inputsize, decbuf);
if(n && fwrite(decbuf, n, 1, fpo) <= 0) {
goto_base16384_file_detailed_cleanup(encode, base16384_err_write_file, {
munmap(input_file, (size_t)inputsize);
close(fd);
});
}
munmap(input_file, (size_t)inputsize);
if(!is_standard_io(output)) fclose(fpo);
close(fd);
}
#endif
return base16384_err_ok;
base16384_encode_file_detailed_cleanup:
if(fpo && !is_standard_io(output)) fclose(fpo);
if(fp && !is_stdin) fclose(fp);
if(errnobak) errno = errnobak;
return retval;
}
base16384_err_t base16384_encode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf) {
base16384_err_t base16384_encode_fp_detailed(FILE* input, FILE* output, char* encbuf, char* decbuf, int flag) {
if(!input) {
return base16384_err_fopen_input_file;
}
if(!output) {
return base16384_err_fopen_output_file;
}
off_t inputsize = BASE16384_ENCBUFSZ-7;
size_t cnt = 0;
fputc(0xFE, output);
fputc(0xFF, output);
if(!(flag&BASE16384_FLAG_NOHEADER)) {
fputc(0xFE, output);
fputc(0xFF, output);
}
off_t inputsize = _BASE16384_ENCBUFSZ;
uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
size_t cnt;
while((cnt = fread(encbuf, sizeof(char), inputsize, input)) > 0) {
int n = base16384_encode(encbuf, cnt, decbuf);
if(fwrite(decbuf, n, 1, output) <= 0) {
int n;
while(cnt%7) {
n = fread(encbuf+cnt, sizeof(char), 1, input);
if(n > 0) cnt++;
else break;
}
if(do_sum_check(flag)) {
sum = calc_sum(sum, cnt, encbuf);
if(cnt%7) { // last encode
*(uint32_t*)(&encbuf[cnt]) = htobe32(sum);
}
}
n = base16384_encode_unsafe(encbuf, cnt, decbuf);
if(n && fwrite(decbuf, n, 1, output) <= 0) {
return base16384_err_write_file;
}
}
return base16384_err_ok;
}
base16384_err_t base16384_encode_fd(int input, int output, char* encbuf, char* decbuf) {
base16384_err_t base16384_encode_fd_detailed(int input, int output, char* encbuf, char* decbuf, int flag) {
if(input < 0) {
return base16384_err_fopen_input_file;
}
if(output < 0) {
return base16384_err_fopen_output_file;
}
off_t inputsize = BASE16384_ENCBUFSZ-7;
off_t inputsize = _BASE16384_ENCBUFSZ;
size_t cnt = 0;
write(output, "\xfe\xff", 2);
uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
if(!(flag&BASE16384_FLAG_NOHEADER)) write(output, "\xfe\xff", 2);
while((cnt = read(input, encbuf, inputsize)) > 0) {
int n = base16384_encode(encbuf, cnt, decbuf);
if(write(output, decbuf, n) < n) {
int n;
while(cnt%7) {
n = read(input, encbuf+cnt, sizeof(char));
if(n > 0) cnt++;
else break;
}
if(do_sum_check(flag)) {
sum = calc_sum(sum, cnt, encbuf);
if(cnt%7) { // last encode
*(uint32_t*)(&encbuf[cnt]) = htobe32(sum);
}
}
n = base16384_encode_unsafe(encbuf, cnt, decbuf);
if(n && write(output, decbuf, n) < n) {
return base16384_err_write_file;
}
}
@@ -161,128 +229,232 @@ static inline int is_next_end(FILE* fp) {
return 0;
}
base16384_err_t base16384_decode_file(const char* input, const char* output, char* encbuf, char* decbuf) {
base16384_err_t base16384_decode_file_detailed(const char* input, const char* output, char* encbuf, char* decbuf, int flag) {
off_t inputsize;
FILE* fp = NULL;
FILE* fpo;
if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) return base16384_err_invalid_file_name;
if(is_standard_io(input)) { // read from stdin
inputsize = 0;
uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
base16384_err_t retval = base16384_err_ok;
int errnobak = 0, is_stdin = is_standard_io(input);
if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) {
errno = EINVAL;
return base16384_err_invalid_file_name;
}
if(is_stdin) { // read from stdin
inputsize = _BASE16384_DECBUFSZ;
fp = stdin;
} else inputsize = get_file_size(input);
if(inputsize < 0) {
if(inputsize <= 0) {
if(!inputsize) errno = EINVAL;
return base16384_err_get_file_size;
}
fpo = is_standard_io(output)?stdout:fopen(output, "wb");
if(!fpo) {
return base16384_err_fopen_output_file;
}
if(!inputsize || inputsize > BASE16384_DECBUFSZ) { // stdin or big file, use decbuf & fread
inputsize = BASE16384_DECBUFSZ/8*8;
int loop_count = 0;
if(inputsize >= _BASE16384_DECBUFSZ) { // stdin or big file, use decbuf & fread
if(!is_stdin) loop_count = inputsize/_BASE16384_DECBUFSZ;
inputsize = _BASE16384_DECBUFSZ;
#if defined _WIN32 || defined __cosmopolitan
}
#endif
if(!fp) fp = fopen(input, "rb");
if(!fp) {
return base16384_err_fopen_input_file;
goto_base16384_file_detailed_cleanup(decode, base16384_err_fopen_input_file, {});
}
int cnt = 0;
int end = 0;
rm_head(fp);
if(errno) {
goto_base16384_file_detailed_cleanup(decode, base16384_err_read_file, {});
}
int cnt, last_encbuf_cnt = 0, last_decbuf_cnt = 0, offset = 0;
size_t total_decoded_len = 0;
while((cnt = fread(decbuf, sizeof(char), inputsize, fp)) > 0) {
int n;
while(cnt%8) {
n = fread(decbuf+cnt, sizeof(char), 1, fp);
if(n > 0) cnt++;
else break;
}
int end;
if((end = is_next_end(fp))) {
decbuf[cnt++] = '=';
decbuf[cnt++] = end;
}
if(fwrite(encbuf, base16384_decode(decbuf, cnt, encbuf), 1, fpo) <= 0) {
return base16384_err_write_file;
if(errno) goto_base16384_file_detailed_cleanup(decode, base16384_err_read_file, {});
offset = decbuf[cnt-1];
last_decbuf_cnt = cnt;
cnt = base16384_decode_unsafe(decbuf, cnt, encbuf);
if(cnt && fwrite(encbuf, cnt, 1, fpo) <= 0) {
goto_base16384_file_detailed_cleanup(decode, base16384_err_write_file, {});
}
total_decoded_len += cnt;
if(do_sum_check(flag)) sum = calc_sum(sum, cnt, encbuf);
last_encbuf_cnt = cnt;
}
if(do_sum_check(flag)
&& (flag&BASE16384_FLAG_DO_SUM_CHECK_FORCELY || total_decoded_len >= _BASE16384_ENCBUFSZ)
&& last_decbuf_cnt > 2
&& decbuf[last_decbuf_cnt-2] == '='
&& check_sum(sum, *(uint32_t*)(&encbuf[last_encbuf_cnt]), offset)) {
errno = EINVAL;
goto_base16384_file_detailed_cleanup(decode, base16384_err_invalid_decoding_checksum, {});
}
if(!is_standard_io(output)) fclose(fpo);
if(!is_standard_io(input)) fclose(fp);
#if !defined _WIN32 && !defined __cosmopolitan
} else { // small file, use mmap & fwrite
int fd = open(input, O_RDONLY);
if(fd < 0) {
return base16384_err_open_input_file;
goto_base16384_file_detailed_cleanup(decode, base16384_err_open_input_file, {});
}
char *input_file = mmap(NULL, (size_t)inputsize+16, PROT_READ, MAP_PRIVATE, fd, 0);
if(input_file == MAP_FAILED) {
return base16384_err_map_input_file;
goto_base16384_file_detailed_cleanup(decode, base16384_err_map_input_file, close(fd));
}
int off = skip_offset(input_file);
if(fwrite(encbuf, base16384_decode(input_file+off, inputsize-off, encbuf), 1, fpo) <= 0) {
return base16384_err_write_file;
int n = skip_offset(input_file);
n = base16384_decode_safe(input_file+n, inputsize-n, encbuf);
if(n && fwrite(encbuf, n, 1, fpo) <= 0) {
goto_base16384_file_detailed_cleanup(decode, base16384_err_write_file, {
munmap(input_file, (size_t)inputsize);
close(fd);
});
}
munmap(input_file, (size_t)inputsize);
if(!is_standard_io(output)) fclose(fpo);
close(fd);
}
#endif
return base16384_err_ok;
base16384_decode_file_detailed_cleanup:
if(fpo && !is_standard_io(output)) fclose(fpo);
if(fp && !is_stdin) fclose(fp);
if(errnobak) errno = errnobak;
return retval;
}
base16384_err_t base16384_decode_fp(FILE* input, FILE* output, char* encbuf, char* decbuf) {
base16384_err_t base16384_decode_fp_detailed(FILE* input, FILE* output, char* encbuf, char* decbuf, int flag) {
if(!input) {
errno = EINVAL;
return base16384_err_fopen_input_file;
}
if(!output) {
errno = EINVAL;
return base16384_err_fopen_output_file;
}
off_t inputsize = BASE16384_DECBUFSZ/8*8;
int cnt = 0;
int end = 0;
off_t inputsize = _BASE16384_DECBUFSZ;
uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
rm_head(input);
if(errno) {
return base16384_err_read_file;
}
int cnt, last_encbuf_cnt = 0, last_decbuf_cnt = 0, offset = 0;
size_t total_decoded_len = 0;
while((cnt = fread(decbuf, sizeof(char), inputsize, input)) > 0) {
int n;
while(cnt%8) {
n = fread(decbuf+cnt, sizeof(char), 1, input);
if(n > 0) cnt++;
else break;
}
int end;
if((end = is_next_end(input))) {
decbuf[cnt++] = '=';
decbuf[cnt++] = end;
}
if(fwrite(encbuf, base16384_decode(decbuf, cnt, encbuf), 1, output) <= 0) {
if(errno) return base16384_err_read_file;
offset = decbuf[cnt-1];
last_decbuf_cnt = cnt;
cnt = base16384_decode_unsafe(decbuf, cnt, encbuf);
if(cnt && fwrite(encbuf, cnt, 1, output) <= 0) {
return base16384_err_write_file;
}
total_decoded_len += cnt;
if(do_sum_check(flag)) sum = calc_sum(sum, cnt, encbuf);
last_encbuf_cnt = cnt;
}
if(do_sum_check(flag)
&& (flag&BASE16384_FLAG_DO_SUM_CHECK_FORCELY || total_decoded_len >= _BASE16384_ENCBUFSZ)
&& last_decbuf_cnt > 2
&& decbuf[last_decbuf_cnt-2] == '='
&& check_sum(sum, *(uint32_t*)(&encbuf[last_encbuf_cnt]), offset)) {
errno = EINVAL;
return base16384_err_invalid_decoding_checksum;
}
return base16384_err_ok;
}
static inline int is_next_end_fd(int fd) {
char ch = 0;
read(fd, &ch, 1);
static inline uint16_t is_next_end_fd(int fd) {
uint8_t ch = 0;
if(read(fd, &ch, 1) != 1) return (uint16_t)EOF;
uint16_t ret = (uint16_t)ch & 0x00ff;
if(ch == '=') {
read(fd, &ch, 1);
if(read(fd, &ch, 1) != 1) return (uint16_t)EOF;
ret <<= 8;
ret |= (uint16_t)ch & 0x00ff;
}
return (int)ch;
return ret;
}
base16384_err_t base16384_decode_fd(int input, int output, char* encbuf, char* decbuf) {
base16384_err_t base16384_decode_fd_detailed(int input, int output, char* encbuf, char* decbuf, int flag) {
if(input < 0) {
errno = EINVAL;
return base16384_err_fopen_input_file;
}
if(output < 0) {
errno = EINVAL;
return base16384_err_fopen_output_file;
}
off_t inputsize = BASE16384_DECBUFSZ/8*8;
int cnt = 0;
int end = 0;
off_t inputsize = _BASE16384_DECBUFSZ;
uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE;
uint8_t remains[8];
decbuf[0] = 0;
if(read(input, decbuf, 2) < 2) {
if(read(input, remains, 2) != 2) {
return base16384_err_read_file;
}
if(decbuf[0] != (char)(0xfe)) cnt = 2;
while((end = read(input, decbuf+cnt, inputsize-cnt)) > 0 || cnt > 0) {
if(end > 0) {
cnt += end;
if((end = is_next_end_fd(input))) {
decbuf[cnt++] = '=';
decbuf[cnt++] = end;
end = 0;
} else end = 1;
} else end = 0;
cnt = base16384_decode(decbuf, cnt, encbuf);
if(write(output, encbuf, cnt) < cnt) {
int p = 0;
if(remains[0] != (uint8_t)(0xfe)) p = 2;
int n, last_encbuf_cnt = 0, last_decbuf_cnt = 0, offset = 0;
size_t total_decoded_len = 0;
while((n = read(input, decbuf+p, inputsize-p)) > 0) {
if(p) {
memcpy(decbuf, remains, p);
n += p;
p = 0;
}
int x;
while(n%8) {
x = read(input, decbuf+n, sizeof(char));
if(x > 0) n++;
else break;
}
uint16_t next = is_next_end_fd(input);
if(errno) {
return base16384_err_read_file;
}
if((uint16_t)(~next)) {
if(next&0xff00) {
decbuf[n++] = '=';
decbuf[n++] = (char)(next&0x00ff);
} else remains[p++] = (char)(next&0x00ff);
}
offset = decbuf[n-1];
last_decbuf_cnt = n;
n = base16384_decode_unsafe(decbuf, n, encbuf);
if(n && write(output, encbuf, n) != n) {
return base16384_err_write_file;
}
cnt = end;
total_decoded_len += n;
if(do_sum_check(flag)) sum = calc_sum(sum, n, encbuf);
last_encbuf_cnt = n;
}
if(do_sum_check(flag)
&& (flag&BASE16384_FLAG_DO_SUM_CHECK_FORCELY || total_decoded_len >= _BASE16384_ENCBUFSZ)
&& last_decbuf_cnt > 2
&& decbuf[last_decbuf_cnt-2] == '='
&& check_sum(sum, *(uint32_t*)(&encbuf[last_encbuf_cnt]), offset)) {
errno = EINVAL;
return base16384_err_invalid_decoding_checksum;
}
return base16384_err_ok;
}

16
test/CMakeLists.txt Normal file
View File

@@ -0,0 +1,16 @@
cmake_minimum_required(VERSION 2.8.12)
if (POLICY CMP0048)
cmake_policy(SET CMP0048 NEW)
endif (POLICY CMP0048)
project(base16384_test VERSION 1.0.0)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)
file (GLOB_RECURSE C_FILES "*.c")
foreach (C_FILE ${C_FILES})
get_filename_component(FILE_NAME ${C_FILE} NAME_WE)
message(STATUS "Add test ${FILE_NAME}")
add_executable(${FILE_NAME} ${C_FILE})
target_link_libraries(${FILE_NAME} base16384_s)
add_test(NAME do_${FILE_NAME} COMMAND ${FILE_NAME})
endforeach ()

91
test/coder_test.c Normal file
View File

@@ -0,0 +1,91 @@
/* test/coder_test.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "base16384.h"
#define TEST_SIZE (4096)
char encbuf[TEST_SIZE+16];
char decbuf[TEST_SIZE/7*8+16];
char tstbuf[TEST_SIZE+16];
#define loop_diff(target) \
for(i = start; i < end; i++) { \
if (encbuf[i] != tstbuf[i]) { \
if(n) { \
fprintf(stderr, " @%d", i); \
n = 0; \
} \
fprintf(stderr, " %02x", (uint8_t)(target[i])); \
} else if(!n) { \
n = 1; \
fprintf(stderr, " ..."); \
} \
}
#define return_error(i, n) { \
int end = i; \
int start; \
for(start = 0; start < end; start++) { \
if(encbuf[start] != tstbuf[start]) break; \
} \
fprintf(stderr, "result mismatch @ loop %d, decsz: %d, first diff @ %d\n", i, n, start); \
fprintf(stderr, "expect"); \
n = 1; \
loop_diff(encbuf); \
fprintf(stderr, "\ngot "); \
n = 1; \
loop_diff(tstbuf); \
fputc('\n', stderr); \
return 1; \
}
#define test_batch(encode, decode) \
fputs("testing base16384_"#encode"/base16384_"#decode"...\n", stderr); \
for(i = 0; i <= TEST_SIZE; i++) { \
n = base16384_##encode(encbuf, i, decbuf); \
n = base16384_##decode(decbuf, n, tstbuf); \
if (memcmp(encbuf, tstbuf, n)) return_error(i, n); \
}
int main() {
srand(time(NULL));
int i, n;
for(i = 0; i <= TEST_SIZE; i += sizeof(int)) {
*(int*)(&encbuf[i]) = rand();
}
test_batch(encode, decode);
test_batch(encode, decode_unsafe);
test_batch(encode, decode_safe);
test_batch(encode_unsafe, decode);
test_batch(encode_unsafe, decode_unsafe);
test_batch(encode_unsafe, decode_safe);
test_batch(encode_safe, decode);
test_batch(encode_safe, decode_unsafe);
test_batch(encode_safe, decode_safe);
return 0;
}

165
test/file_test.c Normal file
View File

@@ -0,0 +1,165 @@
/* test/file_test.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifdef _WIN32
#include <io.h>
#define ftruncate _chsize_s
#else
#define _POSIX1_SOURCE 2
#include <unistd.h>
#endif
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "base16384.h"
#include "binary.h"
#include "file_test.h"
#define TEST_SIZE (4096)
#define TEST_INPUT_FILENAME "file_test_input.bin"
#define TEST_OUTPUT_FILENAME "file_test_output.bin"
#define TEST_VALIDATE_FILENAME "file_test_validate.bin"
char encbuf[BASE16384_ENCBUFSZ];
char decbuf[BASE16384_DECBUFSZ];
char tstbuf[BASE16384_ENCBUFSZ];
#define init_input_file() \
for(i = 0; i < BASE16384_ENCBUFSZ; i += sizeof(int)) { \
*(int*)(&encbuf[i]) = rand(); \
} \
fp = fopen(TEST_INPUT_FILENAME, "wb"); \
ok(!fp, "fopen"); \
ok(fwrite(encbuf, BASE16384_ENCBUFSZ, 1, fp) != 1, "fwrite"); \
ok(fclose(fp), "fclose"); \
fputs("input file created.\n", stderr);
#define test_file_detailed(flag) \
fputs("testing base16384_en/decode_file with flag "#flag"...\n", stderr); \
init_input_file(); \
for(i = TEST_SIZE; i > 0; i--) { \
reset_and_truncate(fd, i); \
loop_ok(close(fd), i, "close"); \
\
err = base16384_encode_file_detailed(TEST_INPUT_FILENAME, TEST_OUTPUT_FILENAME, encbuf, decbuf, flag); \
base16384_loop_ok(err); \
\
err = base16384_decode_file_detailed(TEST_OUTPUT_FILENAME, TEST_VALIDATE_FILENAME, encbuf, decbuf, flag); \
base16384_loop_ok(err); \
\
validate_result(); \
}
#define test_fp_detailed(flag) \
fputs("testing base16384_en/decode_fp with flag "#flag"...\n", stderr); \
init_input_file(); \
for(i = TEST_SIZE; i > 0; i--) { \
reset_and_truncate(fd, i); \
loop_ok(close(fd), i, "close"); \
\
FILE* fpin = fopen(TEST_INPUT_FILENAME, "rb"); \
loop_ok(!fpin, i, "fopen"); \
\
FILE* fpout = fopen(TEST_OUTPUT_FILENAME, "wb+"); \
loop_ok(!fpout, i, "fopen"); \
\
err = base16384_encode_fp_detailed(fpin, fpout, encbuf, decbuf, flag); \
base16384_loop_ok(err); \
\
loop_ok(fclose(fpin), i, "fclose"); \
\
FILE* fpval = fopen(TEST_VALIDATE_FILENAME, "wb"); \
loop_ok(!fpval, i, "fopen"); \
\
rewind(fpout); \
\
err = base16384_decode_fp_detailed(fpout, fpval, encbuf, decbuf, flag); \
base16384_loop_ok(err); \
\
loop_ok(fclose(fpout), i, "fclose"); \
loop_ok(fclose(fpval), i, "fclose"); \
\
validate_result(); \
}
#define test_fd_detailed(flag) \
fputs("testing base16384_en/decode_fd with flag "#flag"...\n", stderr); \
init_input_file(); \
for(i = TEST_SIZE; i > 0; i--) { \
reset_and_truncate(fd, i); \
\
int fdout = open(TEST_OUTPUT_FILENAME, O_RDWR|O_TRUNC|O_CREAT|O_APPEND); \
loop_ok(!fdout, i, "open"); \
\
err = base16384_encode_fd_detailed(fd, fdout, encbuf, decbuf, 0); \
base16384_loop_ok(err); \
loop_ok(close(fd), i, "close"); \
\
int fdval = open(TEST_VALIDATE_FILENAME, O_WRONLY|O_TRUNC|O_CREAT); \
loop_ok(!fdval, i, "open"); \
\
loop_ok(lseek(fdout, 0, SEEK_SET), i, "lseek"); \
\
err = base16384_decode_fd_detailed(fdout, fdval, encbuf, decbuf, 0); \
base16384_loop_ok(err); \
\
loop_ok(close(fdout), i, "close"); \
loop_ok(close(fdval), i, "close"); \
\
validate_result(); \
}
#define test_detailed(name) \
test_##name##_detailed(0); \
\
test_##name##_detailed(BASE16384_FLAG_NOHEADER); \
test_##name##_detailed(BASE16384_FLAG_SUM_CHECK_ON_REMAIN); \
test_##name##_detailed(BASE16384_FLAG_DO_SUM_CHECK_FORCELY); \
\
test_##name##_detailed(BASE16384_FLAG_NOHEADER|BASE16384_FLAG_SUM_CHECK_ON_REMAIN); \
test_##name##_detailed(BASE16384_FLAG_NOHEADER|BASE16384_FLAG_DO_SUM_CHECK_FORCELY); \
\
test_##name##_detailed(BASE16384_FLAG_SUM_CHECK_ON_REMAIN|BASE16384_FLAG_DO_SUM_CHECK_FORCELY); \
\
test_##name##_detailed(BASE16384_FLAG_NOHEADER|BASE16384_FLAG_SUM_CHECK_ON_REMAIN|BASE16384_FLAG_DO_SUM_CHECK_FORCELY);
#define remove_test_files() \
remove(TEST_INPUT_FILENAME); \
remove(TEST_OUTPUT_FILENAME); \
remove(TEST_VALIDATE_FILENAME);
int main() {
srand(time(NULL));
FILE* fp;
int fd, i;
base16384_err_t err;
test_detailed(file);
test_detailed(fp);
test_detailed(fd);
remove_test_files();
return 0;
}

85
test/file_test.h Normal file
View File

@@ -0,0 +1,85 @@
#ifndef _FILE_TEST_H_
#define _FILE_TEST_H_
/* test/file_test.h
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define ok(has_failed, reason) \
if (has_failed) { \
perror(reason); \
return 1; \
}
#define loop_ok(has_failed, i, reason) \
if (has_failed) { \
fprintf(stderr, "loop @%d: ", i); \
perror(reason); \
return 1; \
}
#define reset_and_truncate(fd, i) { \
fd = open(TEST_INPUT_FILENAME, O_RDWR); \
ok(!fd, "open"); \
loop_ok(lseek(fd, 0, SEEK_SET), i, "lseek"); \
loop_ok(ftruncate(fd, i), i, "ftruncate"); \
}
#define base16384_loop_ok(err) \
if (err) { \
fprintf(stderr, "loop @%d: ", i); \
base16384_perror(err); \
return 1; \
}
#define validate_result() \
uint64_t buf, sum_input = 0, sum_validate = 0; \
fp = fopen(TEST_INPUT_FILENAME, "rb"); { \
loop_ok(!fp, i, "fopen"); \
int cnt; \
while ((cnt = fread(&buf, 1, sizeof(sum_input), fp)) > 0) { \
int n; \
buf = 0; \
while(cnt < sizeof(sum_input)) { \
n = fread((uint8_t*)(&buf)+cnt, 1, 1, fp); \
if (n) cnt++; \
else break; \
} \
sum_input += buf; \
} \
} fclose(fp); \
fp = fopen(TEST_VALIDATE_FILENAME, "rb"); { \
loop_ok(!fp, i, "fopen"); \
int cnt; \
while ((cnt = fread(&buf, 1, sizeof(sum_validate), fp)) > 0) { \
int n; \
buf = 0; \
while(cnt < sizeof(sum_validate)) { \
n = fread((uint8_t*)(&buf)+cnt, 1, 1, fp); \
if (n) cnt++; \
else break; \
} \
sum_validate += buf; \
} \
} fclose(fp); \
if (sum_input != sum_validate) { \
fprintf(stderr, "loop @%d, expect: %016llx, got: %016llx: ", i, (unsigned long long)sum_input, (unsigned long long)sum_validate); \
fputs(TEST_INPUT_FILENAME " and " TEST_VALIDATE_FILENAME " mismatch.", stderr); \
return 1; \
}
#endif

140
test/wrap_test.c Normal file
View File

@@ -0,0 +1,140 @@
/* test/wrap_test.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifdef _WIN32
#include <io.h>
#define ftruncate _chsize_s
#else
#define _POSIX1_SOURCE 2
#include <unistd.h>
#endif
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "base16384.h"
#include "binary.h"
#include "file_test.h"
#define TEST_SIZE (4096)
#define TEST_INPUT_FILENAME "wrap_test_input.bin"
#define TEST_OUTPUT_FILENAME "wrap_test_output.bin"
#define TEST_VALIDATE_FILENAME "wrap_test_validate.bin"
char encbuf[BASE16384_ENCBUFSZ];
char decbuf[BASE16384_DECBUFSZ];
char tstbuf[BASE16384_ENCBUFSZ];
#define init_input_file() \
for(i = 0; i < BASE16384_ENCBUFSZ; i += sizeof(int)) { \
*(int*)(&encbuf[i]) = rand(); \
} \
fp = fopen(TEST_INPUT_FILENAME, "wb"); \
ok(!fp, "fopen"); \
ok(fwrite(encbuf, BASE16384_ENCBUFSZ, 1, fp) != 1, "fwrite"); \
ok(fclose(fp), "fclose"); \
fputs("input file created.\n", stderr);
int main() {
srand(time(NULL));
FILE* fp;
int fd, i;
base16384_err_t err;
fputs("testing base16384_en/decode_file...\n", stderr);
init_input_file();
for(i = TEST_SIZE; i > 0; i--) {
reset_and_truncate(fd, i);
loop_ok(close(fd), i, "close");
err = base16384_encode_file(TEST_INPUT_FILENAME, TEST_OUTPUT_FILENAME, encbuf, decbuf);
base16384_loop_ok(err);
err = base16384_decode_file(TEST_OUTPUT_FILENAME, TEST_VALIDATE_FILENAME, encbuf, decbuf);
base16384_loop_ok(err);
validate_result();
}
fputs("testing base16384_en/decode_fp...\n", stderr);
init_input_file();
for(i = TEST_SIZE; i > 0; i--) {
reset_and_truncate(fd, i);
loop_ok(close(fd), i, "close");
FILE* fpin = fopen(TEST_INPUT_FILENAME, "rb");
loop_ok(!fpin, i, "fopen");
FILE* fpout = fopen(TEST_OUTPUT_FILENAME, "wb+");
loop_ok(!fpout, i, "fopen");
err = base16384_encode_fp(fpin, fpout, encbuf, decbuf);
base16384_loop_ok(err);
loop_ok(fclose(fpin), i, "fclose");
FILE* fpval = fopen(TEST_VALIDATE_FILENAME, "wb");
loop_ok(!fpval, i, "fopen");
rewind(fpout);
err = base16384_decode_fp(fpout, fpval, encbuf, decbuf);
base16384_loop_ok(err);
loop_ok(fclose(fpout), i, "fclose");
loop_ok(fclose(fpval), i, "fclose");
validate_result();
}
fputs("testing base16384_en/decode_fd...\n", stderr);
init_input_file();
for(i = TEST_SIZE; i > 0; i--) {
reset_and_truncate(fd, i);
int fdout = open(TEST_OUTPUT_FILENAME, O_RDWR|O_TRUNC|O_CREAT|O_APPEND);
loop_ok(!fdout, i, "open");
err = base16384_encode_fd(fd, fdout, encbuf, decbuf);
base16384_loop_ok(err);
loop_ok(close(fd), i, "close");
int fdval = open(TEST_VALIDATE_FILENAME, O_WRONLY|O_TRUNC|O_CREAT);
loop_ok(!fdval, i, "open");
loop_ok(lseek(fdout, 0, SEEK_SET), i, "lseek");
err = base16384_decode_fd(fdout, fdval, encbuf, decbuf);
base16384_loop_ok(err);
loop_ok(close(fdout), i, "close");
loop_ok(close(fdval), i, "close");
validate_result();
}
remove(TEST_INPUT_FILENAME);
remove(TEST_OUTPUT_FILENAME);
remove(TEST_VALIDATE_FILENAME);
return 0;
}

38
wrap.c Normal file
View File

@@ -0,0 +1,38 @@
/* wrap.c
* This file is part of the base16384 distribution (https://github.com/fumiama/base16384).
* Copyright (c) 2022-2024 Fumiama Minamoto.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "base16384.h"
#define base16384_typed_params(type) type input, type output, char* encbuf, char* decbuf
#define BASE16384_WRAP_DECL(method, name, type) \
base16384_err_t base16384_##method##_##name(base16384_typed_params(type)) { \
return base16384_##method##_##name##_detailed(input, output, encbuf, decbuf, 0); \
}
BASE16384_WRAP_DECL(encode, file, const char*);
BASE16384_WRAP_DECL(encode, fp, FILE*);
BASE16384_WRAP_DECL(encode, fd, int);
BASE16384_WRAP_DECL(decode, file, const char*);
BASE16384_WRAP_DECL(decode, fp, FILE*);
BASE16384_WRAP_DECL(decode, fd, int);
#undef BASE16384_WRAP_DECL
#undef base16384_typed_params