1
0
mirror of https://github.com/fumiama/base16384.git synced 2026-06-05 02:00:31 +08:00

feat(coder): add safe encode/decode

This commit is contained in:
源文雨
2024-04-04 23:58:05 +09:00
parent b451127638
commit 2622a5ad69
8 changed files with 435 additions and 29 deletions

View File

@@ -15,10 +15,19 @@ jobs:
sudo apt-get update
sudo apt-get install -y gcc cmake
- name: Build and Run Tests
- name: Build and Run 64bit Tests
run: |
mkdir build
cd build
cmake -DBUILD=test ..
make
make test || ctest --rerun-failed --output-on-failure
- name: Build and Run 32bit Tests
run: |
rm -rf build
mkdir build
cd build
cmake -DBUILD=test -DFORCE_32BIT=1 ..
make
make test || ctest --rerun-failed --output-on-failure

View File

@@ -17,7 +17,7 @@ endif ()
add_executable(base16384_b base16384.c)
IF (CMAKE_SIZEOF_VOID_P EQUAL 8)
IF ((NOT FORCE_32BIT) AND CMAKE_SIZEOF_VOID_P EQUAL 8)
message(STATUS "Adding 64bit libraries...")
add_definitions(-DIS_64BIT_PROCESSOR)
add_library(base16384 SHARED file.c base1464.c)

View File

@@ -16,8 +16,113 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <string.h>
#include "binary.h"
union remainder {
uint8_t buf[4];
uint32_t val;
};
typedef union remainder remainder;
int base16384_encode_safe(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen += 4; break;
case 2:
case 3: outlen += 6; break;
case 4:
case 5: outlen += 8; break;
case 6: outlen += 10; break;
default: break;
}
uint32_t* vals = (uint32_t*)buf;
uint32_t n = 0;
int32_t i = 0;
for(; i < dlen - 7; i += 7) {
register uint32_t sum = 0;
register uint32_t shift = htobe32(*(uint32_t*)(data+i));
sum |= (shift>>2) & 0x3fff0000;
sum |= (shift>>4) & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
shift <<= 26;
shift &= 0x3c000000;
sum = 0;
shift |= (htobe32(*(uint32_t*)(data+i+4))>>6)&0x03fffffc;
sum |= shift & 0x3fff0000;
shift >>= 2;
sum |= shift & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
}
remainder valbuf;
if(dlen - i == 7) {
register uint32_t sum = 0;
register uint32_t shift = htobe32(*(uint32_t*)(data+i));
sum |= (shift>>2) & 0x3fff0000;
sum |= (shift>>4) & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
shift <<= 26;
shift &= 0x3c000000;
sum = 0;
memcpy(valbuf.buf, data+i+4, 3);
shift |= (htobe32(valbuf.val)>>6)&0x03fffffc;
sum |= shift & 0x3fff0000;
shift >>= 2;
sum |= shift & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
return outlen;
}
uint8_t o = offset;
if(o--) {
register uint32_t sum = 0x0000003f & (data[i] >> 2);
sum |= ((uint32_t)data[i] << 14) & 0x0000c000;
if(o--) {
sum |= ((uint32_t)data[i + 1] << 6) & 0x00003f00;
sum |= ((uint32_t)data[i + 1] << 20) & 0x00300000;
if(o--) {
sum |= ((uint32_t)data[i + 2] << 12) & 0x000f0000;
sum |= ((uint32_t)data[i + 2] << 28) & 0xf0000000;
if(o--) {
sum |= ((uint32_t)data[i + 3] << 20) & 0x0f000000;
sum += 0x004e004e;
// safe, because it will never go over 0x3dxx
#ifdef WORDS_BIGENDIAN
vals[n++] = __builtin_bswap32(sum);
#else
vals[n++] = sum;
#endif
sum = (((uint32_t)data[i + 3] << 2)) & 0x0000003c;
if(o--) {
sum |= (((uint32_t)data[i + 4] >> 6)) & 0x00000003;
sum |= ((uint32_t)data[i + 4] << 10) & 0x0000fc00;
if(o--) {
sum |= ((uint32_t)data[i + 5] << 2) & 0x00000300;
sum |= ((uint32_t)data[i + 5] << 16) & 0x003f0000;
}
}
}
}
}
sum += 0x004e004e;
// safe, because it will never go over 0x3dxx
#ifdef WORDS_BIGENDIAN
vals[n] = __builtin_bswap32(sum);
#else
vals[n] = sum;
#endif
buf[outlen - 2] = '=';
buf[outlen - 1] = offset;
}
return outlen;
}
int base16384_encode(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
@@ -31,9 +136,6 @@ int base16384_encode(const char* data, int dlen, char* buf) {
case 6: outlen += 10; break;
default: break;
}
#ifdef DEBUG
printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8);
#endif
uint32_t* vals = (uint32_t*)buf;
uint32_t n = 0;
int32_t i = 0;
@@ -109,9 +211,6 @@ int base16384_encode_unsafe(const char* data, int dlen, char* buf) {
case 6: outlen += 10; break;
default: break;
}
#ifdef DEBUG
printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8);
#endif
uint32_t* vals = (uint32_t*)buf;
uint32_t n = 0;
int32_t i = 0;
@@ -139,6 +238,102 @@ int base16384_encode_unsafe(const char* data, int dlen, char* buf) {
return outlen;
}
int base16384_decode_safe(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
if(data[dlen-2] == '=') {
offset = data[dlen-1];
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen -= 4; break;
case 2:
case 3: outlen -= 6; break;
case 4:
case 5: outlen -= 8; break;
case 6: outlen -= 10; break;
default: break;
}
}
outlen = outlen / 8 * 7 + offset;
uint32_t* vals = (uint32_t*)data;
uint32_t n = 0;
int32_t i = 0;
for(; i < outlen - 7; i+=7) { // n实际每次自增2
register uint32_t sum = 0;
register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
shift <<= 2;
sum |= shift & 0xfffc0000;
shift <<= 2;
sum |= shift & 0x0003fff0;
shift = htobe32(vals[n++]) - 0x4e004e00;
sum |= shift >> 26;
*(uint32_t*)(buf+i) = be32toh(sum);
sum = 0;
shift <<= 6;
sum |= shift & 0xffc00000;
shift <<= 2;
sum |= shift & 0x003fff00;
*(uint32_t*)(buf+i+4) = be32toh(sum);
}
remainder valbuf;
if(outlen - i == 7) {
register uint32_t sum = 0;
register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
shift <<= 2;
sum |= shift & 0xfffc0000;
shift <<= 2;
sum |= shift & 0x0003fff0;
shift = htobe32(vals[n]) - 0x4e004e00;
sum |= shift >> 26;
*(uint32_t*)(buf+i) = be32toh(sum);
sum = 0;
shift <<= 6;
sum |= shift & 0xffc00000;
shift <<= 2;
sum |= shift & 0x003fff00;
valbuf.val = be32toh(sum);
memcpy(buf+i+4, valbuf.buf, 3);
} else if(offset--) {
int cnt = dlen-2-(int)n*(int)sizeof(uint32_t);
if (cnt > 4) cnt = 4;
memcpy(valbuf.buf, &vals[n], cnt);
n++;
#ifdef WORDS_BIGENDIAN
register uint32_t sum = __builtin_bswap32(valbuf.val);
#else
register uint32_t sum = valbuf.val;
#endif
sum -= 0x0000004e;
buf[i++] = ((sum & 0x0000003f) << 2) | ((sum & 0x0000c000) >> 14);
if(offset--) {
sum -= 0x004e0000;
buf[i++] = ((sum & 0x00003f00) >> 6) | ((sum & 0x00300000) >> 20);
if(offset--) {
buf[i++] = ((sum & 0x000f0000) >> 12) | ((sum & 0xf0000000) >> 28);
if(offset--) {
buf[i] = (sum & 0x0f000000) >> 20;
memcpy(valbuf.buf, &vals[n], dlen-2-(int)n*(int)sizeof(uint32_t));
#ifdef WORDS_BIGENDIAN
sum = __builtin_bswap32(valbuf.val);
#else
sum = valbuf.val;
#endif
sum -= 0x0000004e;
buf[i++] |= (sum & 0x0000003c) >> 2;
if(offset--) {
buf[i++] = ((sum & 0x00000003) << 6) | ((sum & 0x0000fc00) >> 10);
if(offset--) {
sum -= 0x004e0000;
buf[i] = ((sum & 0x00000300) >> 2) | ((sum & 0x003f0000) >> 16);
}
}
}
}
}
}
return outlen;
}
int base16384_decode(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;

View File

@@ -16,8 +16,107 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <string.h>
#include "binary.h"
union remainder {
uint8_t buf[8];
uint64_t val;
};
typedef union remainder remainder;
int base16384_encode_safe(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen += 4; break;
case 2:
case 3: outlen += 6; break;
case 4:
case 5: outlen += 8; break;
case 6: outlen += 10; break;
default: break;
}
#ifdef DEBUG
printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8);
#endif
uint64_t* vals = (uint64_t*)buf;
uint64_t n = 0;
int64_t i = 0;
for(; i < dlen - 7; i += 7) {
register uint64_t sum = 0;
register uint64_t shift = htobe64(*(uint64_t*)(data+i))>>2;
sum |= shift & 0x3fff000000000000;
shift >>= 2;
sum |= shift & 0x00003fff00000000;
shift >>= 2;
sum |= shift & 0x000000003fff0000;
shift >>= 2;
sum |= shift & 0x0000000000003fff;
sum += 0x4e004e004e004e00;
vals[n++] = be64toh(sum);
#ifdef DEBUG
printf("i: %llu, add sum: %016llx\n", i, sum);
#endif
}
remainder valbuf;
if(dlen - i == 7) {
memcpy(valbuf.buf, data+i, 7);
register uint64_t sum = 0;
register uint64_t shift = htobe64(valbuf.val)>>2;
sum |= shift & 0x3fff000000000000;
shift >>= 2;
sum |= shift & 0x00003fff00000000;
shift >>= 2;
sum |= shift & 0x000000003fff0000;
shift >>= 2;
sum |= shift & 0x0000000000003fff;
sum += 0x4e004e004e004e00;
vals[n++] = be64toh(sum);
return outlen;
}
int o = offset;
if(o--) {
register uint64_t sum = 0x000000000000003f & (data[i] >> 2);
sum |= ((uint64_t)data[i] << 14) & 0x000000000000c000;
if(o--) {
sum |= ((uint64_t)data[i + 1] << 6) & 0x0000000000003f00;
sum |= ((uint64_t)data[i + 1] << 20) & 0x0000000000300000;
if(o--) {
sum |= ((uint64_t)data[i + 2] << 12) & 0x00000000000f0000;
sum |= ((uint64_t)data[i + 2] << 28) & 0x00000000f0000000;
if(o--) {
sum |= ((uint64_t)data[i + 3] << 20) & 0x000000000f000000;
sum |= ((uint64_t)data[i + 3] << 34) & 0x0000003c00000000;
if(o--) {
sum |= ((uint64_t)data[i + 4] << 26) & 0x0000000300000000;
sum |= ((uint64_t)data[i + 4] << 42) & 0x0000fc0000000000;
if(o--) {
sum |= ((uint64_t)data[i + 5] << 34) & 0x0000030000000000;
sum |= ((uint64_t)data[i + 5] << 48) & 0x003f000000000000;
}
}
}
}
}
sum += 0x004e004e004e004e;
#ifdef WORDS_BIGENDIAN
valbuf.val = __builtin_bswap64(sum);
#else
valbuf.val = sum;
#endif
memcpy(&vals[n], valbuf.buf, outlen-2-(int)n*(int)sizeof(uint64_t));
#ifdef DEBUG
printf("i: %llu, add sum: %016llx\n", i, sum);
#endif
buf[outlen - 2] = '=';
buf[outlen - 1] = offset;
}
return outlen;
}
int base16384_encode(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
@@ -134,6 +233,86 @@ int base16384_encode_unsafe(const char* data, int dlen, char* buf) {
return outlen;
}
int base16384_decode_safe(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
if(data[dlen-2] == '=') {
offset = data[dlen-1];
switch(offset) { // 算上偏移标志字符占用的2字节
case 0: break;
case 1: outlen -= 4; break;
case 2:
case 3: outlen -= 6; break;
case 4:
case 5: outlen -= 8; break;
case 6: outlen -= 10; break;
default: break;
}
}
outlen = outlen / 8 * 7 + offset;
uint64_t* vals = (uint64_t*)data;
uint64_t n = 0;
int64_t i = 0;
for(; i < outlen - 7; n++, i+=7) {
register uint64_t sum = 0;
register uint64_t shift = htobe64(vals[n]) - 0x4e004e004e004e00;
shift <<= 2;
sum |= shift & 0xfffc000000000000;
shift <<= 2;
sum |= shift & 0x0003fff000000000;
shift <<= 2;
sum |= shift & 0x0000000fffc00000;
shift <<= 2;
sum |= shift & 0x00000000003fff00;
*(uint64_t*)(buf+i) = be64toh(sum);
#ifdef DEBUG
printf("i: %llu, add sum: %016llx\n", i, sum);
#endif
}
remainder valbuf;
if(outlen - i == 7) {
register uint64_t sum = 0;
register uint64_t shift = htobe64(vals[n]) - 0x4e004e004e004e00;
shift <<= 2;
sum |= shift & 0xfffc000000000000;
shift <<= 2;
sum |= shift & 0x0003fff000000000;
shift <<= 2;
sum |= shift & 0x0000000fffc00000;
shift <<= 2;
sum |= shift & 0x00000000003fff00;
valbuf.val = be64toh(sum);
memcpy(buf+i, valbuf.buf, 7);
} else if(offset--) {
memcpy(valbuf.buf, &vals[n], dlen-2-(int)n*(int)sizeof(uint64_t));
#ifdef WORDS_BIGENDIAN
register uint64_t sum = __builtin_bswap64(valbuf.val) - 0x000000000000004e;
#else
register uint64_t sum = valbuf.val - 0x000000000000004e;
#endif
buf[i++] = ((sum & 0x000000000000003f) << 2) | ((sum & 0x000000000000c000) >> 14);
if(offset--) {
sum -= 0x00000000004e0000;
buf[i++] = ((sum & 0x0000000000003f00) >> 6) | ((sum & 0x0000000000300000) >> 20);
if(offset--) {
buf[i++] = ((sum & 0x00000000000f0000) >> 12) | ((sum & 0x00000000f0000000) >> 28);
if(offset--) {
sum -= 0x0000004e00000000;
buf[i++] = ((sum & 0x000000000f000000) >> 20) | ((sum & 0x0000003c00000000) >> 34);
if(offset--) {
buf[i++] = ((sum & 0x0000000300000000) >> 26) | ((sum & 0x0000fc0000000000) >> 42);
if(offset--) {
sum -= 0x004e000000000000;
buf[i] = ((sum & 0x0000030000000000) >> 34) | ((sum & 0x003f000000000000) >> 48);
}
}
}
}
}
}
return outlen;
}
int base16384_decode(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;

View File

@@ -116,6 +116,15 @@ static inline int base16384_decode_len(int dlen, int offset) {
return _base16384_decode_len(dlen, offset) + 16; // 多出 16 字节用于 unsafe 循环覆盖
}
/**
* @brief safely encode data and write result into buf
* @param data data to encode, no data overread
* @param dlen the data length
* @param buf the output buffer, whose size can be exactly `_base16384_encode_len`
* @return the total length written
*/
int base16384_encode_safe(const char* data, int dlen, char* buf);
/**
* @brief encode data and write result into buf
* @param data data to encode
@@ -134,6 +143,15 @@ int base16384_encode(const char* data, int dlen, char* buf);
*/
int base16384_encode_unsafe(const char* data, int dlen, char* buf);
/**
* @brief safely decode data and write result into buf
* @param data data to decode, no data overread
* @param dlen the data length
* @param buf the output buffer, whose size can be exactly `_base16384_decode_len`
* @return the total length written
*/
int base16384_decode_safe(const char* data, int dlen, char* buf);
/**
* @brief decode data and write result into buf
* @param data data to decode

6
file.c
View File

@@ -170,7 +170,7 @@ base16384_err_t base16384_encode_file_detailed(const char* input, const char* ou
fputc(0xFE, fpo);
fputc(0xFF, fpo);
}
int n = base16384_encode(input_file, (int)inputsize, decbuf);
int n = base16384_encode_safe(input_file, (int)inputsize, decbuf);
if(n && fwrite(decbuf, n, 1, fpo) <= 0) {
goto_base16384_file_detailed_cleanup(encode, base16384_err_write_file, {
munmap(input_file, (size_t)inputsize+16);
@@ -317,8 +317,8 @@ base16384_err_t base16384_decode_file_detailed(const char* input, const char* ou
if(input_file == MAP_FAILED) {
goto_base16384_file_detailed_cleanup(decode, base16384_err_map_input_file, close(fd));
}
int off = skip_offset(input_file);
int n = base16384_decode(input_file+off, inputsize-off, encbuf);
int n = skip_offset(input_file);
n = base16384_decode_safe(input_file+n, inputsize-n, encbuf);
if(n && fwrite(encbuf, n, 1, fpo) <= 0) {
goto_base16384_file_detailed_cleanup(decode, base16384_err_write_file, {
munmap(input_file, (size_t)inputsize+16);

View File

@@ -43,24 +43,31 @@ char tstbuf[TEST_SIZE+16];
return 1; \
}
#define test_batch(encode, decode) \
fputs("testing base16384_"#encode"/base16384_"#decode"...\n", stderr); \
for(i = 0; i <= TEST_SIZE; i++) { \
n = base16384_##encode(encbuf, i, decbuf); \
n = base16384_##decode(decbuf, n, tstbuf); \
if (memcmp(encbuf, tstbuf, n)) return_error(i, n); \
}
int main() {
srand(time(NULL));
int i, n;
for(i = 0; i <= TEST_SIZE; i += sizeof(int)) {
*(int*)(&encbuf[i]) = rand();
}
fputs("testing base16384_en/decode...\n", stderr);
for(i = 0; i <= TEST_SIZE; i++) {
n = base16384_encode(encbuf, i, decbuf);
n = base16384_decode(decbuf, n, tstbuf);
int decn = n;
if (memcmp(encbuf, tstbuf, n)) return_error(i, n);
}
fputs("testing base16384_en/ecode_unsafe...\n", stderr);
for(i = 0; i <= TEST_SIZE; i++) {
n = base16384_encode_unsafe(encbuf, i, decbuf);
n = base16384_decode_unsafe(decbuf, n, tstbuf);
if ((n = memcmp(encbuf, tstbuf, n))) return_error(i, n);
}
test_batch(encode, decode);
test_batch(encode, decode_unsafe);
test_batch(encode, decode_safe);
test_batch(encode_unsafe, decode);
test_batch(encode_unsafe, decode_unsafe);
test_batch(encode_unsafe, decode_safe);
test_batch(encode_safe, decode);
test_batch(encode_safe, decode_unsafe);
test_batch(encode_safe, decode_safe);
return 0;
}

View File

@@ -98,18 +98,16 @@ int main() {
fputs("testing base16384_en/decode_file...\n", stderr);
init_input_file();
for(i = TEST_SIZE; i > 0; i--) {
fprintf(stderr, "loop@%d\n", i);
reset_and_truncate(fd, i);
loop_ok(close(fd), i, "close");
fputs("base16384_encode_file\n", stderr);
err = base16384_encode_file(TEST_INPUT_FILENAME, TEST_OUTPUT_FILENAME, encbuf, decbuf);
base16384_loop_ok(err);
fputs("base16384_decode_file\n", stderr);
err = base16384_decode_file(TEST_OUTPUT_FILENAME, TEST_VALIDATE_FILENAME, encbuf, decbuf);
base16384_loop_ok(err);
fputs("validate_result\n", stderr);
validate_result();
fputs("fin\n\n", stderr);
}
fputs("testing base16384_en/decode_fp...\n", stderr);