From 2622a5ad692f9b3db35683dad9ac30cca1d87cd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Thu, 4 Apr 2024 23:58:05 +0900 Subject: [PATCH] feat(coder): add safe encode/decode --- .github/workflows/test.yml | 11 +- CMakeLists.txt | 2 +- base1432.c | 207 +++++++++++++++++++++++++++++++++++-- base1464.c | 179 ++++++++++++++++++++++++++++++++ base16384.h | 18 ++++ file.c | 6 +- test/coder_test.c | 33 +++--- test/wrap_test.c | 8 +- 8 files changed, 435 insertions(+), 29 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b81fc5a..ced3f8f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,10 +15,19 @@ jobs: sudo apt-get update sudo apt-get install -y gcc cmake - - name: Build and Run Tests + - name: Build and Run 64bit Tests run: | mkdir build cd build cmake -DBUILD=test .. make make test || ctest --rerun-failed --output-on-failure + + - name: Build and Run 32bit Tests + run: | + rm -rf build + mkdir build + cd build + cmake -DBUILD=test -DFORCE_32BIT=1 .. + make + make test || ctest --rerun-failed --output-on-failure diff --git a/CMakeLists.txt b/CMakeLists.txt index 9fd796a..0600cec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ endif () add_executable(base16384_b base16384.c) -IF (CMAKE_SIZEOF_VOID_P EQUAL 8) +IF ((NOT FORCE_32BIT) AND CMAKE_SIZEOF_VOID_P EQUAL 8) message(STATUS "Adding 64bit libraries...") add_definitions(-DIS_64BIT_PROCESSOR) add_library(base16384 SHARED file.c base1464.c) diff --git a/base1432.c b/base1432.c index df723e2..e991d2a 100644 --- a/base1432.c +++ b/base1432.c @@ -16,8 +16,113 @@ * along with this program. If not, see . */ +#include + #include "binary.h" +union remainder { + uint8_t buf[4]; + uint32_t val; +}; +typedef union remainder remainder; + +int base16384_encode_safe(const char* data, int dlen, char* buf) { + int outlen = dlen / 7 * 8; + int offset = dlen % 7; + switch(offset) { // 算上偏移标志字符占用的2字节 + case 0: break; + case 1: outlen += 4; break; + case 2: + case 3: outlen += 6; break; + case 4: + case 5: outlen += 8; break; + case 6: outlen += 10; break; + default: break; + } + uint32_t* vals = (uint32_t*)buf; + uint32_t n = 0; + int32_t i = 0; + for(; i < dlen - 7; i += 7) { + register uint32_t sum = 0; + register uint32_t shift = htobe32(*(uint32_t*)(data+i)); + sum |= (shift>>2) & 0x3fff0000; + sum |= (shift>>4) & 0x00003fff; + sum += 0x4e004e00; + vals[n++] = be32toh(sum); + shift <<= 26; + shift &= 0x3c000000; + sum = 0; + shift |= (htobe32(*(uint32_t*)(data+i+4))>>6)&0x03fffffc; + sum |= shift & 0x3fff0000; + shift >>= 2; + sum |= shift & 0x00003fff; + sum += 0x4e004e00; + vals[n++] = be32toh(sum); + } + remainder valbuf; + if(dlen - i == 7) { + register uint32_t sum = 0; + register uint32_t shift = htobe32(*(uint32_t*)(data+i)); + sum |= (shift>>2) & 0x3fff0000; + sum |= (shift>>4) & 0x00003fff; + sum += 0x4e004e00; + vals[n++] = be32toh(sum); + shift <<= 26; + shift &= 0x3c000000; + sum = 0; + memcpy(valbuf.buf, data+i+4, 3); + shift |= (htobe32(valbuf.val)>>6)&0x03fffffc; + sum |= shift & 0x3fff0000; + shift >>= 2; + sum |= shift & 0x00003fff; + sum += 0x4e004e00; + vals[n++] = be32toh(sum); + return outlen; + } + uint8_t o = offset; + if(o--) { + register uint32_t sum = 0x0000003f & (data[i] >> 2); + sum |= ((uint32_t)data[i] << 14) & 0x0000c000; + if(o--) { + sum |= ((uint32_t)data[i + 1] << 6) & 0x00003f00; + sum |= ((uint32_t)data[i + 1] << 20) & 0x00300000; + if(o--) { + sum |= ((uint32_t)data[i + 2] << 12) & 0x000f0000; + sum |= ((uint32_t)data[i + 2] << 28) & 0xf0000000; + if(o--) { + sum |= ((uint32_t)data[i + 3] << 20) & 0x0f000000; + sum += 0x004e004e; + // safe, because it will never go over 0x3dxx + #ifdef WORDS_BIGENDIAN + vals[n++] = __builtin_bswap32(sum); + #else + vals[n++] = sum; + #endif + sum = (((uint32_t)data[i + 3] << 2)) & 0x0000003c; + if(o--) { + sum |= (((uint32_t)data[i + 4] >> 6)) & 0x00000003; + sum |= ((uint32_t)data[i + 4] << 10) & 0x0000fc00; + if(o--) { + sum |= ((uint32_t)data[i + 5] << 2) & 0x00000300; + sum |= ((uint32_t)data[i + 5] << 16) & 0x003f0000; + } + } + } + } + } + sum += 0x004e004e; + // safe, because it will never go over 0x3dxx + #ifdef WORDS_BIGENDIAN + vals[n] = __builtin_bswap32(sum); + #else + vals[n] = sum; + #endif + buf[outlen - 2] = '='; + buf[outlen - 1] = offset; + } + return outlen; +} + int base16384_encode(const char* data, int dlen, char* buf) { int outlen = dlen / 7 * 8; int offset = dlen % 7; @@ -31,9 +136,6 @@ int base16384_encode(const char* data, int dlen, char* buf) { case 6: outlen += 10; break; default: break; } - #ifdef DEBUG - printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8); - #endif uint32_t* vals = (uint32_t*)buf; uint32_t n = 0; int32_t i = 0; @@ -109,9 +211,6 @@ int base16384_encode_unsafe(const char* data, int dlen, char* buf) { case 6: outlen += 10; break; default: break; } - #ifdef DEBUG - printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8); - #endif uint32_t* vals = (uint32_t*)buf; uint32_t n = 0; int32_t i = 0; @@ -139,6 +238,102 @@ int base16384_encode_unsafe(const char* data, int dlen, char* buf) { return outlen; } +int base16384_decode_safe(const char* data, int dlen, char* buf) { + int outlen = dlen; + int offset = 0; + if(data[dlen-2] == '=') { + offset = data[dlen-1]; + switch(offset) { // 算上偏移标志字符占用的2字节 + case 0: break; + case 1: outlen -= 4; break; + case 2: + case 3: outlen -= 6; break; + case 4: + case 5: outlen -= 8; break; + case 6: outlen -= 10; break; + default: break; + } + } + outlen = outlen / 8 * 7 + offset; + uint32_t* vals = (uint32_t*)data; + uint32_t n = 0; + int32_t i = 0; + for(; i < outlen - 7; i+=7) { // n实际每次自增2 + register uint32_t sum = 0; + register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00; + shift <<= 2; + sum |= shift & 0xfffc0000; + shift <<= 2; + sum |= shift & 0x0003fff0; + shift = htobe32(vals[n++]) - 0x4e004e00; + sum |= shift >> 26; + *(uint32_t*)(buf+i) = be32toh(sum); + sum = 0; + shift <<= 6; + sum |= shift & 0xffc00000; + shift <<= 2; + sum |= shift & 0x003fff00; + *(uint32_t*)(buf+i+4) = be32toh(sum); + } + remainder valbuf; + if(outlen - i == 7) { + register uint32_t sum = 0; + register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00; + shift <<= 2; + sum |= shift & 0xfffc0000; + shift <<= 2; + sum |= shift & 0x0003fff0; + shift = htobe32(vals[n]) - 0x4e004e00; + sum |= shift >> 26; + *(uint32_t*)(buf+i) = be32toh(sum); + sum = 0; + shift <<= 6; + sum |= shift & 0xffc00000; + shift <<= 2; + sum |= shift & 0x003fff00; + valbuf.val = be32toh(sum); + memcpy(buf+i+4, valbuf.buf, 3); + } else if(offset--) { + int cnt = dlen-2-(int)n*(int)sizeof(uint32_t); + if (cnt > 4) cnt = 4; + memcpy(valbuf.buf, &vals[n], cnt); + n++; + #ifdef WORDS_BIGENDIAN + register uint32_t sum = __builtin_bswap32(valbuf.val); + #else + register uint32_t sum = valbuf.val; + #endif + sum -= 0x0000004e; + buf[i++] = ((sum & 0x0000003f) << 2) | ((sum & 0x0000c000) >> 14); + if(offset--) { + sum -= 0x004e0000; + buf[i++] = ((sum & 0x00003f00) >> 6) | ((sum & 0x00300000) >> 20); + if(offset--) { + buf[i++] = ((sum & 0x000f0000) >> 12) | ((sum & 0xf0000000) >> 28); + if(offset--) { + buf[i] = (sum & 0x0f000000) >> 20; + memcpy(valbuf.buf, &vals[n], dlen-2-(int)n*(int)sizeof(uint32_t)); + #ifdef WORDS_BIGENDIAN + sum = __builtin_bswap32(valbuf.val); + #else + sum = valbuf.val; + #endif + sum -= 0x0000004e; + buf[i++] |= (sum & 0x0000003c) >> 2; + if(offset--) { + buf[i++] = ((sum & 0x00000003) << 6) | ((sum & 0x0000fc00) >> 10); + if(offset--) { + sum -= 0x004e0000; + buf[i] = ((sum & 0x00000300) >> 2) | ((sum & 0x003f0000) >> 16); + } + } + } + } + } + } + return outlen; +} + int base16384_decode(const char* data, int dlen, char* buf) { int outlen = dlen; int offset = 0; diff --git a/base1464.c b/base1464.c index 77d1ff3..23d3f69 100644 --- a/base1464.c +++ b/base1464.c @@ -16,8 +16,107 @@ * along with this program. If not, see . */ +#include + #include "binary.h" +union remainder { + uint8_t buf[8]; + uint64_t val; +}; +typedef union remainder remainder; + +int base16384_encode_safe(const char* data, int dlen, char* buf) { + int outlen = dlen / 7 * 8; + int offset = dlen % 7; + switch(offset) { // 算上偏移标志字符占用的2字节 + case 0: break; + case 1: outlen += 4; break; + case 2: + case 3: outlen += 6; break; + case 4: + case 5: outlen += 8; break; + case 6: outlen += 10; break; + default: break; + } + #ifdef DEBUG + printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8); + #endif + uint64_t* vals = (uint64_t*)buf; + uint64_t n = 0; + int64_t i = 0; + for(; i < dlen - 7; i += 7) { + register uint64_t sum = 0; + register uint64_t shift = htobe64(*(uint64_t*)(data+i))>>2; + sum |= shift & 0x3fff000000000000; + shift >>= 2; + sum |= shift & 0x00003fff00000000; + shift >>= 2; + sum |= shift & 0x000000003fff0000; + shift >>= 2; + sum |= shift & 0x0000000000003fff; + sum += 0x4e004e004e004e00; + vals[n++] = be64toh(sum); + #ifdef DEBUG + printf("i: %llu, add sum: %016llx\n", i, sum); + #endif + } + remainder valbuf; + if(dlen - i == 7) { + memcpy(valbuf.buf, data+i, 7); + register uint64_t sum = 0; + register uint64_t shift = htobe64(valbuf.val)>>2; + sum |= shift & 0x3fff000000000000; + shift >>= 2; + sum |= shift & 0x00003fff00000000; + shift >>= 2; + sum |= shift & 0x000000003fff0000; + shift >>= 2; + sum |= shift & 0x0000000000003fff; + sum += 0x4e004e004e004e00; + vals[n++] = be64toh(sum); + return outlen; + } + int o = offset; + if(o--) { + register uint64_t sum = 0x000000000000003f & (data[i] >> 2); + sum |= ((uint64_t)data[i] << 14) & 0x000000000000c000; + if(o--) { + sum |= ((uint64_t)data[i + 1] << 6) & 0x0000000000003f00; + sum |= ((uint64_t)data[i + 1] << 20) & 0x0000000000300000; + if(o--) { + sum |= ((uint64_t)data[i + 2] << 12) & 0x00000000000f0000; + sum |= ((uint64_t)data[i + 2] << 28) & 0x00000000f0000000; + if(o--) { + sum |= ((uint64_t)data[i + 3] << 20) & 0x000000000f000000; + sum |= ((uint64_t)data[i + 3] << 34) & 0x0000003c00000000; + if(o--) { + sum |= ((uint64_t)data[i + 4] << 26) & 0x0000000300000000; + sum |= ((uint64_t)data[i + 4] << 42) & 0x0000fc0000000000; + if(o--) { + sum |= ((uint64_t)data[i + 5] << 34) & 0x0000030000000000; + sum |= ((uint64_t)data[i + 5] << 48) & 0x003f000000000000; + } + } + } + } + } + sum += 0x004e004e004e004e; + #ifdef WORDS_BIGENDIAN + valbuf.val = __builtin_bswap64(sum); + #else + valbuf.val = sum; + #endif + memcpy(&vals[n], valbuf.buf, outlen-2-(int)n*(int)sizeof(uint64_t)); + #ifdef DEBUG + printf("i: %llu, add sum: %016llx\n", i, sum); + #endif + buf[outlen - 2] = '='; + buf[outlen - 1] = offset; + } + return outlen; +} + int base16384_encode(const char* data, int dlen, char* buf) { int outlen = dlen / 7 * 8; int offset = dlen % 7; @@ -134,6 +233,86 @@ int base16384_encode_unsafe(const char* data, int dlen, char* buf) { return outlen; } +int base16384_decode_safe(const char* data, int dlen, char* buf) { + int outlen = dlen; + int offset = 0; + if(data[dlen-2] == '=') { + offset = data[dlen-1]; + switch(offset) { // 算上偏移标志字符占用的2字节 + case 0: break; + case 1: outlen -= 4; break; + case 2: + case 3: outlen -= 6; break; + case 4: + case 5: outlen -= 8; break; + case 6: outlen -= 10; break; + default: break; + } + } + outlen = outlen / 8 * 7 + offset; + uint64_t* vals = (uint64_t*)data; + uint64_t n = 0; + int64_t i = 0; + for(; i < outlen - 7; n++, i+=7) { + register uint64_t sum = 0; + register uint64_t shift = htobe64(vals[n]) - 0x4e004e004e004e00; + shift <<= 2; + sum |= shift & 0xfffc000000000000; + shift <<= 2; + sum |= shift & 0x0003fff000000000; + shift <<= 2; + sum |= shift & 0x0000000fffc00000; + shift <<= 2; + sum |= shift & 0x00000000003fff00; + *(uint64_t*)(buf+i) = be64toh(sum); + #ifdef DEBUG + printf("i: %llu, add sum: %016llx\n", i, sum); + #endif + } + remainder valbuf; + if(outlen - i == 7) { + register uint64_t sum = 0; + register uint64_t shift = htobe64(vals[n]) - 0x4e004e004e004e00; + shift <<= 2; + sum |= shift & 0xfffc000000000000; + shift <<= 2; + sum |= shift & 0x0003fff000000000; + shift <<= 2; + sum |= shift & 0x0000000fffc00000; + shift <<= 2; + sum |= shift & 0x00000000003fff00; + valbuf.val = be64toh(sum); + memcpy(buf+i, valbuf.buf, 7); + } else if(offset--) { + memcpy(valbuf.buf, &vals[n], dlen-2-(int)n*(int)sizeof(uint64_t)); + #ifdef WORDS_BIGENDIAN + register uint64_t sum = __builtin_bswap64(valbuf.val) - 0x000000000000004e; + #else + register uint64_t sum = valbuf.val - 0x000000000000004e; + #endif + buf[i++] = ((sum & 0x000000000000003f) << 2) | ((sum & 0x000000000000c000) >> 14); + if(offset--) { + sum -= 0x00000000004e0000; + buf[i++] = ((sum & 0x0000000000003f00) >> 6) | ((sum & 0x0000000000300000) >> 20); + if(offset--) { + buf[i++] = ((sum & 0x00000000000f0000) >> 12) | ((sum & 0x00000000f0000000) >> 28); + if(offset--) { + sum -= 0x0000004e00000000; + buf[i++] = ((sum & 0x000000000f000000) >> 20) | ((sum & 0x0000003c00000000) >> 34); + if(offset--) { + buf[i++] = ((sum & 0x0000000300000000) >> 26) | ((sum & 0x0000fc0000000000) >> 42); + if(offset--) { + sum -= 0x004e000000000000; + buf[i] = ((sum & 0x0000030000000000) >> 34) | ((sum & 0x003f000000000000) >> 48); + } + } + } + } + } + } + return outlen; +} + int base16384_decode(const char* data, int dlen, char* buf) { int outlen = dlen; int offset = 0; diff --git a/base16384.h b/base16384.h index 7ea5d88..5707ca3 100644 --- a/base16384.h +++ b/base16384.h @@ -116,6 +116,15 @@ static inline int base16384_decode_len(int dlen, int offset) { return _base16384_decode_len(dlen, offset) + 16; // 多出 16 字节用于 unsafe 循环覆盖 } +/** + * @brief safely encode data and write result into buf + * @param data data to encode, no data overread + * @param dlen the data length + * @param buf the output buffer, whose size can be exactly `_base16384_encode_len` + * @return the total length written +*/ +int base16384_encode_safe(const char* data, int dlen, char* buf); + /** * @brief encode data and write result into buf * @param data data to encode @@ -134,6 +143,15 @@ int base16384_encode(const char* data, int dlen, char* buf); */ int base16384_encode_unsafe(const char* data, int dlen, char* buf); +/** + * @brief safely decode data and write result into buf + * @param data data to decode, no data overread + * @param dlen the data length + * @param buf the output buffer, whose size can be exactly `_base16384_decode_len` + * @return the total length written +*/ +int base16384_decode_safe(const char* data, int dlen, char* buf); + /** * @brief decode data and write result into buf * @param data data to decode diff --git a/file.c b/file.c index c6953ff..049b18e 100644 --- a/file.c +++ b/file.c @@ -170,7 +170,7 @@ base16384_err_t base16384_encode_file_detailed(const char* input, const char* ou fputc(0xFE, fpo); fputc(0xFF, fpo); } - int n = base16384_encode(input_file, (int)inputsize, decbuf); + int n = base16384_encode_safe(input_file, (int)inputsize, decbuf); if(n && fwrite(decbuf, n, 1, fpo) <= 0) { goto_base16384_file_detailed_cleanup(encode, base16384_err_write_file, { munmap(input_file, (size_t)inputsize+16); @@ -317,8 +317,8 @@ base16384_err_t base16384_decode_file_detailed(const char* input, const char* ou if(input_file == MAP_FAILED) { goto_base16384_file_detailed_cleanup(decode, base16384_err_map_input_file, close(fd)); } - int off = skip_offset(input_file); - int n = base16384_decode(input_file+off, inputsize-off, encbuf); + int n = skip_offset(input_file); + n = base16384_decode_safe(input_file+n, inputsize-n, encbuf); if(n && fwrite(encbuf, n, 1, fpo) <= 0) { goto_base16384_file_detailed_cleanup(decode, base16384_err_write_file, { munmap(input_file, (size_t)inputsize+16); diff --git a/test/coder_test.c b/test/coder_test.c index fe391f6..10d7769 100644 --- a/test/coder_test.c +++ b/test/coder_test.c @@ -43,24 +43,31 @@ char tstbuf[TEST_SIZE+16]; return 1; \ } +#define test_batch(encode, decode) \ + fputs("testing base16384_"#encode"/base16384_"#decode"...\n", stderr); \ + for(i = 0; i <= TEST_SIZE; i++) { \ + n = base16384_##encode(encbuf, i, decbuf); \ + n = base16384_##decode(decbuf, n, tstbuf); \ + if (memcmp(encbuf, tstbuf, n)) return_error(i, n); \ + } + int main() { srand(time(NULL)); int i, n; for(i = 0; i <= TEST_SIZE; i += sizeof(int)) { *(int*)(&encbuf[i]) = rand(); } - fputs("testing base16384_en/decode...\n", stderr); - for(i = 0; i <= TEST_SIZE; i++) { - n = base16384_encode(encbuf, i, decbuf); - n = base16384_decode(decbuf, n, tstbuf); - int decn = n; - if (memcmp(encbuf, tstbuf, n)) return_error(i, n); - } - fputs("testing base16384_en/ecode_unsafe...\n", stderr); - for(i = 0; i <= TEST_SIZE; i++) { - n = base16384_encode_unsafe(encbuf, i, decbuf); - n = base16384_decode_unsafe(decbuf, n, tstbuf); - if ((n = memcmp(encbuf, tstbuf, n))) return_error(i, n); - } + + test_batch(encode, decode); + test_batch(encode, decode_unsafe); + test_batch(encode, decode_safe); + + test_batch(encode_unsafe, decode); + test_batch(encode_unsafe, decode_unsafe); + test_batch(encode_unsafe, decode_safe); + + test_batch(encode_safe, decode); + test_batch(encode_safe, decode_unsafe); + test_batch(encode_safe, decode_safe); return 0; } diff --git a/test/wrap_test.c b/test/wrap_test.c index b6abb1b..2aa7b5b 100644 --- a/test/wrap_test.c +++ b/test/wrap_test.c @@ -98,18 +98,16 @@ int main() { fputs("testing base16384_en/decode_file...\n", stderr); init_input_file(); for(i = TEST_SIZE; i > 0; i--) { - fprintf(stderr, "loop@%d\n", i); reset_and_truncate(fd, i); loop_ok(close(fd), i, "close"); - fputs("base16384_encode_file\n", stderr); + err = base16384_encode_file(TEST_INPUT_FILENAME, TEST_OUTPUT_FILENAME, encbuf, decbuf); base16384_loop_ok(err); - fputs("base16384_decode_file\n", stderr); + err = base16384_decode_file(TEST_OUTPUT_FILENAME, TEST_VALIDATE_FILENAME, encbuf, decbuf); base16384_loop_ok(err); - fputs("validate_result\n", stderr); + validate_result(); - fputs("fin\n\n", stderr); } fputs("testing base16384_en/decode_fp...\n", stderr);