diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index b81fc5a..ced3f8f 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -15,10 +15,19 @@ jobs:
sudo apt-get update
sudo apt-get install -y gcc cmake
- - name: Build and Run Tests
+ - name: Build and Run 64bit Tests
run: |
mkdir build
cd build
cmake -DBUILD=test ..
make
make test || ctest --rerun-failed --output-on-failure
+
+ - name: Build and Run 32bit Tests
+ run: |
+ rm -rf build
+ mkdir build
+ cd build
+ cmake -DBUILD=test -DFORCE_32BIT=1 ..
+ make
+ make test || ctest --rerun-failed --output-on-failure
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9fd796a..0600cec 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,7 +17,7 @@ endif ()
add_executable(base16384_b base16384.c)
-IF (CMAKE_SIZEOF_VOID_P EQUAL 8)
+IF ((NOT FORCE_32BIT) AND CMAKE_SIZEOF_VOID_P EQUAL 8)
message(STATUS "Adding 64bit libraries...")
add_definitions(-DIS_64BIT_PROCESSOR)
add_library(base16384 SHARED file.c base1464.c)
diff --git a/base1432.c b/base1432.c
index df723e2..e991d2a 100644
--- a/base1432.c
+++ b/base1432.c
@@ -16,8 +16,113 @@
* along with this program. If not, see .
*/
+#include
+
#include "binary.h"
+union remainder {
+ uint8_t buf[4];
+ uint32_t val;
+};
+typedef union remainder remainder;
+
+int base16384_encode_safe(const char* data, int dlen, char* buf) {
+ int outlen = dlen / 7 * 8;
+ int offset = dlen % 7;
+ switch(offset) { // 算上偏移标志字符占用的2字节
+ case 0: break;
+ case 1: outlen += 4; break;
+ case 2:
+ case 3: outlen += 6; break;
+ case 4:
+ case 5: outlen += 8; break;
+ case 6: outlen += 10; break;
+ default: break;
+ }
+ uint32_t* vals = (uint32_t*)buf;
+ uint32_t n = 0;
+ int32_t i = 0;
+ for(; i < dlen - 7; i += 7) {
+ register uint32_t sum = 0;
+ register uint32_t shift = htobe32(*(uint32_t*)(data+i));
+ sum |= (shift>>2) & 0x3fff0000;
+ sum |= (shift>>4) & 0x00003fff;
+ sum += 0x4e004e00;
+ vals[n++] = be32toh(sum);
+ shift <<= 26;
+ shift &= 0x3c000000;
+ sum = 0;
+ shift |= (htobe32(*(uint32_t*)(data+i+4))>>6)&0x03fffffc;
+ sum |= shift & 0x3fff0000;
+ shift >>= 2;
+ sum |= shift & 0x00003fff;
+ sum += 0x4e004e00;
+ vals[n++] = be32toh(sum);
+ }
+ remainder valbuf;
+ if(dlen - i == 7) {
+ register uint32_t sum = 0;
+ register uint32_t shift = htobe32(*(uint32_t*)(data+i));
+ sum |= (shift>>2) & 0x3fff0000;
+ sum |= (shift>>4) & 0x00003fff;
+ sum += 0x4e004e00;
+ vals[n++] = be32toh(sum);
+ shift <<= 26;
+ shift &= 0x3c000000;
+ sum = 0;
+ memcpy(valbuf.buf, data+i+4, 3);
+ shift |= (htobe32(valbuf.val)>>6)&0x03fffffc;
+ sum |= shift & 0x3fff0000;
+ shift >>= 2;
+ sum |= shift & 0x00003fff;
+ sum += 0x4e004e00;
+ vals[n++] = be32toh(sum);
+ return outlen;
+ }
+ uint8_t o = offset;
+ if(o--) {
+ register uint32_t sum = 0x0000003f & (data[i] >> 2);
+ sum |= ((uint32_t)data[i] << 14) & 0x0000c000;
+ if(o--) {
+ sum |= ((uint32_t)data[i + 1] << 6) & 0x00003f00;
+ sum |= ((uint32_t)data[i + 1] << 20) & 0x00300000;
+ if(o--) {
+ sum |= ((uint32_t)data[i + 2] << 12) & 0x000f0000;
+ sum |= ((uint32_t)data[i + 2] << 28) & 0xf0000000;
+ if(o--) {
+ sum |= ((uint32_t)data[i + 3] << 20) & 0x0f000000;
+ sum += 0x004e004e;
+ // safe, because it will never go over 0x3dxx
+ #ifdef WORDS_BIGENDIAN
+ vals[n++] = __builtin_bswap32(sum);
+ #else
+ vals[n++] = sum;
+ #endif
+ sum = (((uint32_t)data[i + 3] << 2)) & 0x0000003c;
+ if(o--) {
+ sum |= (((uint32_t)data[i + 4] >> 6)) & 0x00000003;
+ sum |= ((uint32_t)data[i + 4] << 10) & 0x0000fc00;
+ if(o--) {
+ sum |= ((uint32_t)data[i + 5] << 2) & 0x00000300;
+ sum |= ((uint32_t)data[i + 5] << 16) & 0x003f0000;
+ }
+ }
+ }
+ }
+ }
+ sum += 0x004e004e;
+ // safe, because it will never go over 0x3dxx
+ #ifdef WORDS_BIGENDIAN
+ vals[n] = __builtin_bswap32(sum);
+ #else
+ vals[n] = sum;
+ #endif
+ buf[outlen - 2] = '=';
+ buf[outlen - 1] = offset;
+ }
+ return outlen;
+}
+
int base16384_encode(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
@@ -31,9 +136,6 @@ int base16384_encode(const char* data, int dlen, char* buf) {
case 6: outlen += 10; break;
default: break;
}
- #ifdef DEBUG
- printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8);
- #endif
uint32_t* vals = (uint32_t*)buf;
uint32_t n = 0;
int32_t i = 0;
@@ -109,9 +211,6 @@ int base16384_encode_unsafe(const char* data, int dlen, char* buf) {
case 6: outlen += 10; break;
default: break;
}
- #ifdef DEBUG
- printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8);
- #endif
uint32_t* vals = (uint32_t*)buf;
uint32_t n = 0;
int32_t i = 0;
@@ -139,6 +238,102 @@ int base16384_encode_unsafe(const char* data, int dlen, char* buf) {
return outlen;
}
+int base16384_decode_safe(const char* data, int dlen, char* buf) {
+ int outlen = dlen;
+ int offset = 0;
+ if(data[dlen-2] == '=') {
+ offset = data[dlen-1];
+ switch(offset) { // 算上偏移标志字符占用的2字节
+ case 0: break;
+ case 1: outlen -= 4; break;
+ case 2:
+ case 3: outlen -= 6; break;
+ case 4:
+ case 5: outlen -= 8; break;
+ case 6: outlen -= 10; break;
+ default: break;
+ }
+ }
+ outlen = outlen / 8 * 7 + offset;
+ uint32_t* vals = (uint32_t*)data;
+ uint32_t n = 0;
+ int32_t i = 0;
+ for(; i < outlen - 7; i+=7) { // n实际每次自增2
+ register uint32_t sum = 0;
+ register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
+ shift <<= 2;
+ sum |= shift & 0xfffc0000;
+ shift <<= 2;
+ sum |= shift & 0x0003fff0;
+ shift = htobe32(vals[n++]) - 0x4e004e00;
+ sum |= shift >> 26;
+ *(uint32_t*)(buf+i) = be32toh(sum);
+ sum = 0;
+ shift <<= 6;
+ sum |= shift & 0xffc00000;
+ shift <<= 2;
+ sum |= shift & 0x003fff00;
+ *(uint32_t*)(buf+i+4) = be32toh(sum);
+ }
+ remainder valbuf;
+ if(outlen - i == 7) {
+ register uint32_t sum = 0;
+ register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
+ shift <<= 2;
+ sum |= shift & 0xfffc0000;
+ shift <<= 2;
+ sum |= shift & 0x0003fff0;
+ shift = htobe32(vals[n]) - 0x4e004e00;
+ sum |= shift >> 26;
+ *(uint32_t*)(buf+i) = be32toh(sum);
+ sum = 0;
+ shift <<= 6;
+ sum |= shift & 0xffc00000;
+ shift <<= 2;
+ sum |= shift & 0x003fff00;
+ valbuf.val = be32toh(sum);
+ memcpy(buf+i+4, valbuf.buf, 3);
+ } else if(offset--) {
+ int cnt = dlen-2-(int)n*(int)sizeof(uint32_t);
+ if (cnt > 4) cnt = 4;
+ memcpy(valbuf.buf, &vals[n], cnt);
+ n++;
+ #ifdef WORDS_BIGENDIAN
+ register uint32_t sum = __builtin_bswap32(valbuf.val);
+ #else
+ register uint32_t sum = valbuf.val;
+ #endif
+ sum -= 0x0000004e;
+ buf[i++] = ((sum & 0x0000003f) << 2) | ((sum & 0x0000c000) >> 14);
+ if(offset--) {
+ sum -= 0x004e0000;
+ buf[i++] = ((sum & 0x00003f00) >> 6) | ((sum & 0x00300000) >> 20);
+ if(offset--) {
+ buf[i++] = ((sum & 0x000f0000) >> 12) | ((sum & 0xf0000000) >> 28);
+ if(offset--) {
+ buf[i] = (sum & 0x0f000000) >> 20;
+ memcpy(valbuf.buf, &vals[n], dlen-2-(int)n*(int)sizeof(uint32_t));
+ #ifdef WORDS_BIGENDIAN
+ sum = __builtin_bswap32(valbuf.val);
+ #else
+ sum = valbuf.val;
+ #endif
+ sum -= 0x0000004e;
+ buf[i++] |= (sum & 0x0000003c) >> 2;
+ if(offset--) {
+ buf[i++] = ((sum & 0x00000003) << 6) | ((sum & 0x0000fc00) >> 10);
+ if(offset--) {
+ sum -= 0x004e0000;
+ buf[i] = ((sum & 0x00000300) >> 2) | ((sum & 0x003f0000) >> 16);
+ }
+ }
+ }
+ }
+ }
+ }
+ return outlen;
+}
+
int base16384_decode(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
diff --git a/base1464.c b/base1464.c
index 77d1ff3..23d3f69 100644
--- a/base1464.c
+++ b/base1464.c
@@ -16,8 +16,107 @@
* along with this program. If not, see .
*/
+#include
+
#include "binary.h"
+union remainder {
+ uint8_t buf[8];
+ uint64_t val;
+};
+typedef union remainder remainder;
+
+int base16384_encode_safe(const char* data, int dlen, char* buf) {
+ int outlen = dlen / 7 * 8;
+ int offset = dlen % 7;
+ switch(offset) { // 算上偏移标志字符占用的2字节
+ case 0: break;
+ case 1: outlen += 4; break;
+ case 2:
+ case 3: outlen += 6; break;
+ case 4:
+ case 5: outlen += 8; break;
+ case 6: outlen += 10; break;
+ default: break;
+ }
+ #ifdef DEBUG
+ printf("outlen: %llu, offset: %u, malloc: %llu\n", outlen, offset, outlen + 8);
+ #endif
+ uint64_t* vals = (uint64_t*)buf;
+ uint64_t n = 0;
+ int64_t i = 0;
+ for(; i < dlen - 7; i += 7) {
+ register uint64_t sum = 0;
+ register uint64_t shift = htobe64(*(uint64_t*)(data+i))>>2;
+ sum |= shift & 0x3fff000000000000;
+ shift >>= 2;
+ sum |= shift & 0x00003fff00000000;
+ shift >>= 2;
+ sum |= shift & 0x000000003fff0000;
+ shift >>= 2;
+ sum |= shift & 0x0000000000003fff;
+ sum += 0x4e004e004e004e00;
+ vals[n++] = be64toh(sum);
+ #ifdef DEBUG
+ printf("i: %llu, add sum: %016llx\n", i, sum);
+ #endif
+ }
+ remainder valbuf;
+ if(dlen - i == 7) {
+ memcpy(valbuf.buf, data+i, 7);
+ register uint64_t sum = 0;
+ register uint64_t shift = htobe64(valbuf.val)>>2;
+ sum |= shift & 0x3fff000000000000;
+ shift >>= 2;
+ sum |= shift & 0x00003fff00000000;
+ shift >>= 2;
+ sum |= shift & 0x000000003fff0000;
+ shift >>= 2;
+ sum |= shift & 0x0000000000003fff;
+ sum += 0x4e004e004e004e00;
+ vals[n++] = be64toh(sum);
+ return outlen;
+ }
+ int o = offset;
+ if(o--) {
+ register uint64_t sum = 0x000000000000003f & (data[i] >> 2);
+ sum |= ((uint64_t)data[i] << 14) & 0x000000000000c000;
+ if(o--) {
+ sum |= ((uint64_t)data[i + 1] << 6) & 0x0000000000003f00;
+ sum |= ((uint64_t)data[i + 1] << 20) & 0x0000000000300000;
+ if(o--) {
+ sum |= ((uint64_t)data[i + 2] << 12) & 0x00000000000f0000;
+ sum |= ((uint64_t)data[i + 2] << 28) & 0x00000000f0000000;
+ if(o--) {
+ sum |= ((uint64_t)data[i + 3] << 20) & 0x000000000f000000;
+ sum |= ((uint64_t)data[i + 3] << 34) & 0x0000003c00000000;
+ if(o--) {
+ sum |= ((uint64_t)data[i + 4] << 26) & 0x0000000300000000;
+ sum |= ((uint64_t)data[i + 4] << 42) & 0x0000fc0000000000;
+ if(o--) {
+ sum |= ((uint64_t)data[i + 5] << 34) & 0x0000030000000000;
+ sum |= ((uint64_t)data[i + 5] << 48) & 0x003f000000000000;
+ }
+ }
+ }
+ }
+ }
+ sum += 0x004e004e004e004e;
+ #ifdef WORDS_BIGENDIAN
+ valbuf.val = __builtin_bswap64(sum);
+ #else
+ valbuf.val = sum;
+ #endif
+ memcpy(&vals[n], valbuf.buf, outlen-2-(int)n*(int)sizeof(uint64_t));
+ #ifdef DEBUG
+ printf("i: %llu, add sum: %016llx\n", i, sum);
+ #endif
+ buf[outlen - 2] = '=';
+ buf[outlen - 1] = offset;
+ }
+ return outlen;
+}
+
int base16384_encode(const char* data, int dlen, char* buf) {
int outlen = dlen / 7 * 8;
int offset = dlen % 7;
@@ -134,6 +233,86 @@ int base16384_encode_unsafe(const char* data, int dlen, char* buf) {
return outlen;
}
+int base16384_decode_safe(const char* data, int dlen, char* buf) {
+ int outlen = dlen;
+ int offset = 0;
+ if(data[dlen-2] == '=') {
+ offset = data[dlen-1];
+ switch(offset) { // 算上偏移标志字符占用的2字节
+ case 0: break;
+ case 1: outlen -= 4; break;
+ case 2:
+ case 3: outlen -= 6; break;
+ case 4:
+ case 5: outlen -= 8; break;
+ case 6: outlen -= 10; break;
+ default: break;
+ }
+ }
+ outlen = outlen / 8 * 7 + offset;
+ uint64_t* vals = (uint64_t*)data;
+ uint64_t n = 0;
+ int64_t i = 0;
+ for(; i < outlen - 7; n++, i+=7) {
+ register uint64_t sum = 0;
+ register uint64_t shift = htobe64(vals[n]) - 0x4e004e004e004e00;
+ shift <<= 2;
+ sum |= shift & 0xfffc000000000000;
+ shift <<= 2;
+ sum |= shift & 0x0003fff000000000;
+ shift <<= 2;
+ sum |= shift & 0x0000000fffc00000;
+ shift <<= 2;
+ sum |= shift & 0x00000000003fff00;
+ *(uint64_t*)(buf+i) = be64toh(sum);
+ #ifdef DEBUG
+ printf("i: %llu, add sum: %016llx\n", i, sum);
+ #endif
+ }
+ remainder valbuf;
+ if(outlen - i == 7) {
+ register uint64_t sum = 0;
+ register uint64_t shift = htobe64(vals[n]) - 0x4e004e004e004e00;
+ shift <<= 2;
+ sum |= shift & 0xfffc000000000000;
+ shift <<= 2;
+ sum |= shift & 0x0003fff000000000;
+ shift <<= 2;
+ sum |= shift & 0x0000000fffc00000;
+ shift <<= 2;
+ sum |= shift & 0x00000000003fff00;
+ valbuf.val = be64toh(sum);
+ memcpy(buf+i, valbuf.buf, 7);
+ } else if(offset--) {
+ memcpy(valbuf.buf, &vals[n], dlen-2-(int)n*(int)sizeof(uint64_t));
+ #ifdef WORDS_BIGENDIAN
+ register uint64_t sum = __builtin_bswap64(valbuf.val) - 0x000000000000004e;
+ #else
+ register uint64_t sum = valbuf.val - 0x000000000000004e;
+ #endif
+ buf[i++] = ((sum & 0x000000000000003f) << 2) | ((sum & 0x000000000000c000) >> 14);
+ if(offset--) {
+ sum -= 0x00000000004e0000;
+ buf[i++] = ((sum & 0x0000000000003f00) >> 6) | ((sum & 0x0000000000300000) >> 20);
+ if(offset--) {
+ buf[i++] = ((sum & 0x00000000000f0000) >> 12) | ((sum & 0x00000000f0000000) >> 28);
+ if(offset--) {
+ sum -= 0x0000004e00000000;
+ buf[i++] = ((sum & 0x000000000f000000) >> 20) | ((sum & 0x0000003c00000000) >> 34);
+ if(offset--) {
+ buf[i++] = ((sum & 0x0000000300000000) >> 26) | ((sum & 0x0000fc0000000000) >> 42);
+ if(offset--) {
+ sum -= 0x004e000000000000;
+ buf[i] = ((sum & 0x0000030000000000) >> 34) | ((sum & 0x003f000000000000) >> 48);
+ }
+ }
+ }
+ }
+ }
+ }
+ return outlen;
+}
+
int base16384_decode(const char* data, int dlen, char* buf) {
int outlen = dlen;
int offset = 0;
diff --git a/base16384.h b/base16384.h
index 7ea5d88..5707ca3 100644
--- a/base16384.h
+++ b/base16384.h
@@ -116,6 +116,15 @@ static inline int base16384_decode_len(int dlen, int offset) {
return _base16384_decode_len(dlen, offset) + 16; // 多出 16 字节用于 unsafe 循环覆盖
}
+/**
+ * @brief safely encode data and write result into buf
+ * @param data data to encode, no data overread
+ * @param dlen the data length
+ * @param buf the output buffer, whose size can be exactly `_base16384_encode_len`
+ * @return the total length written
+*/
+int base16384_encode_safe(const char* data, int dlen, char* buf);
+
/**
* @brief encode data and write result into buf
* @param data data to encode
@@ -134,6 +143,15 @@ int base16384_encode(const char* data, int dlen, char* buf);
*/
int base16384_encode_unsafe(const char* data, int dlen, char* buf);
+/**
+ * @brief safely decode data and write result into buf
+ * @param data data to decode, no data overread
+ * @param dlen the data length
+ * @param buf the output buffer, whose size can be exactly `_base16384_decode_len`
+ * @return the total length written
+*/
+int base16384_decode_safe(const char* data, int dlen, char* buf);
+
/**
* @brief decode data and write result into buf
* @param data data to decode
diff --git a/file.c b/file.c
index c6953ff..049b18e 100644
--- a/file.c
+++ b/file.c
@@ -170,7 +170,7 @@ base16384_err_t base16384_encode_file_detailed(const char* input, const char* ou
fputc(0xFE, fpo);
fputc(0xFF, fpo);
}
- int n = base16384_encode(input_file, (int)inputsize, decbuf);
+ int n = base16384_encode_safe(input_file, (int)inputsize, decbuf);
if(n && fwrite(decbuf, n, 1, fpo) <= 0) {
goto_base16384_file_detailed_cleanup(encode, base16384_err_write_file, {
munmap(input_file, (size_t)inputsize+16);
@@ -317,8 +317,8 @@ base16384_err_t base16384_decode_file_detailed(const char* input, const char* ou
if(input_file == MAP_FAILED) {
goto_base16384_file_detailed_cleanup(decode, base16384_err_map_input_file, close(fd));
}
- int off = skip_offset(input_file);
- int n = base16384_decode(input_file+off, inputsize-off, encbuf);
+ int n = skip_offset(input_file);
+ n = base16384_decode_safe(input_file+n, inputsize-n, encbuf);
if(n && fwrite(encbuf, n, 1, fpo) <= 0) {
goto_base16384_file_detailed_cleanup(decode, base16384_err_write_file, {
munmap(input_file, (size_t)inputsize+16);
diff --git a/test/coder_test.c b/test/coder_test.c
index fe391f6..10d7769 100644
--- a/test/coder_test.c
+++ b/test/coder_test.c
@@ -43,24 +43,31 @@ char tstbuf[TEST_SIZE+16];
return 1; \
}
+#define test_batch(encode, decode) \
+ fputs("testing base16384_"#encode"/base16384_"#decode"...\n", stderr); \
+ for(i = 0; i <= TEST_SIZE; i++) { \
+ n = base16384_##encode(encbuf, i, decbuf); \
+ n = base16384_##decode(decbuf, n, tstbuf); \
+ if (memcmp(encbuf, tstbuf, n)) return_error(i, n); \
+ }
+
int main() {
srand(time(NULL));
int i, n;
for(i = 0; i <= TEST_SIZE; i += sizeof(int)) {
*(int*)(&encbuf[i]) = rand();
}
- fputs("testing base16384_en/decode...\n", stderr);
- for(i = 0; i <= TEST_SIZE; i++) {
- n = base16384_encode(encbuf, i, decbuf);
- n = base16384_decode(decbuf, n, tstbuf);
- int decn = n;
- if (memcmp(encbuf, tstbuf, n)) return_error(i, n);
- }
- fputs("testing base16384_en/ecode_unsafe...\n", stderr);
- for(i = 0; i <= TEST_SIZE; i++) {
- n = base16384_encode_unsafe(encbuf, i, decbuf);
- n = base16384_decode_unsafe(decbuf, n, tstbuf);
- if ((n = memcmp(encbuf, tstbuf, n))) return_error(i, n);
- }
+
+ test_batch(encode, decode);
+ test_batch(encode, decode_unsafe);
+ test_batch(encode, decode_safe);
+
+ test_batch(encode_unsafe, decode);
+ test_batch(encode_unsafe, decode_unsafe);
+ test_batch(encode_unsafe, decode_safe);
+
+ test_batch(encode_safe, decode);
+ test_batch(encode_safe, decode_unsafe);
+ test_batch(encode_safe, decode_safe);
return 0;
}
diff --git a/test/wrap_test.c b/test/wrap_test.c
index b6abb1b..2aa7b5b 100644
--- a/test/wrap_test.c
+++ b/test/wrap_test.c
@@ -98,18 +98,16 @@ int main() {
fputs("testing base16384_en/decode_file...\n", stderr);
init_input_file();
for(i = TEST_SIZE; i > 0; i--) {
- fprintf(stderr, "loop@%d\n", i);
reset_and_truncate(fd, i);
loop_ok(close(fd), i, "close");
- fputs("base16384_encode_file\n", stderr);
+
err = base16384_encode_file(TEST_INPUT_FILENAME, TEST_OUTPUT_FILENAME, encbuf, decbuf);
base16384_loop_ok(err);
- fputs("base16384_decode_file\n", stderr);
+
err = base16384_decode_file(TEST_OUTPUT_FILENAME, TEST_VALIDATE_FILENAME, encbuf, decbuf);
base16384_loop_ok(err);
- fputs("validate_result\n", stderr);
+
validate_result();
- fputs("fin\n\n", stderr);
}
fputs("testing base16384_en/decode_fp...\n", stderr);