From c64fdb6dc9124b09a0d17c3bf58310a8d460f357 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Sat, 6 Apr 2024 18:49:23 +0900 Subject: [PATCH] feat(file): add custom stream api --- base16384.h | 54 ++++++++++++++++++++ file.c | 130 ++++++++++++++++++++++++++++++++++++++++++++--- test/file_test.c | 44 +++++++++++++++- test/file_test.h | 32 ++++++++++++ test/wrap_test.c | 38 ++++++++++++++ wrap.c | 2 + 6 files changed, 290 insertions(+), 10 deletions(-) diff --git a/base16384.h b/base16384.h index b5be8a5..288380e 100644 --- a/base16384.h +++ b/base16384.h @@ -59,6 +59,36 @@ typedef enum base16384_err_t base16384_err_t; // forcely do sumcheck without checking data length #define BASE16384_FLAG_DO_SUM_CHECK_FORCELY (1<<2) +/** + * @brief custom reader function interface + * @param client_data the data pointer defined by the client + * @param buffer to where put data + * @param count read bytes count + * @return the size read +*/ +typedef ssize_t(*base16384_reader_t)(const void *client_data, void *buffer, size_t count); + +/** + * @brief custom writer function interface + * @param client_data the data pointer defined by the client + * @param buffer from where read data + * @param count write bytes count + * @return the size written +*/ +typedef ssize_t(*base16384_writer_t)(const void *client_data, const void *buffer, size_t count); + +struct base16384_stream_t { + union { + base16384_reader_t reader; + base16384_writer_t writer; + } f; + const void *client_data; +}; +/** + * @brief for stream encode/decode +*/ +typedef struct base16384_stream_t base16384_stream_t; + /** * @brief calculate the exact encoded size * @param dlen the data length to encode @@ -210,6 +240,17 @@ base16384_err_t base16384_encode_fp_detailed(base16384_typed_flag_params(FILE*)) */ base16384_err_t base16384_encode_fd_detailed(base16384_typed_flag_params(int)); +/** + * @brief encode custom input reader to custom output writer + * @param input custom input reader + * @param output custom output writer + * @param encbuf must be no less than BASE16384_ENCBUFSZ + * @param decbuf must be no less than BASE16384_DECBUFSZ + * @param flag BASE16384_FLAG_xxx value, add multiple flags by `|` + * @return the error code +*/ +base16384_err_t base16384_encode_stream_detailed(base16384_typed_flag_params(base16384_stream_t*)); + /** * @brief decode input file to output file * @param input filename or `-` to specify stdin @@ -243,16 +284,29 @@ base16384_err_t base16384_decode_fp_detailed(base16384_typed_flag_params(FILE*)) */ base16384_err_t base16384_decode_fd_detailed(base16384_typed_flag_params(int)); +/** + * @brief decode custom input reader to custom output writer + * @param input custom input reader + * @param output custom output writer + * @param encbuf must be no less than BASE16384_ENCBUFSZ + * @param decbuf must be no less than BASE16384_DECBUFSZ + * @param flag BASE16384_FLAG_xxx value, add multiple flags by `|` + * @return the error code +*/ +base16384_err_t base16384_decode_stream_detailed(base16384_typed_flag_params(base16384_stream_t*)); + #define BASE16384_WRAP_DECL(method, name, type) \ base16384_err_t base16384_##method##_##name(base16384_typed_params(type)); BASE16384_WRAP_DECL(encode, file, const char*); BASE16384_WRAP_DECL(encode, fp, FILE*); BASE16384_WRAP_DECL(encode, fd, int); + BASE16384_WRAP_DECL(encode, stream, base16384_stream_t*); BASE16384_WRAP_DECL(decode, file, const char*); BASE16384_WRAP_DECL(decode, fp, FILE*); BASE16384_WRAP_DECL(decode, fd, int); + BASE16384_WRAP_DECL(decode, stream, base16384_stream_t*); #undef BASE16384_WRAP_DECL diff --git a/file.c b/file.c index 36fb7aa..faa7549 100644 --- a/file.c +++ b/file.c @@ -57,14 +57,14 @@ static inline off_t get_file_size(const char* filepath) { #define do_sum_check(flag) ((flag)&(BASE16384_FLAG_DO_SUM_CHECK_FORCELY|BASE16384_FLAG_SUM_CHECK_ON_REMAIN)) base16384_err_t base16384_encode_file_detailed(const char* input, const char* output, char* encbuf, char* decbuf, int flag) { - off_t inputsize; - FILE *fp = NULL, *fpo; - int errnobak = 0, is_stdin = is_standard_io(input); - base16384_err_t retval = base16384_err_ok; if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) { errno = EINVAL; return base16384_err_invalid_file_name; } + base16384_err_t retval = base16384_err_ok; + off_t inputsize; + FILE *fp = NULL, *fpo; + int errnobak = 0, is_stdin = is_standard_io(input); if(is_stdin) { // read from stdin inputsize = _BASE16384_ENCBUFSZ; fp = stdin; @@ -213,6 +213,41 @@ base16384_err_t base16384_encode_fd_detailed(int input, int output, char* encbuf return base16384_err_ok; } +#define reader(cd, buf, n) (input->f.reader((cd)->client_data, (buf), (n))) +#define writer(cd, buf, n) (output->f.writer((cd)->client_data, (buf), (n))) + +base16384_err_t base16384_encode_stream_detailed(base16384_stream_t* input, base16384_stream_t* output, char* encbuf, char* decbuf, int flag) { + if(!input || !input->f.reader) { + return base16384_err_fopen_input_file; + } + if(!output || !output->f.writer) { + return base16384_err_fopen_output_file; + } + off_t inputsize = _BASE16384_ENCBUFSZ; + size_t cnt = 0; + uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE; + if(!(flag&BASE16384_FLAG_NOHEADER)) writer(output, "\xfe\xff", 2); + while((cnt = reader(input, encbuf, inputsize)) > 0) { + int n; + while(cnt%7) { + n = reader(input, encbuf+cnt, sizeof(char)); + if(n > 0) cnt++; + else break; + } + if(do_sum_check(flag)) { + sum = calc_sum(sum, cnt, encbuf); + if(cnt%7) { // last encode + *(uint32_t*)(&encbuf[cnt]) = htobe32(sum); + } + } + n = base16384_encode_unsafe(encbuf, cnt, decbuf); + if(n && writer(output, decbuf, n) < n) { + return base16384_err_write_file; + } + } + return base16384_err_ok; +} + #define rm_head(fp) {\ int ch = fgetc(fp);\ if(ch == 0xFE) fgetc(fp);\ @@ -230,16 +265,16 @@ static inline int is_next_end(FILE* fp) { } base16384_err_t base16384_decode_file_detailed(const char* input, const char* output, char* encbuf, char* decbuf, int flag) { + if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) { + errno = EINVAL; + return base16384_err_invalid_file_name; + } off_t inputsize; FILE* fp = NULL; FILE* fpo; uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE; base16384_err_t retval = base16384_err_ok; int errnobak = 0, is_stdin = is_standard_io(input); - if(!input || !output || strlen(input) <= 0 || strlen(output) <= 0) { - errno = EINVAL; - return base16384_err_invalid_file_name; - } if(is_stdin) { // read from stdin inputsize = _BASE16384_DECBUFSZ; fp = stdin; @@ -458,3 +493,82 @@ base16384_err_t base16384_decode_fd_detailed(int input, int output, char* encbuf } return base16384_err_ok; } + +static inline uint16_t is_next_end_stream(base16384_stream_t* input) { + uint8_t ch = 0; + if(reader(input, &ch, 1) != 1) return (uint16_t)EOF; + uint16_t ret = (uint16_t)ch & 0x00ff; + if(ch == '=') { + if(reader(input, &ch, 1) != 1) return (uint16_t)EOF; + ret <<= 8; + ret |= (uint16_t)ch & 0x00ff; + } + return ret; +} + +base16384_err_t base16384_decode_stream_detailed(base16384_stream_t* input, base16384_stream_t* output, char* encbuf, char* decbuf, int flag) { + if(!input || !input->f.reader) { + errno = EINVAL; + return base16384_err_fopen_input_file; + } + if(!output || !output->f.writer) { + errno = EINVAL; + return base16384_err_fopen_output_file; + } + + off_t inputsize = _BASE16384_DECBUFSZ; + uint32_t sum = BASE16384_SIMPLE_SUM_INIT_VALUE; + uint8_t remains[8]; + + decbuf[0] = 0; + if(reader(input, remains, 2) != 2) { + return base16384_err_read_file; + } + + int p = 0; + if(remains[0] != (uint8_t)(0xfe)) p = 2; + + int n, last_encbuf_cnt = 0, last_decbuf_cnt = 0, offset = 0; + size_t total_decoded_len = 0; + while((n = reader(input, decbuf+p, inputsize-p)) > 0) { + if(p) { + memcpy(decbuf, remains, p); + n += p; + p = 0; + } + int x; + while(n%8) { + x = reader(input, decbuf+n, sizeof(char)); + if(x > 0) n++; + else break; + } + uint16_t next = is_next_end_stream(input); + if(errno) { + return base16384_err_read_file; + } + if((uint16_t)(~next)) { + if(next&0xff00) { + decbuf[n++] = '='; + decbuf[n++] = (char)(next&0x00ff); + } else remains[p++] = (char)(next&0x00ff); + } + offset = decbuf[n-1]; + last_decbuf_cnt = n; + n = base16384_decode_unsafe(decbuf, n, encbuf); + if(n && writer(output, encbuf, n) != n) { + return base16384_err_write_file; + } + total_decoded_len += n; + if(do_sum_check(flag)) sum = calc_sum(sum, n, encbuf); + last_encbuf_cnt = n; + } + if(do_sum_check(flag) + && (flag&BASE16384_FLAG_DO_SUM_CHECK_FORCELY || total_decoded_len >= _BASE16384_ENCBUFSZ) + && last_decbuf_cnt > 2 + && decbuf[last_decbuf_cnt-2] == '=' + && check_sum(sum, *(uint32_t*)(&encbuf[last_encbuf_cnt]), offset)) { + errno = EINVAL; + return base16384_err_invalid_decoding_checksum; + } + return base16384_err_ok; +} diff --git a/test/file_test.c b/test/file_test.c index 8910277..754cd72 100644 --- a/test/file_test.c +++ b/test/file_test.c @@ -100,7 +100,7 @@ static char tstbuf[BASE16384_ENCBUFSZ]; int fdout = open(TEST_OUTPUT_FILENAME, O_RDWR|O_TRUNC|O_CREAT|O_APPEND); \ loop_ok(!fdout, i, "open"); \ \ - err = base16384_encode_fd_detailed(fd, fdout, encbuf, decbuf, 0); \ + err = base16384_encode_fd_detailed(fd, fdout, encbuf, decbuf, flag); \ base16384_loop_ok(err); \ loop_ok(close(fd), i, "close"); \ \ @@ -109,7 +109,46 @@ static char tstbuf[BASE16384_ENCBUFSZ]; \ loop_ok(lseek(fdout, 0, SEEK_SET), i, "lseek"); \ \ - err = base16384_decode_fd_detailed(fdout, fdval, encbuf, decbuf, 0); \ + err = base16384_decode_fd_detailed(fdout, fdval, encbuf, decbuf, flag); \ + base16384_loop_ok(err); \ + \ + loop_ok(close(fdout), i, "close"); \ + loop_ok(close(fdval), i, "close"); \ + \ + validate_result(); \ + } + +#define test_stream_detailed(flag) \ + fputs("testing base16384_en/decode_stream with flag "#flag"...\n", stderr); \ + init_input_file(); \ + for(i = TEST_SIZE; i > 0; i--) { \ + reset_and_truncate(fd, i); \ + \ + int fdout = open(TEST_OUTPUT_FILENAME, O_RDWR|O_TRUNC|O_CREAT|O_APPEND); \ + loop_ok(!fdout, i, "open"); \ + \ + err = base16384_encode_stream_detailed(&(base16384_stream_t){ \ + .client_data = (void*)(uintptr_t)fd, \ + .f.reader = base16384_test_file_reader, \ + }, &(base16384_stream_t){ \ + .client_data = (void*)(uintptr_t)fdout, \ + .f.writer = base16384_test_file_writer, \ + }, encbuf, decbuf, flag); \ + base16384_loop_ok(err); \ + loop_ok(close(fd), i, "close"); \ + \ + int fdval = open(TEST_VALIDATE_FILENAME, O_WRONLY|O_TRUNC|O_CREAT); \ + loop_ok(!fdval, i, "open"); \ + \ + loop_ok(lseek(fdout, 0, SEEK_SET), i, "lseek"); \ + \ + err = base16384_decode_stream_detailed(&(base16384_stream_t){ \ + .client_data = (void*)(uintptr_t)fdout, \ + .f.reader = base16384_test_file_reader, \ + }, &(base16384_stream_t){ \ + .client_data = (void*)(uintptr_t)fdval, \ + .f.writer = base16384_test_file_writer, \ + }, encbuf, decbuf, flag); \ base16384_loop_ok(err); \ \ loop_ok(close(fdout), i, "close"); \ @@ -148,6 +187,7 @@ int main() { test_detailed(file); test_detailed(fp); test_detailed(fd); + test_detailed(stream); remove_test_files(); diff --git a/test/file_test.h b/test/file_test.h index 2109bd8..06f6745 100644 --- a/test/file_test.h +++ b/test/file_test.h @@ -92,4 +92,36 @@ ok(fclose(fp), "fclose"); \ fputs("input file created.\n", stderr); +#include +#include +#include + +static ssize_t base16384_test_file_reader(const void *client_data, void *buffer, size_t count) { + int fd = (int)((uintptr_t)client_data); + ssize_t ret = read(fd, buffer, count); + if(ret < 0) return ret; + for(ssize_t i = 0; i < ret; i++) { + ((uint8_t*)(buffer))[i] = ~((uint8_t*)(buffer))[i]; + } + return ret; +} + +static ssize_t base16384_test_file_writer(const void *client_data, const void *buffer, size_t count) { + int fd = (int)((uintptr_t)client_data); + if(count <= 0) { + errno = EINVAL; + return -100; + } + uint8_t* wbuf = (uint8_t*)malloc(count); + if(!wbuf) return -200; + for(ssize_t i = 0; i < count; i++) { + wbuf[i] = ~((uint8_t*)(buffer))[i]; + } + ssize_t ret = write(fd, buffer, count); + int errnobak = errno; + free(wbuf); + errno = errnobak; + return ret; +} + #endif \ No newline at end of file diff --git a/test/wrap_test.c b/test/wrap_test.c index 6cf84cd..e2345b4 100644 --- a/test/wrap_test.c +++ b/test/wrap_test.c @@ -122,6 +122,44 @@ int main() { validate_result(); } + fputs("testing base16384_en/decode_stream...\n", stderr); + init_input_file(); + for(i = TEST_SIZE; i > 0; i--) { + reset_and_truncate(fd, i); + + int fdout = open(TEST_OUTPUT_FILENAME, O_RDWR|O_TRUNC|O_CREAT|O_APPEND); + loop_ok(!fdout, i, "open"); + + err = base16384_encode_stream(&(base16384_stream_t){ + .client_data = (void*)(uintptr_t)fd, + .f.reader = base16384_test_file_reader, + }, &(base16384_stream_t){ + .client_data = (void*)(uintptr_t)fdout, + .f.writer = base16384_test_file_writer, + }, encbuf, decbuf); + base16384_loop_ok(err); + loop_ok(close(fd), i, "close"); + + int fdval = open(TEST_VALIDATE_FILENAME, O_WRONLY|O_TRUNC|O_CREAT); + loop_ok(!fdval, i, "open"); + + loop_ok(lseek(fdout, 0, SEEK_SET), i, "lseek"); + + err = base16384_decode_stream(&(base16384_stream_t){ + .client_data = (void*)(uintptr_t)fdout, + .f.reader = base16384_test_file_reader, + }, &(base16384_stream_t){ + .client_data = (void*)(uintptr_t)fdval, + .f.writer = base16384_test_file_writer, + }, encbuf, decbuf); + base16384_loop_ok(err); + + loop_ok(close(fdout), i, "close"); + loop_ok(close(fdval), i, "close"); + + validate_result(); + } + remove(TEST_INPUT_FILENAME); remove(TEST_OUTPUT_FILENAME); remove(TEST_VALIDATE_FILENAME); diff --git a/wrap.c b/wrap.c index 5358b5f..4d1334b 100644 --- a/wrap.c +++ b/wrap.c @@ -28,10 +28,12 @@ BASE16384_WRAP_DECL(encode, file, const char*); BASE16384_WRAP_DECL(encode, fp, FILE*); BASE16384_WRAP_DECL(encode, fd, int); + BASE16384_WRAP_DECL(encode, stream, base16384_stream_t*); BASE16384_WRAP_DECL(decode, file, const char*); BASE16384_WRAP_DECL(decode, fp, FILE*); BASE16384_WRAP_DECL(decode, fd, int); + BASE16384_WRAP_DECL(decode, stream, base16384_stream_t*); #undef BASE16384_WRAP_DECL