1
0
mirror of https://github.com/fumiama/go-base16384.git synced 2026-06-05 08:40:37 +08:00
Files
go-base16384/c/base1432.c
源文雨 369cf02def finish arm64 decode
name            old time/op    new time/op     delta
EncodeTo/16-8     10.6ns ± 0%     10.4ns ± 0%    -2.07%  (p=0.008 n=5+5)
EncodeTo/256-8    80.8ns ± 0%     55.7ns ± 0%   -31.11%  (p=0.008 n=5+5)
EncodeTo/4K-8     1.21µs ± 0%     0.82µs ± 0%   -32.67%  (p=0.016 n=4+5)
EncodeTo/32K-8    9.64µs ± 0%     6.47µs ± 0%   -32.90%  (p=0.008 n=5+5)
DecodeTo/16-8     9.79ns ± 0%    26.02ns ± 0%  +165.85%  (p=0.016 n=4+5)
DecodeTo/256-8    80.9ns ± 0%    111.6ns ± 0%   +37.98%  (p=0.008 n=5+5)
DecodeTo/4K-8     1.22µs ± 0%     1.17µs ± 0%    -3.73%  (p=0.008 n=5+5)
DecodeTo/32K-8    9.71µs ± 1%     8.80µs ± 1%    -9.37%  (p=0.008 n=5+5)
Encoder/16-8      76.5ns ± 0%     76.5ns ± 0%      ~     (p=0.810 n=5+5)
Encoder/256-8      356ns ± 0%      291ns ± 0%   -18.22%  (p=0.008 n=5+5)
Encoder/4K-8      4.05µs ± 0%     3.70µs ± 0%    -8.76%  (p=0.008 n=5+5)
Encoder/32K-8     34.1µs ± 0%     29.2µs ± 0%   -14.30%  (p=0.008 n=5+5)
Decoder/16-8       205ns ± 0%      207ns ± 0%    +1.08%  (p=0.008 n=5+5)
Decoder/256-8      262ns ± 0%      244ns ± 1%    -6.94%  (p=0.008 n=5+5)
Decoder/4K-8      1.49µs ± 0%     1.12µs ± 0%   -24.87%  (p=0.008 n=5+5)
Decoder/32K-8     11.0µs ± 0%      8.0µs ± 0%   -27.00%  (p=0.008 n=5+5)

name            old speed      new speed       delta
EncodeTo/16-8   1.50GB/s ± 0%   1.54GB/s ± 0%    +2.11%  (p=0.008 n=5+5)
EncodeTo/256-8  3.17GB/s ± 0%   4.60GB/s ± 0%   +45.15%  (p=0.008 n=5+5)
EncodeTo/4K-8   3.37GB/s ± 0%   5.01GB/s ± 0%   +48.51%  (p=0.008 n=5+5)
EncodeTo/32K-8  3.40GB/s ± 0%   5.06GB/s ± 0%   +49.02%  (p=0.008 n=5+5)
DecodeTo/16-8   2.25GB/s ± 0%   0.85GB/s ± 0%   -62.39%  (p=0.016 n=4+5)
DecodeTo/256-8  3.66GB/s ± 0%   2.65GB/s ± 0%   -27.54%  (p=0.008 n=5+5)
DecodeTo/4K-8   3.84GB/s ± 0%   3.99GB/s ± 0%    +3.87%  (p=0.008 n=5+5)
DecodeTo/32K-8  3.86GB/s ± 1%   4.26GB/s ± 1%   +10.33%  (p=0.008 n=5+5)
Encoder/16-8     209MB/s ± 0%    209MB/s ± 0%      ~     (p=0.802 n=5+5)
Encoder/256-8    720MB/s ± 0%    880MB/s ± 0%   +22.28%  (p=0.008 n=5+5)
Encoder/4K-8    1.01GB/s ± 0%   1.11GB/s ± 0%    +9.60%  (p=0.008 n=5+5)
Encoder/32K-8    962MB/s ± 0%   1122MB/s ± 0%   +16.69%  (p=0.008 n=5+5)
Decoder/16-8    78.1MB/s ± 0%   77.3MB/s ± 0%    -1.08%  (p=0.008 n=5+5)
Decoder/256-8    977MB/s ± 0%   1050MB/s ± 1%    +7.47%  (p=0.008 n=5+5)
Decoder/4K-8    2.76GB/s ± 0%   3.67GB/s ± 0%   +33.10%  (p=0.008 n=5+5)
Decoder/32K-8   2.98GB/s ± 0%   4.08GB/s ± 0%   +36.98%  (p=0.008 n=5+5)

name            old alloc/op   new alloc/op    delta
EncodeTo/16-8      0.00B           0.00B           ~     (all equal)
EncodeTo/256-8     0.00B           0.00B           ~     (all equal)
EncodeTo/4K-8      0.00B           0.00B           ~     (all equal)
EncodeTo/32K-8     0.00B           0.00B           ~     (all equal)
DecodeTo/16-8      0.00B          48.00B ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/256-8     0.00B         576.00B ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/4K-8      0.00B        6144.00B ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/32K-8     0.00B       49152.00B ± 0%     +Inf%  (p=0.008 n=5+5)
Encoder/16-8       24.0B ± 0%      24.0B ± 0%      ~     (all equal)
Encoder/256-8       472B ± 0%        24B ± 0%   -94.92%  (p=0.008 n=5+5)
Encoder/4K-8       24.0B ± 0%      24.0B ± 0%      ~     (all equal)
Encoder/32K-8     41.0kB ± 0%      0.0kB ± 0%   -99.94%  (p=0.008 n=5+5)
Decoder/16-8      1.39kB ± 0%     1.39kB ± 0%      ~     (all equal)
Decoder/256-8     1.39kB ± 0%     1.39kB ± 0%      ~     (all equal)
Decoder/4K-8      4.98kB ± 0%     4.98kB ± 0%      ~     (all equal)
Decoder/32K-8     41.1kB ± 0%     41.1kB ± 0%      ~     (all equal)

name            old allocs/op  new allocs/op   delta
EncodeTo/16-8       0.00            0.00           ~     (all equal)
EncodeTo/256-8      0.00            0.00           ~     (all equal)
EncodeTo/4K-8       0.00            0.00           ~     (all equal)
EncodeTo/32K-8      0.00            0.00           ~     (all equal)
DecodeTo/16-8       0.00            1.00 ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/256-8      0.00            1.00 ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/4K-8       0.00            1.00 ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/32K-8      0.00            1.00 ± 0%     +Inf%  (p=0.008 n=5+5)
Encoder/16-8        1.00 ± 0%       1.00 ± 0%      ~     (all equal)
Encoder/256-8       2.00 ± 0%       1.00 ± 0%   -50.00%  (p=0.008 n=5+5)
Encoder/4K-8        1.00 ± 0%       1.00 ± 0%      ~     (all equal)
Encoder/32K-8       2.00 ± 0%       1.00 ± 0%   -50.00%  (p=0.008 n=5+5)
Decoder/16-8        3.00 ± 0%       3.00 ± 0%      ~     (all equal)
Decoder/256-8       3.00 ± 0%       3.00 ± 0%      ~     (all equal)
Decoder/4K-8        3.00 ± 0%       3.00 ± 0%      ~     (all equal)
Decoder/32K-8       3.00 ± 0%       3.00 ± 0%      ~     (all equal)
2022-12-14 01:14:03 +08:00

162 lines
4.2 KiB
C

#ifdef __cosmopolitan // always le
# define be16toh(x) bswap_16(x)
# define be32toh(x) bswap_32(x)
# define htobe16(x) bswap_16(x)
# define htobe32(x) bswap_32(x)
#else
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef __linux__
# include <endian.h>
#endif
#ifdef __FreeBSD__
# include <sys/endian.h>
#endif
#ifdef __NetBSD__
# include <sys/endian.h>
#endif
#ifdef __OpenBSD__
# include <sys/types.h>
# define be16toh(x) betoh16(x)
# define be32toh(x) betoh32(x)
#endif
#ifdef __MAC_10_0
# define be16toh(x) ntohs(x)
# define be32toh(x) ntohl(x)
# define htobe16(x) ntohs(x)
# define htobe32(x) htonl(x)
#endif
#ifdef _WIN32
#ifdef WORDS_BIGENDIAN
# define be16toh(x) (x)
# define be32toh(x) (x)
# define htobe16(x) (x)
# define htobe32(x) (x)
#else
# define be16toh(x) _byteswap_ushort(x)
# define be32toh(x) _byteswap_ulong(x)
# define htobe16(x) _byteswap_ushort(x)
# define htobe32(x) _byteswap_ulong(x)
#endif
#endif
#endif
int base16384_encode(int offset, int outlen, const char* data, int dlen, int dcap, char* buf, int blen, int bcap) {
uint32_t* vals = (uint32_t*)buf;
uint32_t n = 0;
int32_t i = 0;
for(; i <= dlen - 7; i += 7) {
register uint32_t sum = 0;
register uint32_t shift = htobe32(*(uint32_t*)(data+i));
sum |= (shift>>2) & 0x3fff0000;
sum |= (shift>>4) & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
shift <<= 26;
shift &= 0x3c000000;
sum = 0;
shift |= (htobe32(*(uint32_t*)(data+i+4))>>6)&0x03fffffc;
sum |= shift & 0x3fff0000;
shift >>= 2;
sum |= shift & 0x00003fff;
sum += 0x4e004e00;
vals[n++] = be32toh(sum);
}
uint8_t o = offset;
if(o--) {
register uint32_t sum = 0x0000003f & (data[i] >> 2);
sum |= ((uint32_t)data[i] << 14) & 0x0000c000;
if(o--) {
sum |= ((uint32_t)data[i + 1] << 6) & 0x00003f00;
sum |= ((uint32_t)data[i + 1] << 20) & 0x00300000;
if(o--) {
sum |= ((uint32_t)data[i + 2] << 12) & 0x000f0000;
sum |= ((uint32_t)data[i + 2] << 28) & 0xf0000000;
if(o--) {
sum |= ((uint32_t)data[i + 3] << 20) & 0x0f000000;
sum += 0x004e004e;
#ifdef WORDS_BIGENDIAN
vals[n++] = __builtin_bswap32(sum);
#else
vals[n++] = sum;
#endif
sum = (((uint32_t)data[i + 3] << 2)) & 0x0000003c;
if(o--) {
sum |= (((uint32_t)data[i + 4] >> 6)) & 0x00000003;
sum |= ((uint32_t)data[i + 4] << 10) & 0x0000fc00;
if(o--) {
sum |= ((uint32_t)data[i + 5] << 2) & 0x00000300;
sum |= ((uint32_t)data[i + 5] << 16) & 0x003f0000;
}
}
}
}
}
sum += 0x004e004e;
#ifdef WORDS_BIGENDIAN
vals[n] = __builtin_bswap32(sum);
#else
vals[n] = sum;
#endif
buf[outlen - 2] = '=';
buf[outlen - 1] = offset;
}
return outlen;
}
void base16384_decode(int offset, int outlen, const char* data, int dlen, int dcap, char* buf, int blen, int bcap) {
uint32_t* vals = (uint32_t*)data;
uint32_t n = 0;
int32_t i = 0;
for(; i <= outlen - 7; i+=7) { // n实际每次自增2
register uint32_t sum = 0;
register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
shift <<= 2;
sum |= shift & 0xfffc0000;
shift <<= 2;
sum |= shift & 0x0003fff0;
shift = htobe32(vals[n++]) - 0x4e004e00;
sum |= shift >> 26;
*(uint32_t*)(buf+i) = be32toh(sum);
sum = 0;
shift <<= 6;
sum |= shift & 0xffc00000;
shift <<= 2;
sum |= shift & 0x003fff00;
*(uint32_t*)(buf+i+4) = be32toh(sum);
}
if(offset--) {
// 这里有读取越界
#ifdef WORDS_BIGENDIAN
register uint32_t sum = __builtin_bswap32(vals[n++]);
#else
register uint32_t sum = vals[n++];
#endif
sum -= 0x0000004e;
buf[i++] = ((sum & 0x0000003f) << 2) | ((sum & 0x0000c000) >> 14);
if(offset--) {
sum -= 0x004e0000;
buf[i++] = ((sum & 0x00003f00) >> 6) | ((sum & 0x00300000) >> 20);
if(offset--) {
buf[i++] = ((sum & 0x000f0000) >> 12) | ((sum & 0xf0000000) >> 28);
if(offset--) {
buf[i] = (sum & 0x0f000000) >> 20;
// 这里有读取越界
sum = vals[n];
sum -= 0x0000004e;
buf[i++] |= (sum & 0x0000003c) >> 2;
if(offset--) {
buf[i++] = ((sum & 0x00000003) << 6) | ((sum & 0x0000fc00) >> 10);
if(offset--) {
sum -= 0x004e0000;
buf[i] = ((sum & 0x00000300) >> 2) | ((sum & 0x003f0000) >> 16);
}
}
}
}
}
}
return;
}