1
0
mirror of https://github.com/fumiama/go-base16384.git synced 2026-06-08 20:10:32 +08:00

finish arm64 decode

name            old time/op    new time/op     delta
EncodeTo/16-8     10.6ns ± 0%     10.4ns ± 0%    -2.07%  (p=0.008 n=5+5)
EncodeTo/256-8    80.8ns ± 0%     55.7ns ± 0%   -31.11%  (p=0.008 n=5+5)
EncodeTo/4K-8     1.21µs ± 0%     0.82µs ± 0%   -32.67%  (p=0.016 n=4+5)
EncodeTo/32K-8    9.64µs ± 0%     6.47µs ± 0%   -32.90%  (p=0.008 n=5+5)
DecodeTo/16-8     9.79ns ± 0%    26.02ns ± 0%  +165.85%  (p=0.016 n=4+5)
DecodeTo/256-8    80.9ns ± 0%    111.6ns ± 0%   +37.98%  (p=0.008 n=5+5)
DecodeTo/4K-8     1.22µs ± 0%     1.17µs ± 0%    -3.73%  (p=0.008 n=5+5)
DecodeTo/32K-8    9.71µs ± 1%     8.80µs ± 1%    -9.37%  (p=0.008 n=5+5)
Encoder/16-8      76.5ns ± 0%     76.5ns ± 0%      ~     (p=0.810 n=5+5)
Encoder/256-8      356ns ± 0%      291ns ± 0%   -18.22%  (p=0.008 n=5+5)
Encoder/4K-8      4.05µs ± 0%     3.70µs ± 0%    -8.76%  (p=0.008 n=5+5)
Encoder/32K-8     34.1µs ± 0%     29.2µs ± 0%   -14.30%  (p=0.008 n=5+5)
Decoder/16-8       205ns ± 0%      207ns ± 0%    +1.08%  (p=0.008 n=5+5)
Decoder/256-8      262ns ± 0%      244ns ± 1%    -6.94%  (p=0.008 n=5+5)
Decoder/4K-8      1.49µs ± 0%     1.12µs ± 0%   -24.87%  (p=0.008 n=5+5)
Decoder/32K-8     11.0µs ± 0%      8.0µs ± 0%   -27.00%  (p=0.008 n=5+5)

name            old speed      new speed       delta
EncodeTo/16-8   1.50GB/s ± 0%   1.54GB/s ± 0%    +2.11%  (p=0.008 n=5+5)
EncodeTo/256-8  3.17GB/s ± 0%   4.60GB/s ± 0%   +45.15%  (p=0.008 n=5+5)
EncodeTo/4K-8   3.37GB/s ± 0%   5.01GB/s ± 0%   +48.51%  (p=0.008 n=5+5)
EncodeTo/32K-8  3.40GB/s ± 0%   5.06GB/s ± 0%   +49.02%  (p=0.008 n=5+5)
DecodeTo/16-8   2.25GB/s ± 0%   0.85GB/s ± 0%   -62.39%  (p=0.016 n=4+5)
DecodeTo/256-8  3.66GB/s ± 0%   2.65GB/s ± 0%   -27.54%  (p=0.008 n=5+5)
DecodeTo/4K-8   3.84GB/s ± 0%   3.99GB/s ± 0%    +3.87%  (p=0.008 n=5+5)
DecodeTo/32K-8  3.86GB/s ± 1%   4.26GB/s ± 1%   +10.33%  (p=0.008 n=5+5)
Encoder/16-8     209MB/s ± 0%    209MB/s ± 0%      ~     (p=0.802 n=5+5)
Encoder/256-8    720MB/s ± 0%    880MB/s ± 0%   +22.28%  (p=0.008 n=5+5)
Encoder/4K-8    1.01GB/s ± 0%   1.11GB/s ± 0%    +9.60%  (p=0.008 n=5+5)
Encoder/32K-8    962MB/s ± 0%   1122MB/s ± 0%   +16.69%  (p=0.008 n=5+5)
Decoder/16-8    78.1MB/s ± 0%   77.3MB/s ± 0%    -1.08%  (p=0.008 n=5+5)
Decoder/256-8    977MB/s ± 0%   1050MB/s ± 1%    +7.47%  (p=0.008 n=5+5)
Decoder/4K-8    2.76GB/s ± 0%   3.67GB/s ± 0%   +33.10%  (p=0.008 n=5+5)
Decoder/32K-8   2.98GB/s ± 0%   4.08GB/s ± 0%   +36.98%  (p=0.008 n=5+5)

name            old alloc/op   new alloc/op    delta
EncodeTo/16-8      0.00B           0.00B           ~     (all equal)
EncodeTo/256-8     0.00B           0.00B           ~     (all equal)
EncodeTo/4K-8      0.00B           0.00B           ~     (all equal)
EncodeTo/32K-8     0.00B           0.00B           ~     (all equal)
DecodeTo/16-8      0.00B          48.00B ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/256-8     0.00B         576.00B ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/4K-8      0.00B        6144.00B ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/32K-8     0.00B       49152.00B ± 0%     +Inf%  (p=0.008 n=5+5)
Encoder/16-8       24.0B ± 0%      24.0B ± 0%      ~     (all equal)
Encoder/256-8       472B ± 0%        24B ± 0%   -94.92%  (p=0.008 n=5+5)
Encoder/4K-8       24.0B ± 0%      24.0B ± 0%      ~     (all equal)
Encoder/32K-8     41.0kB ± 0%      0.0kB ± 0%   -99.94%  (p=0.008 n=5+5)
Decoder/16-8      1.39kB ± 0%     1.39kB ± 0%      ~     (all equal)
Decoder/256-8     1.39kB ± 0%     1.39kB ± 0%      ~     (all equal)
Decoder/4K-8      4.98kB ± 0%     4.98kB ± 0%      ~     (all equal)
Decoder/32K-8     41.1kB ± 0%     41.1kB ± 0%      ~     (all equal)

name            old allocs/op  new allocs/op   delta
EncodeTo/16-8       0.00            0.00           ~     (all equal)
EncodeTo/256-8      0.00            0.00           ~     (all equal)
EncodeTo/4K-8       0.00            0.00           ~     (all equal)
EncodeTo/32K-8      0.00            0.00           ~     (all equal)
DecodeTo/16-8       0.00            1.00 ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/256-8      0.00            1.00 ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/4K-8       0.00            1.00 ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/32K-8      0.00            1.00 ± 0%     +Inf%  (p=0.008 n=5+5)
Encoder/16-8        1.00 ± 0%       1.00 ± 0%      ~     (all equal)
Encoder/256-8       2.00 ± 0%       1.00 ± 0%   -50.00%  (p=0.008 n=5+5)
Encoder/4K-8        1.00 ± 0%       1.00 ± 0%      ~     (all equal)
Encoder/32K-8       2.00 ± 0%       1.00 ± 0%   -50.00%  (p=0.008 n=5+5)
Decoder/16-8        3.00 ± 0%       3.00 ± 0%      ~     (all equal)
Decoder/256-8       3.00 ± 0%       3.00 ± 0%      ~     (all equal)
Decoder/4K-8        3.00 ± 0%       3.00 ± 0%      ~     (all equal)
Decoder/32K-8       3.00 ± 0%       3.00 ± 0%      ~     (all equal)
This commit is contained in:
源文雨
2022-12-14 01:14:03 +08:00
parent 5e0f486237
commit 369cf02def
4 changed files with 166 additions and 13 deletions

View File

@@ -104,3 +104,58 @@ int base16384_encode(int offset, int outlen, const char* data, int dlen, int dca
}
return outlen;
}
void base16384_decode(int offset, int outlen, const char* data, int dlen, int dcap, char* buf, int blen, int bcap) {
uint32_t* vals = (uint32_t*)data;
uint32_t n = 0;
int32_t i = 0;
for(; i <= outlen - 7; i+=7) { // n实际每次自增2
register uint32_t sum = 0;
register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
shift <<= 2;
sum |= shift & 0xfffc0000;
shift <<= 2;
sum |= shift & 0x0003fff0;
shift = htobe32(vals[n++]) - 0x4e004e00;
sum |= shift >> 26;
*(uint32_t*)(buf+i) = be32toh(sum);
sum = 0;
shift <<= 6;
sum |= shift & 0xffc00000;
shift <<= 2;
sum |= shift & 0x003fff00;
*(uint32_t*)(buf+i+4) = be32toh(sum);
}
if(offset--) {
// 这里有读取越界
#ifdef WORDS_BIGENDIAN
register uint32_t sum = __builtin_bswap32(vals[n++]);
#else
register uint32_t sum = vals[n++];
#endif
sum -= 0x0000004e;
buf[i++] = ((sum & 0x0000003f) << 2) | ((sum & 0x0000c000) >> 14);
if(offset--) {
sum -= 0x004e0000;
buf[i++] = ((sum & 0x00003f00) >> 6) | ((sum & 0x00300000) >> 20);
if(offset--) {
buf[i++] = ((sum & 0x000f0000) >> 12) | ((sum & 0xf0000000) >> 28);
if(offset--) {
buf[i] = (sum & 0x0f000000) >> 20;
// 这里有读取越界
sum = vals[n];
sum -= 0x0000004e;
buf[i++] |= (sum & 0x0000003c) >> 2;
if(offset--) {
buf[i++] = ((sum & 0x00000003) << 6) | ((sum & 0x0000fc00) >> 10);
if(offset--) {
sum -= 0x004e0000;
buf[i] = ((sum & 0x00000300) >> 2) | ((sum & 0x003f0000) >> 16);
}
}
}
}
}
}
return;
}