1
0
mirror of https://github.com/fumiama/go-base16384.git synced 2026-06-05 00:32:52 +08:00

finish arm64 decode

name            old time/op    new time/op     delta
EncodeTo/16-8     10.6ns ± 0%     10.4ns ± 0%    -2.07%  (p=0.008 n=5+5)
EncodeTo/256-8    80.8ns ± 0%     55.7ns ± 0%   -31.11%  (p=0.008 n=5+5)
EncodeTo/4K-8     1.21µs ± 0%     0.82µs ± 0%   -32.67%  (p=0.016 n=4+5)
EncodeTo/32K-8    9.64µs ± 0%     6.47µs ± 0%   -32.90%  (p=0.008 n=5+5)
DecodeTo/16-8     9.79ns ± 0%    26.02ns ± 0%  +165.85%  (p=0.016 n=4+5)
DecodeTo/256-8    80.9ns ± 0%    111.6ns ± 0%   +37.98%  (p=0.008 n=5+5)
DecodeTo/4K-8     1.22µs ± 0%     1.17µs ± 0%    -3.73%  (p=0.008 n=5+5)
DecodeTo/32K-8    9.71µs ± 1%     8.80µs ± 1%    -9.37%  (p=0.008 n=5+5)
Encoder/16-8      76.5ns ± 0%     76.5ns ± 0%      ~     (p=0.810 n=5+5)
Encoder/256-8      356ns ± 0%      291ns ± 0%   -18.22%  (p=0.008 n=5+5)
Encoder/4K-8      4.05µs ± 0%     3.70µs ± 0%    -8.76%  (p=0.008 n=5+5)
Encoder/32K-8     34.1µs ± 0%     29.2µs ± 0%   -14.30%  (p=0.008 n=5+5)
Decoder/16-8       205ns ± 0%      207ns ± 0%    +1.08%  (p=0.008 n=5+5)
Decoder/256-8      262ns ± 0%      244ns ± 1%    -6.94%  (p=0.008 n=5+5)
Decoder/4K-8      1.49µs ± 0%     1.12µs ± 0%   -24.87%  (p=0.008 n=5+5)
Decoder/32K-8     11.0µs ± 0%      8.0µs ± 0%   -27.00%  (p=0.008 n=5+5)

name            old speed      new speed       delta
EncodeTo/16-8   1.50GB/s ± 0%   1.54GB/s ± 0%    +2.11%  (p=0.008 n=5+5)
EncodeTo/256-8  3.17GB/s ± 0%   4.60GB/s ± 0%   +45.15%  (p=0.008 n=5+5)
EncodeTo/4K-8   3.37GB/s ± 0%   5.01GB/s ± 0%   +48.51%  (p=0.008 n=5+5)
EncodeTo/32K-8  3.40GB/s ± 0%   5.06GB/s ± 0%   +49.02%  (p=0.008 n=5+5)
DecodeTo/16-8   2.25GB/s ± 0%   0.85GB/s ± 0%   -62.39%  (p=0.016 n=4+5)
DecodeTo/256-8  3.66GB/s ± 0%   2.65GB/s ± 0%   -27.54%  (p=0.008 n=5+5)
DecodeTo/4K-8   3.84GB/s ± 0%   3.99GB/s ± 0%    +3.87%  (p=0.008 n=5+5)
DecodeTo/32K-8  3.86GB/s ± 1%   4.26GB/s ± 1%   +10.33%  (p=0.008 n=5+5)
Encoder/16-8     209MB/s ± 0%    209MB/s ± 0%      ~     (p=0.802 n=5+5)
Encoder/256-8    720MB/s ± 0%    880MB/s ± 0%   +22.28%  (p=0.008 n=5+5)
Encoder/4K-8    1.01GB/s ± 0%   1.11GB/s ± 0%    +9.60%  (p=0.008 n=5+5)
Encoder/32K-8    962MB/s ± 0%   1122MB/s ± 0%   +16.69%  (p=0.008 n=5+5)
Decoder/16-8    78.1MB/s ± 0%   77.3MB/s ± 0%    -1.08%  (p=0.008 n=5+5)
Decoder/256-8    977MB/s ± 0%   1050MB/s ± 1%    +7.47%  (p=0.008 n=5+5)
Decoder/4K-8    2.76GB/s ± 0%   3.67GB/s ± 0%   +33.10%  (p=0.008 n=5+5)
Decoder/32K-8   2.98GB/s ± 0%   4.08GB/s ± 0%   +36.98%  (p=0.008 n=5+5)

name            old alloc/op   new alloc/op    delta
EncodeTo/16-8      0.00B           0.00B           ~     (all equal)
EncodeTo/256-8     0.00B           0.00B           ~     (all equal)
EncodeTo/4K-8      0.00B           0.00B           ~     (all equal)
EncodeTo/32K-8     0.00B           0.00B           ~     (all equal)
DecodeTo/16-8      0.00B          48.00B ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/256-8     0.00B         576.00B ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/4K-8      0.00B        6144.00B ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/32K-8     0.00B       49152.00B ± 0%     +Inf%  (p=0.008 n=5+5)
Encoder/16-8       24.0B ± 0%      24.0B ± 0%      ~     (all equal)
Encoder/256-8       472B ± 0%        24B ± 0%   -94.92%  (p=0.008 n=5+5)
Encoder/4K-8       24.0B ± 0%      24.0B ± 0%      ~     (all equal)
Encoder/32K-8     41.0kB ± 0%      0.0kB ± 0%   -99.94%  (p=0.008 n=5+5)
Decoder/16-8      1.39kB ± 0%     1.39kB ± 0%      ~     (all equal)
Decoder/256-8     1.39kB ± 0%     1.39kB ± 0%      ~     (all equal)
Decoder/4K-8      4.98kB ± 0%     4.98kB ± 0%      ~     (all equal)
Decoder/32K-8     41.1kB ± 0%     41.1kB ± 0%      ~     (all equal)

name            old allocs/op  new allocs/op   delta
EncodeTo/16-8       0.00            0.00           ~     (all equal)
EncodeTo/256-8      0.00            0.00           ~     (all equal)
EncodeTo/4K-8       0.00            0.00           ~     (all equal)
EncodeTo/32K-8      0.00            0.00           ~     (all equal)
DecodeTo/16-8       0.00            1.00 ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/256-8      0.00            1.00 ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/4K-8       0.00            1.00 ± 0%     +Inf%  (p=0.008 n=5+5)
DecodeTo/32K-8      0.00            1.00 ± 0%     +Inf%  (p=0.008 n=5+5)
Encoder/16-8        1.00 ± 0%       1.00 ± 0%      ~     (all equal)
Encoder/256-8       2.00 ± 0%       1.00 ± 0%   -50.00%  (p=0.008 n=5+5)
Encoder/4K-8        1.00 ± 0%       1.00 ± 0%      ~     (all equal)
Encoder/32K-8       2.00 ± 0%       1.00 ± 0%   -50.00%  (p=0.008 n=5+5)
Decoder/16-8        3.00 ± 0%       3.00 ± 0%      ~     (all equal)
Decoder/256-8       3.00 ± 0%       3.00 ± 0%      ~     (all equal)
Decoder/4K-8        3.00 ± 0%       3.00 ± 0%      ~     (all equal)
Decoder/32K-8       3.00 ± 0%       3.00 ± 0%      ~     (all equal)
This commit is contained in:
源文雨
2022-12-14 01:14:03 +08:00
parent 5e0f486237
commit 369cf02def
4 changed files with 166 additions and 13 deletions

View File

@@ -25,7 +25,6 @@ func encode(offset, outlen int, b, encd []byte) {
return
}
n := valn - (uintptr)(*(*unsafe.Pointer)(unsafe.Pointer(&encd)))
println(sum, n)
var tmp [4]byte
binary.LittleEndian.PutUint32(tmp[:], uint32(sum))
copy(encd[n:], tmp[:])
@@ -34,12 +33,8 @@ func encode(offset, outlen int, b, encd []byte) {
}
func decode(offset, outlen int, b, decd []byte) {
/*
if offset != 0 && cap(b) == len(b) {
b = append(b, make([]byte, 8)...)
}
_decode(offset, outlen, b, decd)
*/
decodeGeneric(offset, outlen, b, decd)
if offset != 0 && cap(b) == len(b) {
b = append(b, make([]byte, 8)...)
}
_decode(offset, outlen, b, decd)
}

View File

@@ -9,7 +9,7 @@ TEXT ·_encode(SB), NOSPLIT, $0-81
MOVD ·data+16(FP), R9
MOVD ·dlen+24(FP), R3
MOVD ·encd+40(FP), R5
SUBW $6, R3, R3
CMPW $0, R3
BLE enctil
@@ -87,7 +87,7 @@ encrem:
ANDW $0xf000000, R4, R4
ORRW R3, R4, R3
ADDW $0x4e0000, R3, R3
ADDW $78, R3, R3
ADDW $0x4e, R3, R3
MOVW R3, (R5)(R8<<2)
MOVBU (R2)(R10), R3
UBFIZW $2, R3, $4, R3
@@ -120,4 +120,105 @@ enctil:
// func _decode(offset, outlen int, b, decd []byte)
TEXT ·_decode(SB), NOSPLIT, $0-64
MOVD ·offset+0(FP), R0
MOVD ·outlen+8(FP), R1
MOVD ·data+16(FP), R2
MOVD ·decd+40(FP), R5
SUBW $6, R1, R1
CMPW $0, R1
BLE dectil
MOVW $0xb200, R11
MOVD R5, R9
SUB $8, R2, R14
SUB $4, R2, R13
MOVD $2, R8
MOVW $0, R10
MOVK $(0xb1ff<<16), R11
declop:
MOVW (R14)(R8<<2), R4
ADDW $7, R10, R10
MOVW (R13)(R8<<2), R3
MOVW R8, R12
REVW R4, R4
CMPW R1, R10
ADDW R11, R4, R4
REVW R3, R3
ADDW R11, R3, R3
ADD $2, R8, R8
LSLW $2, R4, R7
UBFIZW $4, R4, $14, R4
LSLW $6, R3, R6
ANDW $-262144, R7, R7
ORRW R4, R7, R7
ANDW $-4194304, R6, R4
UBFIZW $8, R3, $14, R6
ORRW R3>>26, R7, R3
ORRW R6, R4, R4
REVW R3, R3
REVW R4, R4
STPW (R3, R4), (R9)
ADD $7, R9, R9
BLT declop
decrem:
CBZW R0, decret
MOVW (R2)(R12.UXTW<<2), R1
CMPW $1, R0
SUBW $0x4e, R1, R3
UBFX $14, R3, $2, R4
ORRW R3<<2, R4, R3
MOVB R3, (R5)(R10.SXTW)
BEQ decret
MOVW $0xffb2, R7
ADDW $1, R10, R4
MOVK $(0xffb1<<16), R7
ADDW R7, R1, R1
CMPW $2, R0
UBFX $20, R1, $8, R6
LSRW $6, R1, R3
ANDW $3, R6, R8
ANDW $-4, R3, R3
ORRW R8, R3, R3
MOVB R3, (R5)(R4.SXTW)
BEQ decret
ADDW $2, R10, R3
LSRW $12, R1, R4
ANDW $-16, R4, R4
CMPW $3, R0
ORRW R1>>28, R4, R1
MOVB R1, (R5)(R3.SXTW)
BEQ decret
ADDW $3, R10, R1
ADDW $1, R12, R12
ANDW $0xf0, R6, R6
CMPW $4, R0
MOVW (R2)(R12<<2), R3
SUBW $0x4e, R3, R2
UBFX $2, R2, $4, R4
ORRW R6, R4, R4
MOVB R4, (R5)(R1.SXTW)
BEQ decret
ADDW $4, R10, R1
UBFX $10, R2, $6, R4
ORRW R2<<6, R4, R2
CMPW $5, R0
MOVB R2, (R5)(R1.SXTW)
BEQ decret
ADDW R7, R3, R3
ADDW $5, R10, R10
LSRW $2, R3, R0
UBFX $16, R3, $6, R3
ANDW $-64, R0, R0
ORRW R3, R0, R3
MOVB R3, (R5)(R10.SXTW)
decret:
RET
dectil:
MOVW $0, R10
MOVW $0, R12
JMP decrem

View File

@@ -24,7 +24,9 @@ func TestBase14(t *testing.T) {
for i := 1; i < 4096; i++ {
rand.Read(buf[:i])
out := Decode(Encode(buf[:i]))
assert.Equal(t, hex.EncodeToString(buf[:i]), hex.EncodeToString(out))
if !assert.Equal(t, hex.EncodeToString(buf[:i]), hex.EncodeToString(out)) {
t.Fatal()
}
}
}

View File

@@ -104,3 +104,58 @@ int base16384_encode(int offset, int outlen, const char* data, int dlen, int dca
}
return outlen;
}
void base16384_decode(int offset, int outlen, const char* data, int dlen, int dcap, char* buf, int blen, int bcap) {
uint32_t* vals = (uint32_t*)data;
uint32_t n = 0;
int32_t i = 0;
for(; i <= outlen - 7; i+=7) { // n实际每次自增2
register uint32_t sum = 0;
register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
shift <<= 2;
sum |= shift & 0xfffc0000;
shift <<= 2;
sum |= shift & 0x0003fff0;
shift = htobe32(vals[n++]) - 0x4e004e00;
sum |= shift >> 26;
*(uint32_t*)(buf+i) = be32toh(sum);
sum = 0;
shift <<= 6;
sum |= shift & 0xffc00000;
shift <<= 2;
sum |= shift & 0x003fff00;
*(uint32_t*)(buf+i+4) = be32toh(sum);
}
if(offset--) {
// 这里有读取越界
#ifdef WORDS_BIGENDIAN
register uint32_t sum = __builtin_bswap32(vals[n++]);
#else
register uint32_t sum = vals[n++];
#endif
sum -= 0x0000004e;
buf[i++] = ((sum & 0x0000003f) << 2) | ((sum & 0x0000c000) >> 14);
if(offset--) {
sum -= 0x004e0000;
buf[i++] = ((sum & 0x00003f00) >> 6) | ((sum & 0x00300000) >> 20);
if(offset--) {
buf[i++] = ((sum & 0x000f0000) >> 12) | ((sum & 0xf0000000) >> 28);
if(offset--) {
buf[i] = (sum & 0x0f000000) >> 20;
// 这里有读取越界
sum = vals[n];
sum -= 0x0000004e;
buf[i++] |= (sum & 0x0000003c) >> 2;
if(offset--) {
buf[i++] = ((sum & 0x00000003) << 6) | ((sum & 0x0000fc00) >> 10);
if(offset--) {
sum -= 0x004e0000;
buf[i] = ((sum & 0x00000300) >> 2) | ((sum & 0x003f0000) >> 16);
}
}
}
}
}
}
return;
}