mirror of
https://github.com/fumiama/go-base16384.git
synced 2026-06-05 00:32:52 +08:00
finish arm64 decode
name old time/op new time/op delta EncodeTo/16-8 10.6ns ± 0% 10.4ns ± 0% -2.07% (p=0.008 n=5+5) EncodeTo/256-8 80.8ns ± 0% 55.7ns ± 0% -31.11% (p=0.008 n=5+5) EncodeTo/4K-8 1.21µs ± 0% 0.82µs ± 0% -32.67% (p=0.016 n=4+5) EncodeTo/32K-8 9.64µs ± 0% 6.47µs ± 0% -32.90% (p=0.008 n=5+5) DecodeTo/16-8 9.79ns ± 0% 26.02ns ± 0% +165.85% (p=0.016 n=4+5) DecodeTo/256-8 80.9ns ± 0% 111.6ns ± 0% +37.98% (p=0.008 n=5+5) DecodeTo/4K-8 1.22µs ± 0% 1.17µs ± 0% -3.73% (p=0.008 n=5+5) DecodeTo/32K-8 9.71µs ± 1% 8.80µs ± 1% -9.37% (p=0.008 n=5+5) Encoder/16-8 76.5ns ± 0% 76.5ns ± 0% ~ (p=0.810 n=5+5) Encoder/256-8 356ns ± 0% 291ns ± 0% -18.22% (p=0.008 n=5+5) Encoder/4K-8 4.05µs ± 0% 3.70µs ± 0% -8.76% (p=0.008 n=5+5) Encoder/32K-8 34.1µs ± 0% 29.2µs ± 0% -14.30% (p=0.008 n=5+5) Decoder/16-8 205ns ± 0% 207ns ± 0% +1.08% (p=0.008 n=5+5) Decoder/256-8 262ns ± 0% 244ns ± 1% -6.94% (p=0.008 n=5+5) Decoder/4K-8 1.49µs ± 0% 1.12µs ± 0% -24.87% (p=0.008 n=5+5) Decoder/32K-8 11.0µs ± 0% 8.0µs ± 0% -27.00% (p=0.008 n=5+5) name old speed new speed delta EncodeTo/16-8 1.50GB/s ± 0% 1.54GB/s ± 0% +2.11% (p=0.008 n=5+5) EncodeTo/256-8 3.17GB/s ± 0% 4.60GB/s ± 0% +45.15% (p=0.008 n=5+5) EncodeTo/4K-8 3.37GB/s ± 0% 5.01GB/s ± 0% +48.51% (p=0.008 n=5+5) EncodeTo/32K-8 3.40GB/s ± 0% 5.06GB/s ± 0% +49.02% (p=0.008 n=5+5) DecodeTo/16-8 2.25GB/s ± 0% 0.85GB/s ± 0% -62.39% (p=0.016 n=4+5) DecodeTo/256-8 3.66GB/s ± 0% 2.65GB/s ± 0% -27.54% (p=0.008 n=5+5) DecodeTo/4K-8 3.84GB/s ± 0% 3.99GB/s ± 0% +3.87% (p=0.008 n=5+5) DecodeTo/32K-8 3.86GB/s ± 1% 4.26GB/s ± 1% +10.33% (p=0.008 n=5+5) Encoder/16-8 209MB/s ± 0% 209MB/s ± 0% ~ (p=0.802 n=5+5) Encoder/256-8 720MB/s ± 0% 880MB/s ± 0% +22.28% (p=0.008 n=5+5) Encoder/4K-8 1.01GB/s ± 0% 1.11GB/s ± 0% +9.60% (p=0.008 n=5+5) Encoder/32K-8 962MB/s ± 0% 1122MB/s ± 0% +16.69% (p=0.008 n=5+5) Decoder/16-8 78.1MB/s ± 0% 77.3MB/s ± 0% -1.08% (p=0.008 n=5+5) Decoder/256-8 977MB/s ± 0% 1050MB/s ± 1% +7.47% (p=0.008 n=5+5) Decoder/4K-8 2.76GB/s ± 0% 3.67GB/s ± 0% +33.10% (p=0.008 n=5+5) Decoder/32K-8 2.98GB/s ± 0% 4.08GB/s ± 0% +36.98% (p=0.008 n=5+5) name old alloc/op new alloc/op delta EncodeTo/16-8 0.00B 0.00B ~ (all equal) EncodeTo/256-8 0.00B 0.00B ~ (all equal) EncodeTo/4K-8 0.00B 0.00B ~ (all equal) EncodeTo/32K-8 0.00B 0.00B ~ (all equal) DecodeTo/16-8 0.00B 48.00B ± 0% +Inf% (p=0.008 n=5+5) DecodeTo/256-8 0.00B 576.00B ± 0% +Inf% (p=0.008 n=5+5) DecodeTo/4K-8 0.00B 6144.00B ± 0% +Inf% (p=0.008 n=5+5) DecodeTo/32K-8 0.00B 49152.00B ± 0% +Inf% (p=0.008 n=5+5) Encoder/16-8 24.0B ± 0% 24.0B ± 0% ~ (all equal) Encoder/256-8 472B ± 0% 24B ± 0% -94.92% (p=0.008 n=5+5) Encoder/4K-8 24.0B ± 0% 24.0B ± 0% ~ (all equal) Encoder/32K-8 41.0kB ± 0% 0.0kB ± 0% -99.94% (p=0.008 n=5+5) Decoder/16-8 1.39kB ± 0% 1.39kB ± 0% ~ (all equal) Decoder/256-8 1.39kB ± 0% 1.39kB ± 0% ~ (all equal) Decoder/4K-8 4.98kB ± 0% 4.98kB ± 0% ~ (all equal) Decoder/32K-8 41.1kB ± 0% 41.1kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta EncodeTo/16-8 0.00 0.00 ~ (all equal) EncodeTo/256-8 0.00 0.00 ~ (all equal) EncodeTo/4K-8 0.00 0.00 ~ (all equal) EncodeTo/32K-8 0.00 0.00 ~ (all equal) DecodeTo/16-8 0.00 1.00 ± 0% +Inf% (p=0.008 n=5+5) DecodeTo/256-8 0.00 1.00 ± 0% +Inf% (p=0.008 n=5+5) DecodeTo/4K-8 0.00 1.00 ± 0% +Inf% (p=0.008 n=5+5) DecodeTo/32K-8 0.00 1.00 ± 0% +Inf% (p=0.008 n=5+5) Encoder/16-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Encoder/256-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.008 n=5+5) Encoder/4K-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Encoder/32K-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.008 n=5+5) Decoder/16-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/256-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/4K-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/32K-8 3.00 ± 0% 3.00 ± 0% ~ (all equal)
This commit is contained in:
@@ -25,7 +25,6 @@ func encode(offset, outlen int, b, encd []byte) {
|
||||
return
|
||||
}
|
||||
n := valn - (uintptr)(*(*unsafe.Pointer)(unsafe.Pointer(&encd)))
|
||||
println(sum, n)
|
||||
var tmp [4]byte
|
||||
binary.LittleEndian.PutUint32(tmp[:], uint32(sum))
|
||||
copy(encd[n:], tmp[:])
|
||||
@@ -34,12 +33,8 @@ func encode(offset, outlen int, b, encd []byte) {
|
||||
}
|
||||
|
||||
func decode(offset, outlen int, b, decd []byte) {
|
||||
/*
|
||||
if offset != 0 && cap(b) == len(b) {
|
||||
b = append(b, make([]byte, 8)...)
|
||||
}
|
||||
_decode(offset, outlen, b, decd)
|
||||
*/
|
||||
|
||||
decodeGeneric(offset, outlen, b, decd)
|
||||
if offset != 0 && cap(b) == len(b) {
|
||||
b = append(b, make([]byte, 8)...)
|
||||
}
|
||||
_decode(offset, outlen, b, decd)
|
||||
}
|
||||
|
||||
107
base14_arm64.s
107
base14_arm64.s
@@ -9,7 +9,7 @@ TEXT ·_encode(SB), NOSPLIT, $0-81
|
||||
MOVD ·data+16(FP), R9
|
||||
MOVD ·dlen+24(FP), R3
|
||||
MOVD ·encd+40(FP), R5
|
||||
|
||||
|
||||
SUBW $6, R3, R3
|
||||
CMPW $0, R3
|
||||
BLE enctil
|
||||
@@ -87,7 +87,7 @@ encrem:
|
||||
ANDW $0xf000000, R4, R4
|
||||
ORRW R3, R4, R3
|
||||
ADDW $0x4e0000, R3, R3
|
||||
ADDW $78, R3, R3
|
||||
ADDW $0x4e, R3, R3
|
||||
MOVW R3, (R5)(R8<<2)
|
||||
MOVBU (R2)(R10), R3
|
||||
UBFIZW $2, R3, $4, R3
|
||||
@@ -120,4 +120,105 @@ enctil:
|
||||
|
||||
// func _decode(offset, outlen int, b, decd []byte)
|
||||
TEXT ·_decode(SB), NOSPLIT, $0-64
|
||||
|
||||
MOVD ·offset+0(FP), R0
|
||||
MOVD ·outlen+8(FP), R1
|
||||
MOVD ·data+16(FP), R2
|
||||
MOVD ·decd+40(FP), R5
|
||||
|
||||
SUBW $6, R1, R1
|
||||
CMPW $0, R1
|
||||
BLE dectil
|
||||
MOVW $0xb200, R11
|
||||
MOVD R5, R9
|
||||
SUB $8, R2, R14
|
||||
SUB $4, R2, R13
|
||||
MOVD $2, R8
|
||||
MOVW $0, R10
|
||||
MOVK $(0xb1ff<<16), R11
|
||||
declop:
|
||||
MOVW (R14)(R8<<2), R4
|
||||
ADDW $7, R10, R10
|
||||
MOVW (R13)(R8<<2), R3
|
||||
MOVW R8, R12
|
||||
REVW R4, R4
|
||||
CMPW R1, R10
|
||||
ADDW R11, R4, R4
|
||||
REVW R3, R3
|
||||
ADDW R11, R3, R3
|
||||
ADD $2, R8, R8
|
||||
LSLW $2, R4, R7
|
||||
UBFIZW $4, R4, $14, R4
|
||||
LSLW $6, R3, R6
|
||||
ANDW $-262144, R7, R7
|
||||
ORRW R4, R7, R7
|
||||
ANDW $-4194304, R6, R4
|
||||
UBFIZW $8, R3, $14, R6
|
||||
ORRW R3>>26, R7, R3
|
||||
ORRW R6, R4, R4
|
||||
REVW R3, R3
|
||||
REVW R4, R4
|
||||
STPW (R3, R4), (R9)
|
||||
ADD $7, R9, R9
|
||||
BLT declop
|
||||
decrem:
|
||||
CBZW R0, decret
|
||||
MOVW (R2)(R12.UXTW<<2), R1
|
||||
CMPW $1, R0
|
||||
SUBW $0x4e, R1, R3
|
||||
UBFX $14, R3, $2, R4
|
||||
ORRW R3<<2, R4, R3
|
||||
MOVB R3, (R5)(R10.SXTW)
|
||||
BEQ decret
|
||||
|
||||
MOVW $0xffb2, R7
|
||||
ADDW $1, R10, R4
|
||||
MOVK $(0xffb1<<16), R7
|
||||
ADDW R7, R1, R1
|
||||
CMPW $2, R0
|
||||
UBFX $20, R1, $8, R6
|
||||
LSRW $6, R1, R3
|
||||
ANDW $3, R6, R8
|
||||
ANDW $-4, R3, R3
|
||||
ORRW R8, R3, R3
|
||||
MOVB R3, (R5)(R4.SXTW)
|
||||
BEQ decret
|
||||
|
||||
ADDW $2, R10, R3
|
||||
LSRW $12, R1, R4
|
||||
ANDW $-16, R4, R4
|
||||
CMPW $3, R0
|
||||
ORRW R1>>28, R4, R1
|
||||
MOVB R1, (R5)(R3.SXTW)
|
||||
BEQ decret
|
||||
|
||||
ADDW $3, R10, R1
|
||||
ADDW $1, R12, R12
|
||||
ANDW $0xf0, R6, R6
|
||||
CMPW $4, R0
|
||||
MOVW (R2)(R12<<2), R3
|
||||
SUBW $0x4e, R3, R2
|
||||
UBFX $2, R2, $4, R4
|
||||
ORRW R6, R4, R4
|
||||
MOVB R4, (R5)(R1.SXTW)
|
||||
BEQ decret
|
||||
|
||||
ADDW $4, R10, R1
|
||||
UBFX $10, R2, $6, R4
|
||||
ORRW R2<<6, R4, R2
|
||||
CMPW $5, R0
|
||||
MOVB R2, (R5)(R1.SXTW)
|
||||
BEQ decret
|
||||
|
||||
ADDW R7, R3, R3
|
||||
ADDW $5, R10, R10
|
||||
LSRW $2, R3, R0
|
||||
UBFX $16, R3, $6, R3
|
||||
ANDW $-64, R0, R0
|
||||
ORRW R3, R0, R3
|
||||
MOVB R3, (R5)(R10.SXTW)
|
||||
decret:
|
||||
RET
|
||||
dectil:
|
||||
MOVW $0, R10
|
||||
MOVW $0, R12
|
||||
JMP decrem
|
||||
|
||||
@@ -24,7 +24,9 @@ func TestBase14(t *testing.T) {
|
||||
for i := 1; i < 4096; i++ {
|
||||
rand.Read(buf[:i])
|
||||
out := Decode(Encode(buf[:i]))
|
||||
assert.Equal(t, hex.EncodeToString(buf[:i]), hex.EncodeToString(out))
|
||||
if !assert.Equal(t, hex.EncodeToString(buf[:i]), hex.EncodeToString(out)) {
|
||||
t.Fatal()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
55
c/base1432.c
55
c/base1432.c
@@ -104,3 +104,58 @@ int base16384_encode(int offset, int outlen, const char* data, int dlen, int dca
|
||||
}
|
||||
return outlen;
|
||||
}
|
||||
|
||||
void base16384_decode(int offset, int outlen, const char* data, int dlen, int dcap, char* buf, int blen, int bcap) {
|
||||
uint32_t* vals = (uint32_t*)data;
|
||||
uint32_t n = 0;
|
||||
int32_t i = 0;
|
||||
for(; i <= outlen - 7; i+=7) { // n实际每次自增2
|
||||
register uint32_t sum = 0;
|
||||
register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
|
||||
shift <<= 2;
|
||||
sum |= shift & 0xfffc0000;
|
||||
shift <<= 2;
|
||||
sum |= shift & 0x0003fff0;
|
||||
shift = htobe32(vals[n++]) - 0x4e004e00;
|
||||
sum |= shift >> 26;
|
||||
*(uint32_t*)(buf+i) = be32toh(sum);
|
||||
sum = 0;
|
||||
shift <<= 6;
|
||||
sum |= shift & 0xffc00000;
|
||||
shift <<= 2;
|
||||
sum |= shift & 0x003fff00;
|
||||
*(uint32_t*)(buf+i+4) = be32toh(sum);
|
||||
}
|
||||
if(offset--) {
|
||||
// 这里有读取越界
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
register uint32_t sum = __builtin_bswap32(vals[n++]);
|
||||
#else
|
||||
register uint32_t sum = vals[n++];
|
||||
#endif
|
||||
sum -= 0x0000004e;
|
||||
buf[i++] = ((sum & 0x0000003f) << 2) | ((sum & 0x0000c000) >> 14);
|
||||
if(offset--) {
|
||||
sum -= 0x004e0000;
|
||||
buf[i++] = ((sum & 0x00003f00) >> 6) | ((sum & 0x00300000) >> 20);
|
||||
if(offset--) {
|
||||
buf[i++] = ((sum & 0x000f0000) >> 12) | ((sum & 0xf0000000) >> 28);
|
||||
if(offset--) {
|
||||
buf[i] = (sum & 0x0f000000) >> 20;
|
||||
// 这里有读取越界
|
||||
sum = vals[n];
|
||||
sum -= 0x0000004e;
|
||||
buf[i++] |= (sum & 0x0000003c) >> 2;
|
||||
if(offset--) {
|
||||
buf[i++] = ((sum & 0x00000003) << 6) | ((sum & 0x0000fc00) >> 10);
|
||||
if(offset--) {
|
||||
sum -= 0x004e0000;
|
||||
buf[i] = ((sum & 0x00000300) >> 2) | ((sum & 0x003f0000) >> 16);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user