mirror of
https://github.com/fumiama/go-base16384.git
synced 2026-06-05 00:32:52 +08:00
finish arm64 decode
name old time/op new time/op delta EncodeTo/16-8 10.6ns ± 0% 10.4ns ± 0% -2.07% (p=0.008 n=5+5) EncodeTo/256-8 80.8ns ± 0% 55.7ns ± 0% -31.11% (p=0.008 n=5+5) EncodeTo/4K-8 1.21µs ± 0% 0.82µs ± 0% -32.67% (p=0.016 n=4+5) EncodeTo/32K-8 9.64µs ± 0% 6.47µs ± 0% -32.90% (p=0.008 n=5+5) DecodeTo/16-8 9.79ns ± 0% 26.02ns ± 0% +165.85% (p=0.016 n=4+5) DecodeTo/256-8 80.9ns ± 0% 111.6ns ± 0% +37.98% (p=0.008 n=5+5) DecodeTo/4K-8 1.22µs ± 0% 1.17µs ± 0% -3.73% (p=0.008 n=5+5) DecodeTo/32K-8 9.71µs ± 1% 8.80µs ± 1% -9.37% (p=0.008 n=5+5) Encoder/16-8 76.5ns ± 0% 76.5ns ± 0% ~ (p=0.810 n=5+5) Encoder/256-8 356ns ± 0% 291ns ± 0% -18.22% (p=0.008 n=5+5) Encoder/4K-8 4.05µs ± 0% 3.70µs ± 0% -8.76% (p=0.008 n=5+5) Encoder/32K-8 34.1µs ± 0% 29.2µs ± 0% -14.30% (p=0.008 n=5+5) Decoder/16-8 205ns ± 0% 207ns ± 0% +1.08% (p=0.008 n=5+5) Decoder/256-8 262ns ± 0% 244ns ± 1% -6.94% (p=0.008 n=5+5) Decoder/4K-8 1.49µs ± 0% 1.12µs ± 0% -24.87% (p=0.008 n=5+5) Decoder/32K-8 11.0µs ± 0% 8.0µs ± 0% -27.00% (p=0.008 n=5+5) name old speed new speed delta EncodeTo/16-8 1.50GB/s ± 0% 1.54GB/s ± 0% +2.11% (p=0.008 n=5+5) EncodeTo/256-8 3.17GB/s ± 0% 4.60GB/s ± 0% +45.15% (p=0.008 n=5+5) EncodeTo/4K-8 3.37GB/s ± 0% 5.01GB/s ± 0% +48.51% (p=0.008 n=5+5) EncodeTo/32K-8 3.40GB/s ± 0% 5.06GB/s ± 0% +49.02% (p=0.008 n=5+5) DecodeTo/16-8 2.25GB/s ± 0% 0.85GB/s ± 0% -62.39% (p=0.016 n=4+5) DecodeTo/256-8 3.66GB/s ± 0% 2.65GB/s ± 0% -27.54% (p=0.008 n=5+5) DecodeTo/4K-8 3.84GB/s ± 0% 3.99GB/s ± 0% +3.87% (p=0.008 n=5+5) DecodeTo/32K-8 3.86GB/s ± 1% 4.26GB/s ± 1% +10.33% (p=0.008 n=5+5) Encoder/16-8 209MB/s ± 0% 209MB/s ± 0% ~ (p=0.802 n=5+5) Encoder/256-8 720MB/s ± 0% 880MB/s ± 0% +22.28% (p=0.008 n=5+5) Encoder/4K-8 1.01GB/s ± 0% 1.11GB/s ± 0% +9.60% (p=0.008 n=5+5) Encoder/32K-8 962MB/s ± 0% 1122MB/s ± 0% +16.69% (p=0.008 n=5+5) Decoder/16-8 78.1MB/s ± 0% 77.3MB/s ± 0% -1.08% (p=0.008 n=5+5) Decoder/256-8 977MB/s ± 0% 1050MB/s ± 1% +7.47% (p=0.008 n=5+5) Decoder/4K-8 2.76GB/s ± 0% 3.67GB/s ± 0% +33.10% (p=0.008 n=5+5) Decoder/32K-8 2.98GB/s ± 0% 4.08GB/s ± 0% +36.98% (p=0.008 n=5+5) name old alloc/op new alloc/op delta EncodeTo/16-8 0.00B 0.00B ~ (all equal) EncodeTo/256-8 0.00B 0.00B ~ (all equal) EncodeTo/4K-8 0.00B 0.00B ~ (all equal) EncodeTo/32K-8 0.00B 0.00B ~ (all equal) DecodeTo/16-8 0.00B 48.00B ± 0% +Inf% (p=0.008 n=5+5) DecodeTo/256-8 0.00B 576.00B ± 0% +Inf% (p=0.008 n=5+5) DecodeTo/4K-8 0.00B 6144.00B ± 0% +Inf% (p=0.008 n=5+5) DecodeTo/32K-8 0.00B 49152.00B ± 0% +Inf% (p=0.008 n=5+5) Encoder/16-8 24.0B ± 0% 24.0B ± 0% ~ (all equal) Encoder/256-8 472B ± 0% 24B ± 0% -94.92% (p=0.008 n=5+5) Encoder/4K-8 24.0B ± 0% 24.0B ± 0% ~ (all equal) Encoder/32K-8 41.0kB ± 0% 0.0kB ± 0% -99.94% (p=0.008 n=5+5) Decoder/16-8 1.39kB ± 0% 1.39kB ± 0% ~ (all equal) Decoder/256-8 1.39kB ± 0% 1.39kB ± 0% ~ (all equal) Decoder/4K-8 4.98kB ± 0% 4.98kB ± 0% ~ (all equal) Decoder/32K-8 41.1kB ± 0% 41.1kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta EncodeTo/16-8 0.00 0.00 ~ (all equal) EncodeTo/256-8 0.00 0.00 ~ (all equal) EncodeTo/4K-8 0.00 0.00 ~ (all equal) EncodeTo/32K-8 0.00 0.00 ~ (all equal) DecodeTo/16-8 0.00 1.00 ± 0% +Inf% (p=0.008 n=5+5) DecodeTo/256-8 0.00 1.00 ± 0% +Inf% (p=0.008 n=5+5) DecodeTo/4K-8 0.00 1.00 ± 0% +Inf% (p=0.008 n=5+5) DecodeTo/32K-8 0.00 1.00 ± 0% +Inf% (p=0.008 n=5+5) Encoder/16-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Encoder/256-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.008 n=5+5) Encoder/4K-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Encoder/32K-8 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.008 n=5+5) Decoder/16-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/256-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/4K-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/32K-8 3.00 ± 0% 3.00 ± 0% ~ (all equal)
This commit is contained in:
@@ -25,7 +25,6 @@ func encode(offset, outlen int, b, encd []byte) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
n := valn - (uintptr)(*(*unsafe.Pointer)(unsafe.Pointer(&encd)))
|
n := valn - (uintptr)(*(*unsafe.Pointer)(unsafe.Pointer(&encd)))
|
||||||
println(sum, n)
|
|
||||||
var tmp [4]byte
|
var tmp [4]byte
|
||||||
binary.LittleEndian.PutUint32(tmp[:], uint32(sum))
|
binary.LittleEndian.PutUint32(tmp[:], uint32(sum))
|
||||||
copy(encd[n:], tmp[:])
|
copy(encd[n:], tmp[:])
|
||||||
@@ -34,12 +33,8 @@ func encode(offset, outlen int, b, encd []byte) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func decode(offset, outlen int, b, decd []byte) {
|
func decode(offset, outlen int, b, decd []byte) {
|
||||||
/*
|
if offset != 0 && cap(b) == len(b) {
|
||||||
if offset != 0 && cap(b) == len(b) {
|
b = append(b, make([]byte, 8)...)
|
||||||
b = append(b, make([]byte, 8)...)
|
}
|
||||||
}
|
_decode(offset, outlen, b, decd)
|
||||||
_decode(offset, outlen, b, decd)
|
|
||||||
*/
|
|
||||||
|
|
||||||
decodeGeneric(offset, outlen, b, decd)
|
|
||||||
}
|
}
|
||||||
|
|||||||
107
base14_arm64.s
107
base14_arm64.s
@@ -9,7 +9,7 @@ TEXT ·_encode(SB), NOSPLIT, $0-81
|
|||||||
MOVD ·data+16(FP), R9
|
MOVD ·data+16(FP), R9
|
||||||
MOVD ·dlen+24(FP), R3
|
MOVD ·dlen+24(FP), R3
|
||||||
MOVD ·encd+40(FP), R5
|
MOVD ·encd+40(FP), R5
|
||||||
|
|
||||||
SUBW $6, R3, R3
|
SUBW $6, R3, R3
|
||||||
CMPW $0, R3
|
CMPW $0, R3
|
||||||
BLE enctil
|
BLE enctil
|
||||||
@@ -87,7 +87,7 @@ encrem:
|
|||||||
ANDW $0xf000000, R4, R4
|
ANDW $0xf000000, R4, R4
|
||||||
ORRW R3, R4, R3
|
ORRW R3, R4, R3
|
||||||
ADDW $0x4e0000, R3, R3
|
ADDW $0x4e0000, R3, R3
|
||||||
ADDW $78, R3, R3
|
ADDW $0x4e, R3, R3
|
||||||
MOVW R3, (R5)(R8<<2)
|
MOVW R3, (R5)(R8<<2)
|
||||||
MOVBU (R2)(R10), R3
|
MOVBU (R2)(R10), R3
|
||||||
UBFIZW $2, R3, $4, R3
|
UBFIZW $2, R3, $4, R3
|
||||||
@@ -120,4 +120,105 @@ enctil:
|
|||||||
|
|
||||||
// func _decode(offset, outlen int, b, decd []byte)
|
// func _decode(offset, outlen int, b, decd []byte)
|
||||||
TEXT ·_decode(SB), NOSPLIT, $0-64
|
TEXT ·_decode(SB), NOSPLIT, $0-64
|
||||||
|
MOVD ·offset+0(FP), R0
|
||||||
|
MOVD ·outlen+8(FP), R1
|
||||||
|
MOVD ·data+16(FP), R2
|
||||||
|
MOVD ·decd+40(FP), R5
|
||||||
|
|
||||||
|
SUBW $6, R1, R1
|
||||||
|
CMPW $0, R1
|
||||||
|
BLE dectil
|
||||||
|
MOVW $0xb200, R11
|
||||||
|
MOVD R5, R9
|
||||||
|
SUB $8, R2, R14
|
||||||
|
SUB $4, R2, R13
|
||||||
|
MOVD $2, R8
|
||||||
|
MOVW $0, R10
|
||||||
|
MOVK $(0xb1ff<<16), R11
|
||||||
|
declop:
|
||||||
|
MOVW (R14)(R8<<2), R4
|
||||||
|
ADDW $7, R10, R10
|
||||||
|
MOVW (R13)(R8<<2), R3
|
||||||
|
MOVW R8, R12
|
||||||
|
REVW R4, R4
|
||||||
|
CMPW R1, R10
|
||||||
|
ADDW R11, R4, R4
|
||||||
|
REVW R3, R3
|
||||||
|
ADDW R11, R3, R3
|
||||||
|
ADD $2, R8, R8
|
||||||
|
LSLW $2, R4, R7
|
||||||
|
UBFIZW $4, R4, $14, R4
|
||||||
|
LSLW $6, R3, R6
|
||||||
|
ANDW $-262144, R7, R7
|
||||||
|
ORRW R4, R7, R7
|
||||||
|
ANDW $-4194304, R6, R4
|
||||||
|
UBFIZW $8, R3, $14, R6
|
||||||
|
ORRW R3>>26, R7, R3
|
||||||
|
ORRW R6, R4, R4
|
||||||
|
REVW R3, R3
|
||||||
|
REVW R4, R4
|
||||||
|
STPW (R3, R4), (R9)
|
||||||
|
ADD $7, R9, R9
|
||||||
|
BLT declop
|
||||||
|
decrem:
|
||||||
|
CBZW R0, decret
|
||||||
|
MOVW (R2)(R12.UXTW<<2), R1
|
||||||
|
CMPW $1, R0
|
||||||
|
SUBW $0x4e, R1, R3
|
||||||
|
UBFX $14, R3, $2, R4
|
||||||
|
ORRW R3<<2, R4, R3
|
||||||
|
MOVB R3, (R5)(R10.SXTW)
|
||||||
|
BEQ decret
|
||||||
|
|
||||||
|
MOVW $0xffb2, R7
|
||||||
|
ADDW $1, R10, R4
|
||||||
|
MOVK $(0xffb1<<16), R7
|
||||||
|
ADDW R7, R1, R1
|
||||||
|
CMPW $2, R0
|
||||||
|
UBFX $20, R1, $8, R6
|
||||||
|
LSRW $6, R1, R3
|
||||||
|
ANDW $3, R6, R8
|
||||||
|
ANDW $-4, R3, R3
|
||||||
|
ORRW R8, R3, R3
|
||||||
|
MOVB R3, (R5)(R4.SXTW)
|
||||||
|
BEQ decret
|
||||||
|
|
||||||
|
ADDW $2, R10, R3
|
||||||
|
LSRW $12, R1, R4
|
||||||
|
ANDW $-16, R4, R4
|
||||||
|
CMPW $3, R0
|
||||||
|
ORRW R1>>28, R4, R1
|
||||||
|
MOVB R1, (R5)(R3.SXTW)
|
||||||
|
BEQ decret
|
||||||
|
|
||||||
|
ADDW $3, R10, R1
|
||||||
|
ADDW $1, R12, R12
|
||||||
|
ANDW $0xf0, R6, R6
|
||||||
|
CMPW $4, R0
|
||||||
|
MOVW (R2)(R12<<2), R3
|
||||||
|
SUBW $0x4e, R3, R2
|
||||||
|
UBFX $2, R2, $4, R4
|
||||||
|
ORRW R6, R4, R4
|
||||||
|
MOVB R4, (R5)(R1.SXTW)
|
||||||
|
BEQ decret
|
||||||
|
|
||||||
|
ADDW $4, R10, R1
|
||||||
|
UBFX $10, R2, $6, R4
|
||||||
|
ORRW R2<<6, R4, R2
|
||||||
|
CMPW $5, R0
|
||||||
|
MOVB R2, (R5)(R1.SXTW)
|
||||||
|
BEQ decret
|
||||||
|
|
||||||
|
ADDW R7, R3, R3
|
||||||
|
ADDW $5, R10, R10
|
||||||
|
LSRW $2, R3, R0
|
||||||
|
UBFX $16, R3, $6, R3
|
||||||
|
ANDW $-64, R0, R0
|
||||||
|
ORRW R3, R0, R3
|
||||||
|
MOVB R3, (R5)(R10.SXTW)
|
||||||
|
decret:
|
||||||
|
RET
|
||||||
|
dectil:
|
||||||
|
MOVW $0, R10
|
||||||
|
MOVW $0, R12
|
||||||
|
JMP decrem
|
||||||
|
|||||||
@@ -24,7 +24,9 @@ func TestBase14(t *testing.T) {
|
|||||||
for i := 1; i < 4096; i++ {
|
for i := 1; i < 4096; i++ {
|
||||||
rand.Read(buf[:i])
|
rand.Read(buf[:i])
|
||||||
out := Decode(Encode(buf[:i]))
|
out := Decode(Encode(buf[:i]))
|
||||||
assert.Equal(t, hex.EncodeToString(buf[:i]), hex.EncodeToString(out))
|
if !assert.Equal(t, hex.EncodeToString(buf[:i]), hex.EncodeToString(out)) {
|
||||||
|
t.Fatal()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
55
c/base1432.c
55
c/base1432.c
@@ -104,3 +104,58 @@ int base16384_encode(int offset, int outlen, const char* data, int dlen, int dca
|
|||||||
}
|
}
|
||||||
return outlen;
|
return outlen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void base16384_decode(int offset, int outlen, const char* data, int dlen, int dcap, char* buf, int blen, int bcap) {
|
||||||
|
uint32_t* vals = (uint32_t*)data;
|
||||||
|
uint32_t n = 0;
|
||||||
|
int32_t i = 0;
|
||||||
|
for(; i <= outlen - 7; i+=7) { // n实际每次自增2
|
||||||
|
register uint32_t sum = 0;
|
||||||
|
register uint32_t shift = htobe32(vals[n++]) - 0x4e004e00;
|
||||||
|
shift <<= 2;
|
||||||
|
sum |= shift & 0xfffc0000;
|
||||||
|
shift <<= 2;
|
||||||
|
sum |= shift & 0x0003fff0;
|
||||||
|
shift = htobe32(vals[n++]) - 0x4e004e00;
|
||||||
|
sum |= shift >> 26;
|
||||||
|
*(uint32_t*)(buf+i) = be32toh(sum);
|
||||||
|
sum = 0;
|
||||||
|
shift <<= 6;
|
||||||
|
sum |= shift & 0xffc00000;
|
||||||
|
shift <<= 2;
|
||||||
|
sum |= shift & 0x003fff00;
|
||||||
|
*(uint32_t*)(buf+i+4) = be32toh(sum);
|
||||||
|
}
|
||||||
|
if(offset--) {
|
||||||
|
// 这里有读取越界
|
||||||
|
#ifdef WORDS_BIGENDIAN
|
||||||
|
register uint32_t sum = __builtin_bswap32(vals[n++]);
|
||||||
|
#else
|
||||||
|
register uint32_t sum = vals[n++];
|
||||||
|
#endif
|
||||||
|
sum -= 0x0000004e;
|
||||||
|
buf[i++] = ((sum & 0x0000003f) << 2) | ((sum & 0x0000c000) >> 14);
|
||||||
|
if(offset--) {
|
||||||
|
sum -= 0x004e0000;
|
||||||
|
buf[i++] = ((sum & 0x00003f00) >> 6) | ((sum & 0x00300000) >> 20);
|
||||||
|
if(offset--) {
|
||||||
|
buf[i++] = ((sum & 0x000f0000) >> 12) | ((sum & 0xf0000000) >> 28);
|
||||||
|
if(offset--) {
|
||||||
|
buf[i] = (sum & 0x0f000000) >> 20;
|
||||||
|
// 这里有读取越界
|
||||||
|
sum = vals[n];
|
||||||
|
sum -= 0x0000004e;
|
||||||
|
buf[i++] |= (sum & 0x0000003c) >> 2;
|
||||||
|
if(offset--) {
|
||||||
|
buf[i++] = ((sum & 0x00000003) << 6) | ((sum & 0x0000fc00) >> 10);
|
||||||
|
if(offset--) {
|
||||||
|
sum -= 0x004e0000;
|
||||||
|
buf[i] = ((sum & 0x00000300) >> 2) | ((sum & 0x003f0000) >> 16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user