From 5e0f486237718f3c048a578eae9a5f76a23f32b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Tue, 13 Dec 2022 17:08:02 +0800 Subject: [PATCH] finish arm64 encode --- base14_amd64.s | 2 +- base14_arm64.go | 45 ++++++++++++++++++ base14_arm64.s | 123 ++++++++++++++++++++++++++++++++++++++++++++++++ base14_noasm.go | 4 +- c/base1432.c | 106 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 277 insertions(+), 3 deletions(-) create mode 100644 base14_arm64.go create mode 100644 base14_arm64.s create mode 100644 c/base1432.c diff --git a/base14_amd64.s b/base14_amd64.s index 685b3c6..29eadda 100644 --- a/base14_amd64.s +++ b/base14_amd64.s @@ -118,7 +118,7 @@ encsav: ADDQ CX, DX SHLQ $3, SI MOVQ DX, ·sum+64(FP) - MOVQ SI, ·sum+72(FP) + MOVQ SI, ·n+72(FP) encend: RET diff --git a/base14_arm64.go b/base14_arm64.go new file mode 100644 index 0000000..254b93a --- /dev/null +++ b/base14_arm64.go @@ -0,0 +1,45 @@ +//go:build arm64 +// +build arm64 + +package base14 + +import ( + "encoding/binary" + "unsafe" +) + +//go:noescape +//go:nosplit +func _encode(offset, outlen int, b, encd []byte) (sum uint64, valn uintptr) + +//go:noescape +//go:nosplit +func _decode(offset, outlen int, b, decd []byte) + +func encode(offset, outlen int, b, encd []byte) { + if len(b) == 7 { + b = append(b, 0) + } + sum, valn := _encode(offset, outlen, b, encd) + if offset == 0 { + return + } + n := valn - (uintptr)(*(*unsafe.Pointer)(unsafe.Pointer(&encd))) + println(sum, n) + var tmp [4]byte + binary.LittleEndian.PutUint32(tmp[:], uint32(sum)) + copy(encd[n:], tmp[:]) + encd[outlen-2] = '=' + encd[outlen-1] = byte(offset) +} + +func decode(offset, outlen int, b, decd []byte) { + /* + if offset != 0 && cap(b) == len(b) { + b = append(b, make([]byte, 8)...) + } + _decode(offset, outlen, b, decd) + */ + + decodeGeneric(offset, outlen, b, decd) +} diff --git a/base14_arm64.s b/base14_arm64.s new file mode 100644 index 0000000..8889c57 --- /dev/null +++ b/base14_arm64.s @@ -0,0 +1,123 @@ +//go:build arm64 +// +build arm64 + +#include "textflag.h" + +// func _encode(offset, outlen int, b, encd []byte) (sum uint64, &vals[n] uintptr) +TEXT ·_encode(SB), NOSPLIT, $0-81 + MOVD ·offset+0(FP), R0 + MOVD ·data+16(FP), R9 + MOVD ·dlen+24(FP), R3 + MOVD ·encd+40(FP), R5 + + SUBW $6, R3, R3 + CMPW $0, R3 + BLE enctil + MOVW $0x4e00, R11 + SUB $8, R5, R14 + SUB $4, R5, R13 + MOVD $2, R8 + MOVW $0, R10 // int32_t i = 0 + MOVK $(0x4e00<<16), R11 +enclop: + MOVW (R9), R4 + ADDW $7, R10, R10 + MOVW R8, R12 + CMPW R3, R10 + REVW R4, R4 + ADD $7, R9, R9 + LSRW $2, R4, R6 + UBFX $4, R4, $14, R15 + ANDW $0x3fff0000, R6, R6 + UBFIZW $26, R4, $4, R7 + ORRW R15, R6, R6 + ADDW R11, R6, R6 + REVW R6, R6 + MOVW R6, (R14)(R8<<2) + MOVW -3(R9), R4 + REVW R4, R4 + LSRW $6, R4, R4 + ANDW $0x3fffffc, R4, R4 + ORRW R7, R4, R4 + ANDW $0x3fff0000, R4, R6 + UBFX $2, R4, $14, R4 + ORRW R6, R4, R4 + ADDW R11, R4, R4 + REVW R4, R4 + MOVW R4, (R13)(R8<<2) + ADDW $2, R8, R8 + BLT enclop +encrem: + ANDSW $0xff, R0, R0 + BEQ encret + + MOVBU (R2)(R10.SXTW), R3 + UXTW R12, R8 + CMPW $1, R0 + SXTW R10, R10 + ADD R8<<2, R5, R7 + UBFIZW $14, R3, $2, R4 + ORRW R3>>2, R4, R3 + BEQ encsum + + ADD R10, R2, R9 + CMPW $2, R0 + MOVBU 1(R9), R6 + LSLW $6, R6, R4 + UBFIZW $20, R6, $2, R6 + ANDW $0x3f00, R4, R4 + ORRW R3, R4, R3 + ORRW R3, R6, R3 + BEQ encsum + + MOVBU 2(R9), R4 + CMPW $3, R0 + LSLW $12, R4, R6 + ANDW $0xf0000, R6, R6 + ORRW R4<<28, R6, R4 + ORRW R4, R3, R3 + BEQ encsum + + ADD $3, R10, R10 + ADDW $1, R12, R12 + CMPW $4, R0 + ADD R12<<2, R5, R7 + MOVBU (R2)(R10), R4 + LSLW $20, R4, R4 + ANDW $0xf000000, R4, R4 + ORRW R3, R4, R3 + ADDW $0x4e0000, R3, R3 + ADDW $78, R3, R3 + MOVW R3, (R5)(R8<<2) + MOVBU (R2)(R10), R3 + UBFIZW $2, R3, $4, R3 + BEQ encsum + + MOVBU 4(R9), R4 + CMPW $5, R0 + UBFIZW $10, R4, $6, R2 + ORRW R3, R2, R3 + ORRW R4>>6, R3, R3 + BEQ encsum + + MOVBU 5(R9), R4 + LSLW $2, R4, R2 + UBFIZW $16, R4, $6, R4 + ANDW $0x300, R2, R2 + ORRW R4, R2, R2 + ORRW R2, R3, R3 +encsum: + ADDW $0x4e0000, R3, R3 + ADDW $0x4e, R3, R3 + MOVD R3, ·sum+64(FP) + MOVD R7, ·n+72(FP) +encret: + RET +enctil: + MOVW $0, R10 + MOVW $0, R12 + JMP encrem + +// func _decode(offset, outlen int, b, decd []byte) +TEXT ·_decode(SB), NOSPLIT, $0-64 + \ No newline at end of file diff --git a/base14_noasm.go b/base14_noasm.go index 4c8d769..dff3200 100644 --- a/base14_noasm.go +++ b/base14_noasm.go @@ -1,5 +1,5 @@ -//go:build !amd64 -// +build !amd64 +//go:build !amd64 && !arm64 +// +build !amd64,!arm64 package base14 diff --git a/c/base1432.c b/c/base1432.c new file mode 100644 index 0000000..cf065f3 --- /dev/null +++ b/c/base1432.c @@ -0,0 +1,106 @@ +#ifdef __cosmopolitan // always le +# define be16toh(x) bswap_16(x) +# define be32toh(x) bswap_32(x) +# define htobe16(x) bswap_16(x) +# define htobe32(x) bswap_32(x) +#else +#include +#include +#include +#ifdef __linux__ +# include +#endif +#ifdef __FreeBSD__ +# include +#endif +#ifdef __NetBSD__ +# include +#endif +#ifdef __OpenBSD__ +# include +# define be16toh(x) betoh16(x) +# define be32toh(x) betoh32(x) +#endif +#ifdef __MAC_10_0 +# define be16toh(x) ntohs(x) +# define be32toh(x) ntohl(x) +# define htobe16(x) ntohs(x) +# define htobe32(x) htonl(x) +#endif +#ifdef _WIN32 + #ifdef WORDS_BIGENDIAN + # define be16toh(x) (x) + # define be32toh(x) (x) + # define htobe16(x) (x) + # define htobe32(x) (x) + #else + # define be16toh(x) _byteswap_ushort(x) + # define be32toh(x) _byteswap_ulong(x) + # define htobe16(x) _byteswap_ushort(x) + # define htobe32(x) _byteswap_ulong(x) + #endif +#endif +#endif + +int base16384_encode(int offset, int outlen, const char* data, int dlen, int dcap, char* buf, int blen, int bcap) { + uint32_t* vals = (uint32_t*)buf; + uint32_t n = 0; + int32_t i = 0; + for(; i <= dlen - 7; i += 7) { + register uint32_t sum = 0; + register uint32_t shift = htobe32(*(uint32_t*)(data+i)); + sum |= (shift>>2) & 0x3fff0000; + sum |= (shift>>4) & 0x00003fff; + sum += 0x4e004e00; + vals[n++] = be32toh(sum); + shift <<= 26; + shift &= 0x3c000000; + sum = 0; + shift |= (htobe32(*(uint32_t*)(data+i+4))>>6)&0x03fffffc; + sum |= shift & 0x3fff0000; + shift >>= 2; + sum |= shift & 0x00003fff; + sum += 0x4e004e00; + vals[n++] = be32toh(sum); + } + uint8_t o = offset; + if(o--) { + register uint32_t sum = 0x0000003f & (data[i] >> 2); + sum |= ((uint32_t)data[i] << 14) & 0x0000c000; + if(o--) { + sum |= ((uint32_t)data[i + 1] << 6) & 0x00003f00; + sum |= ((uint32_t)data[i + 1] << 20) & 0x00300000; + if(o--) { + sum |= ((uint32_t)data[i + 2] << 12) & 0x000f0000; + sum |= ((uint32_t)data[i + 2] << 28) & 0xf0000000; + if(o--) { + sum |= ((uint32_t)data[i + 3] << 20) & 0x0f000000; + sum += 0x004e004e; + #ifdef WORDS_BIGENDIAN + vals[n++] = __builtin_bswap32(sum); + #else + vals[n++] = sum; + #endif + sum = (((uint32_t)data[i + 3] << 2)) & 0x0000003c; + if(o--) { + sum |= (((uint32_t)data[i + 4] >> 6)) & 0x00000003; + sum |= ((uint32_t)data[i + 4] << 10) & 0x0000fc00; + if(o--) { + sum |= ((uint32_t)data[i + 5] << 2) & 0x00000300; + sum |= ((uint32_t)data[i + 5] << 16) & 0x003f0000; + } + } + } + } + } + sum += 0x004e004e; + #ifdef WORDS_BIGENDIAN + vals[n] = __builtin_bswap32(sum); + #else + vals[n] = sum; + #endif + buf[outlen - 2] = '='; + buf[outlen - 1] = offset; + } + return outlen; +}