diff --git a/base14_arm64.go b/base14_arm64.go index 8aab1a5..64c8adf 100644 --- a/base14_arm64.go +++ b/base14_arm64.go @@ -5,26 +5,21 @@ package base14 import ( "encoding/binary" - "unsafe" ) //go:noescape //go:nosplit -func _encode(offset, outlen int, b, encd []byte) (sum uint64, valn uintptr) +func _encode(offset int, b, encd []byte) (sum uint64, n int) //go:noescape //go:nosplit func _decode(offset, outlen int, b, decd []byte) func encode(offset, outlen int, b, encd []byte) { - if len(b) == 7 { - b = append(b, 0) - } - sum, valn := _encode(offset, outlen, b, encd) + sum, n := _encode(offset, b, encd) if offset == 0 { return } - n := valn - (uintptr)(*(*unsafe.Pointer)(unsafe.Pointer(&encd))) var tmp [4]byte binary.LittleEndian.PutUint32(tmp[:], uint32(sum)) copy(encd[n:], tmp[:]) @@ -33,8 +28,5 @@ func encode(offset, outlen int, b, encd []byte) { } func decode(offset, outlen int, b, decd []byte) { - if offset != 0 && cap(b) == len(b) { - b = append(b, make([]byte, 8)...) - } _decode(offset, outlen, b, decd) } diff --git a/base14_arm64.s b/base14_arm64.s index d704a7e..9974b75 100644 --- a/base14_arm64.s +++ b/base14_arm64.s @@ -3,12 +3,12 @@ #include "textflag.h" -// func _encode(offset, outlen int, b, encd []byte) (sum uint64, &vals[n] uintptr) -TEXT ·_encode(SB), NOSPLIT, $0-81 +// func _encode(offset, b, encd []byte) (sum uint64, n int) +TEXT ·_encode(SB), NOSPLIT, $0-72 MOVD ·offset+0(FP), R0 - MOVD ·data+16(FP), R9 - MOVD ·dlen+24(FP), R3 - MOVD ·encd+40(FP), R5 + MOVD ·data+8(FP), R9 + MOVD ·dlen+16(FP), R3 + MOVD ·encd+32(FP), R5 SUBW $6, R3, R3 CMPW $0, R3 @@ -109,8 +109,9 @@ encrem: encsum: ADDW $0x4e0000, R3, R3 ADDW $0x4e, R3, R3 - MOVD R3, ·sum+64(FP) - MOVD R7, ·n+72(FP) + SUB R5, R7, R7 + MOVD R3, ·sum+56(FP) + MOVD R7, ·n+64(FP) encret: RET enctil: @@ -125,16 +126,16 @@ TEXT ·_decode(SB), NOSPLIT, $0-64 MOVD ·data+16(FP), R2 MOVD ·decd+40(FP), R5 - SUBW $6, R1, R1 - CMPW $0, R1 - BLE dectil - MOVW $0xb200, R11 - MOVD R5, R9 - SUB $8, R2, R14 - SUB $4, R2, R13 - MOVD $2, R8 - MOVW $0, R10 - MOVK $(0xb1ff<<16), R11 + SUBW $6, R1, R1 // sub w1, w1, #6 + CMPW $0, R1 // cmp w1, 0 + BLE dectil // ble .L7 + MOVW $0xb200, R11 // mov w11, 45568 + MOVD R5, R9 // mov x9, x5 + SUB $8, R2, R14 // sub x14, x2, #8 + SUB $4, R2, R13 // sub x13, x2, #4 + MOVD $2, R8 // mov x8, 2 + MOVW $0, R10 // mov w10, 0 + MOVK $(0xb1ff<<16), R11 // movk w11, 0xb1ff, lsl 16 declop: MOVW (R14)(R8<<2), R4 ADDW $7, R10, R10 @@ -161,61 +162,61 @@ declop: ADD $7, R9, R9 BLT declop decrem: - CBZW R0, decret - MOVW (R2)(R12.UXTW<<2), R1 - CMPW $1, R0 - SUBW $0x4e, R1, R3 - UBFX $14, R3, $2, R4 - ORRW R3<<2, R4, R3 - MOVB R3, (R5)(R10.SXTW) - BEQ decret + CBZW R0, decret // cbz w0, .L1 + MOVW (R2)(R12.UXTW<<2), R1 // ldr w1, [x2, w12, uxtw 2] + CMPW $1, R0 // cmp w0, 1 + SUBW $0x4e, R1, R3 // sub w3, w1, #78 + UBFX $14, R3, $2, R4 // ubfx x4, x3, 14, 2 + ORRW R3<<2, R4, R3 // orr w3, w4, w3, lsl 2 + MOVB R3, (R5)(R10.SXTW) // strb w3, [x5, w10, sxtw] + BEQ decret // beq .L1 - MOVW $0xffb2, R7 - ADDW $1, R10, R4 - MOVK $(0xffb1<<16), R7 - ADDW R7, R1, R1 - CMPW $2, R0 - UBFX $20, R1, $8, R6 - LSRW $6, R1, R3 - ANDW $3, R6, R8 - ANDW $-4, R3, R3 - ORRW R8, R3, R3 - MOVB R3, (R5)(R4.SXTW) - BEQ decret + MOVW $0xffb2, R7 // mov w7, 65458 + ADDW $1, R10, R6 // add w6, w10, 1 + MOVK $(0xffb1<<16), R7 // movk w7, 0xffb1, lsl 16 + ADDW R7, R1, R1 // add w1, w1, w7 + CMPW $2, R0 // cmp w0, 2 + UBFX $20, R1, $8, R4 // ubfx x4, x1, 20, 8 + LSRW $6, R1, R3 // lsr w3, w1, 6 + ANDW $3, R4, R8 // and w8, w4, 3 + ANDW $-4, R3, R3 // and w3, w3, -4 + ORRW R8, R3, R3 // orr w3, w3, w8 + MOVB R3, (R5)(R6.SXTW) // strb w3, [x5, w6, sxtw] + BEQ decret // beq .L1 - ADDW $2, R10, R3 - LSRW $12, R1, R4 - ANDW $-16, R4, R4 - CMPW $3, R0 - ORRW R1>>28, R4, R1 - MOVB R1, (R5)(R3.SXTW) - BEQ decret + ADDW $2, R10, R3 // add w3, w10, 2 + LSRW $12, R1, R6 // lsr w6, w1, 12 + ANDW $-16, R6, R6 // and w6, w6, -16 + CMPW $3, R0 // cmp w0, 3 + ORRW R1>>28, R6, R1 // orr w1, w6, w1, lsr 28 + MOVB R1, (R5)(R3.SXTW) // strb w1, [x5, w3, sxtw] + BEQ decret // beq .L1 - ADDW $3, R10, R1 - ADDW $1, R12, R12 - ANDW $0xf0, R6, R6 - CMPW $4, R0 - MOVW (R2)(R12<<2), R3 - SUBW $0x4e, R3, R2 - UBFX $2, R2, $4, R4 - ORRW R6, R4, R4 - MOVB R4, (R5)(R1.SXTW) - BEQ decret + ADDW $3, R10, R1 // add w1, w10, 3 + ADDW $1, R12, R12 // add w12, w12, 1 + ANDW $0xf0, R4, R4 // and w4, w4, 240 + CMPW $4, R0 // cmp w0, 4 + MOVW (R2)(R12<<2), R3 // ldr w3, [x2, x12, lsl 2] + SUBW $0x4e, R3, R2 // sub w2, w3, #78 + UBFX $2, R2, $4, R6 // ubfx x6, x2, 2, 4 + ORRW R6, R4, R4 // orr w4, w4, w6 + MOVB R4, (R5)(R1.SXTW) // strb w4, [x5, w1, sxtw] + BEQ decret // beq .L1 - ADDW $4, R10, R1 - UBFX $10, R2, $6, R4 - ORRW R2<<6, R4, R2 - CMPW $5, R0 - MOVB R2, (R5)(R1.SXTW) - BEQ decret + ADDW $4, R10, R1 // add w1, w10, 4 + UBFX $10, R2, $6, R4 // ubfx x4, x2, 10, 6 + ORRW R2<<6, R4, R2 // orr w2, w4, w2, lsl 6 + CMPW $5, R0 // cmp w0, 5 + MOVB R2, (R5)(R1.SXTW) // strb w2, [x5, w1, sxtw] + BEQ decret // beq .L1 - ADDW R7, R3, R3 - ADDW $5, R10, R10 - LSRW $2, R3, R0 - UBFX $16, R3, $6, R3 - ANDW $-64, R0, R0 - ORRW R3, R0, R3 - MOVB R3, (R5)(R10.SXTW) + ADDW R7, R3, R3 // add w3, w3, w7 + ADDW $5, R10, R10 // add w10, w10, 5 + LSRW $2, R3, R0 // lsr w0, w3, 2 + UBFX $16, R3, $6, R3 // ubfx x3, x3, 16, 6 + ANDW $-64, R0, R0 // and w0, w0, -64 + ORRW R3, R0, R3 // orr w3, w0, w3 + MOVB R3, (R5)(R10.SXTW) // strb w3, [x5, w10, sxtw] decret: RET dectil: