From f6d737d0a618014f79bc05209a246e918a68752e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Sun, 2 Oct 2022 22:49:06 +0800 Subject: [PATCH] add amd64 asm ver enc16blk1 --- encode.go | 17 ------------- encode_amd64.go | 8 ++++++ encode_amd64.s | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ encode_noasm.go | 23 +++++++++++++++++ 4 files changed, 97 insertions(+), 17 deletions(-) create mode 100644 encode_amd64.go create mode 100644 encode_amd64.s create mode 100644 encode_noasm.go diff --git a/encode.go b/encode.go index 7042007..fcaee96 100644 --- a/encode.go +++ b/encode.go @@ -36,23 +36,6 @@ func (bs Base) Encode(data []byte) (out []byte) { return } -// enc16blk1 for bit 1 (actual enc128blk1) -// len(in)!=0, len(out)==len(in)*16 -//go:nosplit -func enc16blk1(mask uint16, in, out []byte) { - for i, n := range in { - c := i * 16 - binary.BigEndian.PutUint16(out[c:c+2], uint16(n>>7)+mask) - binary.BigEndian.PutUint16(out[c+2:c+4], uint16(n>>6&1)+mask) - binary.BigEndian.PutUint16(out[c+4:c+6], uint16(n>>5&1)+mask) - binary.BigEndian.PutUint16(out[c+6:c+8], uint16(n>>4&1)+mask) - binary.BigEndian.PutUint16(out[c+8:c+10], uint16(n>>3&1)+mask) - binary.BigEndian.PutUint16(out[c+10:c+12], uint16(n>>2&1)+mask) - binary.BigEndian.PutUint16(out[c+12:c+14], uint16(n>>1&1)+mask) - binary.BigEndian.PutUint16(out[c+14:c+16], uint16(n&1)+mask) - } -} - // enc64blk2 for bit 2 // len(in)!=0, len(out)==len(in)*8 //go:nosplit diff --git a/encode_amd64.go b/encode_amd64.go new file mode 100644 index 0000000..03b7b5f --- /dev/null +++ b/encode_amd64.go @@ -0,0 +1,8 @@ +//go:build amd64 +// +build amd64 + +package unibase2n + +// enc16blk1 for bit 1 (actual enc128blk1) +// len(in)!=0, len(out)==len(in)*16 +func enc16blk1(mask uint16, in, out []byte) diff --git a/encode_amd64.s b/encode_amd64.s new file mode 100644 index 0000000..4cb0cf9 --- /dev/null +++ b/encode_amd64.s @@ -0,0 +1,66 @@ +//go:build amd64 +// +build amd64 + +#include "textflag.h" + +// enc16blk1(mask uint16, in, out []byte) +// for bit 1 (actual enc128blk1) +TEXT ·enc16blk1(SB), NOSPLIT, $0-56 + // create mask + MOVQ ·mask+0(FP), BX + ANDQ $0xffff, BX + BSWAPQ BX + MOVQ BX, AX + SHRQ $16, AX + ORQ AX, BX + SHRQ $16, AX + ORQ AX, BX + SHRQ $16, AX + ORQ AX, BX + // load source addr + MOVQ ·in+8(FP), SI + // load source len + MOVQ ·inlen+16(FP), CX + // load dest addr + MOVQ ·out+32(FP), DI + // go forward + CLD +lop: + LODSB + // 8 -> 64 + XORQ DX, DX + SHLB $1, AX + SETCS DX + SHLQ $16, DX + SHLB $1, AX + SETCS DX + SHLQ $16, DX + SHLB $1, AX + SETCS DX + SHLQ $16, DX + SHLB $1, AX + SETCS DX + // add mask + MOVQ AX, R8 + LEAQ 0(DX)(BX*1), AX + BSWAPQ AX + STOSQ + // 8 -> 64 + XORQ AX, AX + SHLB $1, R8 + SETCS AX + SHLQ $16, AX + SHLB $1, R8 + SETCS AX + SHLQ $16, AX + SHLB $1, R8 + SETCS AX + SHLQ $16, AX + SHLB $1, R8 + SETCS AX + // add mask + ADDQ BX, AX + BSWAPQ AX + STOSQ + LOOP lop + RET diff --git a/encode_noasm.go b/encode_noasm.go new file mode 100644 index 0000000..919d351 --- /dev/null +++ b/encode_noasm.go @@ -0,0 +1,23 @@ +//go:build !amd64 +// +build !amd64 + +package unibase2n + +import "encoding/binary" + +// enc16blk1 for bit 1 (actual enc128blk1) +// len(in)!=0, len(out)==len(in)*16 +//go:nosplit +func enc16blk1(mask uint16, in, out []byte) { + for i, n := range in { + c := i * 16 + binary.BigEndian.PutUint16(out[c:c+2], uint16(n>>7)+mask) + binary.BigEndian.PutUint16(out[c+2:c+4], uint16(n>>6&1)+mask) + binary.BigEndian.PutUint16(out[c+4:c+6], uint16(n>>5&1)+mask) + binary.BigEndian.PutUint16(out[c+6:c+8], uint16(n>>4&1)+mask) + binary.BigEndian.PutUint16(out[c+8:c+10], uint16(n>>3&1)+mask) + binary.BigEndian.PutUint16(out[c+10:c+12], uint16(n>>2&1)+mask) + binary.BigEndian.PutUint16(out[c+12:c+14], uint16(n>>1&1)+mask) + binary.BigEndian.PutUint16(out[c+14:c+16], uint16(n&1)+mask) + } +}