From f084ae51285d57b964df94d2780bfb0ef4ea2148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Sun, 2 Oct 2022 23:57:35 +0800 Subject: [PATCH] add more asm --- encode.go | 32 ------------------ encode_amd64.go | 12 +++++++ encode_amd64.s | 88 +++++++++++++++++++++++++++++++++++++++++++++++++ encode_noasm.go | 32 ++++++++++++++++++ 4 files changed, 132 insertions(+), 32 deletions(-) diff --git a/encode.go b/encode.go index fcaee96..f6c1b32 100644 --- a/encode.go +++ b/encode.go @@ -36,38 +36,6 @@ func (bs Base) Encode(data []byte) (out []byte) { return } -// enc64blk2 for bit 2 -// len(in)!=0, len(out)==len(in)*8 -//go:nosplit -func enc64blk2(mask uint64, in, out []byte) { - for i, n := range in { - c := i * 8 - x := (uint64(n)<<42 | uint64(n)<<28 | uint64(n)<<14 | uint64(n)) & 0x00030003_00030003 - binary.BigEndian.PutUint64(out[c:c+8], x+mask) - } -} - -// enc32blk4 for bit 4 -// len(in)!=0, len(out)==len(in)*4 -//go:nosplit -func enc32blk4(mask uint32, in, out []byte) { - for i, n := range in { - c := i * 4 - x := (uint32(n)<<12 | uint32(n)) & 0x000f000f - binary.BigEndian.PutUint32(out[c:c+4], x+mask) - } -} - -// enc16blk8 for bit 8 -// len(in)!=0, len(out)==len(in)*2 -//go:nosplit -func enc16blk8(mask uint16, in, out []byte) { - for i, n := range in { - c := i * 2 - binary.BigEndian.PutUint16(out[c:c+2], uint16(n)+mask) - } -} - // enc128blk for bit 3 5 6 7 9 11 13 15 // len(in)>0, len(out)==len(in)/bit*16 //go:nosplit diff --git a/encode_amd64.go b/encode_amd64.go index 03b7b5f..8555070 100644 --- a/encode_amd64.go +++ b/encode_amd64.go @@ -6,3 +6,15 @@ package unibase2n // enc16blk1 for bit 1 (actual enc128blk1) // len(in)!=0, len(out)==len(in)*16 func enc16blk1(mask uint16, in, out []byte) + +// enc64blk2 for bit 2 +// len(in)!=0, len(out)==len(in)*8 +func enc64blk2(mask uint64, in, out []byte) + +// enc32blk4 for bit 4 +// len(in)!=0, len(out)==len(in)*4 +func enc32blk4(mask uint32, in, out []byte) + +// enc16blk8 for bit 8 +// len(in)!=0, len(out)==len(in)*2 +func enc16blk8(mask uint16, in, out []byte) diff --git a/encode_amd64.s b/encode_amd64.s index 4cb0cf9..f735295 100644 --- a/encode_amd64.s +++ b/encode_amd64.s @@ -64,3 +64,91 @@ lop: STOSQ LOOP lop RET + +// enc64blk2(mask uint64, in, out []byte) +// len(in)!=0, len(out)==len(in)*8 +TEXT ·enc64blk2(SB), NOSPLIT, $0-56 + // load mask + MOVQ ·mask+0(FP), BX + // load source addr + MOVQ ·in+8(FP), SI + // load source len + MOVQ ·inlen+16(FP), CX + // load dest addr + MOVQ ·out+32(FP), DI + // go forward + CLD +lop: + LODSB + // 8 -> 64 + SHLL $10, AX + MOVL AX, DX + ANDL $0x00030000, DX + RORQ $14, AX + MOVL AX, R8 + ANDL $0x00000003, R8 + ORL R8, DX + SHLQ $32, DX + ROLQ $4, AX + MOVL AX, R8 + ANDL $0x00000003, R8 + ORQ R8, DX + SHLL $14, AX + ANDL $0x00030000, AX + ORQ AX, DX + // add mask + LEAQ 0(DX)(BX*1), AX + BSWAPQ AX + STOSQ + LOOP lop + RET + +// enc32blk4(mask uint32, in, out []byte) +// len(in)!=0, len(out)==len(in)*4 +TEXT ·enc32blk4(SB), NOSPLIT, $0-56 + // load mask + MOVQ ·mask+0(FP), BX + // load source addr + MOVQ ·in+8(FP), SI + // load source len + MOVQ ·inlen+16(FP), CX + // load dest addr + MOVQ ·out+32(FP), DI + // go forward + CLD +lop: + LODSB + // 8 -> 32 + MOVL AX, DX + ANDL $0x0f, DX + SHLL $12, AX + ANDL $0x000f0000, AX + ORL AX, DX + // add mask + LEAL 0(DX)(BX*1), AX + BSWAPL AX + STOSL + LOOP lop + RET + +// func enc16blk8(mask uint16, in, out []byte) +// len(in)!=0, len(out)==len(in)*2 +TEXT ·enc16blk8(SB), NOSPLIT, $0-56 + // load mask + MOVQ ·mask+0(FP), BX + // load source addr + MOVQ ·in+8(FP), SI + // load source len + MOVQ ·inlen+16(FP), CX + // load dest addr + MOVQ ·out+32(FP), DI + // go forward + CLD +lop: + LODSB + // add mask + LEAW 0(AX)(BX*1), AX + RORW $8, AX + STOSW + LOOP lop + RET diff --git a/encode_noasm.go b/encode_noasm.go index 919d351..79e4547 100644 --- a/encode_noasm.go +++ b/encode_noasm.go @@ -21,3 +21,35 @@ func enc16blk1(mask uint16, in, out []byte) { binary.BigEndian.PutUint16(out[c+14:c+16], uint16(n&1)+mask) } } + +// enc64blk2 for bit 2 +// len(in)!=0, len(out)==len(in)*8 +//go:nosplit +func enc64blk2(mask uint64, in, out []byte) { + for i, n := range in { + c := i * 8 + x := (uint64(n)<<42 | uint64(n)<<28 | uint64(n)<<14 | uint64(n)) & 0x00030003_00030003 + binary.BigEndian.PutUint64(out[c:c+8], x+mask) + } +} + +// enc32blk4 for bit 4 +// len(in)!=0, len(out)==len(in)*4 +//go:nosplit +func enc32blk4(mask uint32, in, out []byte) { + for i, n := range in { + c := i * 4 + x := (uint32(n)<<12 | uint32(n)) & 0x000f000f + binary.BigEndian.PutUint32(out[c:c+4], x+mask) + } +} + +// enc16blk8 for bit 8 +// len(in)!=0, len(out)==len(in)*2 +//go:nosplit +func enc16blk8(mask uint16, in, out []byte) { + for i, n := range in { + c := i * 2 + binary.BigEndian.PutUint16(out[c:c+2], uint16(n)+mask) + } +}