mirror of
https://github.com/fumiama/go-base16384.git
synced 2026-06-05 00:32:52 +08:00
goos: darwin goarch: amd64 pkg: github.com/fumiama/go-base16384 cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz name old time/op new time/op delta EncodeTo/16-8 16.9ns ± 3% 16.7ns ± 1% -1.62% (p=0.048 n=5+5) EncodeTo/256-8 78.0ns ± 1% 77.6ns ± 0% ~ (p=0.286 n=5+4) EncodeTo/4K-8 942ns ± 0% 943ns ± 0% ~ (p=0.841 n=5+5) EncodeTo/32K-8 7.59µs ± 1% 7.53µs ± 1% ~ (p=0.222 n=5+5) DecodeTo/16-8 43.1ns ± 1% 12.2ns ± 0% -71.70% (p=0.008 n=5+5) DecodeTo/256-8 179ns ± 1% 74ns ± 1% -58.93% (p=0.008 n=5+5) DecodeTo/4K-8 1.67µs ± 1% 0.94µs ± 0% -43.89% (p=0.008 n=5+5) DecodeTo/32K-8 13.2µs ± 0% 7.5µs ± 1% -43.48% (p=0.008 n=5+5) Encoder/16-8 118ns ± 4% 112ns ± 0% -5.01% (p=0.008 n=5+5) Encoder/256-8 350ns ± 0% 341ns ± 0% -2.48% (p=0.008 n=5+5) Encoder/4K-8 3.86µs ± 2% 3.83µs ± 0% ~ (p=0.238 n=5+5) Encoder/32K-8 29.6µs ± 0% 29.4µs ± 1% ~ (p=0.095 n=5+5) Decoder/16-8 417ns ± 6% 406ns ± 1% ~ (p=0.056 n=5+5) Decoder/256-8 471ns ± 1% 467ns ± 1% ~ (p=0.222 n=5+5) Decoder/4K-8 1.65µs ± 1% 1.65µs ± 2% ~ (p=0.500 n=5+5) Decoder/32K-8 14.3µs ±21% 12.7µs ± 1% ~ (p=0.151 n=5+5) name old speed new speed delta EncodeTo/16-8 946MB/s ± 3% 961MB/s ± 1% ~ (p=0.056 n=5+5) EncodeTo/256-8 3.28GB/s ± 1% 3.30GB/s ± 0% ~ (p=0.286 n=5+4) EncodeTo/4K-8 4.35GB/s ± 0% 4.34GB/s ± 0% ~ (p=0.841 n=5+5) EncodeTo/32K-8 4.32GB/s ± 1% 4.35GB/s ± 1% ~ (p=0.222 n=5+5) DecodeTo/16-8 510MB/s ± 1% 1803MB/s ± 0% +253.37% (p=0.008 n=5+5) DecodeTo/256-8 1.65GB/s ± 1% 4.02GB/s ± 1% +143.45% (p=0.008 n=5+5) DecodeTo/4K-8 2.80GB/s ± 1% 4.99GB/s ± 0% +78.22% (p=0.008 n=5+5) DecodeTo/32K-8 2.83GB/s ± 0% 5.00GB/s ± 1% +76.93% (p=0.008 n=5+5) Encoder/16-8 135MB/s ± 4% 142MB/s ± 0% +5.22% (p=0.008 n=5+5) Encoder/256-8 731MB/s ± 0% 750MB/s ± 0% +2.55% (p=0.008 n=5+5) Encoder/4K-8 1.06GB/s ± 2% 1.07GB/s ± 0% ~ (p=0.310 n=5+5) Encoder/32K-8 1.11GB/s ± 0% 1.12GB/s ± 1% ~ (p=0.095 n=5+5) Decoder/16-8 38.4MB/s ± 6% 39.4MB/s ± 1% ~ (p=0.056 n=5+5) Decoder/256-8 544MB/s ± 1% 548MB/s ± 1% ~ (p=0.222 n=5+5) Decoder/4K-8 2.49GB/s ± 1% 2.48GB/s ± 2% ~ (p=0.548 n=5+5) Decoder/32K-8 2.32GB/s ±18% 2.59GB/s ± 1% ~ (p=0.151 n=5+5) name old alloc/op new alloc/op delta EncodeTo/16-8 0.00B 0.00B ~ (all equal) EncodeTo/256-8 0.00B 0.00B ~ (all equal) EncodeTo/4K-8 0.00B 0.00B ~ (all equal) EncodeTo/32K-8 0.00B 0.00B ~ (all equal) DecodeTo/16-8 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) DecodeTo/256-8 576B ± 0% 0B -100.00% (p=0.008 n=5+5) DecodeTo/4K-8 6.14kB ± 0% 0.00kB -100.00% (p=0.008 n=5+5) DecodeTo/32K-8 49.2kB ± 0% 0.0kB -100.00% (p=0.008 n=5+5) Encoder/16-8 24.0B ± 0% 24.0B ± 0% ~ (all equal) Encoder/256-8 24.0B ± 0% 24.0B ± 0% ~ (all equal) Encoder/4K-8 24.0B ± 0% 24.0B ± 0% ~ (all equal) Encoder/32K-8 26.0B ± 0% 26.0B ± 0% ~ (all equal) Decoder/16-8 1.39kB ± 0% 1.39kB ± 0% ~ (all equal) Decoder/256-8 1.39kB ± 0% 1.39kB ± 0% ~ (all equal) Decoder/4K-8 4.98kB ± 0% 4.98kB ± 0% ~ (all equal) Decoder/32K-8 41.1kB ± 0% 41.1kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta EncodeTo/16-8 0.00 0.00 ~ (all equal) EncodeTo/256-8 0.00 0.00 ~ (all equal) EncodeTo/4K-8 0.00 0.00 ~ (all equal) EncodeTo/32K-8 0.00 0.00 ~ (all equal) DecodeTo/16-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) DecodeTo/256-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) DecodeTo/4K-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) DecodeTo/32K-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) Encoder/16-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Encoder/256-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Encoder/4K-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Encoder/32K-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder/16-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/256-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/4K-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/32K-8 3.00 ± 0% 3.00 ± 0% ~ (all equal)
235 lines
4.0 KiB
ArmAsm
235 lines
4.0 KiB
ArmAsm
//go:build amd64
|
|
// +build amd64
|
|
|
|
#include "textflag.h"
|
|
|
|
// func _encode(offset int, b, encd []byte) (sum uint64, n uint64)
|
|
TEXT ·_encode(SB), NOSPLIT, $0-72
|
|
MOVQ ·offset+0(FP), R10
|
|
MOVQ ·data+8(FP), DI
|
|
MOVQ ·dlen+16(FP), R8
|
|
MOVQ ·encd+32(FP), R9
|
|
XORQ CX, CX
|
|
XORQ SI, SI
|
|
SUBQ $6, R8
|
|
JLE encrem
|
|
MOVQ $4611404543450677248, BP
|
|
MOVQ $70364449210368, BX
|
|
MOVQ $5620578098173988352, R11
|
|
|
|
enclop:
|
|
MOVBEQ (DI)(CX*1), DX
|
|
INCQ SI
|
|
ADDQ $7, CX
|
|
MOVQ DX, R13
|
|
MOVQ DX, R12
|
|
SHRQ $2, R13
|
|
SHRQ $4, R12
|
|
ANDQ BX, R12
|
|
ANDQ BP, R13
|
|
ORQ R12, R13
|
|
MOVQ DX, R12
|
|
SHRQ $8, DX
|
|
SHRQ $6, R12
|
|
ANDL $16383, DX
|
|
ANDL $1073676288, R12
|
|
ORQ R13, R12
|
|
ORQ R12, DX
|
|
ADDQ R11, DX
|
|
MOVBEQ DX, -8(R9)(SI*8)
|
|
CMPQ CX, R8
|
|
JL enclop
|
|
|
|
encrem:
|
|
TESTQ R10, R10
|
|
JE encend
|
|
|
|
MOVBLZX (DI)(CX*1), DX
|
|
MOVL DX, R8
|
|
SALQ $14, DX
|
|
SHRB $2, R8
|
|
ANDL $49152, DX
|
|
MOVBLZX R8, R8
|
|
ORQ R8, DX
|
|
CMPL R10, $1
|
|
JE encsav
|
|
|
|
MOVBQSX 1(DI)(CX*1), R8
|
|
MOVQ R8, R11
|
|
SALQ $20, R8
|
|
SALQ $6, R11
|
|
ANDL $3145728, R8
|
|
ANDL $16128, R11
|
|
ORQ R11, DX
|
|
ORQ R8, DX
|
|
CMPL R10, $2
|
|
JE encsav
|
|
|
|
MOVBQSX 2(DI)(CX*1), R8
|
|
MOVQ R8, R11
|
|
SALQ $28, R8
|
|
SALQ $12, R11
|
|
MOVL R8, R8
|
|
ANDL $983040, R11
|
|
ORQ R11, R8
|
|
ORQ R8, DX
|
|
CMPL R10, $3
|
|
JE encsav
|
|
|
|
MOVQ $257698037760, BX
|
|
MOVBQSX 3(DI)(CX*1), R8
|
|
MOVQ R8, R11
|
|
SALQ $34, R8
|
|
SALQ $20, R11
|
|
ANDQ BX, R8
|
|
ANDL $251658240, R11
|
|
ORQ R11, R8
|
|
ORQ R8, DX
|
|
CMPL R10, $4
|
|
JE encsav
|
|
|
|
MOVQ $12884901888, BX
|
|
MOVBQSX 4(DI)(CX*1), R8
|
|
MOVQ R8, R11
|
|
SALQ $42, R8
|
|
SALQ $26, R11
|
|
ANDQ BX, R11
|
|
MOVQ $277076930199552, BX
|
|
ANDQ BX, R8
|
|
ORQ R11, R8
|
|
ORQ R8, DX
|
|
CMPL R10, $5
|
|
JE encsav
|
|
|
|
MOVQ $3298534883328, R8
|
|
MOVBQSX 5(DI)(CX*1), CX
|
|
MOVQ CX, DI
|
|
SALQ $48, CX
|
|
SALQ $34, DI
|
|
ANDQ R8, DI
|
|
MOVQ $17732923532771328, R8
|
|
ANDQ R8, CX
|
|
ORQ DI, CX
|
|
ORQ CX, DX
|
|
|
|
encsav:
|
|
MOVQ $21955383195992142, CX
|
|
ADDQ CX, DX
|
|
SHLQ $3, SI
|
|
MOVQ DX, ·sum+56(FP)
|
|
MOVQ SI, ·n+64(FP)
|
|
|
|
encend:
|
|
RET
|
|
|
|
|
|
// func _decode(offset, outlen int, b, decd []byte)
|
|
TEXT ·_decode(SB), NOSPLIT, $0-64
|
|
MOVQ ·offset+0(FP), BX
|
|
MOVQ ·outlen+8(FP), R8
|
|
MOVQ ·data+16(FP), DI
|
|
MOVQ ·decd+40(FP), R9
|
|
XORQ CX, CX
|
|
XORQ SI, SI
|
|
SUBQ $6, R8
|
|
JLE decrem
|
|
MOVQ $-5620578098173988352, R12
|
|
MOVQ $-1125899906842624, BP
|
|
MOVQ $1125831187365888, R11
|
|
MOVQ $68715282432, R10
|
|
|
|
declop:
|
|
MOVBEQ (DI)(SI*8), DX
|
|
INCQ SI
|
|
ADDQ R12, DX
|
|
MOVQ DX, R13
|
|
LEAQ 0(DX*4), R14
|
|
SALQ $4, R13
|
|
ANDQ BP, R14
|
|
ANDQ R11, R13
|
|
ORQ R13, R14
|
|
MOVQ DX, R13
|
|
SALQ $8, DX
|
|
SALQ $6, R13
|
|
ANDL $4194048, DX
|
|
ANDQ R10, R13
|
|
ORQ R14, R13
|
|
ORQ R13, DX
|
|
MOVBEQ DX, (R9)(CX*1)
|
|
ADDQ $7, CX
|
|
CMPQ CX, R8
|
|
JL declop
|
|
|
|
decrem:
|
|
TESTQ BX, BX
|
|
JE decend
|
|
|
|
MOVQ (DI)(SI*8), DI
|
|
LEAQ -78(DI), SI
|
|
MOVQ SI, DX
|
|
SALL $2, SI
|
|
SHRQ $14, DX
|
|
ANDL $3, DX
|
|
ORL SI, DX
|
|
MOVB DX, 0(R9)(CX*1)
|
|
CMPL BX, $1
|
|
JE decend
|
|
|
|
LEAQ -5111886(DI), DX
|
|
MOVQ DX, SI
|
|
MOVQ DX, R8
|
|
SHRQ $6, SI
|
|
SHRQ $20, R8
|
|
ANDL $-4, SI
|
|
ANDL $3, R8
|
|
ORL R8, SI
|
|
MOVB SI, 1(R9)(CX*1)
|
|
CMPL BX, $2
|
|
JE decend
|
|
|
|
MOVQ DX, SI
|
|
SHRQ $28, DX
|
|
SHRQ $12, SI
|
|
ANDL $15, DX
|
|
ANDL $-16, SI
|
|
ORL SI, DX
|
|
MOVB DX, 2(R9)(CX*1)
|
|
CMPL BX, $3
|
|
JE decend
|
|
|
|
MOVQ $-335012560974, DX
|
|
ADDQ DI, DX
|
|
MOVQ DX, SI
|
|
MOVQ DX, R8
|
|
SHRQ $20, SI
|
|
SHRQ $34, R8
|
|
ANDL $-16, SI
|
|
ANDL $15, R8
|
|
ORL R8, SI
|
|
MOVB SI, 3(R9)(CX*1)
|
|
CMPL BX, $4
|
|
JE decend
|
|
|
|
MOVQ DX, SI
|
|
SHRQ $42, DX
|
|
SHRQ $26, SI
|
|
ANDL $63, DX
|
|
ANDL $-64, SI
|
|
ORL SI, DX
|
|
MOVB DX, 4(R9)(CX*1)
|
|
CMPL BX, $5
|
|
JE decend
|
|
|
|
MOVQ $-21955383195992142, DX
|
|
ADDQ DX, DI
|
|
MOVQ DI, DX
|
|
SHRQ $48, DI
|
|
SHRQ $34, DX
|
|
ANDL $63, DI
|
|
ANDL $-64, DX
|
|
ORL DI, DX
|
|
MOVB DX, 5(R9)(CX*1)
|
|
|
|
decend:
|
|
RET
|