From 75ee4a090e0327621d6bff050d8fa9092a12f4e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Wed, 14 Dec 2022 10:38:19 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=20amd64=20=E8=B0=83=E7=94=A8?= =?UTF-8?q?=E4=B8=8E=E5=86=85=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit goos: darwin goarch: amd64 pkg: github.com/fumiama/go-base16384 cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz name old time/op new time/op delta EncodeTo/16-8 16.9ns ± 3% 16.7ns ± 1% -1.62% (p=0.048 n=5+5) EncodeTo/256-8 78.0ns ± 1% 77.6ns ± 0% ~ (p=0.286 n=5+4) EncodeTo/4K-8 942ns ± 0% 943ns ± 0% ~ (p=0.841 n=5+5) EncodeTo/32K-8 7.59µs ± 1% 7.53µs ± 1% ~ (p=0.222 n=5+5) DecodeTo/16-8 43.1ns ± 1% 12.2ns ± 0% -71.70% (p=0.008 n=5+5) DecodeTo/256-8 179ns ± 1% 74ns ± 1% -58.93% (p=0.008 n=5+5) DecodeTo/4K-8 1.67µs ± 1% 0.94µs ± 0% -43.89% (p=0.008 n=5+5) DecodeTo/32K-8 13.2µs ± 0% 7.5µs ± 1% -43.48% (p=0.008 n=5+5) Encoder/16-8 118ns ± 4% 112ns ± 0% -5.01% (p=0.008 n=5+5) Encoder/256-8 350ns ± 0% 341ns ± 0% -2.48% (p=0.008 n=5+5) Encoder/4K-8 3.86µs ± 2% 3.83µs ± 0% ~ (p=0.238 n=5+5) Encoder/32K-8 29.6µs ± 0% 29.4µs ± 1% ~ (p=0.095 n=5+5) Decoder/16-8 417ns ± 6% 406ns ± 1% ~ (p=0.056 n=5+5) Decoder/256-8 471ns ± 1% 467ns ± 1% ~ (p=0.222 n=5+5) Decoder/4K-8 1.65µs ± 1% 1.65µs ± 2% ~ (p=0.500 n=5+5) Decoder/32K-8 14.3µs ±21% 12.7µs ± 1% ~ (p=0.151 n=5+5) name old speed new speed delta EncodeTo/16-8 946MB/s ± 3% 961MB/s ± 1% ~ (p=0.056 n=5+5) EncodeTo/256-8 3.28GB/s ± 1% 3.30GB/s ± 0% ~ (p=0.286 n=5+4) EncodeTo/4K-8 4.35GB/s ± 0% 4.34GB/s ± 0% ~ (p=0.841 n=5+5) EncodeTo/32K-8 4.32GB/s ± 1% 4.35GB/s ± 1% ~ (p=0.222 n=5+5) DecodeTo/16-8 510MB/s ± 1% 1803MB/s ± 0% +253.37% (p=0.008 n=5+5) DecodeTo/256-8 1.65GB/s ± 1% 4.02GB/s ± 1% +143.45% (p=0.008 n=5+5) DecodeTo/4K-8 2.80GB/s ± 1% 4.99GB/s ± 0% +78.22% (p=0.008 n=5+5) DecodeTo/32K-8 2.83GB/s ± 0% 5.00GB/s ± 1% +76.93% (p=0.008 n=5+5) Encoder/16-8 135MB/s ± 4% 142MB/s ± 0% +5.22% (p=0.008 n=5+5) Encoder/256-8 731MB/s ± 0% 750MB/s ± 0% +2.55% (p=0.008 n=5+5) Encoder/4K-8 1.06GB/s ± 2% 1.07GB/s ± 0% ~ (p=0.310 n=5+5) Encoder/32K-8 1.11GB/s ± 0% 1.12GB/s ± 1% ~ (p=0.095 n=5+5) Decoder/16-8 38.4MB/s ± 6% 39.4MB/s ± 1% ~ (p=0.056 n=5+5) Decoder/256-8 544MB/s ± 1% 548MB/s ± 1% ~ (p=0.222 n=5+5) Decoder/4K-8 2.49GB/s ± 1% 2.48GB/s ± 2% ~ (p=0.548 n=5+5) Decoder/32K-8 2.32GB/s ±18% 2.59GB/s ± 1% ~ (p=0.151 n=5+5) name old alloc/op new alloc/op delta EncodeTo/16-8 0.00B 0.00B ~ (all equal) EncodeTo/256-8 0.00B 0.00B ~ (all equal) EncodeTo/4K-8 0.00B 0.00B ~ (all equal) EncodeTo/32K-8 0.00B 0.00B ~ (all equal) DecodeTo/16-8 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) DecodeTo/256-8 576B ± 0% 0B -100.00% (p=0.008 n=5+5) DecodeTo/4K-8 6.14kB ± 0% 0.00kB -100.00% (p=0.008 n=5+5) DecodeTo/32K-8 49.2kB ± 0% 0.0kB -100.00% (p=0.008 n=5+5) Encoder/16-8 24.0B ± 0% 24.0B ± 0% ~ (all equal) Encoder/256-8 24.0B ± 0% 24.0B ± 0% ~ (all equal) Encoder/4K-8 24.0B ± 0% 24.0B ± 0% ~ (all equal) Encoder/32K-8 26.0B ± 0% 26.0B ± 0% ~ (all equal) Decoder/16-8 1.39kB ± 0% 1.39kB ± 0% ~ (all equal) Decoder/256-8 1.39kB ± 0% 1.39kB ± 0% ~ (all equal) Decoder/4K-8 4.98kB ± 0% 4.98kB ± 0% ~ (all equal) Decoder/32K-8 41.1kB ± 0% 41.1kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta EncodeTo/16-8 0.00 0.00 ~ (all equal) EncodeTo/256-8 0.00 0.00 ~ (all equal) EncodeTo/4K-8 0.00 0.00 ~ (all equal) EncodeTo/32K-8 0.00 0.00 ~ (all equal) DecodeTo/16-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) DecodeTo/256-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) DecodeTo/4K-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) DecodeTo/32K-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) Encoder/16-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Encoder/256-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Encoder/4K-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Encoder/32K-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder/16-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/256-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/4K-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/32K-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) --- base14_amd64.go | 10 ++-------- base14_amd64.s | 15 +++++++-------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/base14_amd64.go b/base14_amd64.go index 80675d6..b6c8aa5 100644 --- a/base14_amd64.go +++ b/base14_amd64.go @@ -9,7 +9,7 @@ import ( //go:noescape //go:nosplit -func _encode(offset, outlen int, b, encd []byte) (sum uint64, n uint64) +func _encode(offset int, b, encd []byte) (sum uint64, n uint64) //go:noescape //go:nosplit @@ -17,10 +17,7 @@ func _decode(offset, outlen int, b, decd []byte) func encode(offset, outlen int, b, encd []byte) { if movbe { - if len(b) == 7 { - b = append(b, 0) - } - sum, n := _encode(offset, outlen, b, encd) + sum, n := _encode(offset, b, encd) if offset == 0 { return } @@ -36,9 +33,6 @@ func encode(offset, outlen int, b, encd []byte) { func decode(offset, outlen int, b, decd []byte) { if movbe { - if offset != 0 && cap(b) == len(b) { - b = append(b, make([]byte, 8)...) - } _decode(offset, outlen, b, decd) } else { decodeGeneric(offset, outlen, b, decd) diff --git a/base14_amd64.s b/base14_amd64.s index 29eadda..f345288 100644 --- a/base14_amd64.s +++ b/base14_amd64.s @@ -3,13 +3,12 @@ #include "textflag.h" -// func _encode(offset, outlen int, b, encd []byte) (sum uint64, n uint64) -TEXT ·_encode(SB), NOSPLIT, $0-81 +// func _encode(offset int, b, encd []byte) (sum uint64, n uint64) +TEXT ·_encode(SB), NOSPLIT, $0-72 MOVQ ·offset+0(FP), R10 - MOVQ ·outlen+8(FP), AX - MOVQ ·data+16(FP), DI - MOVQ ·dlen+24(FP), R8 - MOVQ ·encd+40(FP), R9 + MOVQ ·data+8(FP), DI + MOVQ ·dlen+16(FP), R8 + MOVQ ·encd+32(FP), R9 XORQ CX, CX XORQ SI, SI SUBQ $6, R8 @@ -117,8 +116,8 @@ encsav: MOVQ $21955383195992142, CX ADDQ CX, DX SHLQ $3, SI - MOVQ DX, ·sum+64(FP) - MOVQ SI, ·n+72(FP) + MOVQ DX, ·sum+56(FP) + MOVQ SI, ·n+64(FP) encend: RET