mirror of
https://github.com/fumiama/go-base16384.git
synced 2026-06-05 00:32:52 +08:00
优化 amd64 调用与内存
goos: darwin goarch: amd64 pkg: github.com/fumiama/go-base16384 cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz name old time/op new time/op delta EncodeTo/16-8 16.9ns ± 3% 16.7ns ± 1% -1.62% (p=0.048 n=5+5) EncodeTo/256-8 78.0ns ± 1% 77.6ns ± 0% ~ (p=0.286 n=5+4) EncodeTo/4K-8 942ns ± 0% 943ns ± 0% ~ (p=0.841 n=5+5) EncodeTo/32K-8 7.59µs ± 1% 7.53µs ± 1% ~ (p=0.222 n=5+5) DecodeTo/16-8 43.1ns ± 1% 12.2ns ± 0% -71.70% (p=0.008 n=5+5) DecodeTo/256-8 179ns ± 1% 74ns ± 1% -58.93% (p=0.008 n=5+5) DecodeTo/4K-8 1.67µs ± 1% 0.94µs ± 0% -43.89% (p=0.008 n=5+5) DecodeTo/32K-8 13.2µs ± 0% 7.5µs ± 1% -43.48% (p=0.008 n=5+5) Encoder/16-8 118ns ± 4% 112ns ± 0% -5.01% (p=0.008 n=5+5) Encoder/256-8 350ns ± 0% 341ns ± 0% -2.48% (p=0.008 n=5+5) Encoder/4K-8 3.86µs ± 2% 3.83µs ± 0% ~ (p=0.238 n=5+5) Encoder/32K-8 29.6µs ± 0% 29.4µs ± 1% ~ (p=0.095 n=5+5) Decoder/16-8 417ns ± 6% 406ns ± 1% ~ (p=0.056 n=5+5) Decoder/256-8 471ns ± 1% 467ns ± 1% ~ (p=0.222 n=5+5) Decoder/4K-8 1.65µs ± 1% 1.65µs ± 2% ~ (p=0.500 n=5+5) Decoder/32K-8 14.3µs ±21% 12.7µs ± 1% ~ (p=0.151 n=5+5) name old speed new speed delta EncodeTo/16-8 946MB/s ± 3% 961MB/s ± 1% ~ (p=0.056 n=5+5) EncodeTo/256-8 3.28GB/s ± 1% 3.30GB/s ± 0% ~ (p=0.286 n=5+4) EncodeTo/4K-8 4.35GB/s ± 0% 4.34GB/s ± 0% ~ (p=0.841 n=5+5) EncodeTo/32K-8 4.32GB/s ± 1% 4.35GB/s ± 1% ~ (p=0.222 n=5+5) DecodeTo/16-8 510MB/s ± 1% 1803MB/s ± 0% +253.37% (p=0.008 n=5+5) DecodeTo/256-8 1.65GB/s ± 1% 4.02GB/s ± 1% +143.45% (p=0.008 n=5+5) DecodeTo/4K-8 2.80GB/s ± 1% 4.99GB/s ± 0% +78.22% (p=0.008 n=5+5) DecodeTo/32K-8 2.83GB/s ± 0% 5.00GB/s ± 1% +76.93% (p=0.008 n=5+5) Encoder/16-8 135MB/s ± 4% 142MB/s ± 0% +5.22% (p=0.008 n=5+5) Encoder/256-8 731MB/s ± 0% 750MB/s ± 0% +2.55% (p=0.008 n=5+5) Encoder/4K-8 1.06GB/s ± 2% 1.07GB/s ± 0% ~ (p=0.310 n=5+5) Encoder/32K-8 1.11GB/s ± 0% 1.12GB/s ± 1% ~ (p=0.095 n=5+5) Decoder/16-8 38.4MB/s ± 6% 39.4MB/s ± 1% ~ (p=0.056 n=5+5) Decoder/256-8 544MB/s ± 1% 548MB/s ± 1% ~ (p=0.222 n=5+5) Decoder/4K-8 2.49GB/s ± 1% 2.48GB/s ± 2% ~ (p=0.548 n=5+5) Decoder/32K-8 2.32GB/s ±18% 2.59GB/s ± 1% ~ (p=0.151 n=5+5) name old alloc/op new alloc/op delta EncodeTo/16-8 0.00B 0.00B ~ (all equal) EncodeTo/256-8 0.00B 0.00B ~ (all equal) EncodeTo/4K-8 0.00B 0.00B ~ (all equal) EncodeTo/32K-8 0.00B 0.00B ~ (all equal) DecodeTo/16-8 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) DecodeTo/256-8 576B ± 0% 0B -100.00% (p=0.008 n=5+5) DecodeTo/4K-8 6.14kB ± 0% 0.00kB -100.00% (p=0.008 n=5+5) DecodeTo/32K-8 49.2kB ± 0% 0.0kB -100.00% (p=0.008 n=5+5) Encoder/16-8 24.0B ± 0% 24.0B ± 0% ~ (all equal) Encoder/256-8 24.0B ± 0% 24.0B ± 0% ~ (all equal) Encoder/4K-8 24.0B ± 0% 24.0B ± 0% ~ (all equal) Encoder/32K-8 26.0B ± 0% 26.0B ± 0% ~ (all equal) Decoder/16-8 1.39kB ± 0% 1.39kB ± 0% ~ (all equal) Decoder/256-8 1.39kB ± 0% 1.39kB ± 0% ~ (all equal) Decoder/4K-8 4.98kB ± 0% 4.98kB ± 0% ~ (all equal) Decoder/32K-8 41.1kB ± 0% 41.1kB ± 0% ~ (all equal) name old allocs/op new allocs/op delta EncodeTo/16-8 0.00 0.00 ~ (all equal) EncodeTo/256-8 0.00 0.00 ~ (all equal) EncodeTo/4K-8 0.00 0.00 ~ (all equal) EncodeTo/32K-8 0.00 0.00 ~ (all equal) DecodeTo/16-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) DecodeTo/256-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) DecodeTo/4K-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) DecodeTo/32K-8 1.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) Encoder/16-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Encoder/256-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Encoder/4K-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Encoder/32K-8 1.00 ± 0% 1.00 ± 0% ~ (all equal) Decoder/16-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/256-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/4K-8 3.00 ± 0% 3.00 ± 0% ~ (all equal) Decoder/32K-8 3.00 ± 0% 3.00 ± 0% ~ (all equal)
This commit is contained in:
@@ -9,7 +9,7 @@ import (
|
||||
|
||||
//go:noescape
|
||||
//go:nosplit
|
||||
func _encode(offset, outlen int, b, encd []byte) (sum uint64, n uint64)
|
||||
func _encode(offset int, b, encd []byte) (sum uint64, n uint64)
|
||||
|
||||
//go:noescape
|
||||
//go:nosplit
|
||||
@@ -17,10 +17,7 @@ func _decode(offset, outlen int, b, decd []byte)
|
||||
|
||||
func encode(offset, outlen int, b, encd []byte) {
|
||||
if movbe {
|
||||
if len(b) == 7 {
|
||||
b = append(b, 0)
|
||||
}
|
||||
sum, n := _encode(offset, outlen, b, encd)
|
||||
sum, n := _encode(offset, b, encd)
|
||||
if offset == 0 {
|
||||
return
|
||||
}
|
||||
@@ -36,9 +33,6 @@ func encode(offset, outlen int, b, encd []byte) {
|
||||
|
||||
func decode(offset, outlen int, b, decd []byte) {
|
||||
if movbe {
|
||||
if offset != 0 && cap(b) == len(b) {
|
||||
b = append(b, make([]byte, 8)...)
|
||||
}
|
||||
_decode(offset, outlen, b, decd)
|
||||
} else {
|
||||
decodeGeneric(offset, outlen, b, decd)
|
||||
|
||||
@@ -3,13 +3,12 @@
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func _encode(offset, outlen int, b, encd []byte) (sum uint64, n uint64)
|
||||
TEXT ·_encode(SB), NOSPLIT, $0-81
|
||||
// func _encode(offset int, b, encd []byte) (sum uint64, n uint64)
|
||||
TEXT ·_encode(SB), NOSPLIT, $0-72
|
||||
MOVQ ·offset+0(FP), R10
|
||||
MOVQ ·outlen+8(FP), AX
|
||||
MOVQ ·data+16(FP), DI
|
||||
MOVQ ·dlen+24(FP), R8
|
||||
MOVQ ·encd+40(FP), R9
|
||||
MOVQ ·data+8(FP), DI
|
||||
MOVQ ·dlen+16(FP), R8
|
||||
MOVQ ·encd+32(FP), R9
|
||||
XORQ CX, CX
|
||||
XORQ SI, SI
|
||||
SUBQ $6, R8
|
||||
@@ -117,8 +116,8 @@ encsav:
|
||||
MOVQ $21955383195992142, CX
|
||||
ADDQ CX, DX
|
||||
SHLQ $3, SI
|
||||
MOVQ DX, ·sum+64(FP)
|
||||
MOVQ SI, ·n+72(FP)
|
||||
MOVQ DX, ·sum+56(FP)
|
||||
MOVQ SI, ·n+64(FP)
|
||||
|
||||
encend:
|
||||
RET
|
||||
|
||||
Reference in New Issue
Block a user