1
0
mirror of https://github.com/fumiama/go-base16384.git synced 2026-06-05 00:32:52 +08:00

优化 amd64 调用与内存

goos: darwin
goarch: amd64
pkg: github.com/fumiama/go-base16384
cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz

name            old time/op    new time/op    delta
EncodeTo/16-8     16.9ns ± 3%    16.7ns ± 1%    -1.62%  (p=0.048 n=5+5)
EncodeTo/256-8    78.0ns ± 1%    77.6ns ± 0%      ~     (p=0.286 n=5+4)
EncodeTo/4K-8      942ns ± 0%     943ns ± 0%      ~     (p=0.841 n=5+5)
EncodeTo/32K-8    7.59µs ± 1%    7.53µs ± 1%      ~     (p=0.222 n=5+5)
DecodeTo/16-8     43.1ns ± 1%    12.2ns ± 0%   -71.70%  (p=0.008 n=5+5)
DecodeTo/256-8     179ns ± 1%      74ns ± 1%   -58.93%  (p=0.008 n=5+5)
DecodeTo/4K-8     1.67µs ± 1%    0.94µs ± 0%   -43.89%  (p=0.008 n=5+5)
DecodeTo/32K-8    13.2µs ± 0%     7.5µs ± 1%   -43.48%  (p=0.008 n=5+5)
Encoder/16-8       118ns ± 4%     112ns ± 0%    -5.01%  (p=0.008 n=5+5)
Encoder/256-8      350ns ± 0%     341ns ± 0%    -2.48%  (p=0.008 n=5+5)
Encoder/4K-8      3.86µs ± 2%    3.83µs ± 0%      ~     (p=0.238 n=5+5)
Encoder/32K-8     29.6µs ± 0%    29.4µs ± 1%      ~     (p=0.095 n=5+5)
Decoder/16-8       417ns ± 6%     406ns ± 1%      ~     (p=0.056 n=5+5)
Decoder/256-8      471ns ± 1%     467ns ± 1%      ~     (p=0.222 n=5+5)
Decoder/4K-8      1.65µs ± 1%    1.65µs ± 2%      ~     (p=0.500 n=5+5)
Decoder/32K-8     14.3µs ±21%    12.7µs ± 1%      ~     (p=0.151 n=5+5)

name            old speed      new speed      delta
EncodeTo/16-8    946MB/s ± 3%   961MB/s ± 1%      ~     (p=0.056 n=5+5)
EncodeTo/256-8  3.28GB/s ± 1%  3.30GB/s ± 0%      ~     (p=0.286 n=5+4)
EncodeTo/4K-8   4.35GB/s ± 0%  4.34GB/s ± 0%      ~     (p=0.841 n=5+5)
EncodeTo/32K-8  4.32GB/s ± 1%  4.35GB/s ± 1%      ~     (p=0.222 n=5+5)
DecodeTo/16-8    510MB/s ± 1%  1803MB/s ± 0%  +253.37%  (p=0.008 n=5+5)
DecodeTo/256-8  1.65GB/s ± 1%  4.02GB/s ± 1%  +143.45%  (p=0.008 n=5+5)
DecodeTo/4K-8   2.80GB/s ± 1%  4.99GB/s ± 0%   +78.22%  (p=0.008 n=5+5)
DecodeTo/32K-8  2.83GB/s ± 0%  5.00GB/s ± 1%   +76.93%  (p=0.008 n=5+5)
Encoder/16-8     135MB/s ± 4%   142MB/s ± 0%    +5.22%  (p=0.008 n=5+5)
Encoder/256-8    731MB/s ± 0%   750MB/s ± 0%    +2.55%  (p=0.008 n=5+5)
Encoder/4K-8    1.06GB/s ± 2%  1.07GB/s ± 0%      ~     (p=0.310 n=5+5)
Encoder/32K-8   1.11GB/s ± 0%  1.12GB/s ± 1%      ~     (p=0.095 n=5+5)
Decoder/16-8    38.4MB/s ± 6%  39.4MB/s ± 1%      ~     (p=0.056 n=5+5)
Decoder/256-8    544MB/s ± 1%   548MB/s ± 1%      ~     (p=0.222 n=5+5)
Decoder/4K-8    2.49GB/s ± 1%  2.48GB/s ± 2%      ~     (p=0.548 n=5+5)
Decoder/32K-8   2.32GB/s ±18%  2.59GB/s ± 1%      ~     (p=0.151 n=5+5)

name            old alloc/op   new alloc/op   delta
EncodeTo/16-8      0.00B          0.00B           ~     (all equal)
EncodeTo/256-8     0.00B          0.00B           ~     (all equal)
EncodeTo/4K-8      0.00B          0.00B           ~     (all equal)
EncodeTo/32K-8     0.00B          0.00B           ~     (all equal)
DecodeTo/16-8      48.0B ± 0%      0.0B       -100.00%  (p=0.008 n=5+5)
DecodeTo/256-8      576B ± 0%        0B       -100.00%  (p=0.008 n=5+5)
DecodeTo/4K-8     6.14kB ± 0%    0.00kB       -100.00%  (p=0.008 n=5+5)
DecodeTo/32K-8    49.2kB ± 0%     0.0kB       -100.00%  (p=0.008 n=5+5)
Encoder/16-8       24.0B ± 0%     24.0B ± 0%      ~     (all equal)
Encoder/256-8      24.0B ± 0%     24.0B ± 0%      ~     (all equal)
Encoder/4K-8       24.0B ± 0%     24.0B ± 0%      ~     (all equal)
Encoder/32K-8      26.0B ± 0%     26.0B ± 0%      ~     (all equal)
Decoder/16-8      1.39kB ± 0%    1.39kB ± 0%      ~     (all equal)
Decoder/256-8     1.39kB ± 0%    1.39kB ± 0%      ~     (all equal)
Decoder/4K-8      4.98kB ± 0%    4.98kB ± 0%      ~     (all equal)
Decoder/32K-8     41.1kB ± 0%    41.1kB ± 0%      ~     (all equal)

name            old allocs/op  new allocs/op  delta
EncodeTo/16-8       0.00           0.00           ~     (all equal)
EncodeTo/256-8      0.00           0.00           ~     (all equal)
EncodeTo/4K-8       0.00           0.00           ~     (all equal)
EncodeTo/32K-8      0.00           0.00           ~     (all equal)
DecodeTo/16-8       1.00 ± 0%      0.00       -100.00%  (p=0.008 n=5+5)
DecodeTo/256-8      1.00 ± 0%      0.00       -100.00%  (p=0.008 n=5+5)
DecodeTo/4K-8       1.00 ± 0%      0.00       -100.00%  (p=0.008 n=5+5)
DecodeTo/32K-8      1.00 ± 0%      0.00       -100.00%  (p=0.008 n=5+5)
Encoder/16-8        1.00 ± 0%      1.00 ± 0%      ~     (all equal)
Encoder/256-8       1.00 ± 0%      1.00 ± 0%      ~     (all equal)
Encoder/4K-8        1.00 ± 0%      1.00 ± 0%      ~     (all equal)
Encoder/32K-8       1.00 ± 0%      1.00 ± 0%      ~     (all equal)
Decoder/16-8        3.00 ± 0%      3.00 ± 0%      ~     (all equal)
Decoder/256-8       3.00 ± 0%      3.00 ± 0%      ~     (all equal)
Decoder/4K-8        3.00 ± 0%      3.00 ± 0%      ~     (all equal)
Decoder/32K-8       3.00 ± 0%      3.00 ± 0%      ~     (all equal)
This commit is contained in:
源文雨
2022-12-14 10:38:19 +08:00
parent cdc9c6322a
commit 75ee4a090e
2 changed files with 9 additions and 16 deletions

View File

@@ -9,7 +9,7 @@ import (
//go:noescape
//go:nosplit
func _encode(offset, outlen int, b, encd []byte) (sum uint64, n uint64)
func _encode(offset int, b, encd []byte) (sum uint64, n uint64)
//go:noescape
//go:nosplit
@@ -17,10 +17,7 @@ func _decode(offset, outlen int, b, decd []byte)
func encode(offset, outlen int, b, encd []byte) {
if movbe {
if len(b) == 7 {
b = append(b, 0)
}
sum, n := _encode(offset, outlen, b, encd)
sum, n := _encode(offset, b, encd)
if offset == 0 {
return
}
@@ -36,9 +33,6 @@ func encode(offset, outlen int, b, encd []byte) {
func decode(offset, outlen int, b, decd []byte) {
if movbe {
if offset != 0 && cap(b) == len(b) {
b = append(b, make([]byte, 8)...)
}
_decode(offset, outlen, b, decd)
} else {
decodeGeneric(offset, outlen, b, decd)

View File

@@ -3,13 +3,12 @@
#include "textflag.h"
// func _encode(offset, outlen int, b, encd []byte) (sum uint64, n uint64)
TEXT ·_encode(SB), NOSPLIT, $0-81
// func _encode(offset int, b, encd []byte) (sum uint64, n uint64)
TEXT ·_encode(SB), NOSPLIT, $0-72
MOVQ ·offset+0(FP), R10
MOVQ ·outlen+8(FP), AX
MOVQ ·data+16(FP), DI
MOVQ ·dlen+24(FP), R8
MOVQ ·encd+40(FP), R9
MOVQ ·data+8(FP), DI
MOVQ ·dlen+16(FP), R8
MOVQ ·encd+32(FP), R9
XORQ CX, CX
XORQ SI, SI
SUBQ $6, R8
@@ -117,8 +116,8 @@ encsav:
MOVQ $21955383195992142, CX
ADDQ CX, DX
SHLQ $3, SI
MOVQ DX, ·sum+64(FP)
MOVQ SI, ·n+72(FP)
MOVQ DX, ·sum+56(FP)
MOVQ SI, ·n+64(FP)
encend:
RET