From 6639cbf40b7d800c604680ea2ca087a4411f9ca7 Mon Sep 17 00:00:00 2001 From: fumiama Date: Mon, 22 Nov 2021 20:04:01 +0800 Subject: [PATCH] perf: use runtime.fastrand --- README.md | 61 ++++++++++------ new16.txt | 162 +++++++++++++++++++++---------------------- new17.txt | 162 +++++++++++++++++++++---------------------- old.txt => old16.txt | 0 old17.txt | 86 +++++++++++++++++++++++ tea_1.16.go | 5 ++ tea_1.16_asm.go | 6 +- tea_1.16_pure.go | 6 +- tea_1.17.go | 10 ++- 9 files changed, 309 insertions(+), 189 deletions(-) rename old.txt => old16.txt (100%) create mode 100644 old17.txt diff --git a/README.md b/README.md index 97d0d85..423f671 100644 --- a/README.md +++ b/README.md @@ -2,30 +2,51 @@ TEA 编码算法的 PLAN9 汇编优化实现 ## 1.17 版本及以上 -速度已经达到最优,代码与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)完全相同。 - -## 1.16 版本及以下 -使用 PLAN9 汇编编写`Encrypt`,内联编写`Decrypt`,与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)代码同在`go1.16`版本下编译相比,提升速度如下(new16.txt)。 +代码与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)相比,替换了加密算法为`runtime.fastrand`,提升速度如下。 ```css name old time/op new time/op delta -TEAen/16-8 252ns ± 0% 240ns ± 1% -4.50% (p=0.000 n=9+10) -TEAen/256-8 1.77µs ± 1% 1.67µs ± 1% -5.66% (p=0.000 n=9+10) -TEAen/4K-8 25.9µs ± 0% 24.8µs ± 0% -4.00% (p=0.000 n=10+9) -TEAen/32K-8 208µs ± 1% 201µs ± 0% -3.34% (p=0.000 n=10+10) -TEAde/16-8 216ns ± 1% 211ns ± 1% -2.68% (p=0.000 n=10+10) -TEAde/256-8 1.71µs ± 1% 1.66µs ± 1% -2.69% (p=0.000 n=10+10) -TEAde/4K-8 25.4µs ± 1% 24.7µs ± 1% -2.73% (p=0.000 n=10+10) -TEAde/32K-8 206µs ± 0% 200µs ± 0% -2.59% (p=0.000 n=9+10) +TEAen/16-8 241ns ± 1% 224ns ± 1% -7.05% (p=0.000 n=9+9) +TEAen/256-8 1.71µs ± 1% 1.69µs ± 1% -0.96% (p=0.000 n=10+10) +TEAen/4K-8 25.0µs ± 1% 24.9µs ± 1% ~ (p=0.052 n=10+10) +TEAen/32K-8 203µs ± 0% 202µs ± 0% -0.44% (p=0.003 n=10+10) +TEAde/16-8 208ns ± 1% 208ns ± 0% ~ (p=0.857 n=9+10) +TEAde/256-8 1.65µs ± 1% 1.65µs ± 1% ~ (p=0.859 n=9+10) +TEAde/4K-8 24.7µs ± 1% 24.6µs ± 1% ~ (p=0.133 n=9+10) +TEAde/32K-8 200µs ± 1% 199µs ± 0% ~ (p=0.243 n=9+10) name old speed new speed delta -TEAen/16-8 63.5MB/s ± 0% 66.5MB/s ± 1% +4.70% (p=0.000 n=9+10) -TEAen/256-8 145MB/s ± 1% 153MB/s ± 1% +5.98% (p=0.000 n=9+10) -TEAen/4K-8 158MB/s ± 0% 165MB/s ± 0% +4.16% (p=0.000 n=10+9) -TEAen/32K-8 158MB/s ± 1% 163MB/s ± 0% +3.45% (p=0.000 n=10+10) -TEAde/16-8 148MB/s ± 1% 152MB/s ± 1% +2.75% (p=0.000 n=10+10) -TEAde/256-8 160MB/s ± 1% 164MB/s ± 1% +2.77% (p=0.000 n=10+10) -TEAde/4K-8 162MB/s ± 1% 167MB/s ± 1% +2.80% (p=0.000 n=10+10) -TEAde/32K-8 159MB/s ± 0% 164MB/s ± 0% +2.66% (p=0.000 n=9+10) +TEAen/16-8 66.3MB/s ± 1% 71.3MB/s ± 1% +7.59% (p=0.000 n=9+9) +TEAen/256-8 150MB/s ± 1% 151MB/s ± 1% +0.97% (p=0.000 n=10+10) +TEAen/4K-8 164MB/s ± 1% 164MB/s ± 1% ~ (p=0.052 n=10+10) +TEAen/32K-8 162MB/s ± 0% 163MB/s ± 0% +0.44% (p=0.003 n=10+10) +TEAde/16-8 154MB/s ± 1% 154MB/s ± 0% ~ (p=0.905 n=9+10) +TEAde/256-8 165MB/s ± 1% 165MB/s ± 1% ~ (p=0.905 n=9+10) +TEAde/4K-8 167MB/s ± 1% 167MB/s ± 1% ~ (p=0.133 n=9+10) +TEAde/32K-8 164MB/s ± 1% 165MB/s ± 0% ~ (p=0.218 n=9+10) +``` + +## 1.16 版本及以下 +使用 PLAN9 汇编编写`Encrypt`,内联编写`Decrypt`,替换了加密算法为`runtime.fastrand`,与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)代码同在`go1.16`版本下编译相比,提升速度如下(new16.txt)。 +```css +name old time/op new time/op delta +TEAen/16-8 252ns ± 0% 224ns ± 0% -10.85% (p=0.000 n=9+8) +TEAen/256-8 1.77µs ± 1% 1.67µs ± 1% -5.92% (p=0.000 n=9+9) +TEAen/4K-8 25.9µs ± 0% 24.9µs ± 0% -3.83% (p=0.000 n=10+9) +TEAen/32K-8 208µs ± 1% 201µs ± 0% -3.20% (p=0.000 n=10+9) +TEAde/16-8 216ns ± 1% 211ns ± 1% -2.41% (p=0.000 n=10+10) +TEAde/256-8 1.71µs ± 1% 1.66µs ± 1% -2.40% (p=0.000 n=10+10) +TEAde/4K-8 25.4µs ± 1% 24.8µs ± 1% -2.16% (p=0.000 n=10+10) +TEAde/32K-8 206µs ± 0% 201µs ± 0% -2.35% (p=0.000 n=9+9) + +name old speed new speed delta +TEAen/16-8 63.5MB/s ± 0% 71.3MB/s ± 0% +12.18% (p=0.000 n=9+8) +TEAen/256-8 145MB/s ± 1% 154MB/s ± 1% +6.28% (p=0.000 n=9+9) +TEAen/4K-8 158MB/s ± 0% 165MB/s ± 0% +3.98% (p=0.000 n=10+9) +TEAen/32K-8 158MB/s ± 1% 163MB/s ± 0% +3.31% (p=0.000 n=10+9) +TEAde/16-8 148MB/s ± 1% 152MB/s ± 1% +2.46% (p=0.000 n=10+10) +TEAde/256-8 160MB/s ± 1% 163MB/s ± 1% +2.46% (p=0.000 n=10+10) +TEAde/4K-8 162MB/s ± 1% 166MB/s ± 1% +2.21% (p=0.000 n=10+10) +TEAde/32K-8 159MB/s ± 0% 163MB/s ± 0% +2.41% (p=0.000 n=9+9) ``` 另外[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)本身在`go1.16`版本与在`go1.17`版本下编译相比,提升速度如下(new17.txt)。 ```css diff --git a/new16.txt b/new16.txt index 59069f6..63b4d09 100644 --- a/new16.txt +++ b/new16.txt @@ -2,85 +2,85 @@ goos: darwin goarch: amd64 pkg: github.com/fumiama/gofastTEA cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz -BenchmarkTEAen/16-8 4879810 242.4 ns/op 66.01 MB/s -BenchmarkTEAen/16-8 4984504 241.2 ns/op 66.33 MB/s -BenchmarkTEAen/16-8 5013127 240.0 ns/op 66.65 MB/s -BenchmarkTEAen/16-8 4946341 240.4 ns/op 66.55 MB/s -BenchmarkTEAen/16-8 5010094 239.6 ns/op 66.77 MB/s -BenchmarkTEAen/16-8 4979793 240.7 ns/op 66.46 MB/s -BenchmarkTEAen/16-8 4987821 240.0 ns/op 66.66 MB/s -BenchmarkTEAen/16-8 4990870 240.9 ns/op 66.42 MB/s -BenchmarkTEAen/16-8 4945104 239.6 ns/op 66.79 MB/s -BenchmarkTEAen/16-8 5003238 239.8 ns/op 66.73 MB/s -BenchmarkTEAen/256-8 712588 1680 ns/op 152.37 MB/s -BenchmarkTEAen/256-8 716322 1680 ns/op 152.39 MB/s -BenchmarkTEAen/256-8 717398 1672 ns/op 153.10 MB/s -BenchmarkTEAen/256-8 703051 1669 ns/op 153.34 MB/s -BenchmarkTEAen/256-8 739008 1663 ns/op 153.91 MB/s -BenchmarkTEAen/256-8 720760 1658 ns/op 154.39 MB/s -BenchmarkTEAen/256-8 735790 1675 ns/op 152.80 MB/s -BenchmarkTEAen/256-8 720423 1668 ns/op 153.47 MB/s -BenchmarkTEAen/256-8 721131 1672 ns/op 153.09 MB/s -BenchmarkTEAen/256-8 717520 1669 ns/op 153.37 MB/s -BenchmarkTEAen/4K-8 47542 24859 ns/op 164.77 MB/s -BenchmarkTEAen/4K-8 48340 24879 ns/op 164.63 MB/s -BenchmarkTEAen/4K-8 48124 24807 ns/op 165.11 MB/s -BenchmarkTEAen/4K-8 48874 24841 ns/op 164.89 MB/s -BenchmarkTEAen/4K-8 48474 24819 ns/op 165.03 MB/s -BenchmarkTEAen/4K-8 48309 25202 ns/op 162.52 MB/s -BenchmarkTEAen/4K-8 47352 24930 ns/op 164.30 MB/s -BenchmarkTEAen/4K-8 47407 24760 ns/op 165.43 MB/s -BenchmarkTEAen/4K-8 47619 24841 ns/op 164.89 MB/s -BenchmarkTEAen/4K-8 48085 24779 ns/op 165.30 MB/s -BenchmarkTEAen/32K-8 5924 200510 ns/op 163.42 MB/s -BenchmarkTEAen/32K-8 5964 200683 ns/op 163.28 MB/s -BenchmarkTEAen/32K-8 5904 200746 ns/op 163.23 MB/s -BenchmarkTEAen/32K-8 5907 200587 ns/op 163.36 MB/s -BenchmarkTEAen/32K-8 6031 200538 ns/op 163.40 MB/s -BenchmarkTEAen/32K-8 6249 201163 ns/op 162.89 MB/s -BenchmarkTEAen/32K-8 5974 201777 ns/op 162.40 MB/s -BenchmarkTEAen/32K-8 6063 201619 ns/op 162.52 MB/s -BenchmarkTEAen/32K-8 6001 200751 ns/op 163.23 MB/s -BenchmarkTEAen/32K-8 5971 200775 ns/op 163.21 MB/s -BenchmarkTEAde/16-8 5743614 210.3 ns/op 152.17 MB/s -BenchmarkTEAde/16-8 5685754 210.2 ns/op 152.23 MB/s -BenchmarkTEAde/16-8 5635918 209.7 ns/op 152.56 MB/s -BenchmarkTEAde/16-8 5680320 210.1 ns/op 152.34 MB/s -BenchmarkTEAde/16-8 5719232 212.0 ns/op 150.94 MB/s -BenchmarkTEAde/16-8 5745488 210.4 ns/op 152.09 MB/s -BenchmarkTEAde/16-8 5765454 211.0 ns/op 151.69 MB/s -BenchmarkTEAde/16-8 5644918 210.3 ns/op 152.17 MB/s -BenchmarkTEAde/16-8 5681248 210.1 ns/op 152.33 MB/s -BenchmarkTEAde/16-8 5672364 211.9 ns/op 151.00 MB/s -BenchmarkTEAde/256-8 712252 1663 ns/op 163.58 MB/s -BenchmarkTEAde/256-8 724006 1668 ns/op 163.10 MB/s -BenchmarkTEAde/256-8 734743 1657 ns/op 164.15 MB/s -BenchmarkTEAde/256-8 726320 1659 ns/op 163.97 MB/s -BenchmarkTEAde/256-8 725227 1650 ns/op 164.88 MB/s -BenchmarkTEAde/256-8 726027 1663 ns/op 163.60 MB/s -BenchmarkTEAde/256-8 723242 1652 ns/op 164.61 MB/s -BenchmarkTEAde/256-8 723056 1654 ns/op 164.47 MB/s -BenchmarkTEAde/256-8 719800 1659 ns/op 163.93 MB/s -BenchmarkTEAde/256-8 727068 1667 ns/op 163.20 MB/s -BenchmarkTEAde/4K-8 48092 24702 ns/op 166.46 MB/s -BenchmarkTEAde/4K-8 48133 24749 ns/op 166.15 MB/s -BenchmarkTEAde/4K-8 48650 24597 ns/op 167.18 MB/s -BenchmarkTEAde/4K-8 48375 24642 ns/op 166.87 MB/s -BenchmarkTEAde/4K-8 48902 24570 ns/op 167.36 MB/s -BenchmarkTEAde/4K-8 48852 24599 ns/op 167.16 MB/s -BenchmarkTEAde/4K-8 48820 24868 ns/op 165.35 MB/s -BenchmarkTEAde/4K-8 49100 24729 ns/op 166.28 MB/s -BenchmarkTEAde/4K-8 48906 24708 ns/op 166.43 MB/s -BenchmarkTEAde/4K-8 48818 24583 ns/op 167.27 MB/s -BenchmarkTEAde/32K-8 5941 200449 ns/op 163.55 MB/s -BenchmarkTEAde/32K-8 5901 200616 ns/op 163.42 MB/s -BenchmarkTEAde/32K-8 6033 200566 ns/op 163.46 MB/s -BenchmarkTEAde/32K-8 6034 199527 ns/op 164.31 MB/s -BenchmarkTEAde/32K-8 6068 200952 ns/op 163.14 MB/s -BenchmarkTEAde/32K-8 5953 200107 ns/op 163.83 MB/s -BenchmarkTEAde/32K-8 5966 200340 ns/op 163.64 MB/s -BenchmarkTEAde/32K-8 5971 199451 ns/op 164.37 MB/s -BenchmarkTEAde/32K-8 5972 200547 ns/op 163.47 MB/s -BenchmarkTEAde/32K-8 5932 200476 ns/op 163.53 MB/s +BenchmarkTEAen/16-8 5297209 226.7 ns/op 70.57 MB/s +BenchmarkTEAen/16-8 5350597 224.8 ns/op 71.18 MB/s +BenchmarkTEAen/16-8 5320868 224.4 ns/op 71.32 MB/s +BenchmarkTEAen/16-8 5365137 224.9 ns/op 71.15 MB/s +BenchmarkTEAen/16-8 5331117 223.8 ns/op 71.51 MB/s +BenchmarkTEAen/16-8 5348852 224.9 ns/op 71.14 MB/s +BenchmarkTEAen/16-8 5377758 225.0 ns/op 71.11 MB/s +BenchmarkTEAen/16-8 5290276 224.3 ns/op 71.35 MB/s +BenchmarkTEAen/16-8 5344434 228.3 ns/op 70.09 MB/s +BenchmarkTEAen/16-8 5340181 223.6 ns/op 71.55 MB/s +BenchmarkTEAen/256-8 721362 1656 ns/op 154.62 MB/s +BenchmarkTEAen/256-8 739749 1665 ns/op 153.80 MB/s +BenchmarkTEAen/256-8 737851 1671 ns/op 153.17 MB/s +BenchmarkTEAen/256-8 717496 1664 ns/op 153.81 MB/s +BenchmarkTEAen/256-8 712500 1686 ns/op 151.87 MB/s +BenchmarkTEAen/256-8 719016 1665 ns/op 153.77 MB/s +BenchmarkTEAen/256-8 724338 1670 ns/op 153.25 MB/s +BenchmarkTEAen/256-8 719326 1674 ns/op 152.96 MB/s +BenchmarkTEAen/256-8 708326 1665 ns/op 153.76 MB/s +BenchmarkTEAen/256-8 715416 1665 ns/op 153.74 MB/s +BenchmarkTEAen/4K-8 47715 24790 ns/op 165.23 MB/s +BenchmarkTEAen/4K-8 48402 24894 ns/op 164.54 MB/s +BenchmarkTEAen/4K-8 47937 24877 ns/op 164.65 MB/s +BenchmarkTEAen/4K-8 48338 24861 ns/op 164.75 MB/s +BenchmarkTEAen/4K-8 48440 24803 ns/op 165.14 MB/s +BenchmarkTEAen/4K-8 48217 24879 ns/op 164.64 MB/s +BenchmarkTEAen/4K-8 48192 25234 ns/op 162.32 MB/s +BenchmarkTEAen/4K-8 48388 24967 ns/op 164.06 MB/s +BenchmarkTEAen/4K-8 48415 24861 ns/op 164.76 MB/s +BenchmarkTEAen/4K-8 48234 24984 ns/op 163.95 MB/s +BenchmarkTEAen/32K-8 5944 200978 ns/op 163.04 MB/s +BenchmarkTEAen/32K-8 6070 200065 ns/op 163.79 MB/s +BenchmarkTEAen/32K-8 6025 201225 ns/op 162.84 MB/s +BenchmarkTEAen/32K-8 6105 201669 ns/op 162.48 MB/s +BenchmarkTEAen/32K-8 5970 201261 ns/op 162.81 MB/s +BenchmarkTEAen/32K-8 5998 201009 ns/op 163.02 MB/s +BenchmarkTEAen/32K-8 5872 200889 ns/op 163.12 MB/s +BenchmarkTEAen/32K-8 5952 201904 ns/op 162.29 MB/s +BenchmarkTEAen/32K-8 5848 200970 ns/op 163.05 MB/s +BenchmarkTEAen/32K-8 5952 200880 ns/op 163.12 MB/s +BenchmarkTEAde/16-8 5703844 212.6 ns/op 150.48 MB/s +BenchmarkTEAde/16-8 5639527 212.1 ns/op 150.88 MB/s +BenchmarkTEAde/16-8 5680191 211.9 ns/op 151.00 MB/s +BenchmarkTEAde/16-8 5670024 210.9 ns/op 151.74 MB/s +BenchmarkTEAde/16-8 5748844 210.7 ns/op 151.85 MB/s +BenchmarkTEAde/16-8 5661036 210.1 ns/op 152.32 MB/s +BenchmarkTEAde/16-8 5634832 210.6 ns/op 151.97 MB/s +BenchmarkTEAde/16-8 5660986 211.0 ns/op 151.64 MB/s +BenchmarkTEAde/16-8 5702166 211.0 ns/op 151.68 MB/s +BenchmarkTEAde/16-8 5718448 211.0 ns/op 151.65 MB/s +BenchmarkTEAde/256-8 671943 1664 ns/op 163.45 MB/s +BenchmarkTEAde/256-8 709024 1657 ns/op 164.13 MB/s +BenchmarkTEAde/256-8 721207 1671 ns/op 162.82 MB/s +BenchmarkTEAde/256-8 725904 1656 ns/op 164.24 MB/s +BenchmarkTEAde/256-8 723692 1660 ns/op 163.86 MB/s +BenchmarkTEAde/256-8 719965 1671 ns/op 162.74 MB/s +BenchmarkTEAde/256-8 711105 1674 ns/op 162.48 MB/s +BenchmarkTEAde/256-8 738729 1662 ns/op 163.65 MB/s +BenchmarkTEAde/256-8 728722 1670 ns/op 162.92 MB/s +BenchmarkTEAde/256-8 709340 1656 ns/op 164.24 MB/s +BenchmarkTEAde/4K-8 48242 24857 ns/op 165.43 MB/s +BenchmarkTEAde/4K-8 48841 24836 ns/op 165.57 MB/s +BenchmarkTEAde/4K-8 47600 24782 ns/op 165.93 MB/s +BenchmarkTEAde/4K-8 48906 24954 ns/op 164.78 MB/s +BenchmarkTEAde/4K-8 48112 24860 ns/op 165.40 MB/s +BenchmarkTEAde/4K-8 48838 24831 ns/op 165.60 MB/s +BenchmarkTEAde/4K-8 48406 24797 ns/op 165.83 MB/s +BenchmarkTEAde/4K-8 48187 24672 ns/op 166.67 MB/s +BenchmarkTEAde/4K-8 48662 24761 ns/op 166.06 MB/s +BenchmarkTEAde/4K-8 47848 24832 ns/op 165.59 MB/s +BenchmarkTEAde/32K-8 5923 201091 ns/op 163.03 MB/s +BenchmarkTEAde/32K-8 5884 200839 ns/op 163.23 MB/s +BenchmarkTEAde/32K-8 5984 200686 ns/op 163.36 MB/s +BenchmarkTEAde/32K-8 5977 200524 ns/op 163.49 MB/s +BenchmarkTEAde/32K-8 5995 202499 ns/op 161.90 MB/s +BenchmarkTEAde/32K-8 5960 201497 ns/op 162.70 MB/s +BenchmarkTEAde/32K-8 6036 200497 ns/op 163.51 MB/s +BenchmarkTEAde/32K-8 5977 200972 ns/op 163.13 MB/s +BenchmarkTEAde/32K-8 5907 200726 ns/op 163.33 MB/s +BenchmarkTEAde/32K-8 5985 200288 ns/op 163.68 MB/s PASS -ok github.com/fumiama/gofastTEA 107.828s +ok github.com/fumiama/gofastTEA 108.563s diff --git a/new17.txt b/new17.txt index 96324e3..5bdcf5a 100644 --- a/new17.txt +++ b/new17.txt @@ -2,85 +2,85 @@ goos: darwin goarch: amd64 pkg: github.com/fumiama/gofastTEA cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz -BenchmarkTEAen/16-8 4937554 243.6 ns/op 65.67 MB/s -BenchmarkTEAen/16-8 4969573 242.3 ns/op 66.04 MB/s -BenchmarkTEAen/16-8 4937174 241.3 ns/op 66.30 MB/s -BenchmarkTEAen/16-8 4994734 240.0 ns/op 66.67 MB/s -BenchmarkTEAen/16-8 4988541 241.9 ns/op 66.16 MB/s -BenchmarkTEAen/16-8 4981683 240.2 ns/op 66.60 MB/s -BenchmarkTEAen/16-8 4967822 242.5 ns/op 65.99 MB/s -BenchmarkTEAen/16-8 5013471 240.8 ns/op 66.43 MB/s -BenchmarkTEAen/16-8 4906420 241.8 ns/op 66.16 MB/s -BenchmarkTEAen/16-8 4963357 240.3 ns/op 66.58 MB/s -BenchmarkTEAen/256-8 704292 1703 ns/op 150.32 MB/s -BenchmarkTEAen/256-8 712244 1710 ns/op 149.72 MB/s -BenchmarkTEAen/256-8 687753 1698 ns/op 150.76 MB/s -BenchmarkTEAen/256-8 698522 1709 ns/op 149.78 MB/s -BenchmarkTEAen/256-8 711114 1708 ns/op 149.92 MB/s -BenchmarkTEAen/256-8 712760 1701 ns/op 150.48 MB/s -BenchmarkTEAen/256-8 705231 1701 ns/op 150.54 MB/s -BenchmarkTEAen/256-8 699147 1699 ns/op 150.72 MB/s -BenchmarkTEAen/256-8 702382 1699 ns/op 150.69 MB/s -BenchmarkTEAen/256-8 707511 1699 ns/op 150.71 MB/s -BenchmarkTEAen/4K-8 47986 24923 ns/op 164.35 MB/s -BenchmarkTEAen/4K-8 47972 24808 ns/op 165.11 MB/s -BenchmarkTEAen/4K-8 47262 24943 ns/op 164.22 MB/s -BenchmarkTEAen/4K-8 47324 24961 ns/op 164.10 MB/s -BenchmarkTEAen/4K-8 48688 25036 ns/op 163.60 MB/s -BenchmarkTEAen/4K-8 47722 24995 ns/op 163.87 MB/s -BenchmarkTEAen/4K-8 48550 24810 ns/op 165.09 MB/s -BenchmarkTEAen/4K-8 47937 25068 ns/op 163.39 MB/s -BenchmarkTEAen/4K-8 48129 24867 ns/op 164.71 MB/s -BenchmarkTEAen/4K-8 47654 24986 ns/op 163.93 MB/s -BenchmarkTEAen/32K-8 5894 200779 ns/op 163.20 MB/s -BenchmarkTEAen/32K-8 5937 201123 ns/op 162.93 MB/s -BenchmarkTEAen/32K-8 5941 199721 ns/op 164.07 MB/s -BenchmarkTEAen/32K-8 5998 199359 ns/op 164.37 MB/s -BenchmarkTEAen/32K-8 5923 199691 ns/op 164.09 MB/s -BenchmarkTEAen/32K-8 5934 199594 ns/op 164.17 MB/s -BenchmarkTEAen/32K-8 5965 200496 ns/op 163.43 MB/s -BenchmarkTEAen/32K-8 5950 199249 ns/op 164.46 MB/s -BenchmarkTEAen/32K-8 5983 200564 ns/op 163.38 MB/s -BenchmarkTEAen/32K-8 5911 199334 ns/op 164.39 MB/s -BenchmarkTEAde/16-8 5737286 207.7 ns/op 154.08 MB/s -BenchmarkTEAde/16-8 5758159 207.1 ns/op 154.48 MB/s -BenchmarkTEAde/16-8 5808830 207.8 ns/op 154.02 MB/s -BenchmarkTEAde/16-8 5745165 207.4 ns/op 154.30 MB/s -BenchmarkTEAde/16-8 5753430 208.1 ns/op 153.79 MB/s -BenchmarkTEAde/16-8 5791928 210.9 ns/op 151.74 MB/s -BenchmarkTEAde/16-8 5664402 209.1 ns/op 153.04 MB/s -BenchmarkTEAde/16-8 5726097 207.7 ns/op 154.03 MB/s -BenchmarkTEAde/16-8 5807385 209.1 ns/op 153.04 MB/s -BenchmarkTEAde/16-8 5702360 206.9 ns/op 154.64 MB/s -BenchmarkTEAde/256-8 721252 1653 ns/op 164.53 MB/s -BenchmarkTEAde/256-8 739063 1643 ns/op 165.52 MB/s -BenchmarkTEAde/256-8 741162 1648 ns/op 165.02 MB/s -BenchmarkTEAde/256-8 735223 1642 ns/op 165.67 MB/s -BenchmarkTEAde/256-8 729406 1649 ns/op 164.91 MB/s -BenchmarkTEAde/256-8 729562 1635 ns/op 166.38 MB/s -BenchmarkTEAde/256-8 745306 1648 ns/op 165.05 MB/s -BenchmarkTEAde/256-8 726823 1650 ns/op 164.85 MB/s -BenchmarkTEAde/256-8 736669 1646 ns/op 165.22 MB/s -BenchmarkTEAde/256-8 714020 1650 ns/op 164.85 MB/s -BenchmarkTEAde/4K-8 48386 24470 ns/op 168.04 MB/s -BenchmarkTEAde/4K-8 49455 24395 ns/op 168.56 MB/s -BenchmarkTEAde/4K-8 48878 24545 ns/op 167.53 MB/s -BenchmarkTEAde/4K-8 48738 24511 ns/op 167.76 MB/s -BenchmarkTEAde/4K-8 49315 24614 ns/op 167.06 MB/s -BenchmarkTEAde/4K-8 47276 24493 ns/op 167.88 MB/s -BenchmarkTEAde/4K-8 49026 24435 ns/op 168.28 MB/s -BenchmarkTEAde/4K-8 48810 24534 ns/op 167.60 MB/s -BenchmarkTEAde/4K-8 48943 24519 ns/op 167.71 MB/s -BenchmarkTEAde/4K-8 48555 24520 ns/op 167.70 MB/s -BenchmarkTEAde/32K-8 6025 198951 ns/op 164.78 MB/s -BenchmarkTEAde/32K-8 6050 198765 ns/op 164.94 MB/s -BenchmarkTEAde/32K-8 6034 198560 ns/op 165.11 MB/s -BenchmarkTEAde/32K-8 5997 198661 ns/op 165.03 MB/s -BenchmarkTEAde/32K-8 5952 199032 ns/op 164.72 MB/s -BenchmarkTEAde/32K-8 6106 198250 ns/op 165.37 MB/s -BenchmarkTEAde/32K-8 6075 198810 ns/op 164.90 MB/s -BenchmarkTEAde/32K-8 6056 198851 ns/op 164.87 MB/s -BenchmarkTEAde/32K-8 6085 198702 ns/op 164.99 MB/s -BenchmarkTEAde/32K-8 6079 198614 ns/op 165.06 MB/s +BenchmarkTEAen/16-8 5269038 229.5 ns/op 69.72 MB/s +BenchmarkTEAen/16-8 5148352 225.4 ns/op 70.99 MB/s +BenchmarkTEAen/16-8 5393808 224.1 ns/op 71.39 MB/s +BenchmarkTEAen/16-8 5355736 224.8 ns/op 71.19 MB/s +BenchmarkTEAen/16-8 5335281 225.3 ns/op 71.01 MB/s +BenchmarkTEAen/16-8 5354547 223.7 ns/op 71.53 MB/s +BenchmarkTEAen/16-8 5155089 225.1 ns/op 71.07 MB/s +BenchmarkTEAen/16-8 5364086 222.8 ns/op 71.81 MB/s +BenchmarkTEAen/16-8 5354697 224.6 ns/op 71.23 MB/s +BenchmarkTEAen/16-8 5394775 223.2 ns/op 71.67 MB/s +BenchmarkTEAen/256-8 721279 1695 ns/op 150.99 MB/s +BenchmarkTEAen/256-8 719157 1686 ns/op 151.86 MB/s +BenchmarkTEAen/256-8 712452 1691 ns/op 151.38 MB/s +BenchmarkTEAen/256-8 710757 1693 ns/op 151.21 MB/s +BenchmarkTEAen/256-8 726054 1684 ns/op 152.03 MB/s +BenchmarkTEAen/256-8 719670 1688 ns/op 151.64 MB/s +BenchmarkTEAen/256-8 710749 1700 ns/op 150.61 MB/s +BenchmarkTEAen/256-8 710406 1681 ns/op 152.27 MB/s +BenchmarkTEAen/256-8 711072 1690 ns/op 151.48 MB/s +BenchmarkTEAen/256-8 705573 1691 ns/op 151.43 MB/s +BenchmarkTEAen/4K-8 47502 24856 ns/op 164.79 MB/s +BenchmarkTEAen/4K-8 47906 24808 ns/op 165.11 MB/s +BenchmarkTEAen/4K-8 47121 24822 ns/op 165.01 MB/s +BenchmarkTEAen/4K-8 48050 24900 ns/op 164.50 MB/s +BenchmarkTEAen/4K-8 47642 24996 ns/op 163.86 MB/s +BenchmarkTEAen/4K-8 48513 24923 ns/op 164.34 MB/s +BenchmarkTEAen/4K-8 48159 24987 ns/op 163.93 MB/s +BenchmarkTEAen/4K-8 48250 24948 ns/op 164.18 MB/s +BenchmarkTEAen/4K-8 47930 25095 ns/op 163.22 MB/s +BenchmarkTEAen/4K-8 47818 24880 ns/op 164.63 MB/s +BenchmarkTEAen/32K-8 6008 201270 ns/op 162.81 MB/s +BenchmarkTEAen/32K-8 5946 202390 ns/op 161.90 MB/s +BenchmarkTEAen/32K-8 4990 200935 ns/op 163.08 MB/s +BenchmarkTEAen/32K-8 5710 201290 ns/op 162.79 MB/s +BenchmarkTEAen/32K-8 5949 201151 ns/op 162.90 MB/s +BenchmarkTEAen/32K-8 6046 202128 ns/op 162.12 MB/s +BenchmarkTEAen/32K-8 5950 202166 ns/op 162.08 MB/s +BenchmarkTEAen/32K-8 6021 202292 ns/op 161.98 MB/s +BenchmarkTEAen/32K-8 6038 201232 ns/op 162.84 MB/s +BenchmarkTEAen/32K-8 5959 201314 ns/op 162.77 MB/s +BenchmarkTEAde/16-8 5790429 208.5 ns/op 153.51 MB/s +BenchmarkTEAde/16-8 5733123 207.3 ns/op 154.37 MB/s +BenchmarkTEAde/16-8 5722306 208.4 ns/op 153.56 MB/s +BenchmarkTEAde/16-8 5730838 207.9 ns/op 153.94 MB/s +BenchmarkTEAde/16-8 5743090 208.7 ns/op 153.36 MB/s +BenchmarkTEAde/16-8 5640714 208.2 ns/op 153.70 MB/s +BenchmarkTEAde/16-8 5740749 208.5 ns/op 153.47 MB/s +BenchmarkTEAde/16-8 5775631 208.0 ns/op 153.82 MB/s +BenchmarkTEAde/16-8 5743650 208.3 ns/op 153.62 MB/s +BenchmarkTEAde/16-8 5763579 209.1 ns/op 153.02 MB/s +BenchmarkTEAde/256-8 727724 1654 ns/op 164.42 MB/s +BenchmarkTEAde/256-8 740757 1652 ns/op 164.60 MB/s +BenchmarkTEAde/256-8 724318 1637 ns/op 166.21 MB/s +BenchmarkTEAde/256-8 751288 1639 ns/op 165.92 MB/s +BenchmarkTEAde/256-8 727666 1660 ns/op 163.89 MB/s +BenchmarkTEAde/256-8 728272 1646 ns/op 165.24 MB/s +BenchmarkTEAde/256-8 729640 1645 ns/op 165.37 MB/s +BenchmarkTEAde/256-8 743509 1650 ns/op 164.87 MB/s +BenchmarkTEAde/256-8 726325 1641 ns/op 165.75 MB/s +BenchmarkTEAde/256-8 721312 1653 ns/op 164.52 MB/s +BenchmarkTEAde/4K-8 48522 24454 ns/op 168.16 MB/s +BenchmarkTEAde/4K-8 48518 24610 ns/op 167.09 MB/s +BenchmarkTEAde/4K-8 49022 24653 ns/op 166.79 MB/s +BenchmarkTEAde/4K-8 48313 24545 ns/op 167.53 MB/s +BenchmarkTEAde/4K-8 48856 24589 ns/op 167.23 MB/s +BenchmarkTEAde/4K-8 48660 24529 ns/op 167.64 MB/s +BenchmarkTEAde/4K-8 49248 24667 ns/op 166.70 MB/s +BenchmarkTEAde/4K-8 48969 24485 ns/op 167.94 MB/s +BenchmarkTEAde/4K-8 48952 24658 ns/op 166.76 MB/s +BenchmarkTEAde/4K-8 48680 24426 ns/op 168.35 MB/s +BenchmarkTEAde/32K-8 5991 199033 ns/op 164.72 MB/s +BenchmarkTEAde/32K-8 6022 199173 ns/op 164.60 MB/s +BenchmarkTEAde/32K-8 6037 199301 ns/op 164.49 MB/s +BenchmarkTEAde/32K-8 6009 198696 ns/op 165.00 MB/s +BenchmarkTEAde/32K-8 5984 199928 ns/op 163.98 MB/s +BenchmarkTEAde/32K-8 6020 198890 ns/op 164.83 MB/s +BenchmarkTEAde/32K-8 6001 199967 ns/op 163.95 MB/s +BenchmarkTEAde/32K-8 6290 199553 ns/op 164.29 MB/s +BenchmarkTEAde/32K-8 6159 198473 ns/op 165.18 MB/s +BenchmarkTEAde/32K-8 5992 199065 ns/op 164.69 MB/s PASS -ok github.com/fumiama/gofastTEA 106.301s +ok github.com/fumiama/gofastTEA 107.274s diff --git a/old.txt b/old16.txt similarity index 100% rename from old.txt rename to old16.txt diff --git a/old17.txt b/old17.txt new file mode 100644 index 0000000..b6304b2 --- /dev/null +++ b/old17.txt @@ -0,0 +1,86 @@ +goos: darwin +goarch: amd64 +pkg: github.com/fumiama/gofastTEA +cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz +BenchmarkTEAen/16-8 4941398 244.0 ns/op 65.56 MB/s +BenchmarkTEAen/16-8 4966077 240.8 ns/op 66.44 MB/s +BenchmarkTEAen/16-8 4993192 241.3 ns/op 66.30 MB/s +BenchmarkTEAen/16-8 4943002 240.7 ns/op 66.47 MB/s +BenchmarkTEAen/16-8 4921885 241.9 ns/op 66.14 MB/s +BenchmarkTEAen/16-8 4984929 242.7 ns/op 65.93 MB/s +BenchmarkTEAen/16-8 4977856 241.5 ns/op 66.25 MB/s +BenchmarkTEAen/16-8 4965243 241.4 ns/op 66.27 MB/s +BenchmarkTEAen/16-8 4951600 241.1 ns/op 66.36 MB/s +BenchmarkTEAen/16-8 4933670 240.8 ns/op 66.45 MB/s +BenchmarkTEAen/256-8 706099 1702 ns/op 150.38 MB/s +BenchmarkTEAen/256-8 710599 1713 ns/op 149.45 MB/s +BenchmarkTEAen/256-8 700268 1696 ns/op 150.90 MB/s +BenchmarkTEAen/256-8 677850 1708 ns/op 149.86 MB/s +BenchmarkTEAen/256-8 681873 1703 ns/op 150.29 MB/s +BenchmarkTEAen/256-8 703291 1712 ns/op 149.57 MB/s +BenchmarkTEAen/256-8 708225 1708 ns/op 149.92 MB/s +BenchmarkTEAen/256-8 703201 1700 ns/op 150.63 MB/s +BenchmarkTEAen/256-8 702416 1719 ns/op 148.94 MB/s +BenchmarkTEAen/256-8 698091 1701 ns/op 150.48 MB/s +BenchmarkTEAen/4K-8 47703 25038 ns/op 163.59 MB/s +BenchmarkTEAen/4K-8 47608 25151 ns/op 162.86 MB/s +BenchmarkTEAen/4K-8 47996 25159 ns/op 162.81 MB/s +BenchmarkTEAen/4K-8 47966 24959 ns/op 164.11 MB/s +BenchmarkTEAen/4K-8 48046 24979 ns/op 163.98 MB/s +BenchmarkTEAen/4K-8 48106 24975 ns/op 164.01 MB/s +BenchmarkTEAen/4K-8 48369 24955 ns/op 164.13 MB/s +BenchmarkTEAen/4K-8 47421 24988 ns/op 163.92 MB/s +BenchmarkTEAen/4K-8 47799 25008 ns/op 163.79 MB/s +BenchmarkTEAen/4K-8 47964 24847 ns/op 164.85 MB/s +BenchmarkTEAen/32K-8 5937 201785 ns/op 162.39 MB/s +BenchmarkTEAen/32K-8 5846 202508 ns/op 161.81 MB/s +BenchmarkTEAen/32K-8 6010 202115 ns/op 162.13 MB/s +BenchmarkTEAen/32K-8 5929 202583 ns/op 161.75 MB/s +BenchmarkTEAen/32K-8 5823 202915 ns/op 161.49 MB/s +BenchmarkTEAen/32K-8 5829 202080 ns/op 162.15 MB/s +BenchmarkTEAen/32K-8 5851 203264 ns/op 161.21 MB/s +BenchmarkTEAen/32K-8 5935 202659 ns/op 161.69 MB/s +BenchmarkTEAen/32K-8 5924 202492 ns/op 161.82 MB/s +BenchmarkTEAen/32K-8 5949 202735 ns/op 161.63 MB/s +BenchmarkTEAde/16-8 5762792 207.8 ns/op 154.00 MB/s +BenchmarkTEAde/16-8 5705499 208.3 ns/op 153.60 MB/s +BenchmarkTEAde/16-8 5744668 213.5 ns/op 149.86 MB/s +BenchmarkTEAde/16-8 5727178 210.7 ns/op 151.87 MB/s +BenchmarkTEAde/16-8 5783122 207.6 ns/op 154.14 MB/s +BenchmarkTEAde/16-8 5754253 209.6 ns/op 152.69 MB/s +BenchmarkTEAde/16-8 5773371 208.4 ns/op 153.55 MB/s +BenchmarkTEAde/16-8 5498738 209.1 ns/op 153.03 MB/s +BenchmarkTEAde/16-8 5784189 207.7 ns/op 154.08 MB/s +BenchmarkTEAde/16-8 5727909 206.6 ns/op 154.87 MB/s +BenchmarkTEAde/256-8 739407 1647 ns/op 165.12 MB/s +BenchmarkTEAde/256-8 737415 1650 ns/op 164.85 MB/s +BenchmarkTEAde/256-8 708351 1650 ns/op 164.81 MB/s +BenchmarkTEAde/256-8 719713 1641 ns/op 165.71 MB/s +BenchmarkTEAde/256-8 727462 1641 ns/op 165.79 MB/s +BenchmarkTEAde/256-8 699610 1669 ns/op 162.99 MB/s +BenchmarkTEAde/256-8 739270 1656 ns/op 164.23 MB/s +BenchmarkTEAde/256-8 721683 1646 ns/op 165.26 MB/s +BenchmarkTEAde/256-8 728337 1644 ns/op 165.43 MB/s +BenchmarkTEAde/256-8 727545 1647 ns/op 165.17 MB/s +BenchmarkTEAde/4K-8 49003 24734 ns/op 166.25 MB/s +BenchmarkTEAde/4K-8 48774 24626 ns/op 166.98 MB/s +BenchmarkTEAde/4K-8 46922 25193 ns/op 163.22 MB/s +BenchmarkTEAde/4K-8 49129 24547 ns/op 167.51 MB/s +BenchmarkTEAde/4K-8 48273 24796 ns/op 165.83 MB/s +BenchmarkTEAde/4K-8 47547 24607 ns/op 167.11 MB/s +BenchmarkTEAde/4K-8 48889 24526 ns/op 167.66 MB/s +BenchmarkTEAde/4K-8 48098 24599 ns/op 167.16 MB/s +BenchmarkTEAde/4K-8 49819 24612 ns/op 167.07 MB/s +BenchmarkTEAde/4K-8 46545 24814 ns/op 165.71 MB/s +BenchmarkTEAde/32K-8 5905 198687 ns/op 165.00 MB/s +BenchmarkTEAde/32K-8 6044 199398 ns/op 164.41 MB/s +BenchmarkTEAde/32K-8 6014 199216 ns/op 164.57 MB/s +BenchmarkTEAde/32K-8 6020 200146 ns/op 163.80 MB/s +BenchmarkTEAde/32K-8 6036 199664 ns/op 164.20 MB/s +BenchmarkTEAde/32K-8 6211 199332 ns/op 164.47 MB/s +BenchmarkTEAde/32K-8 6064 199054 ns/op 164.70 MB/s +BenchmarkTEAde/32K-8 5908 199390 ns/op 164.42 MB/s +BenchmarkTEAde/32K-8 5973 204028 ns/op 160.68 MB/s +BenchmarkTEAde/32K-8 6049 200894 ns/op 163.19 MB/s +PASS +ok github.com/fumiama/gofastTEA 108.676s diff --git a/tea_1.16.go b/tea_1.16.go index 67e627a..45b05b1 100644 --- a/tea_1.16.go +++ b/tea_1.16.go @@ -5,8 +5,13 @@ package tea import ( "encoding/binary" + _ "unsafe" // required by go:linkname ) +// Uint32 returns a lock free uint32 value. +//go:linkname Uint32 runtime.fastrand +func Uint32() uint32 + // Encrypt tea 加密 // http://bbs.chinaunix.net/thread-583468-1-1.html // 感谢xichen大佬对TEA的解释 diff --git a/tea_1.16_asm.go b/tea_1.16_asm.go index 312aab6..60c469c 100644 --- a/tea_1.16_asm.go +++ b/tea_1.16_asm.go @@ -4,7 +4,7 @@ package tea import ( - "math/rand" + "encoding/binary" "unsafe" ) @@ -17,7 +17,9 @@ func (t TEA) Encrypt(src []byte) (dst []byte) { lens := len(src) fill := 10 - (lens+1)&7 dst = make([]byte, fill+lens+7) - _, _ = rand.Read(dst[0:fill]) + binary.LittleEndian.PutUint32(dst, Uint32()) + binary.LittleEndian.PutUint32(dst[4:], Uint32()) + binary.LittleEndian.PutUint32(dst[8:], Uint32()) dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 copy(dst[fill:], src) encrypt(uintptr(*(*unsafe.Pointer)(unsafe.Pointer(&dst)))|uintptr(len(dst)<<40), uintptr(unsafe.Pointer(&t))|(uintptr(len(dst))&0xffffff00_00000000)) diff --git a/tea_1.16_pure.go b/tea_1.16_pure.go index 2096704..2c7542e 100644 --- a/tea_1.16_pure.go +++ b/tea_1.16_pure.go @@ -5,8 +5,6 @@ package tea import ( "encoding/binary" - "math/rand" - "unsafe" ) // Encrypt tea 加密 @@ -17,7 +15,9 @@ func (t TEA) Encrypt(src []byte) (dst []byte) { lens := len(src) fill := 10 - (lens+1)&7 dst = make([]byte, fill+lens+7) - _, _ = rand.Read(dst[0:fill]) + binary.LittleEndian.PutUint32(dst, Uint32()) + binary.LittleEndian.PutUint32(dst[4:], Uint32()) + binary.LittleEndian.PutUint32(dst[8:], Uint32()) dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 copy(dst[fill:], src) diff --git a/tea_1.17.go b/tea_1.17.go index 0e556aa..aa5d287 100644 --- a/tea_1.17.go +++ b/tea_1.17.go @@ -5,9 +5,13 @@ package tea import ( "encoding/binary" - "math/rand" + _ "unsafe" // required by go:linkname ) +// Uint32 returns a lock free uint32 value. +//go:linkname Uint32 runtime.fastrand +func Uint32() uint32 + // Encrypt tea 加密 // http://bbs.chinaunix.net/thread-583468-1-1.html // 感谢xichen大佬对TEA的解释 @@ -15,7 +19,9 @@ func (t TEA) Encrypt(src []byte) (dst []byte) { lens := len(src) fill := 10 - (lens+1)%8 dst = make([]byte, fill+lens+7) - _, _ = rand.Read(dst[0:fill]) + binary.LittleEndian.PutUint32(dst, Uint32()) + binary.LittleEndian.PutUint32(dst[4:], Uint32()) + binary.LittleEndian.PutUint32(dst[8:], Uint32()) dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 copy(dst[fill:], src)