From d5af7114dc626e2dfb96346a2d4a9af063e422df Mon Sep 17 00:00:00 2001 From: fumiama Date: Mon, 22 Nov 2021 21:43:32 +0800 Subject: [PATCH] perf: adjust command sequence --- README.md | 66 +++++++++---------- new16.txt | 162 +++++++++++++++++++++++----------------------- new17.txt | 162 +++++++++++++++++++++++----------------------- tea_1.16.go | 53 --------------- tea_1.16_amd64.go | 92 ++++++++++++++++++++++++++ tea_1.16_amd64.s | 32 ++++----- tea_1.16_asm.go | 27 -------- tea_1.16_pure.go | 53 +++++++++++++++ tea_1.17.go | 13 ++-- 9 files changed, 362 insertions(+), 298 deletions(-) create mode 100644 tea_1.16_amd64.go delete mode 100644 tea_1.16_asm.go diff --git a/README.md b/README.md index 423f671..b321734 100644 --- a/README.md +++ b/README.md @@ -2,51 +2,51 @@ TEA 编码算法的 PLAN9 汇编优化实现 ## 1.17 版本及以上 -代码与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)相比,替换了加密算法为`runtime.fastrand`,提升速度如下。 +代码与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)相比,替换了随机算法为`runtime.fastrand`,且简化了`Decrypt`,提升速度如下。 ```css name old time/op new time/op delta -TEAen/16-8 241ns ± 1% 224ns ± 1% -7.05% (p=0.000 n=9+9) -TEAen/256-8 1.71µs ± 1% 1.69µs ± 1% -0.96% (p=0.000 n=10+10) -TEAen/4K-8 25.0µs ± 1% 24.9µs ± 1% ~ (p=0.052 n=10+10) -TEAen/32K-8 203µs ± 0% 202µs ± 0% -0.44% (p=0.003 n=10+10) -TEAde/16-8 208ns ± 1% 208ns ± 0% ~ (p=0.857 n=9+10) -TEAde/256-8 1.65µs ± 1% 1.65µs ± 1% ~ (p=0.859 n=9+10) -TEAde/4K-8 24.7µs ± 1% 24.6µs ± 1% ~ (p=0.133 n=9+10) -TEAde/32K-8 200µs ± 1% 199µs ± 0% ~ (p=0.243 n=9+10) +TEAen/16-8 241ns ± 1% 225ns ± 1% -6.97% (p=0.000 n=9+10) +TEAen/256-8 1.71µs ± 1% 1.68µs ± 1% -1.31% (p=0.000 n=10+9) +TEAen/4K-8 25.0µs ± 1% 25.0µs ± 1% ~ (p=0.497 n=10+9) +TEAen/32K-8 203µs ± 0% 202µs ± 0% ~ (p=0.079 n=10+9) +TEAde/16-8 208ns ± 1% 208ns ± 0% ~ (p=0.914 n=9+9) +TEAde/256-8 1.65µs ± 1% 1.65µs ± 1% ~ (p=0.429 n=9+10) +TEAde/4K-8 24.7µs ± 1% 24.5µs ± 1% -0.44% (p=0.026 n=9+10) +TEAde/32K-8 200µs ± 1% 199µs ± 0% -0.37% (p=0.001 n=9+9) name old speed new speed delta -TEAen/16-8 66.3MB/s ± 1% 71.3MB/s ± 1% +7.59% (p=0.000 n=9+9) -TEAen/256-8 150MB/s ± 1% 151MB/s ± 1% +0.97% (p=0.000 n=10+10) -TEAen/4K-8 164MB/s ± 1% 164MB/s ± 1% ~ (p=0.052 n=10+10) -TEAen/32K-8 162MB/s ± 0% 163MB/s ± 0% +0.44% (p=0.003 n=10+10) -TEAde/16-8 154MB/s ± 1% 154MB/s ± 0% ~ (p=0.905 n=9+10) -TEAde/256-8 165MB/s ± 1% 165MB/s ± 1% ~ (p=0.905 n=9+10) -TEAde/4K-8 167MB/s ± 1% 167MB/s ± 1% ~ (p=0.133 n=9+10) -TEAde/32K-8 164MB/s ± 1% 165MB/s ± 0% ~ (p=0.218 n=9+10) +TEAen/16-8 66.3MB/s ± 1% 71.3MB/s ± 1% +7.50% (p=0.000 n=9+10) +TEAen/256-8 150MB/s ± 1% 152MB/s ± 1% +1.31% (p=0.000 n=10+9) +TEAen/4K-8 164MB/s ± 1% 164MB/s ± 1% ~ (p=0.497 n=10+9) +TEAen/32K-8 162MB/s ± 0% 162MB/s ± 0% ~ (p=0.075 n=10+9) +TEAde/16-8 154MB/s ± 1% 154MB/s ± 0% ~ (p=0.982 n=9+9) +TEAde/256-8 165MB/s ± 1% 165MB/s ± 1% ~ (p=0.399 n=9+10) +TEAde/4K-8 167MB/s ± 1% 168MB/s ± 1% +0.44% (p=0.026 n=9+10) +TEAde/32K-8 164MB/s ± 1% 165MB/s ± 0% +0.37% (p=0.001 n=9+9) ``` ## 1.16 版本及以下 使用 PLAN9 汇编编写`Encrypt`,内联编写`Decrypt`,替换了加密算法为`runtime.fastrand`,与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)代码同在`go1.16`版本下编译相比,提升速度如下(new16.txt)。 ```css name old time/op new time/op delta -TEAen/16-8 252ns ± 0% 224ns ± 0% -10.85% (p=0.000 n=9+8) -TEAen/256-8 1.77µs ± 1% 1.67µs ± 1% -5.92% (p=0.000 n=9+9) -TEAen/4K-8 25.9µs ± 0% 24.9µs ± 0% -3.83% (p=0.000 n=10+9) -TEAen/32K-8 208µs ± 1% 201µs ± 0% -3.20% (p=0.000 n=10+9) -TEAde/16-8 216ns ± 1% 211ns ± 1% -2.41% (p=0.000 n=10+10) -TEAde/256-8 1.71µs ± 1% 1.66µs ± 1% -2.40% (p=0.000 n=10+10) -TEAde/4K-8 25.4µs ± 1% 24.8µs ± 1% -2.16% (p=0.000 n=10+10) -TEAde/32K-8 206µs ± 0% 201µs ± 0% -2.35% (p=0.000 n=9+9) +TEAen/16-8 252ns ± 0% 227ns ± 0% -10.00% (p=0.000 n=9+10) +TEAen/256-8 1.77µs ± 1% 1.66µs ± 0% -6.28% (p=0.000 n=9+10) +TEAen/4K-8 25.9µs ± 0% 24.9µs ± 1% -3.65% (p=0.000 n=10+10) +TEAen/32K-8 208µs ± 1% 200µs ± 0% -3.70% (p=0.000 n=10+9) +TEAde/16-8 216ns ± 1% 210ns ± 1% -3.04% (p=0.000 n=10+10) +TEAde/256-8 1.71µs ± 1% 1.66µs ± 1% -2.93% (p=0.000 n=10+10) +TEAde/4K-8 25.4µs ± 1% 24.8µs ± 0% -2.36% (p=0.000 n=10+9) +TEAde/32K-8 206µs ± 0% 200µs ± 0% -2.53% (p=0.000 n=9+9) name old speed new speed delta -TEAen/16-8 63.5MB/s ± 0% 71.3MB/s ± 0% +12.18% (p=0.000 n=9+8) -TEAen/256-8 145MB/s ± 1% 154MB/s ± 1% +6.28% (p=0.000 n=9+9) -TEAen/4K-8 158MB/s ± 0% 165MB/s ± 0% +3.98% (p=0.000 n=10+9) -TEAen/32K-8 158MB/s ± 1% 163MB/s ± 0% +3.31% (p=0.000 n=10+9) -TEAde/16-8 148MB/s ± 1% 152MB/s ± 1% +2.46% (p=0.000 n=10+10) -TEAde/256-8 160MB/s ± 1% 163MB/s ± 1% +2.46% (p=0.000 n=10+10) -TEAde/4K-8 162MB/s ± 1% 166MB/s ± 1% +2.21% (p=0.000 n=10+10) -TEAde/32K-8 159MB/s ± 0% 163MB/s ± 0% +2.41% (p=0.000 n=9+9) +TEAen/16-8 63.5MB/s ± 0% 70.6MB/s ± 0% +11.12% (p=0.000 n=9+10) +TEAen/256-8 145MB/s ± 1% 154MB/s ± 0% +6.69% (p=0.000 n=9+10) +TEAen/4K-8 158MB/s ± 0% 164MB/s ± 1% +3.79% (p=0.000 n=10+10) +TEAen/32K-8 158MB/s ± 1% 164MB/s ± 0% +3.84% (p=0.000 n=10+9) +TEAde/16-8 148MB/s ± 1% 152MB/s ± 1% +3.12% (p=0.000 n=10+10) +TEAde/256-8 160MB/s ± 1% 164MB/s ± 1% +3.01% (p=0.000 n=10+10) +TEAde/4K-8 162MB/s ± 1% 166MB/s ± 0% +2.41% (p=0.000 n=10+9) +TEAde/32K-8 159MB/s ± 0% 164MB/s ± 0% +2.60% (p=0.000 n=9+9) ``` 另外[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)本身在`go1.16`版本与在`go1.17`版本下编译相比,提升速度如下(new17.txt)。 ```css diff --git a/new16.txt b/new16.txt index 63b4d09..a3cb954 100644 --- a/new16.txt +++ b/new16.txt @@ -2,85 +2,85 @@ goos: darwin goarch: amd64 pkg: github.com/fumiama/gofastTEA cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz -BenchmarkTEAen/16-8 5297209 226.7 ns/op 70.57 MB/s -BenchmarkTEAen/16-8 5350597 224.8 ns/op 71.18 MB/s -BenchmarkTEAen/16-8 5320868 224.4 ns/op 71.32 MB/s -BenchmarkTEAen/16-8 5365137 224.9 ns/op 71.15 MB/s -BenchmarkTEAen/16-8 5331117 223.8 ns/op 71.51 MB/s -BenchmarkTEAen/16-8 5348852 224.9 ns/op 71.14 MB/s -BenchmarkTEAen/16-8 5377758 225.0 ns/op 71.11 MB/s -BenchmarkTEAen/16-8 5290276 224.3 ns/op 71.35 MB/s -BenchmarkTEAen/16-8 5344434 228.3 ns/op 70.09 MB/s -BenchmarkTEAen/16-8 5340181 223.6 ns/op 71.55 MB/s -BenchmarkTEAen/256-8 721362 1656 ns/op 154.62 MB/s -BenchmarkTEAen/256-8 739749 1665 ns/op 153.80 MB/s -BenchmarkTEAen/256-8 737851 1671 ns/op 153.17 MB/s -BenchmarkTEAen/256-8 717496 1664 ns/op 153.81 MB/s -BenchmarkTEAen/256-8 712500 1686 ns/op 151.87 MB/s -BenchmarkTEAen/256-8 719016 1665 ns/op 153.77 MB/s -BenchmarkTEAen/256-8 724338 1670 ns/op 153.25 MB/s -BenchmarkTEAen/256-8 719326 1674 ns/op 152.96 MB/s -BenchmarkTEAen/256-8 708326 1665 ns/op 153.76 MB/s -BenchmarkTEAen/256-8 715416 1665 ns/op 153.74 MB/s -BenchmarkTEAen/4K-8 47715 24790 ns/op 165.23 MB/s -BenchmarkTEAen/4K-8 48402 24894 ns/op 164.54 MB/s -BenchmarkTEAen/4K-8 47937 24877 ns/op 164.65 MB/s -BenchmarkTEAen/4K-8 48338 24861 ns/op 164.75 MB/s -BenchmarkTEAen/4K-8 48440 24803 ns/op 165.14 MB/s -BenchmarkTEAen/4K-8 48217 24879 ns/op 164.64 MB/s -BenchmarkTEAen/4K-8 48192 25234 ns/op 162.32 MB/s -BenchmarkTEAen/4K-8 48388 24967 ns/op 164.06 MB/s -BenchmarkTEAen/4K-8 48415 24861 ns/op 164.76 MB/s -BenchmarkTEAen/4K-8 48234 24984 ns/op 163.95 MB/s -BenchmarkTEAen/32K-8 5944 200978 ns/op 163.04 MB/s -BenchmarkTEAen/32K-8 6070 200065 ns/op 163.79 MB/s -BenchmarkTEAen/32K-8 6025 201225 ns/op 162.84 MB/s -BenchmarkTEAen/32K-8 6105 201669 ns/op 162.48 MB/s -BenchmarkTEAen/32K-8 5970 201261 ns/op 162.81 MB/s -BenchmarkTEAen/32K-8 5998 201009 ns/op 163.02 MB/s -BenchmarkTEAen/32K-8 5872 200889 ns/op 163.12 MB/s -BenchmarkTEAen/32K-8 5952 201904 ns/op 162.29 MB/s -BenchmarkTEAen/32K-8 5848 200970 ns/op 163.05 MB/s -BenchmarkTEAen/32K-8 5952 200880 ns/op 163.12 MB/s -BenchmarkTEAde/16-8 5703844 212.6 ns/op 150.48 MB/s -BenchmarkTEAde/16-8 5639527 212.1 ns/op 150.88 MB/s -BenchmarkTEAde/16-8 5680191 211.9 ns/op 151.00 MB/s -BenchmarkTEAde/16-8 5670024 210.9 ns/op 151.74 MB/s -BenchmarkTEAde/16-8 5748844 210.7 ns/op 151.85 MB/s -BenchmarkTEAde/16-8 5661036 210.1 ns/op 152.32 MB/s -BenchmarkTEAde/16-8 5634832 210.6 ns/op 151.97 MB/s -BenchmarkTEAde/16-8 5660986 211.0 ns/op 151.64 MB/s -BenchmarkTEAde/16-8 5702166 211.0 ns/op 151.68 MB/s -BenchmarkTEAde/16-8 5718448 211.0 ns/op 151.65 MB/s -BenchmarkTEAde/256-8 671943 1664 ns/op 163.45 MB/s -BenchmarkTEAde/256-8 709024 1657 ns/op 164.13 MB/s -BenchmarkTEAde/256-8 721207 1671 ns/op 162.82 MB/s -BenchmarkTEAde/256-8 725904 1656 ns/op 164.24 MB/s -BenchmarkTEAde/256-8 723692 1660 ns/op 163.86 MB/s -BenchmarkTEAde/256-8 719965 1671 ns/op 162.74 MB/s -BenchmarkTEAde/256-8 711105 1674 ns/op 162.48 MB/s -BenchmarkTEAde/256-8 738729 1662 ns/op 163.65 MB/s -BenchmarkTEAde/256-8 728722 1670 ns/op 162.92 MB/s -BenchmarkTEAde/256-8 709340 1656 ns/op 164.24 MB/s -BenchmarkTEAde/4K-8 48242 24857 ns/op 165.43 MB/s -BenchmarkTEAde/4K-8 48841 24836 ns/op 165.57 MB/s -BenchmarkTEAde/4K-8 47600 24782 ns/op 165.93 MB/s -BenchmarkTEAde/4K-8 48906 24954 ns/op 164.78 MB/s -BenchmarkTEAde/4K-8 48112 24860 ns/op 165.40 MB/s -BenchmarkTEAde/4K-8 48838 24831 ns/op 165.60 MB/s -BenchmarkTEAde/4K-8 48406 24797 ns/op 165.83 MB/s -BenchmarkTEAde/4K-8 48187 24672 ns/op 166.67 MB/s -BenchmarkTEAde/4K-8 48662 24761 ns/op 166.06 MB/s -BenchmarkTEAde/4K-8 47848 24832 ns/op 165.59 MB/s -BenchmarkTEAde/32K-8 5923 201091 ns/op 163.03 MB/s -BenchmarkTEAde/32K-8 5884 200839 ns/op 163.23 MB/s -BenchmarkTEAde/32K-8 5984 200686 ns/op 163.36 MB/s -BenchmarkTEAde/32K-8 5977 200524 ns/op 163.49 MB/s -BenchmarkTEAde/32K-8 5995 202499 ns/op 161.90 MB/s -BenchmarkTEAde/32K-8 5960 201497 ns/op 162.70 MB/s -BenchmarkTEAde/32K-8 6036 200497 ns/op 163.51 MB/s -BenchmarkTEAde/32K-8 5977 200972 ns/op 163.13 MB/s -BenchmarkTEAde/32K-8 5907 200726 ns/op 163.33 MB/s -BenchmarkTEAde/32K-8 5985 200288 ns/op 163.68 MB/s +BenchmarkTEAen/16-8 5205363 227.4 ns/op 70.36 MB/s +BenchmarkTEAen/16-8 5295032 225.7 ns/op 70.88 MB/s +BenchmarkTEAen/16-8 5301612 227.1 ns/op 70.46 MB/s +BenchmarkTEAen/16-8 5250404 226.1 ns/op 70.77 MB/s +BenchmarkTEAen/16-8 5268896 226.7 ns/op 70.57 MB/s +BenchmarkTEAen/16-8 5368207 226.5 ns/op 70.65 MB/s +BenchmarkTEAen/16-8 5289012 226.7 ns/op 70.58 MB/s +BenchmarkTEAen/16-8 5208618 226.4 ns/op 70.67 MB/s +BenchmarkTEAen/16-8 5294535 227.0 ns/op 70.48 MB/s +BenchmarkTEAen/16-8 5338717 226.3 ns/op 70.71 MB/s +BenchmarkTEAen/256-8 724610 1656 ns/op 154.56 MB/s +BenchmarkTEAen/256-8 713066 1659 ns/op 154.29 MB/s +BenchmarkTEAen/256-8 731902 1658 ns/op 154.44 MB/s +BenchmarkTEAen/256-8 719208 1654 ns/op 154.82 MB/s +BenchmarkTEAen/256-8 701692 1667 ns/op 153.57 MB/s +BenchmarkTEAen/256-8 718228 1660 ns/op 154.21 MB/s +BenchmarkTEAen/256-8 718328 1665 ns/op 153.73 MB/s +BenchmarkTEAen/256-8 719937 1655 ns/op 154.66 MB/s +BenchmarkTEAen/256-8 723042 1658 ns/op 154.44 MB/s +BenchmarkTEAen/256-8 715159 1665 ns/op 153.74 MB/s +BenchmarkTEAen/4K-8 47251 24868 ns/op 164.71 MB/s +BenchmarkTEAen/4K-8 47812 24951 ns/op 164.16 MB/s +BenchmarkTEAen/4K-8 47818 24970 ns/op 164.03 MB/s +BenchmarkTEAen/4K-8 48086 24773 ns/op 165.34 MB/s +BenchmarkTEAen/4K-8 48152 24906 ns/op 164.46 MB/s +BenchmarkTEAen/4K-8 48198 24811 ns/op 165.09 MB/s +BenchmarkTEAen/4K-8 48510 24804 ns/op 165.14 MB/s +BenchmarkTEAen/4K-8 47932 25009 ns/op 163.78 MB/s +BenchmarkTEAen/4K-8 47920 25190 ns/op 162.60 MB/s +BenchmarkTEAen/4K-8 47616 24972 ns/op 164.02 MB/s +BenchmarkTEAen/32K-8 6052 199998 ns/op 163.84 MB/s +BenchmarkTEAen/32K-8 6024 200465 ns/op 163.46 MB/s +BenchmarkTEAen/32K-8 5832 200107 ns/op 163.75 MB/s +BenchmarkTEAen/32K-8 5870 199989 ns/op 163.85 MB/s +BenchmarkTEAen/32K-8 6027 204637 ns/op 160.13 MB/s +BenchmarkTEAen/32K-8 6052 200703 ns/op 163.27 MB/s +BenchmarkTEAen/32K-8 5940 199921 ns/op 163.90 MB/s +BenchmarkTEAen/32K-8 5989 199988 ns/op 163.85 MB/s +BenchmarkTEAen/32K-8 5954 199747 ns/op 164.05 MB/s +BenchmarkTEAen/32K-8 6086 200549 ns/op 163.39 MB/s +BenchmarkTEAde/16-8 5744001 209.5 ns/op 152.72 MB/s +BenchmarkTEAde/16-8 5672097 209.2 ns/op 152.93 MB/s +BenchmarkTEAde/16-8 5749861 208.7 ns/op 153.33 MB/s +BenchmarkTEAde/16-8 5680198 210.0 ns/op 152.38 MB/s +BenchmarkTEAde/16-8 5710728 209.3 ns/op 152.88 MB/s +BenchmarkTEAde/16-8 5701707 209.0 ns/op 153.08 MB/s +BenchmarkTEAde/16-8 5725279 211.0 ns/op 151.63 MB/s +BenchmarkTEAde/16-8 5652127 211.0 ns/op 151.63 MB/s +BenchmarkTEAde/16-8 5673274 210.3 ns/op 152.19 MB/s +BenchmarkTEAde/16-8 5717761 210.4 ns/op 152.12 MB/s +BenchmarkTEAde/256-8 745363 1652 ns/op 164.65 MB/s +BenchmarkTEAde/256-8 687306 1654 ns/op 164.47 MB/s +BenchmarkTEAde/256-8 728041 1658 ns/op 164.09 MB/s +BenchmarkTEAde/256-8 721563 1652 ns/op 164.69 MB/s +BenchmarkTEAde/256-8 722287 1648 ns/op 165.08 MB/s +BenchmarkTEAde/256-8 761458 1668 ns/op 163.05 MB/s +BenchmarkTEAde/256-8 721782 1649 ns/op 164.91 MB/s +BenchmarkTEAde/256-8 723600 1665 ns/op 163.34 MB/s +BenchmarkTEAde/256-8 726710 1652 ns/op 164.61 MB/s +BenchmarkTEAde/256-8 724033 1654 ns/op 164.48 MB/s +BenchmarkTEAde/4K-8 47727 24722 ns/op 166.33 MB/s +BenchmarkTEAde/4K-8 48508 24739 ns/op 166.21 MB/s +BenchmarkTEAde/4K-8 48212 24676 ns/op 166.64 MB/s +BenchmarkTEAde/4K-8 48688 24764 ns/op 166.05 MB/s +BenchmarkTEAde/4K-8 48141 24870 ns/op 165.34 MB/s +BenchmarkTEAde/4K-8 48632 24742 ns/op 166.20 MB/s +BenchmarkTEAde/4K-8 48469 24867 ns/op 165.36 MB/s +BenchmarkTEAde/4K-8 48142 24697 ns/op 166.50 MB/s +BenchmarkTEAde/4K-8 48943 24840 ns/op 165.54 MB/s +BenchmarkTEAde/4K-8 48226 25117 ns/op 163.71 MB/s +BenchmarkTEAde/32K-8 5964 200443 ns/op 163.56 MB/s +BenchmarkTEAde/32K-8 5994 201887 ns/op 162.39 MB/s +BenchmarkTEAde/32K-8 5044 200369 ns/op 163.62 MB/s +BenchmarkTEAde/32K-8 6028 200666 ns/op 163.38 MB/s +BenchmarkTEAde/32K-8 6115 200806 ns/op 163.26 MB/s +BenchmarkTEAde/32K-8 5979 200116 ns/op 163.83 MB/s +BenchmarkTEAde/32K-8 5872 199903 ns/op 164.00 MB/s +BenchmarkTEAde/32K-8 5628 200546 ns/op 163.47 MB/s +BenchmarkTEAde/32K-8 5968 200698 ns/op 163.35 MB/s +BenchmarkTEAde/32K-8 6007 200201 ns/op 163.76 MB/s PASS -ok github.com/fumiama/gofastTEA 108.563s +ok github.com/fumiama/gofastTEA 105.976s diff --git a/new17.txt b/new17.txt index 5bdcf5a..dac5d5d 100644 --- a/new17.txt +++ b/new17.txt @@ -2,85 +2,85 @@ goos: darwin goarch: amd64 pkg: github.com/fumiama/gofastTEA cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz -BenchmarkTEAen/16-8 5269038 229.5 ns/op 69.72 MB/s -BenchmarkTEAen/16-8 5148352 225.4 ns/op 70.99 MB/s -BenchmarkTEAen/16-8 5393808 224.1 ns/op 71.39 MB/s -BenchmarkTEAen/16-8 5355736 224.8 ns/op 71.19 MB/s -BenchmarkTEAen/16-8 5335281 225.3 ns/op 71.01 MB/s -BenchmarkTEAen/16-8 5354547 223.7 ns/op 71.53 MB/s -BenchmarkTEAen/16-8 5155089 225.1 ns/op 71.07 MB/s -BenchmarkTEAen/16-8 5364086 222.8 ns/op 71.81 MB/s -BenchmarkTEAen/16-8 5354697 224.6 ns/op 71.23 MB/s -BenchmarkTEAen/16-8 5394775 223.2 ns/op 71.67 MB/s -BenchmarkTEAen/256-8 721279 1695 ns/op 150.99 MB/s -BenchmarkTEAen/256-8 719157 1686 ns/op 151.86 MB/s -BenchmarkTEAen/256-8 712452 1691 ns/op 151.38 MB/s -BenchmarkTEAen/256-8 710757 1693 ns/op 151.21 MB/s -BenchmarkTEAen/256-8 726054 1684 ns/op 152.03 MB/s -BenchmarkTEAen/256-8 719670 1688 ns/op 151.64 MB/s -BenchmarkTEAen/256-8 710749 1700 ns/op 150.61 MB/s -BenchmarkTEAen/256-8 710406 1681 ns/op 152.27 MB/s -BenchmarkTEAen/256-8 711072 1690 ns/op 151.48 MB/s -BenchmarkTEAen/256-8 705573 1691 ns/op 151.43 MB/s -BenchmarkTEAen/4K-8 47502 24856 ns/op 164.79 MB/s -BenchmarkTEAen/4K-8 47906 24808 ns/op 165.11 MB/s -BenchmarkTEAen/4K-8 47121 24822 ns/op 165.01 MB/s -BenchmarkTEAen/4K-8 48050 24900 ns/op 164.50 MB/s -BenchmarkTEAen/4K-8 47642 24996 ns/op 163.86 MB/s -BenchmarkTEAen/4K-8 48513 24923 ns/op 164.34 MB/s -BenchmarkTEAen/4K-8 48159 24987 ns/op 163.93 MB/s -BenchmarkTEAen/4K-8 48250 24948 ns/op 164.18 MB/s -BenchmarkTEAen/4K-8 47930 25095 ns/op 163.22 MB/s -BenchmarkTEAen/4K-8 47818 24880 ns/op 164.63 MB/s -BenchmarkTEAen/32K-8 6008 201270 ns/op 162.81 MB/s -BenchmarkTEAen/32K-8 5946 202390 ns/op 161.90 MB/s -BenchmarkTEAen/32K-8 4990 200935 ns/op 163.08 MB/s -BenchmarkTEAen/32K-8 5710 201290 ns/op 162.79 MB/s -BenchmarkTEAen/32K-8 5949 201151 ns/op 162.90 MB/s -BenchmarkTEAen/32K-8 6046 202128 ns/op 162.12 MB/s -BenchmarkTEAen/32K-8 5950 202166 ns/op 162.08 MB/s -BenchmarkTEAen/32K-8 6021 202292 ns/op 161.98 MB/s -BenchmarkTEAen/32K-8 6038 201232 ns/op 162.84 MB/s -BenchmarkTEAen/32K-8 5959 201314 ns/op 162.77 MB/s -BenchmarkTEAde/16-8 5790429 208.5 ns/op 153.51 MB/s -BenchmarkTEAde/16-8 5733123 207.3 ns/op 154.37 MB/s -BenchmarkTEAde/16-8 5722306 208.4 ns/op 153.56 MB/s -BenchmarkTEAde/16-8 5730838 207.9 ns/op 153.94 MB/s -BenchmarkTEAde/16-8 5743090 208.7 ns/op 153.36 MB/s -BenchmarkTEAde/16-8 5640714 208.2 ns/op 153.70 MB/s -BenchmarkTEAde/16-8 5740749 208.5 ns/op 153.47 MB/s -BenchmarkTEAde/16-8 5775631 208.0 ns/op 153.82 MB/s -BenchmarkTEAde/16-8 5743650 208.3 ns/op 153.62 MB/s -BenchmarkTEAde/16-8 5763579 209.1 ns/op 153.02 MB/s -BenchmarkTEAde/256-8 727724 1654 ns/op 164.42 MB/s -BenchmarkTEAde/256-8 740757 1652 ns/op 164.60 MB/s -BenchmarkTEAde/256-8 724318 1637 ns/op 166.21 MB/s -BenchmarkTEAde/256-8 751288 1639 ns/op 165.92 MB/s -BenchmarkTEAde/256-8 727666 1660 ns/op 163.89 MB/s -BenchmarkTEAde/256-8 728272 1646 ns/op 165.24 MB/s -BenchmarkTEAde/256-8 729640 1645 ns/op 165.37 MB/s -BenchmarkTEAde/256-8 743509 1650 ns/op 164.87 MB/s -BenchmarkTEAde/256-8 726325 1641 ns/op 165.75 MB/s -BenchmarkTEAde/256-8 721312 1653 ns/op 164.52 MB/s -BenchmarkTEAde/4K-8 48522 24454 ns/op 168.16 MB/s -BenchmarkTEAde/4K-8 48518 24610 ns/op 167.09 MB/s -BenchmarkTEAde/4K-8 49022 24653 ns/op 166.79 MB/s -BenchmarkTEAde/4K-8 48313 24545 ns/op 167.53 MB/s -BenchmarkTEAde/4K-8 48856 24589 ns/op 167.23 MB/s -BenchmarkTEAde/4K-8 48660 24529 ns/op 167.64 MB/s -BenchmarkTEAde/4K-8 49248 24667 ns/op 166.70 MB/s -BenchmarkTEAde/4K-8 48969 24485 ns/op 167.94 MB/s -BenchmarkTEAde/4K-8 48952 24658 ns/op 166.76 MB/s -BenchmarkTEAde/4K-8 48680 24426 ns/op 168.35 MB/s -BenchmarkTEAde/32K-8 5991 199033 ns/op 164.72 MB/s -BenchmarkTEAde/32K-8 6022 199173 ns/op 164.60 MB/s -BenchmarkTEAde/32K-8 6037 199301 ns/op 164.49 MB/s -BenchmarkTEAde/32K-8 6009 198696 ns/op 165.00 MB/s -BenchmarkTEAde/32K-8 5984 199928 ns/op 163.98 MB/s -BenchmarkTEAde/32K-8 6020 198890 ns/op 164.83 MB/s -BenchmarkTEAde/32K-8 6001 199967 ns/op 163.95 MB/s -BenchmarkTEAde/32K-8 6290 199553 ns/op 164.29 MB/s -BenchmarkTEAde/32K-8 6159 198473 ns/op 165.18 MB/s -BenchmarkTEAde/32K-8 5992 199065 ns/op 164.69 MB/s +BenchmarkTEAen/16-8 5253067 226.1 ns/op 70.75 MB/s +BenchmarkTEAen/16-8 5327096 224.1 ns/op 71.39 MB/s +BenchmarkTEAen/16-8 5384041 223.5 ns/op 71.60 MB/s +BenchmarkTEAen/16-8 5369714 223.8 ns/op 71.51 MB/s +BenchmarkTEAen/16-8 5364848 226.4 ns/op 70.68 MB/s +BenchmarkTEAen/16-8 5362302 224.2 ns/op 71.38 MB/s +BenchmarkTEAen/16-8 5348228 224.6 ns/op 71.24 MB/s +BenchmarkTEAen/16-8 5368670 223.5 ns/op 71.59 MB/s +BenchmarkTEAen/16-8 5313108 224.4 ns/op 71.29 MB/s +BenchmarkTEAen/16-8 5368104 224.8 ns/op 71.18 MB/s +BenchmarkTEAen/256-8 712567 1685 ns/op 151.90 MB/s +BenchmarkTEAen/256-8 704726 1688 ns/op 151.63 MB/s +BenchmarkTEAen/256-8 704440 1676 ns/op 152.75 MB/s +BenchmarkTEAen/256-8 712618 1665 ns/op 153.77 MB/s +BenchmarkTEAen/256-8 708595 1685 ns/op 151.91 MB/s +BenchmarkTEAen/256-8 726122 1681 ns/op 152.25 MB/s +BenchmarkTEAen/256-8 715941 1681 ns/op 152.29 MB/s +BenchmarkTEAen/256-8 702835 1686 ns/op 151.84 MB/s +BenchmarkTEAen/256-8 709210 1678 ns/op 152.52 MB/s +BenchmarkTEAen/256-8 698694 1695 ns/op 151.04 MB/s +BenchmarkTEAen/4K-8 47757 25052 ns/op 163.50 MB/s +BenchmarkTEAen/4K-8 48322 25378 ns/op 161.40 MB/s +BenchmarkTEAen/4K-8 47236 24942 ns/op 164.22 MB/s +BenchmarkTEAen/4K-8 48355 25131 ns/op 162.99 MB/s +BenchmarkTEAen/4K-8 47764 24951 ns/op 164.16 MB/s +BenchmarkTEAen/4K-8 48530 24920 ns/op 164.37 MB/s +BenchmarkTEAen/4K-8 48042 25000 ns/op 163.84 MB/s +BenchmarkTEAen/4K-8 47800 24991 ns/op 163.90 MB/s +BenchmarkTEAen/4K-8 48241 24984 ns/op 163.94 MB/s +BenchmarkTEAen/4K-8 47818 24766 ns/op 165.39 MB/s +BenchmarkTEAen/32K-8 5998 202201 ns/op 162.06 MB/s +BenchmarkTEAen/32K-8 5854 202139 ns/op 162.11 MB/s +BenchmarkTEAen/32K-8 5881 202198 ns/op 162.06 MB/s +BenchmarkTEAen/32K-8 5940 202439 ns/op 161.87 MB/s +BenchmarkTEAen/32K-8 6034 202012 ns/op 162.21 MB/s +BenchmarkTEAen/32K-8 5605 202632 ns/op 161.71 MB/s +BenchmarkTEAen/32K-8 5772 201185 ns/op 162.87 MB/s +BenchmarkTEAen/32K-8 5997 202039 ns/op 162.19 MB/s +BenchmarkTEAen/32K-8 5962 201546 ns/op 162.58 MB/s +BenchmarkTEAen/32K-8 5978 202300 ns/op 161.98 MB/s +BenchmarkTEAde/16-8 5738095 208.0 ns/op 153.82 MB/s +BenchmarkTEAde/16-8 5699319 208.0 ns/op 153.82 MB/s +BenchmarkTEAde/16-8 5749633 208.3 ns/op 153.65 MB/s +BenchmarkTEAde/16-8 5736562 216.8 ns/op 147.58 MB/s +BenchmarkTEAde/16-8 5649189 208.5 ns/op 153.48 MB/s +BenchmarkTEAde/16-8 5718224 208.4 ns/op 153.58 MB/s +BenchmarkTEAde/16-8 5751618 208.1 ns/op 153.81 MB/s +BenchmarkTEAde/16-8 5731822 208.2 ns/op 153.71 MB/s +BenchmarkTEAde/16-8 5705347 208.7 ns/op 153.30 MB/s +BenchmarkTEAde/16-8 5763650 207.9 ns/op 153.91 MB/s +BenchmarkTEAde/256-8 720400 1650 ns/op 164.82 MB/s +BenchmarkTEAde/256-8 733315 1651 ns/op 164.75 MB/s +BenchmarkTEAde/256-8 739126 1642 ns/op 165.69 MB/s +BenchmarkTEAde/256-8 724171 1659 ns/op 163.95 MB/s +BenchmarkTEAde/256-8 757248 1649 ns/op 164.97 MB/s +BenchmarkTEAde/256-8 712720 1650 ns/op 164.81 MB/s +BenchmarkTEAde/256-8 727759 1650 ns/op 164.81 MB/s +BenchmarkTEAde/256-8 739537 1651 ns/op 164.73 MB/s +BenchmarkTEAde/256-8 724605 1641 ns/op 165.73 MB/s +BenchmarkTEAde/256-8 722370 1635 ns/op 166.34 MB/s +BenchmarkTEAde/4K-8 48295 24497 ns/op 167.86 MB/s +BenchmarkTEAde/4K-8 48234 24488 ns/op 167.92 MB/s +BenchmarkTEAde/4K-8 48973 24632 ns/op 166.93 MB/s +BenchmarkTEAde/4K-8 48901 24486 ns/op 167.93 MB/s +BenchmarkTEAde/4K-8 48824 24558 ns/op 167.44 MB/s +BenchmarkTEAde/4K-8 48974 24465 ns/op 168.08 MB/s +BenchmarkTEAde/4K-8 48528 24500 ns/op 167.83 MB/s +BenchmarkTEAde/4K-8 48592 24672 ns/op 166.66 MB/s +BenchmarkTEAde/4K-8 48390 24559 ns/op 167.43 MB/s +BenchmarkTEAde/4K-8 48537 24559 ns/op 167.43 MB/s +BenchmarkTEAde/32K-8 6360 198969 ns/op 164.77 MB/s +BenchmarkTEAde/32K-8 6063 198627 ns/op 165.05 MB/s +BenchmarkTEAde/32K-8 6090 199098 ns/op 164.66 MB/s +BenchmarkTEAde/32K-8 6097 198747 ns/op 164.95 MB/s +BenchmarkTEAde/32K-8 6039 198540 ns/op 165.13 MB/s +BenchmarkTEAde/32K-8 6078 199822 ns/op 164.07 MB/s +BenchmarkTEAde/32K-8 6025 199044 ns/op 164.71 MB/s +BenchmarkTEAde/32K-8 5971 198685 ns/op 165.01 MB/s +BenchmarkTEAde/32K-8 6048 198973 ns/op 164.77 MB/s +BenchmarkTEAde/32K-8 6067 198543 ns/op 165.12 MB/s PASS -ok github.com/fumiama/gofastTEA 107.274s +ok github.com/fumiama/gofastTEA 107.196s diff --git a/tea_1.16.go b/tea_1.16.go index 45b05b1..f7e6759 100644 --- a/tea_1.16.go +++ b/tea_1.16.go @@ -16,59 +16,6 @@ func Uint32() uint32 // http://bbs.chinaunix.net/thread-583468-1-1.html // 感谢xichen大佬对TEA的解释 -//go:nosplit -func (t TEA) Decrypt(data []byte) []byte { - if len(data) < 16 || len(data)&7 != 0 { - return nil - } - dst := make([]byte, len(data)) - - var iv1, iv2, holder uint64 - var v0, v1 uint32 - for i := 0; i < len(dst); i += 8 { - holder = iv1 - iv1 = binary.BigEndian.Uint64(data[i:]) - iv2 ^= iv1 - v0, v1 = uint32(iv2>>32), uint32(iv2) - v1 -= (v0 + 0xe3779b90) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0xe3779b90) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0x454021d7) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0x454021d7) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0xa708a81e) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0xa708a81e) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0x08d12e65) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0x08d12e65) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0xcc623af3) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0xcc623af3) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0x8ff34781) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0x8ff34781) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0x5384540f) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0x5384540f) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0xb54cda56) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0xb54cda56) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0x1715609d) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0x1715609d) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0x78dde6e4) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0x78dde6e4) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0x3c6ef372) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0x3c6ef372) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 -= (v0 + 0x9e3779b9) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 -= (v1 + 0x9e3779b9) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - iv2 = uint64(v0)<<32 | uint64(v1) - binary.BigEndian.PutUint64(dst[i:], iv2^holder) - } - - return dst[dst[0]&7+3 : len(data)-7] -} - //go:nosplit func NewTeaCipher(key []byte) (t TEA) { if len(key) == 16 { diff --git a/tea_1.16_amd64.go b/tea_1.16_amd64.go new file mode 100644 index 0000000..b3d2836 --- /dev/null +++ b/tea_1.16_amd64.go @@ -0,0 +1,92 @@ +//go:build !go1.17 && amd64 +// +build !go1.17,amd64 + +package tea + +import ( + "encoding/binary" + "unsafe" +) + +// implemented in tea_$GOARCH.s +func encrypt(dstlen uintptr, tlen uintptr) +func decrypt(datalen uintptr, dstlen uintptr, t *TEA) + +//go:nosplit +func (t TEA) Encrypt(src []byte) (dst []byte) { + lens := len(src) + fill := 10 - (lens+1)&7 + dst = make([]byte, fill+lens+7) + binary.LittleEndian.PutUint32(dst, Uint32()) + binary.LittleEndian.PutUint32(dst[4:], Uint32()) + binary.LittleEndian.PutUint32(dst[8:], Uint32()) + dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 + copy(dst[fill:], src) + encrypt(uintptr(*(*unsafe.Pointer)(unsafe.Pointer(&dst)))|uintptr(len(dst)<<40), uintptr(unsafe.Pointer(&t))|(uintptr(len(dst)<<16)&0xffffff00_00000000)) + return dst +} + +/* +//go:nosplit +func (t TEA) Decrypt(data []byte) []byte { + if len(data) < 16 || len(data)&7 != 0 { + return nil + } + dst := make([]byte, len(data)) + decrypt(uintptr(*(*unsafe.Pointer)(unsafe.Pointer(&data)))|uintptr(len(data)<<40), uintptr(*(*unsafe.Pointer)(unsafe.Pointer(&dst)))|(uintptr(len(data)<<16)&0xffffff00_00000000), &t) + return dst[dst[0]&7+3 : len(dst)-7] +} +*/ + +//go:nosplit +func (t TEA) Decrypt(data []byte) []byte { + if len(data) < 16 || len(data)&7 != 0 { + return nil + } + dst := make([]byte, len(data)) + + var iv1, iv2, holder uint64 + var v0, v1 uint32 + for i := 0; i < len(dst); i += 8 { + iv1 = binary.BigEndian.Uint64(data[i:]) + iv2 ^= iv1 + v0, v1 = uint32(iv2>>32), uint32(iv2) + v1 -= (v0 + 0xe3779b90) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xe3779b90) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x454021d7) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x454021d7) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xa708a81e) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xa708a81e) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x08d12e65) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x08d12e65) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xcc623af3) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xcc623af3) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x8ff34781) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x8ff34781) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x5384540f) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x5384540f) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xb54cda56) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xb54cda56) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x1715609d) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x1715609d) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x78dde6e4) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x78dde6e4) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x3c6ef372) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x3c6ef372) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x9e3779b9) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x9e3779b9) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + iv2 = uint64(v0)<<32 | uint64(v1) + binary.BigEndian.PutUint64(dst[i:], iv2^holder) + holder = iv1 + } + + return dst[dst[0]&7+3 : len(data)-7] +} diff --git a/tea_1.16_amd64.s b/tea_1.16_amd64.s index 3a5e80e..f0958a1 100644 --- a/tea_1.16_amd64.s +++ b/tea_1.16_amd64.s @@ -7,23 +7,23 @@ TEXT ·encrypt(SB), NOSPLIT, $0-16 MOVQ ·dstlen+0(FP), AX // go:<1.17 dst MOVQ ·teaptr+8(FP), DI // go:<1.17 t - MOVQ AX, BX // len(dst) low 40 bits + MOVQ AX, BX // len(dst) low 24 bits + MOVQ DI, R8 // len(dst) middle 24 bits SHRQ $40, BX // unpack len SHLQ $24, AX SHRQ $24, AX - MOVQ DI, R8 // len(dst) high 24 bits SHLQ $24, DI SHRQ $24, DI - SHRQ $40, R8 - SHLQ $40, R8 - ORQ R8, BX - ADDQ BX, AX // dst += len(dst) - NOTQ BX // i = -i - 1 - INCQ BX // i++ MOVQ (DI), DX // t0 MOVQ 4(DI), R12 // t1 MOVQ 8(DI), R10 // t2 MOVQ 12(DI), SI // t3 + SHRQ $40, R8 + SHLQ $24, R8 + ORQ R8, BX // len(dst) has 48 bits + ADDQ BX, AX // dst += len(dst) + NOTQ BX // i = -i - 1 + INCQ BX // i++ // XORQ R11, R11 // holder XORQ R13, R13 // iv1 XORQ DI, DI // iv2 @@ -391,23 +391,23 @@ TEXT ·decrypt(SB), NOSPLIT, $0-24 MOVQ ·dst+8(FP), DI // go:<1.17 dst MOVQ ·teaptr+16(FP), SI // go:<1.17 t MOVQ AX, BX // len(data) low 24 bits + MOVQ DI, R8 // dst middle 24 bits SHRQ $40, BX // unpack len SHLQ $24, AX SHRQ $24, AX - MOVQ DI, R8 // dst high 24 bits SHLQ $24, DI SHRQ $24, DI - SHRQ $40, R8 - SHLQ $40, R8 - ORQ R8, BX - ADDQ BX, AX // data += len(data) - ADDQ BX, DI // dst += len(data) - NOTQ BX // i = -len - 1 - INCQ BX // i++ MOVQ (SI), DX // t0 MOVQ 4(SI), R12 // t1 MOVQ 8(SI), R10 // t2 MOVQ 12(SI), R13 // t3 + SHRQ $40, R8 + SHLQ $24, R8 + ORQ R8, BX // len(data) has 48 bits + ADDQ BX, AX // data += len(data) + ADDQ BX, DI // dst += len(data) + NOTQ BX // i = -len - 1 + INCQ BX // i++ XORQ SI, SI // iv1 XORQ R15, R15 // iv2 XORQ R11, R11 // holder diff --git a/tea_1.16_asm.go b/tea_1.16_asm.go deleted file mode 100644 index 60c469c..0000000 --- a/tea_1.16_asm.go +++ /dev/null @@ -1,27 +0,0 @@ -//go:build !go1.17 && amd64 -// +build !go1.17,amd64 - -package tea - -import ( - "encoding/binary" - "unsafe" -) - -// implemented in tea_$GOARCH.s -func encrypt(dstlen uintptr, tlen uintptr) -func decrypt(datalen uintptr, dstlen uintptr, t *TEA) - -//go:nosplit -func (t TEA) Encrypt(src []byte) (dst []byte) { - lens := len(src) - fill := 10 - (lens+1)&7 - dst = make([]byte, fill+lens+7) - binary.LittleEndian.PutUint32(dst, Uint32()) - binary.LittleEndian.PutUint32(dst[4:], Uint32()) - binary.LittleEndian.PutUint32(dst[8:], Uint32()) - dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 - copy(dst[fill:], src) - encrypt(uintptr(*(*unsafe.Pointer)(unsafe.Pointer(&dst)))|uintptr(len(dst)<<40), uintptr(unsafe.Pointer(&t))|(uintptr(len(dst))&0xffffff00_00000000)) - return dst -} diff --git a/tea_1.16_pure.go b/tea_1.16_pure.go index 2c7542e..9956bd4 100644 --- a/tea_1.16_pure.go +++ b/tea_1.16_pure.go @@ -65,3 +65,56 @@ func (t TEA) Encrypt(src []byte) (dst []byte) { return dst } + +//go:nosplit +func (t TEA) Decrypt(data []byte) []byte { + if len(data) < 16 || len(data)&7 != 0 { + return nil + } + dst := make([]byte, len(data)) + + var iv1, iv2, holder uint64 + var v0, v1 uint32 + for i := 0; i < len(dst); i += 8 { + iv1 = binary.BigEndian.Uint64(data[i:]) + iv2 ^= iv1 + v0, v1 = uint32(iv2>>32), uint32(iv2) + v1 -= (v0 + 0xe3779b90) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xe3779b90) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x454021d7) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x454021d7) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xa708a81e) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xa708a81e) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x08d12e65) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x08d12e65) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xcc623af3) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xcc623af3) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x8ff34781) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x8ff34781) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x5384540f) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x5384540f) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xb54cda56) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xb54cda56) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x1715609d) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x1715609d) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x78dde6e4) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x78dde6e4) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x3c6ef372) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x3c6ef372) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x9e3779b9) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x9e3779b9) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + iv2 = uint64(v0)<<32 | uint64(v1) + binary.BigEndian.PutUint64(dst[i:], iv2^holder) + holder = iv1 + } + + return dst[dst[0]&7+3 : len(data)-7] +} diff --git a/tea_1.17.go b/tea_1.17.go index aa5d287..528be14 100644 --- a/tea_1.17.go +++ b/tea_1.17.go @@ -43,14 +43,13 @@ func (t TEA) Decrypt(data []byte) []byte { return nil } dst := make([]byte, len(data)) - var iv1, iv2, holder, tmp uint64 + var iv1, iv2, holder uint64 for i := 0; i < len(dst); i += 8 { - block := binary.BigEndian.Uint64(data[i:]) - tmp = t.decode(block ^ iv2) - iv2 = tmp - holder = tmp ^ iv1 - iv1 = block - binary.BigEndian.PutUint64(dst[i:], holder) + iv1 = binary.BigEndian.Uint64(data[i:]) + iv2 ^= iv1 + iv2 = t.decode(iv2) + binary.BigEndian.PutUint64(dst[i:], iv2^holder) + holder = iv1 } return dst[dst[0]&7+3 : len(data)-7] }