diff --git a/README.md b/README.md index d2dcd3f..6126684 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,5 @@ # gofastTEA -TEA 编码算法的 PLAN9 汇编优化实现 - -## 编译逻辑 -- 大于等于 1.17 版本且是 amd64,使用非`asm`非内联算法 -- 大于等于 1.17 版本且不是 amd64,使用非`asm`内联算法 -- 小于 1.17 版本且是 amd64,使用`asm`内联算法 -- 小于 1.17 版本且不是 amd64,使用非`asm`内联算法 +TEA 编码算法的优化实现 ## 1.17 版本及以上 代码与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)相比,替换了随机算法为`runtime.fastrand`,且简化了`Decrypt`,提升速度如下。 @@ -32,29 +26,7 @@ TEAde/32K-8 164MB/s ± 1% 165MB/s ± 0% +0.37% (p=0.001 n=9+9) ``` ## 1.16 版本及以下 -使用 PLAN9 汇编编写`Encrypt`,内联编写`Decrypt`,替换了加密算法为`runtime.fastrand`,与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)代码同在`go1.16`版本下编译相比,提升速度如下(new16.txt)。 -```css -name old time/op new time/op delta -TEAen/16-8 252ns ± 0% 227ns ± 0% -10.00% (p=0.000 n=9+10) -TEAen/256-8 1.77µs ± 1% 1.66µs ± 0% -6.28% (p=0.000 n=9+10) -TEAen/4K-8 25.9µs ± 0% 24.9µs ± 1% -3.65% (p=0.000 n=10+10) -TEAen/32K-8 208µs ± 1% 200µs ± 0% -3.70% (p=0.000 n=10+9) -TEAde/16-8 216ns ± 1% 210ns ± 1% -3.04% (p=0.000 n=10+10) -TEAde/256-8 1.71µs ± 1% 1.66µs ± 1% -2.93% (p=0.000 n=10+10) -TEAde/4K-8 25.4µs ± 1% 24.8µs ± 0% -2.36% (p=0.000 n=10+9) -TEAde/32K-8 206µs ± 0% 200µs ± 0% -2.53% (p=0.000 n=9+9) - -name old speed new speed delta -TEAen/16-8 63.5MB/s ± 0% 70.6MB/s ± 0% +11.12% (p=0.000 n=9+10) -TEAen/256-8 145MB/s ± 1% 154MB/s ± 0% +6.69% (p=0.000 n=9+10) -TEAen/4K-8 158MB/s ± 0% 164MB/s ± 1% +3.79% (p=0.000 n=10+10) -TEAen/32K-8 158MB/s ± 1% 164MB/s ± 0% +3.84% (p=0.000 n=10+9) -TEAde/16-8 148MB/s ± 1% 152MB/s ± 1% +3.12% (p=0.000 n=10+10) -TEAde/256-8 160MB/s ± 1% 164MB/s ± 1% +3.01% (p=0.000 n=10+10) -TEAde/4K-8 162MB/s ± 1% 166MB/s ± 0% +2.41% (p=0.000 n=10+9) -TEAde/32K-8 159MB/s ± 0% 164MB/s ± 0% +2.60% (p=0.000 n=9+9) -``` -另外[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)本身在`go1.16`版本与在`go1.17`版本下编译相比,提升速度如下(new17.txt)。 +[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)本身在`go1.16`版本与在`go1.17`版本下编译相比,提升速度如下。 ```css name old time/op new time/op delta TEAen/16-8 252ns ± 0% 241ns ± 1% -4.09% (p=0.000 n=9+10) @@ -76,4 +48,3 @@ TEAde/256-8 160MB/s ± 1% 165MB/s ± 1% +3.55% (p=0.000 n=10+10) TEAde/4K-8 162MB/s ± 1% 168MB/s ± 0% +3.52% (p=0.000 n=10+10) TEAde/32K-8 159MB/s ± 0% 165MB/s ± 0% +3.45% (p=0.000 n=9+9) ``` -可见在编码时,在`go1.16`版本下的某些时候(编码大小在`0-16kb`之间),`gofastTEA`比`go1.17`版本的`MiraiGo`实现更快,且整体来看,`gofastTEA`在`go1.16`版本下的执行效率已经可以与`MiraiGo`实现的`go1.17`版本基本持平。 \ No newline at end of file diff --git a/new16.txt b/new16.txt deleted file mode 100644 index a3cb954..0000000 --- a/new16.txt +++ /dev/null @@ -1,86 +0,0 @@ -goos: darwin -goarch: amd64 -pkg: github.com/fumiama/gofastTEA -cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz -BenchmarkTEAen/16-8 5205363 227.4 ns/op 70.36 MB/s -BenchmarkTEAen/16-8 5295032 225.7 ns/op 70.88 MB/s -BenchmarkTEAen/16-8 5301612 227.1 ns/op 70.46 MB/s -BenchmarkTEAen/16-8 5250404 226.1 ns/op 70.77 MB/s -BenchmarkTEAen/16-8 5268896 226.7 ns/op 70.57 MB/s -BenchmarkTEAen/16-8 5368207 226.5 ns/op 70.65 MB/s -BenchmarkTEAen/16-8 5289012 226.7 ns/op 70.58 MB/s -BenchmarkTEAen/16-8 5208618 226.4 ns/op 70.67 MB/s -BenchmarkTEAen/16-8 5294535 227.0 ns/op 70.48 MB/s -BenchmarkTEAen/16-8 5338717 226.3 ns/op 70.71 MB/s -BenchmarkTEAen/256-8 724610 1656 ns/op 154.56 MB/s -BenchmarkTEAen/256-8 713066 1659 ns/op 154.29 MB/s -BenchmarkTEAen/256-8 731902 1658 ns/op 154.44 MB/s -BenchmarkTEAen/256-8 719208 1654 ns/op 154.82 MB/s -BenchmarkTEAen/256-8 701692 1667 ns/op 153.57 MB/s -BenchmarkTEAen/256-8 718228 1660 ns/op 154.21 MB/s -BenchmarkTEAen/256-8 718328 1665 ns/op 153.73 MB/s -BenchmarkTEAen/256-8 719937 1655 ns/op 154.66 MB/s -BenchmarkTEAen/256-8 723042 1658 ns/op 154.44 MB/s -BenchmarkTEAen/256-8 715159 1665 ns/op 153.74 MB/s -BenchmarkTEAen/4K-8 47251 24868 ns/op 164.71 MB/s -BenchmarkTEAen/4K-8 47812 24951 ns/op 164.16 MB/s -BenchmarkTEAen/4K-8 47818 24970 ns/op 164.03 MB/s -BenchmarkTEAen/4K-8 48086 24773 ns/op 165.34 MB/s -BenchmarkTEAen/4K-8 48152 24906 ns/op 164.46 MB/s -BenchmarkTEAen/4K-8 48198 24811 ns/op 165.09 MB/s -BenchmarkTEAen/4K-8 48510 24804 ns/op 165.14 MB/s -BenchmarkTEAen/4K-8 47932 25009 ns/op 163.78 MB/s -BenchmarkTEAen/4K-8 47920 25190 ns/op 162.60 MB/s -BenchmarkTEAen/4K-8 47616 24972 ns/op 164.02 MB/s -BenchmarkTEAen/32K-8 6052 199998 ns/op 163.84 MB/s -BenchmarkTEAen/32K-8 6024 200465 ns/op 163.46 MB/s -BenchmarkTEAen/32K-8 5832 200107 ns/op 163.75 MB/s -BenchmarkTEAen/32K-8 5870 199989 ns/op 163.85 MB/s -BenchmarkTEAen/32K-8 6027 204637 ns/op 160.13 MB/s -BenchmarkTEAen/32K-8 6052 200703 ns/op 163.27 MB/s -BenchmarkTEAen/32K-8 5940 199921 ns/op 163.90 MB/s -BenchmarkTEAen/32K-8 5989 199988 ns/op 163.85 MB/s -BenchmarkTEAen/32K-8 5954 199747 ns/op 164.05 MB/s -BenchmarkTEAen/32K-8 6086 200549 ns/op 163.39 MB/s -BenchmarkTEAde/16-8 5744001 209.5 ns/op 152.72 MB/s -BenchmarkTEAde/16-8 5672097 209.2 ns/op 152.93 MB/s -BenchmarkTEAde/16-8 5749861 208.7 ns/op 153.33 MB/s -BenchmarkTEAde/16-8 5680198 210.0 ns/op 152.38 MB/s -BenchmarkTEAde/16-8 5710728 209.3 ns/op 152.88 MB/s -BenchmarkTEAde/16-8 5701707 209.0 ns/op 153.08 MB/s -BenchmarkTEAde/16-8 5725279 211.0 ns/op 151.63 MB/s -BenchmarkTEAde/16-8 5652127 211.0 ns/op 151.63 MB/s -BenchmarkTEAde/16-8 5673274 210.3 ns/op 152.19 MB/s -BenchmarkTEAde/16-8 5717761 210.4 ns/op 152.12 MB/s -BenchmarkTEAde/256-8 745363 1652 ns/op 164.65 MB/s -BenchmarkTEAde/256-8 687306 1654 ns/op 164.47 MB/s -BenchmarkTEAde/256-8 728041 1658 ns/op 164.09 MB/s -BenchmarkTEAde/256-8 721563 1652 ns/op 164.69 MB/s -BenchmarkTEAde/256-8 722287 1648 ns/op 165.08 MB/s -BenchmarkTEAde/256-8 761458 1668 ns/op 163.05 MB/s -BenchmarkTEAde/256-8 721782 1649 ns/op 164.91 MB/s -BenchmarkTEAde/256-8 723600 1665 ns/op 163.34 MB/s -BenchmarkTEAde/256-8 726710 1652 ns/op 164.61 MB/s -BenchmarkTEAde/256-8 724033 1654 ns/op 164.48 MB/s -BenchmarkTEAde/4K-8 47727 24722 ns/op 166.33 MB/s -BenchmarkTEAde/4K-8 48508 24739 ns/op 166.21 MB/s -BenchmarkTEAde/4K-8 48212 24676 ns/op 166.64 MB/s -BenchmarkTEAde/4K-8 48688 24764 ns/op 166.05 MB/s -BenchmarkTEAde/4K-8 48141 24870 ns/op 165.34 MB/s -BenchmarkTEAde/4K-8 48632 24742 ns/op 166.20 MB/s -BenchmarkTEAde/4K-8 48469 24867 ns/op 165.36 MB/s -BenchmarkTEAde/4K-8 48142 24697 ns/op 166.50 MB/s -BenchmarkTEAde/4K-8 48943 24840 ns/op 165.54 MB/s -BenchmarkTEAde/4K-8 48226 25117 ns/op 163.71 MB/s -BenchmarkTEAde/32K-8 5964 200443 ns/op 163.56 MB/s -BenchmarkTEAde/32K-8 5994 201887 ns/op 162.39 MB/s -BenchmarkTEAde/32K-8 5044 200369 ns/op 163.62 MB/s -BenchmarkTEAde/32K-8 6028 200666 ns/op 163.38 MB/s -BenchmarkTEAde/32K-8 6115 200806 ns/op 163.26 MB/s -BenchmarkTEAde/32K-8 5979 200116 ns/op 163.83 MB/s -BenchmarkTEAde/32K-8 5872 199903 ns/op 164.00 MB/s -BenchmarkTEAde/32K-8 5628 200546 ns/op 163.47 MB/s -BenchmarkTEAde/32K-8 5968 200698 ns/op 163.35 MB/s -BenchmarkTEAde/32K-8 6007 200201 ns/op 163.76 MB/s -PASS -ok github.com/fumiama/gofastTEA 105.976s diff --git a/new17.txt b/new17.txt deleted file mode 100644 index dac5d5d..0000000 --- a/new17.txt +++ /dev/null @@ -1,86 +0,0 @@ -goos: darwin -goarch: amd64 -pkg: github.com/fumiama/gofastTEA -cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz -BenchmarkTEAen/16-8 5253067 226.1 ns/op 70.75 MB/s -BenchmarkTEAen/16-8 5327096 224.1 ns/op 71.39 MB/s -BenchmarkTEAen/16-8 5384041 223.5 ns/op 71.60 MB/s -BenchmarkTEAen/16-8 5369714 223.8 ns/op 71.51 MB/s -BenchmarkTEAen/16-8 5364848 226.4 ns/op 70.68 MB/s -BenchmarkTEAen/16-8 5362302 224.2 ns/op 71.38 MB/s -BenchmarkTEAen/16-8 5348228 224.6 ns/op 71.24 MB/s -BenchmarkTEAen/16-8 5368670 223.5 ns/op 71.59 MB/s -BenchmarkTEAen/16-8 5313108 224.4 ns/op 71.29 MB/s -BenchmarkTEAen/16-8 5368104 224.8 ns/op 71.18 MB/s -BenchmarkTEAen/256-8 712567 1685 ns/op 151.90 MB/s -BenchmarkTEAen/256-8 704726 1688 ns/op 151.63 MB/s -BenchmarkTEAen/256-8 704440 1676 ns/op 152.75 MB/s -BenchmarkTEAen/256-8 712618 1665 ns/op 153.77 MB/s -BenchmarkTEAen/256-8 708595 1685 ns/op 151.91 MB/s -BenchmarkTEAen/256-8 726122 1681 ns/op 152.25 MB/s -BenchmarkTEAen/256-8 715941 1681 ns/op 152.29 MB/s -BenchmarkTEAen/256-8 702835 1686 ns/op 151.84 MB/s -BenchmarkTEAen/256-8 709210 1678 ns/op 152.52 MB/s -BenchmarkTEAen/256-8 698694 1695 ns/op 151.04 MB/s -BenchmarkTEAen/4K-8 47757 25052 ns/op 163.50 MB/s -BenchmarkTEAen/4K-8 48322 25378 ns/op 161.40 MB/s -BenchmarkTEAen/4K-8 47236 24942 ns/op 164.22 MB/s -BenchmarkTEAen/4K-8 48355 25131 ns/op 162.99 MB/s -BenchmarkTEAen/4K-8 47764 24951 ns/op 164.16 MB/s -BenchmarkTEAen/4K-8 48530 24920 ns/op 164.37 MB/s -BenchmarkTEAen/4K-8 48042 25000 ns/op 163.84 MB/s -BenchmarkTEAen/4K-8 47800 24991 ns/op 163.90 MB/s -BenchmarkTEAen/4K-8 48241 24984 ns/op 163.94 MB/s -BenchmarkTEAen/4K-8 47818 24766 ns/op 165.39 MB/s -BenchmarkTEAen/32K-8 5998 202201 ns/op 162.06 MB/s -BenchmarkTEAen/32K-8 5854 202139 ns/op 162.11 MB/s -BenchmarkTEAen/32K-8 5881 202198 ns/op 162.06 MB/s -BenchmarkTEAen/32K-8 5940 202439 ns/op 161.87 MB/s -BenchmarkTEAen/32K-8 6034 202012 ns/op 162.21 MB/s -BenchmarkTEAen/32K-8 5605 202632 ns/op 161.71 MB/s -BenchmarkTEAen/32K-8 5772 201185 ns/op 162.87 MB/s -BenchmarkTEAen/32K-8 5997 202039 ns/op 162.19 MB/s -BenchmarkTEAen/32K-8 5962 201546 ns/op 162.58 MB/s -BenchmarkTEAen/32K-8 5978 202300 ns/op 161.98 MB/s -BenchmarkTEAde/16-8 5738095 208.0 ns/op 153.82 MB/s -BenchmarkTEAde/16-8 5699319 208.0 ns/op 153.82 MB/s -BenchmarkTEAde/16-8 5749633 208.3 ns/op 153.65 MB/s -BenchmarkTEAde/16-8 5736562 216.8 ns/op 147.58 MB/s -BenchmarkTEAde/16-8 5649189 208.5 ns/op 153.48 MB/s -BenchmarkTEAde/16-8 5718224 208.4 ns/op 153.58 MB/s -BenchmarkTEAde/16-8 5751618 208.1 ns/op 153.81 MB/s -BenchmarkTEAde/16-8 5731822 208.2 ns/op 153.71 MB/s -BenchmarkTEAde/16-8 5705347 208.7 ns/op 153.30 MB/s -BenchmarkTEAde/16-8 5763650 207.9 ns/op 153.91 MB/s -BenchmarkTEAde/256-8 720400 1650 ns/op 164.82 MB/s -BenchmarkTEAde/256-8 733315 1651 ns/op 164.75 MB/s -BenchmarkTEAde/256-8 739126 1642 ns/op 165.69 MB/s -BenchmarkTEAde/256-8 724171 1659 ns/op 163.95 MB/s -BenchmarkTEAde/256-8 757248 1649 ns/op 164.97 MB/s -BenchmarkTEAde/256-8 712720 1650 ns/op 164.81 MB/s -BenchmarkTEAde/256-8 727759 1650 ns/op 164.81 MB/s -BenchmarkTEAde/256-8 739537 1651 ns/op 164.73 MB/s -BenchmarkTEAde/256-8 724605 1641 ns/op 165.73 MB/s -BenchmarkTEAde/256-8 722370 1635 ns/op 166.34 MB/s -BenchmarkTEAde/4K-8 48295 24497 ns/op 167.86 MB/s -BenchmarkTEAde/4K-8 48234 24488 ns/op 167.92 MB/s -BenchmarkTEAde/4K-8 48973 24632 ns/op 166.93 MB/s -BenchmarkTEAde/4K-8 48901 24486 ns/op 167.93 MB/s -BenchmarkTEAde/4K-8 48824 24558 ns/op 167.44 MB/s -BenchmarkTEAde/4K-8 48974 24465 ns/op 168.08 MB/s -BenchmarkTEAde/4K-8 48528 24500 ns/op 167.83 MB/s -BenchmarkTEAde/4K-8 48592 24672 ns/op 166.66 MB/s -BenchmarkTEAde/4K-8 48390 24559 ns/op 167.43 MB/s -BenchmarkTEAde/4K-8 48537 24559 ns/op 167.43 MB/s -BenchmarkTEAde/32K-8 6360 198969 ns/op 164.77 MB/s -BenchmarkTEAde/32K-8 6063 198627 ns/op 165.05 MB/s -BenchmarkTEAde/32K-8 6090 199098 ns/op 164.66 MB/s -BenchmarkTEAde/32K-8 6097 198747 ns/op 164.95 MB/s -BenchmarkTEAde/32K-8 6039 198540 ns/op 165.13 MB/s -BenchmarkTEAde/32K-8 6078 199822 ns/op 164.07 MB/s -BenchmarkTEAde/32K-8 6025 199044 ns/op 164.71 MB/s -BenchmarkTEAde/32K-8 5971 198685 ns/op 165.01 MB/s -BenchmarkTEAde/32K-8 6048 198973 ns/op 164.77 MB/s -BenchmarkTEAde/32K-8 6067 198543 ns/op 165.12 MB/s -PASS -ok github.com/fumiama/gofastTEA 107.196s diff --git a/old16.txt b/old16.txt deleted file mode 100644 index 865f953..0000000 --- a/old16.txt +++ /dev/null @@ -1,86 +0,0 @@ -goos: darwin -goarch: amd64 -pkg: github.com/fumiama/gofastTEA -cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz -BenchmarkTEAen/16-8 4685186 255.1 ns/op 62.71 MB/s -BenchmarkTEAen/16-8 4744274 250.6 ns/op 63.85 MB/s -BenchmarkTEAen/16-8 4757278 251.0 ns/op 63.74 MB/s -BenchmarkTEAen/16-8 4728451 251.5 ns/op 63.63 MB/s -BenchmarkTEAen/16-8 4771138 252.3 ns/op 63.41 MB/s -BenchmarkTEAen/16-8 4715673 251.4 ns/op 63.64 MB/s -BenchmarkTEAen/16-8 4722417 252.9 ns/op 63.26 MB/s -BenchmarkTEAen/16-8 4788355 251.4 ns/op 63.65 MB/s -BenchmarkTEAen/16-8 4719381 252.5 ns/op 63.36 MB/s -BenchmarkTEAen/16-8 4748629 252.4 ns/op 63.40 MB/s -BenchmarkTEAen/256-8 678688 1760 ns/op 145.49 MB/s -BenchmarkTEAen/256-8 689025 1759 ns/op 145.54 MB/s -BenchmarkTEAen/256-8 673020 1768 ns/op 144.76 MB/s -BenchmarkTEAen/256-8 682868 1773 ns/op 144.41 MB/s -BenchmarkTEAen/256-8 670245 1792 ns/op 142.85 MB/s -BenchmarkTEAen/256-8 679348 1770 ns/op 144.65 MB/s -BenchmarkTEAen/256-8 679471 1773 ns/op 144.38 MB/s -BenchmarkTEAen/256-8 659677 1771 ns/op 144.57 MB/s -BenchmarkTEAen/256-8 677760 1776 ns/op 144.17 MB/s -BenchmarkTEAen/256-8 670436 1788 ns/op 143.18 MB/s -BenchmarkTEAen/4K-8 45494 25910 ns/op 158.08 MB/s -BenchmarkTEAen/4K-8 45812 25838 ns/op 158.52 MB/s -BenchmarkTEAen/4K-8 46861 25802 ns/op 158.75 MB/s -BenchmarkTEAen/4K-8 46503 25991 ns/op 157.59 MB/s -BenchmarkTEAen/4K-8 46102 25813 ns/op 158.68 MB/s -BenchmarkTEAen/4K-8 46298 25954 ns/op 157.82 MB/s -BenchmarkTEAen/4K-8 46710 25750 ns/op 159.07 MB/s -BenchmarkTEAen/4K-8 46239 25836 ns/op 158.54 MB/s -BenchmarkTEAen/4K-8 46072 25860 ns/op 158.39 MB/s -BenchmarkTEAen/4K-8 46370 25938 ns/op 157.92 MB/s -BenchmarkTEAen/32K-8 5605 208498 ns/op 157.16 MB/s -BenchmarkTEAen/32K-8 5654 208256 ns/op 157.34 MB/s -BenchmarkTEAen/32K-8 5671 207461 ns/op 157.95 MB/s -BenchmarkTEAen/32K-8 5726 208031 ns/op 157.51 MB/s -BenchmarkTEAen/32K-8 5886 208996 ns/op 156.79 MB/s -BenchmarkTEAen/32K-8 5594 207445 ns/op 157.96 MB/s -BenchmarkTEAen/32K-8 5684 207217 ns/op 158.13 MB/s -BenchmarkTEAen/32K-8 5754 207360 ns/op 158.02 MB/s -BenchmarkTEAen/32K-8 5595 207484 ns/op 157.93 MB/s -BenchmarkTEAen/32K-8 5692 207754 ns/op 157.72 MB/s -BenchmarkTEAde/16-8 5531444 217.2 ns/op 147.35 MB/s -BenchmarkTEAde/16-8 5521533 215.2 ns/op 148.72 MB/s -BenchmarkTEAde/16-8 5537046 215.2 ns/op 148.69 MB/s -BenchmarkTEAde/16-8 5607153 217.0 ns/op 147.48 MB/s -BenchmarkTEAde/16-8 5534305 218.8 ns/op 146.24 MB/s -BenchmarkTEAde/16-8 5561917 215.7 ns/op 148.35 MB/s -BenchmarkTEAde/16-8 5535909 216.4 ns/op 147.89 MB/s -BenchmarkTEAde/16-8 5519742 215.5 ns/op 148.48 MB/s -BenchmarkTEAde/16-8 5556531 216.5 ns/op 147.82 MB/s -BenchmarkTEAde/16-8 5589644 216.6 ns/op 147.76 MB/s -BenchmarkTEAde/256-8 700608 1697 ns/op 160.28 MB/s -BenchmarkTEAde/256-8 696637 1703 ns/op 159.73 MB/s -BenchmarkTEAde/256-8 697063 1702 ns/op 159.80 MB/s -BenchmarkTEAde/256-8 709950 1710 ns/op 159.08 MB/s -BenchmarkTEAde/256-8 697386 1719 ns/op 158.28 MB/s -BenchmarkTEAde/256-8 700438 1697 ns/op 160.29 MB/s -BenchmarkTEAde/256-8 701476 1710 ns/op 159.09 MB/s -BenchmarkTEAde/256-8 704905 1709 ns/op 159.20 MB/s -BenchmarkTEAde/256-8 702578 1697 ns/op 160.24 MB/s -BenchmarkTEAde/256-8 696729 1707 ns/op 159.36 MB/s -BenchmarkTEAde/4K-8 46996 25395 ns/op 161.92 MB/s -BenchmarkTEAde/4K-8 47506 25322 ns/op 162.39 MB/s -BenchmarkTEAde/4K-8 46075 25309 ns/op 162.48 MB/s -BenchmarkTEAde/4K-8 47414 25445 ns/op 161.60 MB/s -BenchmarkTEAde/4K-8 47269 25409 ns/op 161.83 MB/s -BenchmarkTEAde/4K-8 47254 25543 ns/op 160.99 MB/s -BenchmarkTEAde/4K-8 47268 25260 ns/op 162.79 MB/s -BenchmarkTEAde/4K-8 47424 25376 ns/op 162.04 MB/s -BenchmarkTEAde/4K-8 46938 25254 ns/op 162.83 MB/s -BenchmarkTEAde/4K-8 47344 25352 ns/op 162.20 MB/s -BenchmarkTEAde/32K-8 5851 205595 ns/op 159.46 MB/s -BenchmarkTEAde/32K-8 5856 205832 ns/op 159.28 MB/s -BenchmarkTEAde/32K-8 5830 205681 ns/op 159.39 MB/s -BenchmarkTEAde/32K-8 5796 205488 ns/op 159.54 MB/s -BenchmarkTEAde/32K-8 5841 205767 ns/op 159.33 MB/s -BenchmarkTEAde/32K-8 5691 205681 ns/op 159.39 MB/s -BenchmarkTEAde/32K-8 5886 205331 ns/op 159.66 MB/s -BenchmarkTEAde/32K-8 5842 205587 ns/op 159.47 MB/s -BenchmarkTEAde/32K-8 5809 205667 ns/op 159.40 MB/s -BenchmarkTEAde/32K-8 5941 206341 ns/op 158.88 MB/s -PASS -ok github.com/fumiama/gofastTEA 108.464s diff --git a/old17.txt b/old17.txt deleted file mode 100644 index b6304b2..0000000 --- a/old17.txt +++ /dev/null @@ -1,86 +0,0 @@ -goos: darwin -goarch: amd64 -pkg: github.com/fumiama/gofastTEA -cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz -BenchmarkTEAen/16-8 4941398 244.0 ns/op 65.56 MB/s -BenchmarkTEAen/16-8 4966077 240.8 ns/op 66.44 MB/s -BenchmarkTEAen/16-8 4993192 241.3 ns/op 66.30 MB/s -BenchmarkTEAen/16-8 4943002 240.7 ns/op 66.47 MB/s -BenchmarkTEAen/16-8 4921885 241.9 ns/op 66.14 MB/s -BenchmarkTEAen/16-8 4984929 242.7 ns/op 65.93 MB/s -BenchmarkTEAen/16-8 4977856 241.5 ns/op 66.25 MB/s -BenchmarkTEAen/16-8 4965243 241.4 ns/op 66.27 MB/s -BenchmarkTEAen/16-8 4951600 241.1 ns/op 66.36 MB/s -BenchmarkTEAen/16-8 4933670 240.8 ns/op 66.45 MB/s -BenchmarkTEAen/256-8 706099 1702 ns/op 150.38 MB/s -BenchmarkTEAen/256-8 710599 1713 ns/op 149.45 MB/s -BenchmarkTEAen/256-8 700268 1696 ns/op 150.90 MB/s -BenchmarkTEAen/256-8 677850 1708 ns/op 149.86 MB/s -BenchmarkTEAen/256-8 681873 1703 ns/op 150.29 MB/s -BenchmarkTEAen/256-8 703291 1712 ns/op 149.57 MB/s -BenchmarkTEAen/256-8 708225 1708 ns/op 149.92 MB/s -BenchmarkTEAen/256-8 703201 1700 ns/op 150.63 MB/s -BenchmarkTEAen/256-8 702416 1719 ns/op 148.94 MB/s -BenchmarkTEAen/256-8 698091 1701 ns/op 150.48 MB/s -BenchmarkTEAen/4K-8 47703 25038 ns/op 163.59 MB/s -BenchmarkTEAen/4K-8 47608 25151 ns/op 162.86 MB/s -BenchmarkTEAen/4K-8 47996 25159 ns/op 162.81 MB/s -BenchmarkTEAen/4K-8 47966 24959 ns/op 164.11 MB/s -BenchmarkTEAen/4K-8 48046 24979 ns/op 163.98 MB/s -BenchmarkTEAen/4K-8 48106 24975 ns/op 164.01 MB/s -BenchmarkTEAen/4K-8 48369 24955 ns/op 164.13 MB/s -BenchmarkTEAen/4K-8 47421 24988 ns/op 163.92 MB/s -BenchmarkTEAen/4K-8 47799 25008 ns/op 163.79 MB/s -BenchmarkTEAen/4K-8 47964 24847 ns/op 164.85 MB/s -BenchmarkTEAen/32K-8 5937 201785 ns/op 162.39 MB/s -BenchmarkTEAen/32K-8 5846 202508 ns/op 161.81 MB/s -BenchmarkTEAen/32K-8 6010 202115 ns/op 162.13 MB/s -BenchmarkTEAen/32K-8 5929 202583 ns/op 161.75 MB/s -BenchmarkTEAen/32K-8 5823 202915 ns/op 161.49 MB/s -BenchmarkTEAen/32K-8 5829 202080 ns/op 162.15 MB/s -BenchmarkTEAen/32K-8 5851 203264 ns/op 161.21 MB/s -BenchmarkTEAen/32K-8 5935 202659 ns/op 161.69 MB/s -BenchmarkTEAen/32K-8 5924 202492 ns/op 161.82 MB/s -BenchmarkTEAen/32K-8 5949 202735 ns/op 161.63 MB/s -BenchmarkTEAde/16-8 5762792 207.8 ns/op 154.00 MB/s -BenchmarkTEAde/16-8 5705499 208.3 ns/op 153.60 MB/s -BenchmarkTEAde/16-8 5744668 213.5 ns/op 149.86 MB/s -BenchmarkTEAde/16-8 5727178 210.7 ns/op 151.87 MB/s -BenchmarkTEAde/16-8 5783122 207.6 ns/op 154.14 MB/s -BenchmarkTEAde/16-8 5754253 209.6 ns/op 152.69 MB/s -BenchmarkTEAde/16-8 5773371 208.4 ns/op 153.55 MB/s -BenchmarkTEAde/16-8 5498738 209.1 ns/op 153.03 MB/s -BenchmarkTEAde/16-8 5784189 207.7 ns/op 154.08 MB/s -BenchmarkTEAde/16-8 5727909 206.6 ns/op 154.87 MB/s -BenchmarkTEAde/256-8 739407 1647 ns/op 165.12 MB/s -BenchmarkTEAde/256-8 737415 1650 ns/op 164.85 MB/s -BenchmarkTEAde/256-8 708351 1650 ns/op 164.81 MB/s -BenchmarkTEAde/256-8 719713 1641 ns/op 165.71 MB/s -BenchmarkTEAde/256-8 727462 1641 ns/op 165.79 MB/s -BenchmarkTEAde/256-8 699610 1669 ns/op 162.99 MB/s -BenchmarkTEAde/256-8 739270 1656 ns/op 164.23 MB/s -BenchmarkTEAde/256-8 721683 1646 ns/op 165.26 MB/s -BenchmarkTEAde/256-8 728337 1644 ns/op 165.43 MB/s -BenchmarkTEAde/256-8 727545 1647 ns/op 165.17 MB/s -BenchmarkTEAde/4K-8 49003 24734 ns/op 166.25 MB/s -BenchmarkTEAde/4K-8 48774 24626 ns/op 166.98 MB/s -BenchmarkTEAde/4K-8 46922 25193 ns/op 163.22 MB/s -BenchmarkTEAde/4K-8 49129 24547 ns/op 167.51 MB/s -BenchmarkTEAde/4K-8 48273 24796 ns/op 165.83 MB/s -BenchmarkTEAde/4K-8 47547 24607 ns/op 167.11 MB/s -BenchmarkTEAde/4K-8 48889 24526 ns/op 167.66 MB/s -BenchmarkTEAde/4K-8 48098 24599 ns/op 167.16 MB/s -BenchmarkTEAde/4K-8 49819 24612 ns/op 167.07 MB/s -BenchmarkTEAde/4K-8 46545 24814 ns/op 165.71 MB/s -BenchmarkTEAde/32K-8 5905 198687 ns/op 165.00 MB/s -BenchmarkTEAde/32K-8 6044 199398 ns/op 164.41 MB/s -BenchmarkTEAde/32K-8 6014 199216 ns/op 164.57 MB/s -BenchmarkTEAde/32K-8 6020 200146 ns/op 163.80 MB/s -BenchmarkTEAde/32K-8 6036 199664 ns/op 164.20 MB/s -BenchmarkTEAde/32K-8 6211 199332 ns/op 164.47 MB/s -BenchmarkTEAde/32K-8 6064 199054 ns/op 164.70 MB/s -BenchmarkTEAde/32K-8 5908 199390 ns/op 164.42 MB/s -BenchmarkTEAde/32K-8 5973 204028 ns/op 160.68 MB/s -BenchmarkTEAde/32K-8 6049 200894 ns/op 163.19 MB/s -PASS -ok github.com/fumiama/gofastTEA 108.676s diff --git a/tea_1.16.go b/tea_1.16.go index 2c7cf56..db2da67 100644 --- a/tea_1.16.go +++ b/tea_1.16.go @@ -1,5 +1,5 @@ -//go:build (!go1.17 && amd64) || !amd64 -// +build !go1.17,amd64 !amd64 +//go:build !go1.17 +// +build !go1.17 package tea @@ -8,6 +8,7 @@ import "encoding/binary" // Encrypt tea 加密 // http://bbs.chinaunix.net/thread-583468-1-1.html // 感谢xichen大佬对TEA的解释 +// //go:nosplit func (t TEA) EncryptLittleEndian(src []byte, sumtable [0x10]uint32) (dst []byte) { lens := len(src) @@ -113,7 +114,11 @@ func (t TEA) Decrypt(data []byte) []byte { holder = iv1 } - return dst[dst[0]&7+3 : len(data)-7] + a, b := int(dst[0]&7+3), len(data)-7 + if a >= b { + return nil + } + return dst[a:b] } //go:nosplit @@ -165,7 +170,11 @@ func (t TEA) DecryptTo(data []byte, dst []byte) (from, to int) { holder = iv1 } - return int(dst[0]&7 + 3), len(data) - 7 + from, to = int(dst[0]&7+3), len(data)-7 + if from >= to { + return -1, -1 + } + return } //go:nosplit @@ -190,7 +199,11 @@ func (t TEA) DecryptLittleEndian(data []byte, sumtable [0x10]uint32) []byte { holder = iv1 } - return dst[dst[0]&7+3 : len(data)-7] + a, b := int(dst[0]&7+3), len(data)-7 + if a >= b { + return nil + } + return dst[a:b] } //go:nosplit @@ -214,5 +227,9 @@ func (t TEA) DecryptLittleEndianTo(data []byte, sumtable [0x10]uint32, dst []byt holder = iv1 } - return int(dst[0]&7 + 3), len(data) - 7 + from, to = int(dst[0]&7+3), len(data)-7 + if from >= to { + return -1, -1 + } + return } diff --git a/tea_1.16_amd64.go b/tea_1.16_amd64.go deleted file mode 100644 index 1010468..0000000 --- a/tea_1.16_amd64.go +++ /dev/null @@ -1,53 +0,0 @@ -//go:build !go1.17 && amd64 -// +build !go1.17,amd64 - -package tea - -import ( - "encoding/binary" - "unsafe" -) - -// implemented in tea_$GOARCH.s -func encrypt(dstlen uintptr, tlen uintptr) -func decrypt(datalen uintptr, dstlen uintptr, t *TEA) - -//go:nosplit -func (t TEA) Encrypt(src []byte) (dst []byte) { - lens := len(src) - fill := 10 - (lens+1)&7 - dst = make([]byte, fill+lens+7) - binary.LittleEndian.PutUint32(dst, randuint32()) - binary.LittleEndian.PutUint32(dst[4:], randuint32()) - binary.LittleEndian.PutUint32(dst[8:], randuint32()) - dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 - copy(dst[fill:], src) - encrypt(uintptr(*(*unsafe.Pointer)(unsafe.Pointer(&dst)))|uintptr(len(dst)<<40), uintptr(unsafe.Pointer(&t))|(uintptr(len(dst)<<16)&0xffffff00_00000000)) - return dst -} - -//go:nosplit -func (t TEA) EncryptTo(src []byte, dst []byte) int { - lens := len(src) - fill := 10 - (lens+1)&7 - binary.LittleEndian.PutUint32(dst, randuint32()) - binary.LittleEndian.PutUint32(dst[4:], randuint32()) - binary.LittleEndian.PutUint32(dst[8:], randuint32()) - dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 - copy(dst[fill:], src) - dstlen := fill + lens + 7 - encrypt(uintptr(*(*unsafe.Pointer)(unsafe.Pointer(&dst)))|uintptr(dstlen<<40), uintptr(unsafe.Pointer(&t))|(uintptr(dstlen<<16)&0xffffff00_00000000)) - return dstlen -} - -/* -//go:nosplit -func (t TEA) Decrypt(data []byte) []byte { - if len(data) < 16 || len(data)&7 != 0 { - return nil - } - dst := make([]byte, len(data)) - decrypt(uintptr(*(*unsafe.Pointer)(unsafe.Pointer(&data)))|uintptr(len(data)<<40), uintptr(*(*unsafe.Pointer)(unsafe.Pointer(&dst)))|(uintptr(len(data)<<16)&0xffffff00_00000000), &t) - return dst[dst[0]&7+3 : len(dst)-7] -} -*/ diff --git a/tea_1.16_amd64.s b/tea_1.16_amd64.s deleted file mode 100644 index f0958a1..0000000 --- a/tea_1.16_amd64.s +++ /dev/null @@ -1,768 +0,0 @@ -//go:build !go1.17 && amd64 -// +build !go1.17,amd64 - -#include "textflag.h" - -// func encrypt(dstlen uintptr, t uintptr) -TEXT ·encrypt(SB), NOSPLIT, $0-16 - MOVQ ·dstlen+0(FP), AX // go:<1.17 dst - MOVQ ·teaptr+8(FP), DI // go:<1.17 t - MOVQ AX, BX // len(dst) low 24 bits - MOVQ DI, R8 // len(dst) middle 24 bits - SHRQ $40, BX // unpack len - SHLQ $24, AX - SHRQ $24, AX - SHLQ $24, DI - SHRQ $24, DI - MOVQ (DI), DX // t0 - MOVQ 4(DI), R12 // t1 - MOVQ 8(DI), R10 // t2 - MOVQ 12(DI), SI // t3 - SHRQ $40, R8 - SHLQ $24, R8 - ORQ R8, BX // len(dst) has 48 bits - ADDQ BX, AX // dst += len(dst) - NOTQ BX // i = -i - 1 - INCQ BX // i++ - // XORQ R11, R11 // holder - XORQ R13, R13 // iv1 - XORQ DI, DI // iv2 -enclop: - MOVQ (AX)(BX*1), R11 // holder = Uint64(dst[i:]) - BSWAPQ R11 // holder = BE(block) - XORQ R13, R11 // holder ^= iv1 - MOVQ R11, R13 // iv1 = holder - // Use Register CX(v1), DX(t0), SI(t3), R8(tmp), R10(t2), R12(t1), R13(v0/ret) - ////////////////iv1 = encrypt(iv1)//////////////// - MOVQ R11, CX // v1 - SHRQ $32, R13 // v0 - - LEAQ -1640531527(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ -1640531527(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ 1013904242(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ 1013904242(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ -626627285(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ -626627285(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ 2027808484(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ 2027808484(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ 387276957(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ 387276957(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ -1253254570(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ -1253254570(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ 1401181199(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ 1401181199(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ -239350328(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ -239350328(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ -1879881855(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ -1879881855(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ 774553914(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ 774553914(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ -865977613(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ -865977613(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ 1788458156(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, R13 // v0 += R8 - LEAQ 1788458156(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ 147926629(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ 147926629(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ -1492604898(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ -1492604898(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ 1161830871(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ 1161830871(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - LEAQ -478700656(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRL $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - ADDQ R8, R13 // v0 += R8 - LEAQ -478700656(R13), R8 // R8 = v0 + 0x... - MOVQ R13, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R13, R9 // R9 = v0 - SHRL $5, R9 // R9 >>= 5 - ADDQ SI, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - ADDL R8, CX // v0 += R8 - - SHLQ $32, R13 // v0 <<= 32 - ORQ CX, R13 // v0 |= v1 - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - XORQ DI, R13 // iv1 ^= iv2 - MOVQ R11, DI // iv2 = holder - MOVQ R13, R11 // holder = iv1 - BSWAPQ R11 // holder = BE(holder) - MOVQ R11, (AX)(BX*1) // PutUint64(dst[i:], holder) - ADDQ $8, BX // i += 8 - JNZ enclop - RET - -// func decrypt(datalen uintptr, dst uintptr, t *TEA) -TEXT ·decrypt(SB), NOSPLIT, $0-24 - MOVQ ·data+0(FP), AX // go:<1.17 data - MOVQ ·dst+8(FP), DI // go:<1.17 dst - MOVQ ·teaptr+16(FP), SI // go:<1.17 t - MOVQ AX, BX // len(data) low 24 bits - MOVQ DI, R8 // dst middle 24 bits - SHRQ $40, BX // unpack len - SHLQ $24, AX - SHRQ $24, AX - SHLQ $24, DI - SHRQ $24, DI - MOVQ (SI), DX // t0 - MOVQ 4(SI), R12 // t1 - MOVQ 8(SI), R10 // t2 - MOVQ 12(SI), R13 // t3 - SHRQ $40, R8 - SHLQ $24, R8 - ORQ R8, BX // len(data) has 48 bits - ADDQ BX, AX // data += len(data) - ADDQ BX, DI // dst += len(data) - NOTQ BX // i = -len - 1 - INCQ BX // i++ - XORQ SI, SI // iv1 - XORQ R15, R15 // iv2 - XORQ R11, R11 // holder -declop: - MOVQ (AX)(BX*1), SI // iv1 = Uint64(data[i:]) - BSWAPQ SI // iv1 = BE(block) - XORQ SI, R15 // iv2 ^= iv1 - // Use Register R15(v0/ret), R12(t1), CX(v1), DX(t0), R13(t3), R8, R9, R10(t2) - ///////////////iv2 = decrypt(iv2)/////////////// - MOVQ R15, CX // v1 - SHRQ $32, R15 // v0 - - LEAQ -478700656(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ -478700656(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ 1161830871(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ 1161830871(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ -1492604898(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ -1492604898(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ 147926629(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ 147926629(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ 1788458156(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ 1788458156(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ -865977613(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ -865977613(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ 774553914(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ 774553914(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ -1879881855(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ -1879881855(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ -239350328(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ -239350328(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ 1401181199(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ 1401181199(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ -1253254570(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ -1253254570(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ 387276957(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ 387276957(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ 2027808484(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ 2027808484(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ -626627285(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ -626627285(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ 1013904242(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ 1013904242(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - LEAQ -1640531527(R15), R8 // R8 = v0 + 0x... - MOVQ R15, R9 // R9 = v0 - SHLQ $4, R9 // R9 <<= 4 - ADDQ R10, R9 // R9 += t2 - XORQ R9, R8 // R8 ^= R9 - MOVQ R15, R9 // R9 = v0 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R13, R9 // R9 += t3 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, CX // v1 -= R8 - LEAQ -1640531527(CX), R8 // R8 = v1 + 0x... - MOVQ CX, R9 // R9 = v1 - SHLQ $4, R9 // R9 <<= 4 - ADDQ DX, R9 // R9 += t0 - XORQ R9, R8 // R8 ^= R9 - MOVQ CX, R9 // R9 = v1 - SHRQ $5, R9 // R9 >>= 5 - ADDQ R12, R9 // R9 += t1 - XORQ R9, R8 // R8 ^= R9 - SUBL R8, R15 // v0 -= R8 - - SHLQ $32, R15 - ORQ CX, R15 - /////////////////////////////////////////////// - XORQ R15, R11 // holder ^= iv2 - BSWAPQ R11 // holder = BE(holder) - MOVQ R11, (DI)(BX*1) // PutUint64(dst[i:], holder) - MOVQ SI, R11 // holder = iv1 - ADDQ $8, BX // i += 8 - JNZ declop - RET diff --git a/tea_1.16_pure.go b/tea_1.16_pure.go deleted file mode 100644 index 4176d25..0000000 --- a/tea_1.16_pure.go +++ /dev/null @@ -1,122 +0,0 @@ -//go:build (!go1.17 && !amd64) || !amd64 -// +build !go1.17,!amd64 !amd64 - -package tea - -import ( - "encoding/binary" -) - -// Encrypt tea 加密 -// http://bbs.chinaunix.net/thread-583468-1-1.html -// 感谢xichen大佬对TEA的解释 -//go:nosplit -func (t TEA) Encrypt(src []byte) (dst []byte) { - lens := len(src) - fill := 10 - (lens+1)&7 - dst = make([]byte, fill+lens+7) - binary.LittleEndian.PutUint32(dst, randuint32()) - binary.LittleEndian.PutUint32(dst[4:], randuint32()) - binary.LittleEndian.PutUint32(dst[8:], randuint32()) - dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 - copy(dst[fill:], src) - - var iv1, iv2, holder uint64 - var v0, v1 uint32 - for i := 0; i < len(dst); i += 8 { - holder = binary.BigEndian.Uint64(dst[i:]) ^ iv1 - v0, v1 = uint32(holder>>32), uint32(holder) - v0 += (v1 + 0x9e3779b9) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x9e3779b9) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x3c6ef372) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x3c6ef372) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x78dde6e4) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x78dde6e4) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x1715609d) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x1715609d) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0xb54cda56) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0xb54cda56) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x5384540f) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x5384540f) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x8ff34781) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x8ff34781) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0xcc623af3) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0xcc623af3) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x08d12e65) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x08d12e65) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0xa708a81e) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0xa708a81e) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x454021d7) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x454021d7) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0xe3779b90) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0xe3779b90) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - iv1 = (uint64(v0)<<32 | uint64(v1)) ^ iv2 - iv2 = holder - binary.BigEndian.PutUint64(dst[i:], iv1) - } - - return dst -} - -//go:nosplit -func (t TEA) EncryptTo(src []byte, dst []byte) int { - lens := len(src) - fill := 10 - (lens+1)&7 - binary.LittleEndian.PutUint32(dst, randuint32()) - binary.LittleEndian.PutUint32(dst[4:], randuint32()) - binary.LittleEndian.PutUint32(dst[8:], randuint32()) - dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 - copy(dst[fill:], src) - - var iv1, iv2, holder uint64 - var v0, v1 uint32 - for i := 0; i < fill+lens+7; i += 8 { - holder = binary.BigEndian.Uint64(dst[i:]) ^ iv1 - v0, v1 = uint32(holder>>32), uint32(holder) - v0 += (v1 + 0x9e3779b9) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x9e3779b9) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x3c6ef372) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x3c6ef372) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x78dde6e4) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x78dde6e4) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x1715609d) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x1715609d) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0xb54cda56) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0xb54cda56) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x5384540f) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x5384540f) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x8ff34781) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x8ff34781) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0xcc623af3) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0xcc623af3) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x08d12e65) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x08d12e65) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0xa708a81e) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0xa708a81e) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0x454021d7) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0x454021d7) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - v0 += (v1 + 0xe3779b90) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) - v1 += (v0 + 0xe3779b90) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) - iv1 = (uint64(v0)<<32 | uint64(v1)) ^ iv2 - iv2 = holder - binary.BigEndian.PutUint64(dst[i:], iv1) - } - - return fill + lens + 7 -} diff --git a/tea_1.17.go b/tea_1.17.go index 59973c7..6e8e7fd 100644 --- a/tea_1.17.go +++ b/tea_1.17.go @@ -1,5 +1,5 @@ -//go:build go1.17 && amd64 -// +build go1.17,amd64 +//go:build go1.17 +// +build go1.17 package tea @@ -111,7 +111,11 @@ func (t TEA) Decrypt(data []byte) []byte { binary.BigEndian.PutUint64(dst[i:], iv2^holder) holder = iv1 } - return dst[dst[0]&7+3 : len(data)-7] + a, b := int(dst[0]&7+3), len(data)-7 + if a >= b { + return nil + } + return dst[a:b] } func (t TEA) DecryptTo(data []byte, dst []byte) (from, to int) { @@ -126,7 +130,11 @@ func (t TEA) DecryptTo(data []byte, dst []byte) (from, to int) { binary.BigEndian.PutUint64(dst[i:], iv2^holder) holder = iv1 } - return int(dst[0]&7 + 3), len(data) - 7 + from, to = int(dst[0]&7+3), len(data)-7 + if from >= to { + return -1, -1 + } + return } func (t TEA) DecryptLittleEndian(data []byte, sumtable [0x10]uint32) []byte { @@ -142,7 +150,11 @@ func (t TEA) DecryptLittleEndian(data []byte, sumtable [0x10]uint32) []byte { binary.LittleEndian.PutUint64(dst[i:], iv2^holder) holder = iv1 } - return dst[dst[0]&7+3 : len(data)-7] + a, b := int(dst[0]&7+3), len(data)-7 + if a >= b { + return nil + } + return dst[a:b] } func (t TEA) DecryptLittleEndianTo(data []byte, sumtable [0x10]uint32, dst []byte) (from, to int) { @@ -157,7 +169,11 @@ func (t TEA) DecryptLittleEndianTo(data []byte, sumtable [0x10]uint32, dst []byt binary.LittleEndian.PutUint64(dst[i:], iv2^holder) holder = iv1 } - return int(dst[0]&7 + 3), len(data) - 7 + from, to = int(dst[0]&7+3), len(data)-7 + if from >= to { + return -1, -1 + } + return } //go:nosplit @@ -215,6 +231,7 @@ func (t *TEA) encodeTable(n uint64, s [0x10]uint32) uint64 { } // 每次8字节 +// //go:nosplit func (t *TEA) decode(n uint64) uint64 { v0, v1 := uint32(n>>32), uint32(n) @@ -257,6 +274,7 @@ func (t *TEA) decode(n uint64) uint64 { } // 每次8字节 +// //go:nosplit func (t *TEA) decodeTable(n uint64, s [0x10]uint32) uint64 { v0, v1 := uint32(n>>32), uint32(n)