1
0
mirror of https://github.com/fumiama/gofastTEA.git synced 2026-06-17 15:30:34 +08:00
This commit is contained in:
fumiama
2021-11-21 22:15:35 +08:00
parent d94b730a4e
commit 396bb12a7f
13 changed files with 1571 additions and 1 deletions

View File

@@ -1,2 +1,52 @@
# gofastTEA
TEA 编码算法的 SIMD 实现
TEA 编码算法的 PLAN9 汇编优化实现
## 1.17 版本及以上
速度已经达到最优,代码与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)完全相同。
## 1.16 版本及以下
使用 PLAN9 汇编编写`Encrypt`,内联编写`Decrypt`,与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)代码相比,在`1.16`版本下编译提升速度如下new16.txt
```css
name old time/op new time/op delta
TEAen/16-8 252ns ± 0% 240ns ± 1% -4.50% (p=0.000 n=9+10)
TEAen/256-8 1.77µs ± 1% 1.67µs ± 1% -5.66% (p=0.000 n=9+10)
TEAen/4K-8 25.9µs ± 0% 24.8µs ± 0% -4.00% (p=0.000 n=10+9)
TEAen/32K-8 208µs ± 1% 201µs ± 0% -3.34% (p=0.000 n=10+10)
TEAde/16-8 216ns ± 1% 211ns ± 1% -2.68% (p=0.000 n=10+10)
TEAde/256-8 1.71µs ± 1% 1.66µs ± 1% -2.69% (p=0.000 n=10+10)
TEAde/4K-8 25.4µs ± 1% 24.7µs ± 1% -2.73% (p=0.000 n=10+10)
TEAde/32K-8 206µs ± 0% 200µs ± 0% -2.59% (p=0.000 n=9+10)
name old speed new speed delta
TEAen/16-8 63.5MB/s ± 0% 66.5MB/s ± 1% +4.70% (p=0.000 n=9+10)
TEAen/256-8 145MB/s ± 1% 153MB/s ± 1% +5.98% (p=0.000 n=9+10)
TEAen/4K-8 158MB/s ± 0% 165MB/s ± 0% +4.16% (p=0.000 n=10+9)
TEAen/32K-8 158MB/s ± 1% 163MB/s ± 0% +3.45% (p=0.000 n=10+10)
TEAde/16-8 148MB/s ± 1% 152MB/s ± 1% +2.75% (p=0.000 n=10+10)
TEAde/256-8 160MB/s ± 1% 164MB/s ± 1% +2.77% (p=0.000 n=10+10)
TEAde/4K-8 162MB/s ± 1% 167MB/s ± 1% +2.80% (p=0.000 n=10+10)
TEAde/32K-8 159MB/s ± 0% 164MB/s ± 0% +2.66% (p=0.000 n=9+10)
```
另外升级到`go1.17`后,即与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)代码相同时,在`1.16`版本下编译提升速度如下new17.txt
```css
name old time/op new time/op delta
TEAen/16-8 252ns ± 0% 241ns ± 1% -4.09% (p=0.000 n=9+10)
TEAen/256-8 1.77µs ± 1% 1.70µs ± 0% -3.85% (p=0.000 n=9+10)
TEAen/4K-8 25.9µs ± 0% 24.9µs ± 1% -3.59% (p=0.000 n=10+10)
TEAen/32K-8 208µs ± 1% 200µs ± 1% -3.78% (p=0.000 n=10+10)
TEAde/16-8 216ns ± 1% 208ns ± 1% -3.80% (p=0.000 n=10+10)
TEAde/256-8 1.71µs ± 1% 1.65µs ± 1% -3.44% (p=0.000 n=10+10)
TEAde/4K-8 25.4µs ± 1% 24.5µs ± 0% -3.40% (p=0.000 n=10+10)
TEAde/32K-8 206µs ± 0% 199µs ± 0% -3.36% (p=0.000 n=9+10)
name old speed new speed delta
TEAen/16-8 63.5MB/s ± 0% 66.3MB/s ± 1% +4.27% (p=0.000 n=9+10)
TEAen/256-8 145MB/s ± 1% 150MB/s ± 0% +4.01% (p=0.000 n=9+10)
TEAen/4K-8 158MB/s ± 0% 164MB/s ± 1% +3.73% (p=0.000 n=10+10)
TEAen/32K-8 158MB/s ± 1% 164MB/s ± 1% +3.93% (p=0.000 n=10+10)
TEAde/16-8 148MB/s ± 1% 154MB/s ± 1% +3.95% (p=0.000 n=10+10)
TEAde/256-8 160MB/s ± 1% 165MB/s ± 1% +3.55% (p=0.000 n=10+10)
TEAde/4K-8 162MB/s ± 1% 168MB/s ± 0% +3.52% (p=0.000 n=10+10)
TEAde/32K-8 159MB/s ± 0% 165MB/s ± 0% +3.45% (p=0.000 n=9+9)
```
可见在编码时,在某些时候`go1.16`版本下速度比`go1.17`版本更快,而整体来看,在优化后`go1.16`版本下的执行效率已经可以与`go1.17`版本持平。

11
go.mod Normal file
View File

@@ -0,0 +1,11 @@
module github.com/fumiama/gofastTEA
go 1.17
require github.com/Mrs4s/MiraiGo v0.0.0-20211120033824-43b23f4e6fcb
require (
github.com/pkg/errors v0.9.1 // indirect
golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f // indirect
golang.org/x/sys v0.0.0-20210423082822-04245dca01da // indirect
)

26
go.sum Normal file
View File

@@ -0,0 +1,26 @@
github.com/Mrs4s/MiraiGo v0.0.0-20211120033824-43b23f4e6fcb h1:Rkj28fqIwGx/EgBzRYtpmJRfH6wqVn7cNdc7aJ0QE4M=
github.com/Mrs4s/MiraiGo v0.0.0-20211120033824-43b23f4e6fcb/go.mod h1:imVKbfKqqeit+C/eaWGb4MKQ3z3gN6pRpBU5RMtp5so=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/tidwall/gjson v1.11.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f h1:OfiFi4JbukWwe3lzw+xunroH1mnC1e2Gy5cxNJApiSY=
golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=

86
new16.txt Normal file
View File

@@ -0,0 +1,86 @@
goos: darwin
goarch: amd64
pkg: github.com/fumiama/gofastTEA
cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz
BenchmarkTEAen/16-8 4879810 242.4 ns/op 66.01 MB/s
BenchmarkTEAen/16-8 4984504 241.2 ns/op 66.33 MB/s
BenchmarkTEAen/16-8 5013127 240.0 ns/op 66.65 MB/s
BenchmarkTEAen/16-8 4946341 240.4 ns/op 66.55 MB/s
BenchmarkTEAen/16-8 5010094 239.6 ns/op 66.77 MB/s
BenchmarkTEAen/16-8 4979793 240.7 ns/op 66.46 MB/s
BenchmarkTEAen/16-8 4987821 240.0 ns/op 66.66 MB/s
BenchmarkTEAen/16-8 4990870 240.9 ns/op 66.42 MB/s
BenchmarkTEAen/16-8 4945104 239.6 ns/op 66.79 MB/s
BenchmarkTEAen/16-8 5003238 239.8 ns/op 66.73 MB/s
BenchmarkTEAen/256-8 712588 1680 ns/op 152.37 MB/s
BenchmarkTEAen/256-8 716322 1680 ns/op 152.39 MB/s
BenchmarkTEAen/256-8 717398 1672 ns/op 153.10 MB/s
BenchmarkTEAen/256-8 703051 1669 ns/op 153.34 MB/s
BenchmarkTEAen/256-8 739008 1663 ns/op 153.91 MB/s
BenchmarkTEAen/256-8 720760 1658 ns/op 154.39 MB/s
BenchmarkTEAen/256-8 735790 1675 ns/op 152.80 MB/s
BenchmarkTEAen/256-8 720423 1668 ns/op 153.47 MB/s
BenchmarkTEAen/256-8 721131 1672 ns/op 153.09 MB/s
BenchmarkTEAen/256-8 717520 1669 ns/op 153.37 MB/s
BenchmarkTEAen/4K-8 47542 24859 ns/op 164.77 MB/s
BenchmarkTEAen/4K-8 48340 24879 ns/op 164.63 MB/s
BenchmarkTEAen/4K-8 48124 24807 ns/op 165.11 MB/s
BenchmarkTEAen/4K-8 48874 24841 ns/op 164.89 MB/s
BenchmarkTEAen/4K-8 48474 24819 ns/op 165.03 MB/s
BenchmarkTEAen/4K-8 48309 25202 ns/op 162.52 MB/s
BenchmarkTEAen/4K-8 47352 24930 ns/op 164.30 MB/s
BenchmarkTEAen/4K-8 47407 24760 ns/op 165.43 MB/s
BenchmarkTEAen/4K-8 47619 24841 ns/op 164.89 MB/s
BenchmarkTEAen/4K-8 48085 24779 ns/op 165.30 MB/s
BenchmarkTEAen/32K-8 5924 200510 ns/op 163.42 MB/s
BenchmarkTEAen/32K-8 5964 200683 ns/op 163.28 MB/s
BenchmarkTEAen/32K-8 5904 200746 ns/op 163.23 MB/s
BenchmarkTEAen/32K-8 5907 200587 ns/op 163.36 MB/s
BenchmarkTEAen/32K-8 6031 200538 ns/op 163.40 MB/s
BenchmarkTEAen/32K-8 6249 201163 ns/op 162.89 MB/s
BenchmarkTEAen/32K-8 5974 201777 ns/op 162.40 MB/s
BenchmarkTEAen/32K-8 6063 201619 ns/op 162.52 MB/s
BenchmarkTEAen/32K-8 6001 200751 ns/op 163.23 MB/s
BenchmarkTEAen/32K-8 5971 200775 ns/op 163.21 MB/s
BenchmarkTEAde/16-8 5743614 210.3 ns/op 152.17 MB/s
BenchmarkTEAde/16-8 5685754 210.2 ns/op 152.23 MB/s
BenchmarkTEAde/16-8 5635918 209.7 ns/op 152.56 MB/s
BenchmarkTEAde/16-8 5680320 210.1 ns/op 152.34 MB/s
BenchmarkTEAde/16-8 5719232 212.0 ns/op 150.94 MB/s
BenchmarkTEAde/16-8 5745488 210.4 ns/op 152.09 MB/s
BenchmarkTEAde/16-8 5765454 211.0 ns/op 151.69 MB/s
BenchmarkTEAde/16-8 5644918 210.3 ns/op 152.17 MB/s
BenchmarkTEAde/16-8 5681248 210.1 ns/op 152.33 MB/s
BenchmarkTEAde/16-8 5672364 211.9 ns/op 151.00 MB/s
BenchmarkTEAde/256-8 712252 1663 ns/op 163.58 MB/s
BenchmarkTEAde/256-8 724006 1668 ns/op 163.10 MB/s
BenchmarkTEAde/256-8 734743 1657 ns/op 164.15 MB/s
BenchmarkTEAde/256-8 726320 1659 ns/op 163.97 MB/s
BenchmarkTEAde/256-8 725227 1650 ns/op 164.88 MB/s
BenchmarkTEAde/256-8 726027 1663 ns/op 163.60 MB/s
BenchmarkTEAde/256-8 723242 1652 ns/op 164.61 MB/s
BenchmarkTEAde/256-8 723056 1654 ns/op 164.47 MB/s
BenchmarkTEAde/256-8 719800 1659 ns/op 163.93 MB/s
BenchmarkTEAde/256-8 727068 1667 ns/op 163.20 MB/s
BenchmarkTEAde/4K-8 48092 24702 ns/op 166.46 MB/s
BenchmarkTEAde/4K-8 48133 24749 ns/op 166.15 MB/s
BenchmarkTEAde/4K-8 48650 24597 ns/op 167.18 MB/s
BenchmarkTEAde/4K-8 48375 24642 ns/op 166.87 MB/s
BenchmarkTEAde/4K-8 48902 24570 ns/op 167.36 MB/s
BenchmarkTEAde/4K-8 48852 24599 ns/op 167.16 MB/s
BenchmarkTEAde/4K-8 48820 24868 ns/op 165.35 MB/s
BenchmarkTEAde/4K-8 49100 24729 ns/op 166.28 MB/s
BenchmarkTEAde/4K-8 48906 24708 ns/op 166.43 MB/s
BenchmarkTEAde/4K-8 48818 24583 ns/op 167.27 MB/s
BenchmarkTEAde/32K-8 5941 200449 ns/op 163.55 MB/s
BenchmarkTEAde/32K-8 5901 200616 ns/op 163.42 MB/s
BenchmarkTEAde/32K-8 6033 200566 ns/op 163.46 MB/s
BenchmarkTEAde/32K-8 6034 199527 ns/op 164.31 MB/s
BenchmarkTEAde/32K-8 6068 200952 ns/op 163.14 MB/s
BenchmarkTEAde/32K-8 5953 200107 ns/op 163.83 MB/s
BenchmarkTEAde/32K-8 5966 200340 ns/op 163.64 MB/s
BenchmarkTEAde/32K-8 5971 199451 ns/op 164.37 MB/s
BenchmarkTEAde/32K-8 5972 200547 ns/op 163.47 MB/s
BenchmarkTEAde/32K-8 5932 200476 ns/op 163.53 MB/s
PASS
ok github.com/fumiama/gofastTEA 107.828s

86
new17.txt Normal file
View File

@@ -0,0 +1,86 @@
goos: darwin
goarch: amd64
pkg: github.com/fumiama/gofastTEA
cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz
BenchmarkTEAen/16-8 4937554 243.6 ns/op 65.67 MB/s
BenchmarkTEAen/16-8 4969573 242.3 ns/op 66.04 MB/s
BenchmarkTEAen/16-8 4937174 241.3 ns/op 66.30 MB/s
BenchmarkTEAen/16-8 4994734 240.0 ns/op 66.67 MB/s
BenchmarkTEAen/16-8 4988541 241.9 ns/op 66.16 MB/s
BenchmarkTEAen/16-8 4981683 240.2 ns/op 66.60 MB/s
BenchmarkTEAen/16-8 4967822 242.5 ns/op 65.99 MB/s
BenchmarkTEAen/16-8 5013471 240.8 ns/op 66.43 MB/s
BenchmarkTEAen/16-8 4906420 241.8 ns/op 66.16 MB/s
BenchmarkTEAen/16-8 4963357 240.3 ns/op 66.58 MB/s
BenchmarkTEAen/256-8 704292 1703 ns/op 150.32 MB/s
BenchmarkTEAen/256-8 712244 1710 ns/op 149.72 MB/s
BenchmarkTEAen/256-8 687753 1698 ns/op 150.76 MB/s
BenchmarkTEAen/256-8 698522 1709 ns/op 149.78 MB/s
BenchmarkTEAen/256-8 711114 1708 ns/op 149.92 MB/s
BenchmarkTEAen/256-8 712760 1701 ns/op 150.48 MB/s
BenchmarkTEAen/256-8 705231 1701 ns/op 150.54 MB/s
BenchmarkTEAen/256-8 699147 1699 ns/op 150.72 MB/s
BenchmarkTEAen/256-8 702382 1699 ns/op 150.69 MB/s
BenchmarkTEAen/256-8 707511 1699 ns/op 150.71 MB/s
BenchmarkTEAen/4K-8 47986 24923 ns/op 164.35 MB/s
BenchmarkTEAen/4K-8 47972 24808 ns/op 165.11 MB/s
BenchmarkTEAen/4K-8 47262 24943 ns/op 164.22 MB/s
BenchmarkTEAen/4K-8 47324 24961 ns/op 164.10 MB/s
BenchmarkTEAen/4K-8 48688 25036 ns/op 163.60 MB/s
BenchmarkTEAen/4K-8 47722 24995 ns/op 163.87 MB/s
BenchmarkTEAen/4K-8 48550 24810 ns/op 165.09 MB/s
BenchmarkTEAen/4K-8 47937 25068 ns/op 163.39 MB/s
BenchmarkTEAen/4K-8 48129 24867 ns/op 164.71 MB/s
BenchmarkTEAen/4K-8 47654 24986 ns/op 163.93 MB/s
BenchmarkTEAen/32K-8 5894 200779 ns/op 163.20 MB/s
BenchmarkTEAen/32K-8 5937 201123 ns/op 162.93 MB/s
BenchmarkTEAen/32K-8 5941 199721 ns/op 164.07 MB/s
BenchmarkTEAen/32K-8 5998 199359 ns/op 164.37 MB/s
BenchmarkTEAen/32K-8 5923 199691 ns/op 164.09 MB/s
BenchmarkTEAen/32K-8 5934 199594 ns/op 164.17 MB/s
BenchmarkTEAen/32K-8 5965 200496 ns/op 163.43 MB/s
BenchmarkTEAen/32K-8 5950 199249 ns/op 164.46 MB/s
BenchmarkTEAen/32K-8 5983 200564 ns/op 163.38 MB/s
BenchmarkTEAen/32K-8 5911 199334 ns/op 164.39 MB/s
BenchmarkTEAde/16-8 5737286 207.7 ns/op 154.08 MB/s
BenchmarkTEAde/16-8 5758159 207.1 ns/op 154.48 MB/s
BenchmarkTEAde/16-8 5808830 207.8 ns/op 154.02 MB/s
BenchmarkTEAde/16-8 5745165 207.4 ns/op 154.30 MB/s
BenchmarkTEAde/16-8 5753430 208.1 ns/op 153.79 MB/s
BenchmarkTEAde/16-8 5791928 210.9 ns/op 151.74 MB/s
BenchmarkTEAde/16-8 5664402 209.1 ns/op 153.04 MB/s
BenchmarkTEAde/16-8 5726097 207.7 ns/op 154.03 MB/s
BenchmarkTEAde/16-8 5807385 209.1 ns/op 153.04 MB/s
BenchmarkTEAde/16-8 5702360 206.9 ns/op 154.64 MB/s
BenchmarkTEAde/256-8 721252 1653 ns/op 164.53 MB/s
BenchmarkTEAde/256-8 739063 1643 ns/op 165.52 MB/s
BenchmarkTEAde/256-8 741162 1648 ns/op 165.02 MB/s
BenchmarkTEAde/256-8 735223 1642 ns/op 165.67 MB/s
BenchmarkTEAde/256-8 729406 1649 ns/op 164.91 MB/s
BenchmarkTEAde/256-8 729562 1635 ns/op 166.38 MB/s
BenchmarkTEAde/256-8 745306 1648 ns/op 165.05 MB/s
BenchmarkTEAde/256-8 726823 1650 ns/op 164.85 MB/s
BenchmarkTEAde/256-8 736669 1646 ns/op 165.22 MB/s
BenchmarkTEAde/256-8 714020 1650 ns/op 164.85 MB/s
BenchmarkTEAde/4K-8 48386 24470 ns/op 168.04 MB/s
BenchmarkTEAde/4K-8 49455 24395 ns/op 168.56 MB/s
BenchmarkTEAde/4K-8 48878 24545 ns/op 167.53 MB/s
BenchmarkTEAde/4K-8 48738 24511 ns/op 167.76 MB/s
BenchmarkTEAde/4K-8 49315 24614 ns/op 167.06 MB/s
BenchmarkTEAde/4K-8 47276 24493 ns/op 167.88 MB/s
BenchmarkTEAde/4K-8 49026 24435 ns/op 168.28 MB/s
BenchmarkTEAde/4K-8 48810 24534 ns/op 167.60 MB/s
BenchmarkTEAde/4K-8 48943 24519 ns/op 167.71 MB/s
BenchmarkTEAde/4K-8 48555 24520 ns/op 167.70 MB/s
BenchmarkTEAde/32K-8 6025 198951 ns/op 164.78 MB/s
BenchmarkTEAde/32K-8 6050 198765 ns/op 164.94 MB/s
BenchmarkTEAde/32K-8 6034 198560 ns/op 165.11 MB/s
BenchmarkTEAde/32K-8 5997 198661 ns/op 165.03 MB/s
BenchmarkTEAde/32K-8 5952 199032 ns/op 164.72 MB/s
BenchmarkTEAde/32K-8 6106 198250 ns/op 165.37 MB/s
BenchmarkTEAde/32K-8 6075 198810 ns/op 164.90 MB/s
BenchmarkTEAde/32K-8 6056 198851 ns/op 164.87 MB/s
BenchmarkTEAde/32K-8 6085 198702 ns/op 164.99 MB/s
BenchmarkTEAde/32K-8 6079 198614 ns/op 165.06 MB/s
PASS
ok github.com/fumiama/gofastTEA 106.301s

86
old.txt Normal file
View File

@@ -0,0 +1,86 @@
goos: darwin
goarch: amd64
pkg: github.com/fumiama/gofastTEA
cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz
BenchmarkTEAen/16-8 4685186 255.1 ns/op 62.71 MB/s
BenchmarkTEAen/16-8 4744274 250.6 ns/op 63.85 MB/s
BenchmarkTEAen/16-8 4757278 251.0 ns/op 63.74 MB/s
BenchmarkTEAen/16-8 4728451 251.5 ns/op 63.63 MB/s
BenchmarkTEAen/16-8 4771138 252.3 ns/op 63.41 MB/s
BenchmarkTEAen/16-8 4715673 251.4 ns/op 63.64 MB/s
BenchmarkTEAen/16-8 4722417 252.9 ns/op 63.26 MB/s
BenchmarkTEAen/16-8 4788355 251.4 ns/op 63.65 MB/s
BenchmarkTEAen/16-8 4719381 252.5 ns/op 63.36 MB/s
BenchmarkTEAen/16-8 4748629 252.4 ns/op 63.40 MB/s
BenchmarkTEAen/256-8 678688 1760 ns/op 145.49 MB/s
BenchmarkTEAen/256-8 689025 1759 ns/op 145.54 MB/s
BenchmarkTEAen/256-8 673020 1768 ns/op 144.76 MB/s
BenchmarkTEAen/256-8 682868 1773 ns/op 144.41 MB/s
BenchmarkTEAen/256-8 670245 1792 ns/op 142.85 MB/s
BenchmarkTEAen/256-8 679348 1770 ns/op 144.65 MB/s
BenchmarkTEAen/256-8 679471 1773 ns/op 144.38 MB/s
BenchmarkTEAen/256-8 659677 1771 ns/op 144.57 MB/s
BenchmarkTEAen/256-8 677760 1776 ns/op 144.17 MB/s
BenchmarkTEAen/256-8 670436 1788 ns/op 143.18 MB/s
BenchmarkTEAen/4K-8 45494 25910 ns/op 158.08 MB/s
BenchmarkTEAen/4K-8 45812 25838 ns/op 158.52 MB/s
BenchmarkTEAen/4K-8 46861 25802 ns/op 158.75 MB/s
BenchmarkTEAen/4K-8 46503 25991 ns/op 157.59 MB/s
BenchmarkTEAen/4K-8 46102 25813 ns/op 158.68 MB/s
BenchmarkTEAen/4K-8 46298 25954 ns/op 157.82 MB/s
BenchmarkTEAen/4K-8 46710 25750 ns/op 159.07 MB/s
BenchmarkTEAen/4K-8 46239 25836 ns/op 158.54 MB/s
BenchmarkTEAen/4K-8 46072 25860 ns/op 158.39 MB/s
BenchmarkTEAen/4K-8 46370 25938 ns/op 157.92 MB/s
BenchmarkTEAen/32K-8 5605 208498 ns/op 157.16 MB/s
BenchmarkTEAen/32K-8 5654 208256 ns/op 157.34 MB/s
BenchmarkTEAen/32K-8 5671 207461 ns/op 157.95 MB/s
BenchmarkTEAen/32K-8 5726 208031 ns/op 157.51 MB/s
BenchmarkTEAen/32K-8 5886 208996 ns/op 156.79 MB/s
BenchmarkTEAen/32K-8 5594 207445 ns/op 157.96 MB/s
BenchmarkTEAen/32K-8 5684 207217 ns/op 158.13 MB/s
BenchmarkTEAen/32K-8 5754 207360 ns/op 158.02 MB/s
BenchmarkTEAen/32K-8 5595 207484 ns/op 157.93 MB/s
BenchmarkTEAen/32K-8 5692 207754 ns/op 157.72 MB/s
BenchmarkTEAde/16-8 5531444 217.2 ns/op 147.35 MB/s
BenchmarkTEAde/16-8 5521533 215.2 ns/op 148.72 MB/s
BenchmarkTEAde/16-8 5537046 215.2 ns/op 148.69 MB/s
BenchmarkTEAde/16-8 5607153 217.0 ns/op 147.48 MB/s
BenchmarkTEAde/16-8 5534305 218.8 ns/op 146.24 MB/s
BenchmarkTEAde/16-8 5561917 215.7 ns/op 148.35 MB/s
BenchmarkTEAde/16-8 5535909 216.4 ns/op 147.89 MB/s
BenchmarkTEAde/16-8 5519742 215.5 ns/op 148.48 MB/s
BenchmarkTEAde/16-8 5556531 216.5 ns/op 147.82 MB/s
BenchmarkTEAde/16-8 5589644 216.6 ns/op 147.76 MB/s
BenchmarkTEAde/256-8 700608 1697 ns/op 160.28 MB/s
BenchmarkTEAde/256-8 696637 1703 ns/op 159.73 MB/s
BenchmarkTEAde/256-8 697063 1702 ns/op 159.80 MB/s
BenchmarkTEAde/256-8 709950 1710 ns/op 159.08 MB/s
BenchmarkTEAde/256-8 697386 1719 ns/op 158.28 MB/s
BenchmarkTEAde/256-8 700438 1697 ns/op 160.29 MB/s
BenchmarkTEAde/256-8 701476 1710 ns/op 159.09 MB/s
BenchmarkTEAde/256-8 704905 1709 ns/op 159.20 MB/s
BenchmarkTEAde/256-8 702578 1697 ns/op 160.24 MB/s
BenchmarkTEAde/256-8 696729 1707 ns/op 159.36 MB/s
BenchmarkTEAde/4K-8 46996 25395 ns/op 161.92 MB/s
BenchmarkTEAde/4K-8 47506 25322 ns/op 162.39 MB/s
BenchmarkTEAde/4K-8 46075 25309 ns/op 162.48 MB/s
BenchmarkTEAde/4K-8 47414 25445 ns/op 161.60 MB/s
BenchmarkTEAde/4K-8 47269 25409 ns/op 161.83 MB/s
BenchmarkTEAde/4K-8 47254 25543 ns/op 160.99 MB/s
BenchmarkTEAde/4K-8 47268 25260 ns/op 162.79 MB/s
BenchmarkTEAde/4K-8 47424 25376 ns/op 162.04 MB/s
BenchmarkTEAde/4K-8 46938 25254 ns/op 162.83 MB/s
BenchmarkTEAde/4K-8 47344 25352 ns/op 162.20 MB/s
BenchmarkTEAde/32K-8 5851 205595 ns/op 159.46 MB/s
BenchmarkTEAde/32K-8 5856 205832 ns/op 159.28 MB/s
BenchmarkTEAde/32K-8 5830 205681 ns/op 159.39 MB/s
BenchmarkTEAde/32K-8 5796 205488 ns/op 159.54 MB/s
BenchmarkTEAde/32K-8 5841 205767 ns/op 159.33 MB/s
BenchmarkTEAde/32K-8 5691 205681 ns/op 159.39 MB/s
BenchmarkTEAde/32K-8 5886 205331 ns/op 159.66 MB/s
BenchmarkTEAde/32K-8 5842 205587 ns/op 159.47 MB/s
BenchmarkTEAde/32K-8 5809 205667 ns/op 159.40 MB/s
BenchmarkTEAde/32K-8 5941 206341 ns/op 158.88 MB/s
PASS
ok github.com/fumiama/gofastTEA 108.464s

5
tea.go Normal file
View File

@@ -0,0 +1,5 @@
// Package tea
// from https://github.com/Mrs4s/MiraiGo/blob/master/binary/tea.go
package tea
type TEA [4]uint32

76
tea_1.16.go Normal file
View File

@@ -0,0 +1,76 @@
//go:build !go1.17 && amd64
// +build !go1.17,amd64
package tea
import (
"encoding/binary"
)
// Encrypt tea 加密
// http://bbs.chinaunix.net/thread-583468-1-1.html
// 感谢xichen大佬对TEA的解释
//go:nosplit
func (t TEA) Decrypt(data []byte) []byte {
if len(data) < 16 || len(data)&7 != 0 {
return nil
}
dst := make([]byte, len(data))
var iv1, iv2, holder uint64
var v0, v1 uint32
for i := 0; i < len(dst); i += 8 {
holder = iv1
iv1 = binary.BigEndian.Uint64(data[i:])
iv2 ^= iv1
v0, v1 = uint32(iv2>>32), uint32(iv2)
v1 -= (v0 + 0xe3779b90) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0xe3779b90) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0x454021d7) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0x454021d7) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0xa708a81e) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0xa708a81e) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0x08d12e65) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0x08d12e65) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0xcc623af3) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0xcc623af3) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0x8ff34781) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0x8ff34781) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0x5384540f) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0x5384540f) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0xb54cda56) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0xb54cda56) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0x1715609d) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0x1715609d) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0x78dde6e4) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0x78dde6e4) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0x3c6ef372) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0x3c6ef372) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 -= (v0 + 0x9e3779b9) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 -= (v1 + 0x9e3779b9) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
iv2 = uint64(v0)<<32 | uint64(v1)
binary.BigEndian.PutUint64(dst[i:], iv2^holder)
}
return dst[dst[0]&7+3 : len(data)-7]
}
//go:nosplit
func NewTeaCipher(key []byte) (t TEA) {
if len(key) == 16 {
t[3] = binary.BigEndian.Uint32(key[12:])
t[2] = binary.BigEndian.Uint32(key[8:])
t[1] = binary.BigEndian.Uint32(key[4:])
t[0] = binary.BigEndian.Uint32(key[0:])
}
return
}

768
tea_1.16_amd64.s Normal file
View File

@@ -0,0 +1,768 @@
//go:build !go1.17 && amd64
// +build !go1.17,amd64
#include "textflag.h"
// func encrypt(dstlen uintptr, t uintptr)
TEXT ·encrypt(SB), NOSPLIT, $0-16
MOVQ ·dstlen+0(FP), AX // go:<1.17 dst
MOVQ ·teaptr+8(FP), DI // go:<1.17 t
MOVQ AX, BX // len(dst) low 40 bits
SHRQ $40, BX // unpack len
SHLQ $24, AX
SHRQ $24, AX
MOVQ DI, R8 // len(dst) high 24 bits
SHLQ $24, DI
SHRQ $24, DI
SHRQ $40, R8
SHLQ $40, R8
ORQ R8, BX
ADDQ BX, AX // dst += len(dst)
NOTQ BX // i = -i - 1
INCQ BX // i++
MOVQ (DI), DX // t0
MOVQ 4(DI), R12 // t1
MOVQ 8(DI), R10 // t2
MOVQ 12(DI), SI // t3
// XORQ R11, R11 // holder
XORQ R13, R13 // iv1
XORQ DI, DI // iv2
enclop:
MOVQ (AX)(BX*1), R11 // holder = Uint64(dst[i:])
BSWAPQ R11 // holder = BE(block)
XORQ R13, R11 // holder ^= iv1
MOVQ R11, R13 // iv1 = holder
// Use Register CX(v1), DX(t0), SI(t3), R8(tmp), R10(t2), R12(t1), R13(v0/ret)
////////////////iv1 = encrypt(iv1)////////////////
MOVQ R11, CX // v1
SHRQ $32, R13 // v0
LEAQ -1640531527(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ -1640531527(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ 1013904242(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ 1013904242(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ -626627285(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ -626627285(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ 2027808484(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ 2027808484(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ 387276957(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ 387276957(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ -1253254570(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ -1253254570(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ 1401181199(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ 1401181199(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ -239350328(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ -239350328(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ -1879881855(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ -1879881855(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ 774553914(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ 774553914(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ -865977613(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ -865977613(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ 1788458156(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDL R8, R13 // v0 += R8
LEAQ 1788458156(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ 147926629(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ 147926629(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ -1492604898(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ -1492604898(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ 1161830871(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ 1161830871(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
LEAQ -478700656(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRL $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
ADDQ R8, R13 // v0 += R8
LEAQ -478700656(R13), R8 // R8 = v0 + 0x...
MOVQ R13, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R13, R9 // R9 = v0
SHRL $5, R9 // R9 >>= 5
ADDQ SI, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
ADDL R8, CX // v0 += R8
SHLQ $32, R13 // v0 <<= 32
ORQ CX, R13 // v0 |= v1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
XORQ DI, R13 // iv1 ^= iv2
MOVQ R11, DI // iv2 = holder
MOVQ R13, R11 // holder = iv1
BSWAPQ R11 // holder = BE(holder)
MOVQ R11, (AX)(BX*1) // PutUint64(dst[i:], holder)
ADDQ $8, BX // i += 8
JNZ enclop
RET
// func decrypt(datalen uintptr, dst uintptr, t *TEA)
TEXT ·decrypt(SB), NOSPLIT, $0-24
MOVQ ·data+0(FP), AX // go:<1.17 data
MOVQ ·dst+8(FP), DI // go:<1.17 dst
MOVQ ·teaptr+16(FP), SI // go:<1.17 t
MOVQ AX, BX // len(data) low 24 bits
SHRQ $40, BX // unpack len
SHLQ $24, AX
SHRQ $24, AX
MOVQ DI, R8 // dst high 24 bits
SHLQ $24, DI
SHRQ $24, DI
SHRQ $40, R8
SHLQ $40, R8
ORQ R8, BX
ADDQ BX, AX // data += len(data)
ADDQ BX, DI // dst += len(data)
NOTQ BX // i = -len - 1
INCQ BX // i++
MOVQ (SI), DX // t0
MOVQ 4(SI), R12 // t1
MOVQ 8(SI), R10 // t2
MOVQ 12(SI), R13 // t3
XORQ SI, SI // iv1
XORQ R15, R15 // iv2
XORQ R11, R11 // holder
declop:
MOVQ (AX)(BX*1), SI // iv1 = Uint64(data[i:])
BSWAPQ SI // iv1 = BE(block)
XORQ SI, R15 // iv2 ^= iv1
// Use Register R15(v0/ret), R12(t1), CX(v1), DX(t0), R13(t3), R8, R9, R10(t2)
///////////////iv2 = decrypt(iv2)///////////////
MOVQ R15, CX // v1
SHRQ $32, R15 // v0
LEAQ -478700656(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ -478700656(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ 1161830871(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ 1161830871(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ -1492604898(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ -1492604898(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ 147926629(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ 147926629(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ 1788458156(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ 1788458156(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ -865977613(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ -865977613(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ 774553914(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ 774553914(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ -1879881855(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ -1879881855(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ -239350328(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ -239350328(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ 1401181199(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ 1401181199(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ -1253254570(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ -1253254570(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ 387276957(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ 387276957(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ 2027808484(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ 2027808484(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ -626627285(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ -626627285(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ 1013904242(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ 1013904242(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
LEAQ -1640531527(R15), R8 // R8 = v0 + 0x...
MOVQ R15, R9 // R9 = v0
SHLQ $4, R9 // R9 <<= 4
ADDQ R10, R9 // R9 += t2
XORQ R9, R8 // R8 ^= R9
MOVQ R15, R9 // R9 = v0
SHRQ $5, R9 // R9 >>= 5
ADDQ R13, R9 // R9 += t3
XORQ R9, R8 // R8 ^= R9
SUBL R8, CX // v1 -= R8
LEAQ -1640531527(CX), R8 // R8 = v1 + 0x...
MOVQ CX, R9 // R9 = v1
SHLQ $4, R9 // R9 <<= 4
ADDQ DX, R9 // R9 += t0
XORQ R9, R8 // R8 ^= R9
MOVQ CX, R9 // R9 = v1
SHRQ $5, R9 // R9 >>= 5
ADDQ R12, R9 // R9 += t1
XORQ R9, R8 // R8 ^= R9
SUBL R8, R15 // v0 -= R8
SHLQ $32, R15
ORQ CX, R15
///////////////////////////////////////////////
XORQ R15, R11 // holder ^= iv2
BSWAPQ R11 // holder = BE(holder)
MOVQ R11, (DI)(BX*1) // PutUint64(dst[i:], holder)
MOVQ SI, R11 // holder = iv1
ADDQ $8, BX // i += 8
JNZ declop
RET

25
tea_1.16_asm.go Normal file
View File

@@ -0,0 +1,25 @@
//go:build !go1.17 && amd64
// +build !go1.17,amd64
package tea
import (
"math/rand"
"unsafe"
)
// implemented in tea_$GOARCH.s
func encrypt(dstlen uintptr, tlen uintptr)
func decrypt(datalen uintptr, dstlen uintptr, t *TEA)
//go:nosplit
func (t TEA) Encrypt(src []byte) (dst []byte) {
lens := len(src)
fill := 10 - (lens+1)&7
dst = make([]byte, fill+lens+7)
_, _ = rand.Read(dst[0:fill])
dst[0] = byte(fill-3) | 0xF8 // 存储pad长度
copy(dst[fill:], src)
encrypt(uintptr(*(*unsafe.Pointer)(unsafe.Pointer(&dst)))|uintptr(len(dst)<<40), uintptr(unsafe.Pointer(&t))|(uintptr(len(dst))&0xffffff00_00000000))
return dst
}

67
tea_1.16_pure.go Normal file
View File

@@ -0,0 +1,67 @@
//go:build !go1.17 && !amd64
// +build !go1.17,!amd64
package tea
import (
"encoding/binary"
"math/rand"
"unsafe"
)
// Encrypt tea 加密
// http://bbs.chinaunix.net/thread-583468-1-1.html
// 感谢xichen大佬对TEA的解释
//go:nosplit
func (t TEA) Encrypt(src []byte) (dst []byte) {
lens := len(src)
fill := 10 - (lens+1)&7
dst = make([]byte, fill+lens+7)
_, _ = rand.Read(dst[0:fill])
dst[0] = byte(fill-3) | 0xF8 // 存储pad长度
copy(dst[fill:], src)
var iv1, iv2, holder uint64
var v0, v1 uint32
for i := 0; i < len(dst); i += 8 {
holder = binary.BigEndian.Uint64(dst[i:]) ^ iv1
v0, v1 = uint32(holder>>32), uint32(holder)
v0 += (v1 + 0x9e3779b9) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0x9e3779b9) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0x3c6ef372) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0x3c6ef372) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0x78dde6e4) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0x78dde6e4) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0x1715609d) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0x1715609d) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0xb54cda56) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0xb54cda56) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0x5384540f) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0x5384540f) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0x8ff34781) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0x8ff34781) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0xcc623af3) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0xcc623af3) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0x08d12e65) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0x08d12e65) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0xa708a81e) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0xa708a81e) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0x454021d7) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0x454021d7) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
v0 += (v1 + 0xe3779b90) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1])
v1 += (v0 + 0xe3779b90) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3])
iv1 = (uint64(v0)<<32 | uint64(v1)) ^ iv2
iv2 = holder
binary.BigEndian.PutUint64(dst[i:], iv1)
}
return dst
}

145
tea_1.17.go Normal file
View File

@@ -0,0 +1,145 @@
//go:build go1.17
// +build go1.17
package tea
import (
"encoding/binary"
"math/rand"
)
// Encrypt tea 加密
// http://bbs.chinaunix.net/thread-583468-1-1.html
// 感谢xichen大佬对TEA的解释
func (t TEA) Encrypt(src []byte) (dst []byte) {
lens := len(src)
fill := 10 - (lens+1)%8
dst = make([]byte, fill+lens+7)
_, _ = rand.Read(dst[0:fill])
dst[0] = byte(fill-3) | 0xF8 // 存储pad长度
copy(dst[fill:], src)
var iv1, iv2, holder uint64
for i := 0; i < len(dst); i += 8 {
block := binary.BigEndian.Uint64(dst[i:])
holder = block ^ iv1
iv1 = t.encode(holder)
iv1 = iv1 ^ iv2
iv2 = holder
binary.BigEndian.PutUint64(dst[i:], iv1)
}
return dst
}
func (t TEA) Decrypt(data []byte) []byte {
if len(data) < 16 || len(data)%8 != 0 {
return nil
}
dst := make([]byte, len(data))
var iv1, iv2, holder, tmp uint64
for i := 0; i < len(dst); i += 8 {
block := binary.BigEndian.Uint64(data[i:])
tmp = t.decode(block ^ iv2)
iv2 = tmp
holder = tmp ^ iv1
iv1 = block
binary.BigEndian.PutUint64(dst[i:], holder)
}
return dst[dst[0]&7+3 : len(data)-7]
}
//go:nosplit
func (t *TEA) encode(n uint64) uint64 {
v0, v1 := uint32(n>>32), uint32(n)
t0, t1, t2, t3 := t[0], t[1], t[2], t[3]
v0 += (v1 + 0x9e3779b9) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x9e3779b9) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x3c6ef372) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x3c6ef372) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x78dde6e4) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x78dde6e4) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x1715609d) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x1715609d) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0xb54cda56) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0xb54cda56) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x5384540f) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x5384540f) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x8ff34781) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x8ff34781) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0xcc623af3) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0xcc623af3) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x08d12e65) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x08d12e65) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0xa708a81e) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0xa708a81e) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0x454021d7) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0x454021d7) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 += (v1 + 0xe3779b90) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 += (v0 + 0xe3779b90) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
return uint64(v0)<<32 | uint64(v1)
}
// 每次8字节
//go:nosplit
func (t *TEA) decode(n uint64) uint64 {
v0, v1 := uint32(n>>32), uint32(n)
t0, t1, t2, t3 := t[0], t[1], t[2], t[3]
v1 -= (v0 + 0xe3779b90) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0xe3779b90) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x454021d7) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x454021d7) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0xa708a81e) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0xa708a81e) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x08d12e65) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x08d12e65) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0xcc623af3) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0xcc623af3) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x8ff34781) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x8ff34781) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x5384540f) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x5384540f) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0xb54cda56) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0xb54cda56) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x1715609d) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x1715609d) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x78dde6e4) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x78dde6e4) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x3c6ef372) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x3c6ef372) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
v1 -= (v0 + 0x9e3779b9) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3)
v0 -= (v1 + 0x9e3779b9) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1)
return uint64(v0)<<32 | uint64(v1)
}
//go:nosplit
func NewTeaCipher(key []byte) (t TEA) {
if len(key) != 16 {
return TEA{}
}
t[3] = binary.BigEndian.Uint32(key[12:])
t[2] = binary.BigEndian.Uint32(key[8:])
t[1] = binary.BigEndian.Uint32(key[4:])
t[0] = binary.BigEndian.Uint32(key[0:])
return t
}

139
tea_test.go Normal file
View File

@@ -0,0 +1,139 @@
package tea
import (
"bytes"
"crypto/rand"
"encoding/hex"
"testing"
"github.com/Mrs4s/MiraiGo/utils"
)
var testTEA = NewTeaCipher([]byte("0123456789ABCDEF"))
const (
KEY = iota
DAT
ENC
)
var sampleData = func() [][3]string {
out := [][3]string{
{"0123456789ABCDEF", "MiraiGO Here", "b7b2e52af7f5b1fbf37fc3d5546ac7569aecd01bbacf09bf"},
{"0123456789ABCDEF", "LXY Testing~", "9d0ab85aa14f5434ee83cd2a6b28bf306263cdf88e01264c"},
{"0123456789ABCDEF", "s", "528e8b5c48300b548e94262736ebb8b7"},
{"0123456789ABCDEF", "long long long long long long long", "95715fab6efbd0fd4b76dbc80bd633ebe805849dbc242053b06557f87e748effd9f613f782749fb9fdfa3f45c0c26161"},
{"LXY1226 Mrs4s", "LXY Testing~", "ab20caa63f3a6503a84f3cb28f9e26b6c18c051e995d1721"},
}
for i := range out {
c, _ := hex.DecodeString(out[i][ENC])
out[i][ENC] = utils.B2S(c)
}
return out
}()
func TestTEA(t *testing.T) {
// Self Testing
for _, sample := range sampleData {
tea := NewTeaCipher(utils.S2B(sample[KEY]))
dat := utils.B2S(tea.Decrypt(utils.S2B(sample[ENC])))
if dat != sample[DAT] {
t.Fatalf("error decrypt %v %x", sample, dat)
}
enc := utils.B2S(tea.Encrypt(utils.S2B(sample[DAT])))
dat = utils.B2S(tea.Decrypt(utils.S2B(enc)))
if dat != sample[DAT] {
t.Fatal("error self test", sample)
}
}
key := make([]byte, 16)
_, err := rand.Read(key)
if err != nil {
panic(err)
}
// Random data testing
for i := 1; i < 0xFF; i++ {
_, err := rand.Read(key)
if err != nil {
panic(err)
}
tea := NewTeaCipher(key)
dat := make([]byte, i)
_, err = rand.Read(dat)
if err != nil {
panic(err)
}
enc := tea.Encrypt(dat)
dec := tea.Decrypt(enc)
if !bytes.Equal(dat, dec) {
t.Fatalf("error in %d, %x %x %x", i, key, dat, enc)
}
}
}
func benchEncrypt(b *testing.B, data []byte) {
_, err := rand.Read(data)
if err != nil {
panic(err)
}
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
testTEA.Encrypt(data)
}
}
func benchDecrypt(b *testing.B, data []byte) {
_, err := rand.Read(data)
if err != nil {
panic(err)
}
data = testTEA.Encrypt(data)
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
testTEA.Decrypt(data)
}
}
func BenchmarkTEAen(b *testing.B) {
b.Run("16", func(b *testing.B) {
data := make([]byte, 16)
benchEncrypt(b, data)
})
b.Run("256", func(b *testing.B) {
data := make([]byte, 256)
benchEncrypt(b, data)
})
b.Run("4K", func(b *testing.B) {
data := make([]byte, 1024*4)
benchEncrypt(b, data)
})
b.Run("32K", func(b *testing.B) {
data := make([]byte, 1024*32)
benchEncrypt(b, data)
})
}
func BenchmarkTEAde(b *testing.B) {
b.Run("16", func(b *testing.B) {
data := make([]byte, 16)
benchDecrypt(b, data)
})
b.Run("256", func(b *testing.B) {
data := make([]byte, 256)
benchDecrypt(b, data)
})
b.Run("4K", func(b *testing.B) {
data := make([]byte, 4096)
benchDecrypt(b, data)
})
b.Run("32K", func(b *testing.B) {
data := make([]byte, 1024*32)
benchDecrypt(b, data)
})
}