From 396bb12a7f62995a04408371fb477838f5fabf26 Mon Sep 17 00:00:00 2001 From: fumiama Date: Sun, 21 Nov 2021 22:15:35 +0800 Subject: [PATCH] init --- README.md | 52 +++- go.mod | 11 + go.sum | 26 ++ new16.txt | 86 ++++++ new17.txt | 86 ++++++ old.txt | 86 ++++++ tea.go | 5 + tea_1.16.go | 76 +++++ tea_1.16_amd64.s | 768 +++++++++++++++++++++++++++++++++++++++++++++++ tea_1.16_asm.go | 25 ++ tea_1.16_pure.go | 67 +++++ tea_1.17.go | 145 +++++++++ tea_test.go | 139 +++++++++ 13 files changed, 1571 insertions(+), 1 deletion(-) create mode 100644 go.mod create mode 100644 go.sum create mode 100644 new16.txt create mode 100644 new17.txt create mode 100644 old.txt create mode 100644 tea.go create mode 100644 tea_1.16.go create mode 100644 tea_1.16_amd64.s create mode 100644 tea_1.16_asm.go create mode 100644 tea_1.16_pure.go create mode 100644 tea_1.17.go create mode 100644 tea_test.go diff --git a/README.md b/README.md index 4960963..ff9c7d7 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,52 @@ # gofastTEA -TEA 编码算法的 SIMD 实现 +TEA 编码算法的 PLAN9 汇编优化实现 + +## 1.17 版本及以上 +速度已经达到最优,代码与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)完全相同。 + +## 1.16 版本及以下 +使用 PLAN9 汇编编写`Encrypt`,内联编写`Decrypt`,与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)代码相比,在`1.16`版本下编译提升速度如下(new16.txt)。 +```css +name old time/op new time/op delta +TEAen/16-8 252ns ± 0% 240ns ± 1% -4.50% (p=0.000 n=9+10) +TEAen/256-8 1.77µs ± 1% 1.67µs ± 1% -5.66% (p=0.000 n=9+10) +TEAen/4K-8 25.9µs ± 0% 24.8µs ± 0% -4.00% (p=0.000 n=10+9) +TEAen/32K-8 208µs ± 1% 201µs ± 0% -3.34% (p=0.000 n=10+10) +TEAde/16-8 216ns ± 1% 211ns ± 1% -2.68% (p=0.000 n=10+10) +TEAde/256-8 1.71µs ± 1% 1.66µs ± 1% -2.69% (p=0.000 n=10+10) +TEAde/4K-8 25.4µs ± 1% 24.7µs ± 1% -2.73% (p=0.000 n=10+10) +TEAde/32K-8 206µs ± 0% 200µs ± 0% -2.59% (p=0.000 n=9+10) + +name old speed new speed delta +TEAen/16-8 63.5MB/s ± 0% 66.5MB/s ± 1% +4.70% (p=0.000 n=9+10) +TEAen/256-8 145MB/s ± 1% 153MB/s ± 1% +5.98% (p=0.000 n=9+10) +TEAen/4K-8 158MB/s ± 0% 165MB/s ± 0% +4.16% (p=0.000 n=10+9) +TEAen/32K-8 158MB/s ± 1% 163MB/s ± 0% +3.45% (p=0.000 n=10+10) +TEAde/16-8 148MB/s ± 1% 152MB/s ± 1% +2.75% (p=0.000 n=10+10) +TEAde/256-8 160MB/s ± 1% 164MB/s ± 1% +2.77% (p=0.000 n=10+10) +TEAde/4K-8 162MB/s ± 1% 167MB/s ± 1% +2.80% (p=0.000 n=10+10) +TEAde/32K-8 159MB/s ± 0% 164MB/s ± 0% +2.66% (p=0.000 n=9+10) +``` +另外升级到`go1.17`后,即与[MiraiGo](https://github.com/Mrs4s/MiraiGo/blob/574c4e57b1467225f03936342e477ee0d587a2dc/binary/tea.go)代码相同时,在`1.16`版本下编译提升速度如下(new17.txt)。 +```css +name old time/op new time/op delta +TEAen/16-8 252ns ± 0% 241ns ± 1% -4.09% (p=0.000 n=9+10) +TEAen/256-8 1.77µs ± 1% 1.70µs ± 0% -3.85% (p=0.000 n=9+10) +TEAen/4K-8 25.9µs ± 0% 24.9µs ± 1% -3.59% (p=0.000 n=10+10) +TEAen/32K-8 208µs ± 1% 200µs ± 1% -3.78% (p=0.000 n=10+10) +TEAde/16-8 216ns ± 1% 208ns ± 1% -3.80% (p=0.000 n=10+10) +TEAde/256-8 1.71µs ± 1% 1.65µs ± 1% -3.44% (p=0.000 n=10+10) +TEAde/4K-8 25.4µs ± 1% 24.5µs ± 0% -3.40% (p=0.000 n=10+10) +TEAde/32K-8 206µs ± 0% 199µs ± 0% -3.36% (p=0.000 n=9+10) + +name old speed new speed delta +TEAen/16-8 63.5MB/s ± 0% 66.3MB/s ± 1% +4.27% (p=0.000 n=9+10) +TEAen/256-8 145MB/s ± 1% 150MB/s ± 0% +4.01% (p=0.000 n=9+10) +TEAen/4K-8 158MB/s ± 0% 164MB/s ± 1% +3.73% (p=0.000 n=10+10) +TEAen/32K-8 158MB/s ± 1% 164MB/s ± 1% +3.93% (p=0.000 n=10+10) +TEAde/16-8 148MB/s ± 1% 154MB/s ± 1% +3.95% (p=0.000 n=10+10) +TEAde/256-8 160MB/s ± 1% 165MB/s ± 1% +3.55% (p=0.000 n=10+10) +TEAde/4K-8 162MB/s ± 1% 168MB/s ± 0% +3.52% (p=0.000 n=10+10) +TEAde/32K-8 159MB/s ± 0% 165MB/s ± 0% +3.45% (p=0.000 n=9+9) +``` +可见在编码时,在某些时候`go1.16`版本下速度比`go1.17`版本更快,而整体来看,在优化后`go1.16`版本下的执行效率已经可以与`go1.17`版本持平。 \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..35c6973 --- /dev/null +++ b/go.mod @@ -0,0 +1,11 @@ +module github.com/fumiama/gofastTEA + +go 1.17 + +require github.com/Mrs4s/MiraiGo v0.0.0-20211120033824-43b23f4e6fcb + +require ( + github.com/pkg/errors v0.9.1 // indirect + golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f // indirect + golang.org/x/sys v0.0.0-20210423082822-04245dca01da // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..2092565 --- /dev/null +++ b/go.sum @@ -0,0 +1,26 @@ +github.com/Mrs4s/MiraiGo v0.0.0-20211120033824-43b23f4e6fcb h1:Rkj28fqIwGx/EgBzRYtpmJRfH6wqVn7cNdc7aJ0QE4M= +github.com/Mrs4s/MiraiGo v0.0.0-20211120033824-43b23f4e6fcb/go.mod h1:imVKbfKqqeit+C/eaWGb4MKQ3z3gN6pRpBU5RMtp5so= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/tidwall/gjson v1.11.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f h1:OfiFi4JbukWwe3lzw+xunroH1mnC1e2Gy5cxNJApiSY= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= diff --git a/new16.txt b/new16.txt new file mode 100644 index 0000000..59069f6 --- /dev/null +++ b/new16.txt @@ -0,0 +1,86 @@ +goos: darwin +goarch: amd64 +pkg: github.com/fumiama/gofastTEA +cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz +BenchmarkTEAen/16-8 4879810 242.4 ns/op 66.01 MB/s +BenchmarkTEAen/16-8 4984504 241.2 ns/op 66.33 MB/s +BenchmarkTEAen/16-8 5013127 240.0 ns/op 66.65 MB/s +BenchmarkTEAen/16-8 4946341 240.4 ns/op 66.55 MB/s +BenchmarkTEAen/16-8 5010094 239.6 ns/op 66.77 MB/s +BenchmarkTEAen/16-8 4979793 240.7 ns/op 66.46 MB/s +BenchmarkTEAen/16-8 4987821 240.0 ns/op 66.66 MB/s +BenchmarkTEAen/16-8 4990870 240.9 ns/op 66.42 MB/s +BenchmarkTEAen/16-8 4945104 239.6 ns/op 66.79 MB/s +BenchmarkTEAen/16-8 5003238 239.8 ns/op 66.73 MB/s +BenchmarkTEAen/256-8 712588 1680 ns/op 152.37 MB/s +BenchmarkTEAen/256-8 716322 1680 ns/op 152.39 MB/s +BenchmarkTEAen/256-8 717398 1672 ns/op 153.10 MB/s +BenchmarkTEAen/256-8 703051 1669 ns/op 153.34 MB/s +BenchmarkTEAen/256-8 739008 1663 ns/op 153.91 MB/s +BenchmarkTEAen/256-8 720760 1658 ns/op 154.39 MB/s +BenchmarkTEAen/256-8 735790 1675 ns/op 152.80 MB/s +BenchmarkTEAen/256-8 720423 1668 ns/op 153.47 MB/s +BenchmarkTEAen/256-8 721131 1672 ns/op 153.09 MB/s +BenchmarkTEAen/256-8 717520 1669 ns/op 153.37 MB/s +BenchmarkTEAen/4K-8 47542 24859 ns/op 164.77 MB/s +BenchmarkTEAen/4K-8 48340 24879 ns/op 164.63 MB/s +BenchmarkTEAen/4K-8 48124 24807 ns/op 165.11 MB/s +BenchmarkTEAen/4K-8 48874 24841 ns/op 164.89 MB/s +BenchmarkTEAen/4K-8 48474 24819 ns/op 165.03 MB/s +BenchmarkTEAen/4K-8 48309 25202 ns/op 162.52 MB/s +BenchmarkTEAen/4K-8 47352 24930 ns/op 164.30 MB/s +BenchmarkTEAen/4K-8 47407 24760 ns/op 165.43 MB/s +BenchmarkTEAen/4K-8 47619 24841 ns/op 164.89 MB/s +BenchmarkTEAen/4K-8 48085 24779 ns/op 165.30 MB/s +BenchmarkTEAen/32K-8 5924 200510 ns/op 163.42 MB/s +BenchmarkTEAen/32K-8 5964 200683 ns/op 163.28 MB/s +BenchmarkTEAen/32K-8 5904 200746 ns/op 163.23 MB/s +BenchmarkTEAen/32K-8 5907 200587 ns/op 163.36 MB/s +BenchmarkTEAen/32K-8 6031 200538 ns/op 163.40 MB/s +BenchmarkTEAen/32K-8 6249 201163 ns/op 162.89 MB/s +BenchmarkTEAen/32K-8 5974 201777 ns/op 162.40 MB/s +BenchmarkTEAen/32K-8 6063 201619 ns/op 162.52 MB/s +BenchmarkTEAen/32K-8 6001 200751 ns/op 163.23 MB/s +BenchmarkTEAen/32K-8 5971 200775 ns/op 163.21 MB/s +BenchmarkTEAde/16-8 5743614 210.3 ns/op 152.17 MB/s +BenchmarkTEAde/16-8 5685754 210.2 ns/op 152.23 MB/s +BenchmarkTEAde/16-8 5635918 209.7 ns/op 152.56 MB/s +BenchmarkTEAde/16-8 5680320 210.1 ns/op 152.34 MB/s +BenchmarkTEAde/16-8 5719232 212.0 ns/op 150.94 MB/s +BenchmarkTEAde/16-8 5745488 210.4 ns/op 152.09 MB/s +BenchmarkTEAde/16-8 5765454 211.0 ns/op 151.69 MB/s +BenchmarkTEAde/16-8 5644918 210.3 ns/op 152.17 MB/s +BenchmarkTEAde/16-8 5681248 210.1 ns/op 152.33 MB/s +BenchmarkTEAde/16-8 5672364 211.9 ns/op 151.00 MB/s +BenchmarkTEAde/256-8 712252 1663 ns/op 163.58 MB/s +BenchmarkTEAde/256-8 724006 1668 ns/op 163.10 MB/s +BenchmarkTEAde/256-8 734743 1657 ns/op 164.15 MB/s +BenchmarkTEAde/256-8 726320 1659 ns/op 163.97 MB/s +BenchmarkTEAde/256-8 725227 1650 ns/op 164.88 MB/s +BenchmarkTEAde/256-8 726027 1663 ns/op 163.60 MB/s +BenchmarkTEAde/256-8 723242 1652 ns/op 164.61 MB/s +BenchmarkTEAde/256-8 723056 1654 ns/op 164.47 MB/s +BenchmarkTEAde/256-8 719800 1659 ns/op 163.93 MB/s +BenchmarkTEAde/256-8 727068 1667 ns/op 163.20 MB/s +BenchmarkTEAde/4K-8 48092 24702 ns/op 166.46 MB/s +BenchmarkTEAde/4K-8 48133 24749 ns/op 166.15 MB/s +BenchmarkTEAde/4K-8 48650 24597 ns/op 167.18 MB/s +BenchmarkTEAde/4K-8 48375 24642 ns/op 166.87 MB/s +BenchmarkTEAde/4K-8 48902 24570 ns/op 167.36 MB/s +BenchmarkTEAde/4K-8 48852 24599 ns/op 167.16 MB/s +BenchmarkTEAde/4K-8 48820 24868 ns/op 165.35 MB/s +BenchmarkTEAde/4K-8 49100 24729 ns/op 166.28 MB/s +BenchmarkTEAde/4K-8 48906 24708 ns/op 166.43 MB/s +BenchmarkTEAde/4K-8 48818 24583 ns/op 167.27 MB/s +BenchmarkTEAde/32K-8 5941 200449 ns/op 163.55 MB/s +BenchmarkTEAde/32K-8 5901 200616 ns/op 163.42 MB/s +BenchmarkTEAde/32K-8 6033 200566 ns/op 163.46 MB/s +BenchmarkTEAde/32K-8 6034 199527 ns/op 164.31 MB/s +BenchmarkTEAde/32K-8 6068 200952 ns/op 163.14 MB/s +BenchmarkTEAde/32K-8 5953 200107 ns/op 163.83 MB/s +BenchmarkTEAde/32K-8 5966 200340 ns/op 163.64 MB/s +BenchmarkTEAde/32K-8 5971 199451 ns/op 164.37 MB/s +BenchmarkTEAde/32K-8 5972 200547 ns/op 163.47 MB/s +BenchmarkTEAde/32K-8 5932 200476 ns/op 163.53 MB/s +PASS +ok github.com/fumiama/gofastTEA 107.828s diff --git a/new17.txt b/new17.txt new file mode 100644 index 0000000..96324e3 --- /dev/null +++ b/new17.txt @@ -0,0 +1,86 @@ +goos: darwin +goarch: amd64 +pkg: github.com/fumiama/gofastTEA +cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz +BenchmarkTEAen/16-8 4937554 243.6 ns/op 65.67 MB/s +BenchmarkTEAen/16-8 4969573 242.3 ns/op 66.04 MB/s +BenchmarkTEAen/16-8 4937174 241.3 ns/op 66.30 MB/s +BenchmarkTEAen/16-8 4994734 240.0 ns/op 66.67 MB/s +BenchmarkTEAen/16-8 4988541 241.9 ns/op 66.16 MB/s +BenchmarkTEAen/16-8 4981683 240.2 ns/op 66.60 MB/s +BenchmarkTEAen/16-8 4967822 242.5 ns/op 65.99 MB/s +BenchmarkTEAen/16-8 5013471 240.8 ns/op 66.43 MB/s +BenchmarkTEAen/16-8 4906420 241.8 ns/op 66.16 MB/s +BenchmarkTEAen/16-8 4963357 240.3 ns/op 66.58 MB/s +BenchmarkTEAen/256-8 704292 1703 ns/op 150.32 MB/s +BenchmarkTEAen/256-8 712244 1710 ns/op 149.72 MB/s +BenchmarkTEAen/256-8 687753 1698 ns/op 150.76 MB/s +BenchmarkTEAen/256-8 698522 1709 ns/op 149.78 MB/s +BenchmarkTEAen/256-8 711114 1708 ns/op 149.92 MB/s +BenchmarkTEAen/256-8 712760 1701 ns/op 150.48 MB/s +BenchmarkTEAen/256-8 705231 1701 ns/op 150.54 MB/s +BenchmarkTEAen/256-8 699147 1699 ns/op 150.72 MB/s +BenchmarkTEAen/256-8 702382 1699 ns/op 150.69 MB/s +BenchmarkTEAen/256-8 707511 1699 ns/op 150.71 MB/s +BenchmarkTEAen/4K-8 47986 24923 ns/op 164.35 MB/s +BenchmarkTEAen/4K-8 47972 24808 ns/op 165.11 MB/s +BenchmarkTEAen/4K-8 47262 24943 ns/op 164.22 MB/s +BenchmarkTEAen/4K-8 47324 24961 ns/op 164.10 MB/s +BenchmarkTEAen/4K-8 48688 25036 ns/op 163.60 MB/s +BenchmarkTEAen/4K-8 47722 24995 ns/op 163.87 MB/s +BenchmarkTEAen/4K-8 48550 24810 ns/op 165.09 MB/s +BenchmarkTEAen/4K-8 47937 25068 ns/op 163.39 MB/s +BenchmarkTEAen/4K-8 48129 24867 ns/op 164.71 MB/s +BenchmarkTEAen/4K-8 47654 24986 ns/op 163.93 MB/s +BenchmarkTEAen/32K-8 5894 200779 ns/op 163.20 MB/s +BenchmarkTEAen/32K-8 5937 201123 ns/op 162.93 MB/s +BenchmarkTEAen/32K-8 5941 199721 ns/op 164.07 MB/s +BenchmarkTEAen/32K-8 5998 199359 ns/op 164.37 MB/s +BenchmarkTEAen/32K-8 5923 199691 ns/op 164.09 MB/s +BenchmarkTEAen/32K-8 5934 199594 ns/op 164.17 MB/s +BenchmarkTEAen/32K-8 5965 200496 ns/op 163.43 MB/s +BenchmarkTEAen/32K-8 5950 199249 ns/op 164.46 MB/s +BenchmarkTEAen/32K-8 5983 200564 ns/op 163.38 MB/s +BenchmarkTEAen/32K-8 5911 199334 ns/op 164.39 MB/s +BenchmarkTEAde/16-8 5737286 207.7 ns/op 154.08 MB/s +BenchmarkTEAde/16-8 5758159 207.1 ns/op 154.48 MB/s +BenchmarkTEAde/16-8 5808830 207.8 ns/op 154.02 MB/s +BenchmarkTEAde/16-8 5745165 207.4 ns/op 154.30 MB/s +BenchmarkTEAde/16-8 5753430 208.1 ns/op 153.79 MB/s +BenchmarkTEAde/16-8 5791928 210.9 ns/op 151.74 MB/s +BenchmarkTEAde/16-8 5664402 209.1 ns/op 153.04 MB/s +BenchmarkTEAde/16-8 5726097 207.7 ns/op 154.03 MB/s +BenchmarkTEAde/16-8 5807385 209.1 ns/op 153.04 MB/s +BenchmarkTEAde/16-8 5702360 206.9 ns/op 154.64 MB/s +BenchmarkTEAde/256-8 721252 1653 ns/op 164.53 MB/s +BenchmarkTEAde/256-8 739063 1643 ns/op 165.52 MB/s +BenchmarkTEAde/256-8 741162 1648 ns/op 165.02 MB/s +BenchmarkTEAde/256-8 735223 1642 ns/op 165.67 MB/s +BenchmarkTEAde/256-8 729406 1649 ns/op 164.91 MB/s +BenchmarkTEAde/256-8 729562 1635 ns/op 166.38 MB/s +BenchmarkTEAde/256-8 745306 1648 ns/op 165.05 MB/s +BenchmarkTEAde/256-8 726823 1650 ns/op 164.85 MB/s +BenchmarkTEAde/256-8 736669 1646 ns/op 165.22 MB/s +BenchmarkTEAde/256-8 714020 1650 ns/op 164.85 MB/s +BenchmarkTEAde/4K-8 48386 24470 ns/op 168.04 MB/s +BenchmarkTEAde/4K-8 49455 24395 ns/op 168.56 MB/s +BenchmarkTEAde/4K-8 48878 24545 ns/op 167.53 MB/s +BenchmarkTEAde/4K-8 48738 24511 ns/op 167.76 MB/s +BenchmarkTEAde/4K-8 49315 24614 ns/op 167.06 MB/s +BenchmarkTEAde/4K-8 47276 24493 ns/op 167.88 MB/s +BenchmarkTEAde/4K-8 49026 24435 ns/op 168.28 MB/s +BenchmarkTEAde/4K-8 48810 24534 ns/op 167.60 MB/s +BenchmarkTEAde/4K-8 48943 24519 ns/op 167.71 MB/s +BenchmarkTEAde/4K-8 48555 24520 ns/op 167.70 MB/s +BenchmarkTEAde/32K-8 6025 198951 ns/op 164.78 MB/s +BenchmarkTEAde/32K-8 6050 198765 ns/op 164.94 MB/s +BenchmarkTEAde/32K-8 6034 198560 ns/op 165.11 MB/s +BenchmarkTEAde/32K-8 5997 198661 ns/op 165.03 MB/s +BenchmarkTEAde/32K-8 5952 199032 ns/op 164.72 MB/s +BenchmarkTEAde/32K-8 6106 198250 ns/op 165.37 MB/s +BenchmarkTEAde/32K-8 6075 198810 ns/op 164.90 MB/s +BenchmarkTEAde/32K-8 6056 198851 ns/op 164.87 MB/s +BenchmarkTEAde/32K-8 6085 198702 ns/op 164.99 MB/s +BenchmarkTEAde/32K-8 6079 198614 ns/op 165.06 MB/s +PASS +ok github.com/fumiama/gofastTEA 106.301s diff --git a/old.txt b/old.txt new file mode 100644 index 0000000..865f953 --- /dev/null +++ b/old.txt @@ -0,0 +1,86 @@ +goos: darwin +goarch: amd64 +pkg: github.com/fumiama/gofastTEA +cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz +BenchmarkTEAen/16-8 4685186 255.1 ns/op 62.71 MB/s +BenchmarkTEAen/16-8 4744274 250.6 ns/op 63.85 MB/s +BenchmarkTEAen/16-8 4757278 251.0 ns/op 63.74 MB/s +BenchmarkTEAen/16-8 4728451 251.5 ns/op 63.63 MB/s +BenchmarkTEAen/16-8 4771138 252.3 ns/op 63.41 MB/s +BenchmarkTEAen/16-8 4715673 251.4 ns/op 63.64 MB/s +BenchmarkTEAen/16-8 4722417 252.9 ns/op 63.26 MB/s +BenchmarkTEAen/16-8 4788355 251.4 ns/op 63.65 MB/s +BenchmarkTEAen/16-8 4719381 252.5 ns/op 63.36 MB/s +BenchmarkTEAen/16-8 4748629 252.4 ns/op 63.40 MB/s +BenchmarkTEAen/256-8 678688 1760 ns/op 145.49 MB/s +BenchmarkTEAen/256-8 689025 1759 ns/op 145.54 MB/s +BenchmarkTEAen/256-8 673020 1768 ns/op 144.76 MB/s +BenchmarkTEAen/256-8 682868 1773 ns/op 144.41 MB/s +BenchmarkTEAen/256-8 670245 1792 ns/op 142.85 MB/s +BenchmarkTEAen/256-8 679348 1770 ns/op 144.65 MB/s +BenchmarkTEAen/256-8 679471 1773 ns/op 144.38 MB/s +BenchmarkTEAen/256-8 659677 1771 ns/op 144.57 MB/s +BenchmarkTEAen/256-8 677760 1776 ns/op 144.17 MB/s +BenchmarkTEAen/256-8 670436 1788 ns/op 143.18 MB/s +BenchmarkTEAen/4K-8 45494 25910 ns/op 158.08 MB/s +BenchmarkTEAen/4K-8 45812 25838 ns/op 158.52 MB/s +BenchmarkTEAen/4K-8 46861 25802 ns/op 158.75 MB/s +BenchmarkTEAen/4K-8 46503 25991 ns/op 157.59 MB/s +BenchmarkTEAen/4K-8 46102 25813 ns/op 158.68 MB/s +BenchmarkTEAen/4K-8 46298 25954 ns/op 157.82 MB/s +BenchmarkTEAen/4K-8 46710 25750 ns/op 159.07 MB/s +BenchmarkTEAen/4K-8 46239 25836 ns/op 158.54 MB/s +BenchmarkTEAen/4K-8 46072 25860 ns/op 158.39 MB/s +BenchmarkTEAen/4K-8 46370 25938 ns/op 157.92 MB/s +BenchmarkTEAen/32K-8 5605 208498 ns/op 157.16 MB/s +BenchmarkTEAen/32K-8 5654 208256 ns/op 157.34 MB/s +BenchmarkTEAen/32K-8 5671 207461 ns/op 157.95 MB/s +BenchmarkTEAen/32K-8 5726 208031 ns/op 157.51 MB/s +BenchmarkTEAen/32K-8 5886 208996 ns/op 156.79 MB/s +BenchmarkTEAen/32K-8 5594 207445 ns/op 157.96 MB/s +BenchmarkTEAen/32K-8 5684 207217 ns/op 158.13 MB/s +BenchmarkTEAen/32K-8 5754 207360 ns/op 158.02 MB/s +BenchmarkTEAen/32K-8 5595 207484 ns/op 157.93 MB/s +BenchmarkTEAen/32K-8 5692 207754 ns/op 157.72 MB/s +BenchmarkTEAde/16-8 5531444 217.2 ns/op 147.35 MB/s +BenchmarkTEAde/16-8 5521533 215.2 ns/op 148.72 MB/s +BenchmarkTEAde/16-8 5537046 215.2 ns/op 148.69 MB/s +BenchmarkTEAde/16-8 5607153 217.0 ns/op 147.48 MB/s +BenchmarkTEAde/16-8 5534305 218.8 ns/op 146.24 MB/s +BenchmarkTEAde/16-8 5561917 215.7 ns/op 148.35 MB/s +BenchmarkTEAde/16-8 5535909 216.4 ns/op 147.89 MB/s +BenchmarkTEAde/16-8 5519742 215.5 ns/op 148.48 MB/s +BenchmarkTEAde/16-8 5556531 216.5 ns/op 147.82 MB/s +BenchmarkTEAde/16-8 5589644 216.6 ns/op 147.76 MB/s +BenchmarkTEAde/256-8 700608 1697 ns/op 160.28 MB/s +BenchmarkTEAde/256-8 696637 1703 ns/op 159.73 MB/s +BenchmarkTEAde/256-8 697063 1702 ns/op 159.80 MB/s +BenchmarkTEAde/256-8 709950 1710 ns/op 159.08 MB/s +BenchmarkTEAde/256-8 697386 1719 ns/op 158.28 MB/s +BenchmarkTEAde/256-8 700438 1697 ns/op 160.29 MB/s +BenchmarkTEAde/256-8 701476 1710 ns/op 159.09 MB/s +BenchmarkTEAde/256-8 704905 1709 ns/op 159.20 MB/s +BenchmarkTEAde/256-8 702578 1697 ns/op 160.24 MB/s +BenchmarkTEAde/256-8 696729 1707 ns/op 159.36 MB/s +BenchmarkTEAde/4K-8 46996 25395 ns/op 161.92 MB/s +BenchmarkTEAde/4K-8 47506 25322 ns/op 162.39 MB/s +BenchmarkTEAde/4K-8 46075 25309 ns/op 162.48 MB/s +BenchmarkTEAde/4K-8 47414 25445 ns/op 161.60 MB/s +BenchmarkTEAde/4K-8 47269 25409 ns/op 161.83 MB/s +BenchmarkTEAde/4K-8 47254 25543 ns/op 160.99 MB/s +BenchmarkTEAde/4K-8 47268 25260 ns/op 162.79 MB/s +BenchmarkTEAde/4K-8 47424 25376 ns/op 162.04 MB/s +BenchmarkTEAde/4K-8 46938 25254 ns/op 162.83 MB/s +BenchmarkTEAde/4K-8 47344 25352 ns/op 162.20 MB/s +BenchmarkTEAde/32K-8 5851 205595 ns/op 159.46 MB/s +BenchmarkTEAde/32K-8 5856 205832 ns/op 159.28 MB/s +BenchmarkTEAde/32K-8 5830 205681 ns/op 159.39 MB/s +BenchmarkTEAde/32K-8 5796 205488 ns/op 159.54 MB/s +BenchmarkTEAde/32K-8 5841 205767 ns/op 159.33 MB/s +BenchmarkTEAde/32K-8 5691 205681 ns/op 159.39 MB/s +BenchmarkTEAde/32K-8 5886 205331 ns/op 159.66 MB/s +BenchmarkTEAde/32K-8 5842 205587 ns/op 159.47 MB/s +BenchmarkTEAde/32K-8 5809 205667 ns/op 159.40 MB/s +BenchmarkTEAde/32K-8 5941 206341 ns/op 158.88 MB/s +PASS +ok github.com/fumiama/gofastTEA 108.464s diff --git a/tea.go b/tea.go new file mode 100644 index 0000000..6b487f4 --- /dev/null +++ b/tea.go @@ -0,0 +1,5 @@ +// Package tea +// from https://github.com/Mrs4s/MiraiGo/blob/master/binary/tea.go +package tea + +type TEA [4]uint32 diff --git a/tea_1.16.go b/tea_1.16.go new file mode 100644 index 0000000..67e627a --- /dev/null +++ b/tea_1.16.go @@ -0,0 +1,76 @@ +//go:build !go1.17 && amd64 +// +build !go1.17,amd64 + +package tea + +import ( + "encoding/binary" +) + +// Encrypt tea 加密 +// http://bbs.chinaunix.net/thread-583468-1-1.html +// 感谢xichen大佬对TEA的解释 + +//go:nosplit +func (t TEA) Decrypt(data []byte) []byte { + if len(data) < 16 || len(data)&7 != 0 { + return nil + } + dst := make([]byte, len(data)) + + var iv1, iv2, holder uint64 + var v0, v1 uint32 + for i := 0; i < len(dst); i += 8 { + holder = iv1 + iv1 = binary.BigEndian.Uint64(data[i:]) + iv2 ^= iv1 + v0, v1 = uint32(iv2>>32), uint32(iv2) + v1 -= (v0 + 0xe3779b90) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xe3779b90) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x454021d7) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x454021d7) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xa708a81e) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xa708a81e) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x08d12e65) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x08d12e65) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xcc623af3) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xcc623af3) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x8ff34781) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x8ff34781) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x5384540f) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x5384540f) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xb54cda56) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xb54cda56) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x1715609d) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x1715609d) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x78dde6e4) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x78dde6e4) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x3c6ef372) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x3c6ef372) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 -= (v0 + 0x9e3779b9) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 -= (v1 + 0x9e3779b9) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + iv2 = uint64(v0)<<32 | uint64(v1) + binary.BigEndian.PutUint64(dst[i:], iv2^holder) + } + + return dst[dst[0]&7+3 : len(data)-7] +} + +//go:nosplit +func NewTeaCipher(key []byte) (t TEA) { + if len(key) == 16 { + t[3] = binary.BigEndian.Uint32(key[12:]) + t[2] = binary.BigEndian.Uint32(key[8:]) + t[1] = binary.BigEndian.Uint32(key[4:]) + t[0] = binary.BigEndian.Uint32(key[0:]) + } + return +} diff --git a/tea_1.16_amd64.s b/tea_1.16_amd64.s new file mode 100644 index 0000000..3a5e80e --- /dev/null +++ b/tea_1.16_amd64.s @@ -0,0 +1,768 @@ +//go:build !go1.17 && amd64 +// +build !go1.17,amd64 + +#include "textflag.h" + +// func encrypt(dstlen uintptr, t uintptr) +TEXT ·encrypt(SB), NOSPLIT, $0-16 + MOVQ ·dstlen+0(FP), AX // go:<1.17 dst + MOVQ ·teaptr+8(FP), DI // go:<1.17 t + MOVQ AX, BX // len(dst) low 40 bits + SHRQ $40, BX // unpack len + SHLQ $24, AX + SHRQ $24, AX + MOVQ DI, R8 // len(dst) high 24 bits + SHLQ $24, DI + SHRQ $24, DI + SHRQ $40, R8 + SHLQ $40, R8 + ORQ R8, BX + ADDQ BX, AX // dst += len(dst) + NOTQ BX // i = -i - 1 + INCQ BX // i++ + MOVQ (DI), DX // t0 + MOVQ 4(DI), R12 // t1 + MOVQ 8(DI), R10 // t2 + MOVQ 12(DI), SI // t3 + // XORQ R11, R11 // holder + XORQ R13, R13 // iv1 + XORQ DI, DI // iv2 +enclop: + MOVQ (AX)(BX*1), R11 // holder = Uint64(dst[i:]) + BSWAPQ R11 // holder = BE(block) + XORQ R13, R11 // holder ^= iv1 + MOVQ R11, R13 // iv1 = holder + // Use Register CX(v1), DX(t0), SI(t3), R8(tmp), R10(t2), R12(t1), R13(v0/ret) + ////////////////iv1 = encrypt(iv1)//////////////// + MOVQ R11, CX // v1 + SHRQ $32, R13 // v0 + + LEAQ -1640531527(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ -1640531527(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ 1013904242(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ 1013904242(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ -626627285(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ -626627285(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ 2027808484(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ 2027808484(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ 387276957(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ 387276957(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ -1253254570(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ -1253254570(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ 1401181199(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ 1401181199(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ -239350328(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ -239350328(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ -1879881855(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ -1879881855(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ 774553914(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ 774553914(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ -865977613(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ -865977613(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ 1788458156(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, R13 // v0 += R8 + LEAQ 1788458156(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ 147926629(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ 147926629(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ -1492604898(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ -1492604898(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ 1161830871(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ 1161830871(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + LEAQ -478700656(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRL $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + ADDQ R8, R13 // v0 += R8 + LEAQ -478700656(R13), R8 // R8 = v0 + 0x... + MOVQ R13, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R13, R9 // R9 = v0 + SHRL $5, R9 // R9 >>= 5 + ADDQ SI, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + ADDL R8, CX // v0 += R8 + + SHLQ $32, R13 // v0 <<= 32 + ORQ CX, R13 // v0 |= v1 + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + XORQ DI, R13 // iv1 ^= iv2 + MOVQ R11, DI // iv2 = holder + MOVQ R13, R11 // holder = iv1 + BSWAPQ R11 // holder = BE(holder) + MOVQ R11, (AX)(BX*1) // PutUint64(dst[i:], holder) + ADDQ $8, BX // i += 8 + JNZ enclop + RET + +// func decrypt(datalen uintptr, dst uintptr, t *TEA) +TEXT ·decrypt(SB), NOSPLIT, $0-24 + MOVQ ·data+0(FP), AX // go:<1.17 data + MOVQ ·dst+8(FP), DI // go:<1.17 dst + MOVQ ·teaptr+16(FP), SI // go:<1.17 t + MOVQ AX, BX // len(data) low 24 bits + SHRQ $40, BX // unpack len + SHLQ $24, AX + SHRQ $24, AX + MOVQ DI, R8 // dst high 24 bits + SHLQ $24, DI + SHRQ $24, DI + SHRQ $40, R8 + SHLQ $40, R8 + ORQ R8, BX + ADDQ BX, AX // data += len(data) + ADDQ BX, DI // dst += len(data) + NOTQ BX // i = -len - 1 + INCQ BX // i++ + MOVQ (SI), DX // t0 + MOVQ 4(SI), R12 // t1 + MOVQ 8(SI), R10 // t2 + MOVQ 12(SI), R13 // t3 + XORQ SI, SI // iv1 + XORQ R15, R15 // iv2 + XORQ R11, R11 // holder +declop: + MOVQ (AX)(BX*1), SI // iv1 = Uint64(data[i:]) + BSWAPQ SI // iv1 = BE(block) + XORQ SI, R15 // iv2 ^= iv1 + // Use Register R15(v0/ret), R12(t1), CX(v1), DX(t0), R13(t3), R8, R9, R10(t2) + ///////////////iv2 = decrypt(iv2)/////////////// + MOVQ R15, CX // v1 + SHRQ $32, R15 // v0 + + LEAQ -478700656(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ -478700656(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ 1161830871(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ 1161830871(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ -1492604898(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ -1492604898(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ 147926629(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ 147926629(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ 1788458156(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ 1788458156(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ -865977613(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ -865977613(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ 774553914(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ 774553914(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ -1879881855(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ -1879881855(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ -239350328(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ -239350328(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ 1401181199(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ 1401181199(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ -1253254570(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ -1253254570(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ 387276957(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ 387276957(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ 2027808484(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ 2027808484(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ -626627285(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ -626627285(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ 1013904242(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ 1013904242(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + LEAQ -1640531527(R15), R8 // R8 = v0 + 0x... + MOVQ R15, R9 // R9 = v0 + SHLQ $4, R9 // R9 <<= 4 + ADDQ R10, R9 // R9 += t2 + XORQ R9, R8 // R8 ^= R9 + MOVQ R15, R9 // R9 = v0 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R13, R9 // R9 += t3 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, CX // v1 -= R8 + LEAQ -1640531527(CX), R8 // R8 = v1 + 0x... + MOVQ CX, R9 // R9 = v1 + SHLQ $4, R9 // R9 <<= 4 + ADDQ DX, R9 // R9 += t0 + XORQ R9, R8 // R8 ^= R9 + MOVQ CX, R9 // R9 = v1 + SHRQ $5, R9 // R9 >>= 5 + ADDQ R12, R9 // R9 += t1 + XORQ R9, R8 // R8 ^= R9 + SUBL R8, R15 // v0 -= R8 + + SHLQ $32, R15 + ORQ CX, R15 + /////////////////////////////////////////////// + XORQ R15, R11 // holder ^= iv2 + BSWAPQ R11 // holder = BE(holder) + MOVQ R11, (DI)(BX*1) // PutUint64(dst[i:], holder) + MOVQ SI, R11 // holder = iv1 + ADDQ $8, BX // i += 8 + JNZ declop + RET diff --git a/tea_1.16_asm.go b/tea_1.16_asm.go new file mode 100644 index 0000000..312aab6 --- /dev/null +++ b/tea_1.16_asm.go @@ -0,0 +1,25 @@ +//go:build !go1.17 && amd64 +// +build !go1.17,amd64 + +package tea + +import ( + "math/rand" + "unsafe" +) + +// implemented in tea_$GOARCH.s +func encrypt(dstlen uintptr, tlen uintptr) +func decrypt(datalen uintptr, dstlen uintptr, t *TEA) + +//go:nosplit +func (t TEA) Encrypt(src []byte) (dst []byte) { + lens := len(src) + fill := 10 - (lens+1)&7 + dst = make([]byte, fill+lens+7) + _, _ = rand.Read(dst[0:fill]) + dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 + copy(dst[fill:], src) + encrypt(uintptr(*(*unsafe.Pointer)(unsafe.Pointer(&dst)))|uintptr(len(dst)<<40), uintptr(unsafe.Pointer(&t))|(uintptr(len(dst))&0xffffff00_00000000)) + return dst +} diff --git a/tea_1.16_pure.go b/tea_1.16_pure.go new file mode 100644 index 0000000..2096704 --- /dev/null +++ b/tea_1.16_pure.go @@ -0,0 +1,67 @@ +//go:build !go1.17 && !amd64 +// +build !go1.17,!amd64 + +package tea + +import ( + "encoding/binary" + "math/rand" + "unsafe" +) + +// Encrypt tea 加密 +// http://bbs.chinaunix.net/thread-583468-1-1.html +// 感谢xichen大佬对TEA的解释 +//go:nosplit +func (t TEA) Encrypt(src []byte) (dst []byte) { + lens := len(src) + fill := 10 - (lens+1)&7 + dst = make([]byte, fill+lens+7) + _, _ = rand.Read(dst[0:fill]) + dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 + copy(dst[fill:], src) + + var iv1, iv2, holder uint64 + var v0, v1 uint32 + for i := 0; i < len(dst); i += 8 { + holder = binary.BigEndian.Uint64(dst[i:]) ^ iv1 + v0, v1 = uint32(holder>>32), uint32(holder) + v0 += (v1 + 0x9e3779b9) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0x9e3779b9) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0x3c6ef372) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0x3c6ef372) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0x78dde6e4) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0x78dde6e4) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0x1715609d) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0x1715609d) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0xb54cda56) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0xb54cda56) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0x5384540f) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0x5384540f) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0x8ff34781) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0x8ff34781) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0xcc623af3) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0xcc623af3) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0x08d12e65) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0x08d12e65) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0xa708a81e) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0xa708a81e) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0x454021d7) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0x454021d7) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + v0 += (v1 + 0xe3779b90) ^ ((v1 << 4) + t[0]) ^ ((v1 >> 5) + t[1]) + v1 += (v0 + 0xe3779b90) ^ ((v0 << 4) + t[2]) ^ ((v0 >> 5) + t[3]) + iv1 = (uint64(v0)<<32 | uint64(v1)) ^ iv2 + iv2 = holder + binary.BigEndian.PutUint64(dst[i:], iv1) + } + + return dst +} diff --git a/tea_1.17.go b/tea_1.17.go new file mode 100644 index 0000000..0e556aa --- /dev/null +++ b/tea_1.17.go @@ -0,0 +1,145 @@ +//go:build go1.17 +// +build go1.17 + +package tea + +import ( + "encoding/binary" + "math/rand" +) + +// Encrypt tea 加密 +// http://bbs.chinaunix.net/thread-583468-1-1.html +// 感谢xichen大佬对TEA的解释 +func (t TEA) Encrypt(src []byte) (dst []byte) { + lens := len(src) + fill := 10 - (lens+1)%8 + dst = make([]byte, fill+lens+7) + _, _ = rand.Read(dst[0:fill]) + dst[0] = byte(fill-3) | 0xF8 // 存储pad长度 + copy(dst[fill:], src) + + var iv1, iv2, holder uint64 + for i := 0; i < len(dst); i += 8 { + block := binary.BigEndian.Uint64(dst[i:]) + holder = block ^ iv1 + iv1 = t.encode(holder) + iv1 = iv1 ^ iv2 + iv2 = holder + binary.BigEndian.PutUint64(dst[i:], iv1) + } + + return dst +} + +func (t TEA) Decrypt(data []byte) []byte { + if len(data) < 16 || len(data)%8 != 0 { + return nil + } + dst := make([]byte, len(data)) + var iv1, iv2, holder, tmp uint64 + for i := 0; i < len(dst); i += 8 { + block := binary.BigEndian.Uint64(data[i:]) + tmp = t.decode(block ^ iv2) + iv2 = tmp + holder = tmp ^ iv1 + iv1 = block + binary.BigEndian.PutUint64(dst[i:], holder) + } + return dst[dst[0]&7+3 : len(data)-7] +} + +//go:nosplit +func (t *TEA) encode(n uint64) uint64 { + v0, v1 := uint32(n>>32), uint32(n) + t0, t1, t2, t3 := t[0], t[1], t[2], t[3] + + v0 += (v1 + 0x9e3779b9) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x9e3779b9) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x3c6ef372) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x3c6ef372) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x78dde6e4) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x78dde6e4) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x1715609d) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x1715609d) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0xb54cda56) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0xb54cda56) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x5384540f) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x5384540f) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x8ff34781) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x8ff34781) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0xcc623af3) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0xcc623af3) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x08d12e65) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x08d12e65) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0xa708a81e) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0xa708a81e) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0x454021d7) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0x454021d7) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 += (v1 + 0xe3779b90) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 += (v0 + 0xe3779b90) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + + return uint64(v0)<<32 | uint64(v1) +} + +// 每次8字节 +//go:nosplit +func (t *TEA) decode(n uint64) uint64 { + v0, v1 := uint32(n>>32), uint32(n) + t0, t1, t2, t3 := t[0], t[1], t[2], t[3] + + v1 -= (v0 + 0xe3779b90) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0xe3779b90) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x454021d7) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x454021d7) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0xa708a81e) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0xa708a81e) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x08d12e65) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x08d12e65) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x6a99b4ac) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x6a99b4ac) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0xcc623af3) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0xcc623af3) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x2e2ac13a) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x2e2ac13a) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x8ff34781) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x8ff34781) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0xf1bbcdc8) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0xf1bbcdc8) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x5384540f) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x5384540f) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0xb54cda56) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0xb54cda56) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x1715609d) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x1715609d) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x78dde6e4) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x78dde6e4) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0xdaa66d2b) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0xdaa66d2b) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x3c6ef372) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x3c6ef372) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + v1 -= (v0 + 0x9e3779b9) ^ ((v0 << 4) + t2) ^ ((v0 >> 5) + t3) + v0 -= (v1 + 0x9e3779b9) ^ ((v1 << 4) + t0) ^ ((v1 >> 5) + t1) + + return uint64(v0)<<32 | uint64(v1) +} + +//go:nosplit +func NewTeaCipher(key []byte) (t TEA) { + if len(key) != 16 { + return TEA{} + } + t[3] = binary.BigEndian.Uint32(key[12:]) + t[2] = binary.BigEndian.Uint32(key[8:]) + t[1] = binary.BigEndian.Uint32(key[4:]) + t[0] = binary.BigEndian.Uint32(key[0:]) + return t +} diff --git a/tea_test.go b/tea_test.go new file mode 100644 index 0000000..63577bd --- /dev/null +++ b/tea_test.go @@ -0,0 +1,139 @@ +package tea + +import ( + "bytes" + "crypto/rand" + "encoding/hex" + "testing" + + "github.com/Mrs4s/MiraiGo/utils" +) + +var testTEA = NewTeaCipher([]byte("0123456789ABCDEF")) + +const ( + KEY = iota + DAT + ENC +) + +var sampleData = func() [][3]string { + out := [][3]string{ + {"0123456789ABCDEF", "MiraiGO Here", "b7b2e52af7f5b1fbf37fc3d5546ac7569aecd01bbacf09bf"}, + {"0123456789ABCDEF", "LXY Testing~", "9d0ab85aa14f5434ee83cd2a6b28bf306263cdf88e01264c"}, + + {"0123456789ABCDEF", "s", "528e8b5c48300b548e94262736ebb8b7"}, + {"0123456789ABCDEF", "long long long long long long long", "95715fab6efbd0fd4b76dbc80bd633ebe805849dbc242053b06557f87e748effd9f613f782749fb9fdfa3f45c0c26161"}, + + {"LXY1226 Mrs4s", "LXY Testing~", "ab20caa63f3a6503a84f3cb28f9e26b6c18c051e995d1721"}, + } + for i := range out { + c, _ := hex.DecodeString(out[i][ENC]) + out[i][ENC] = utils.B2S(c) + } + return out +}() + +func TestTEA(t *testing.T) { + // Self Testing + for _, sample := range sampleData { + tea := NewTeaCipher(utils.S2B(sample[KEY])) + dat := utils.B2S(tea.Decrypt(utils.S2B(sample[ENC]))) + if dat != sample[DAT] { + t.Fatalf("error decrypt %v %x", sample, dat) + } + enc := utils.B2S(tea.Encrypt(utils.S2B(sample[DAT]))) + dat = utils.B2S(tea.Decrypt(utils.S2B(enc))) + if dat != sample[DAT] { + t.Fatal("error self test", sample) + } + } + + key := make([]byte, 16) + _, err := rand.Read(key) + if err != nil { + panic(err) + } + // Random data testing + for i := 1; i < 0xFF; i++ { + _, err := rand.Read(key) + if err != nil { + panic(err) + } + tea := NewTeaCipher(key) + + dat := make([]byte, i) + _, err = rand.Read(dat) + if err != nil { + panic(err) + } + enc := tea.Encrypt(dat) + dec := tea.Decrypt(enc) + if !bytes.Equal(dat, dec) { + t.Fatalf("error in %d, %x %x %x", i, key, dat, enc) + } + } +} + +func benchEncrypt(b *testing.B, data []byte) { + _, err := rand.Read(data) + if err != nil { + panic(err) + } + b.SetBytes(int64(len(data))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + testTEA.Encrypt(data) + } +} + +func benchDecrypt(b *testing.B, data []byte) { + _, err := rand.Read(data) + if err != nil { + panic(err) + } + data = testTEA.Encrypt(data) + b.SetBytes(int64(len(data))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + testTEA.Decrypt(data) + } +} + +func BenchmarkTEAen(b *testing.B) { + b.Run("16", func(b *testing.B) { + data := make([]byte, 16) + benchEncrypt(b, data) + }) + b.Run("256", func(b *testing.B) { + data := make([]byte, 256) + benchEncrypt(b, data) + }) + b.Run("4K", func(b *testing.B) { + data := make([]byte, 1024*4) + benchEncrypt(b, data) + }) + b.Run("32K", func(b *testing.B) { + data := make([]byte, 1024*32) + benchEncrypt(b, data) + }) +} + +func BenchmarkTEAde(b *testing.B) { + b.Run("16", func(b *testing.B) { + data := make([]byte, 16) + benchDecrypt(b, data) + }) + b.Run("256", func(b *testing.B) { + data := make([]byte, 256) + benchDecrypt(b, data) + }) + b.Run("4K", func(b *testing.B) { + data := make([]byte, 4096) + benchDecrypt(b, data) + }) + b.Run("32K", func(b *testing.B) { + data := make([]byte, 1024*32) + benchDecrypt(b, data) + }) +}