mirror of
https://github.com/fumiama/blake2b-simd.git
synced 2026-06-05 02:00:26 +08:00
Detect SSE automatically and enable corresponding optimized code path.
With this change, following happens.
```
if sse {
compressSSE()
return
}
compressGeneric()
```
compressGeneric is used as a fallback when SSE is not detected.
This commit is contained in:
committed by
Harshavardhana
parent
83ff4cf5f4
commit
3a46db1cb4
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
*.test
|
||||
12
blake2b.go
12
blake2b.go
@@ -143,6 +143,7 @@ func (d *digest) initialize(c *Config) {
|
||||
p[2] = 1
|
||||
p[3] = 1
|
||||
}
|
||||
|
||||
// Initialize.
|
||||
d.size = c.Size
|
||||
for i := 0; i < 8; i++ {
|
||||
@@ -151,6 +152,7 @@ func (d *digest) initialize(c *Config) {
|
||||
if c.Tree != nil && c.Tree.IsLastNode {
|
||||
d.isLastNode = true
|
||||
}
|
||||
|
||||
// Process key.
|
||||
if c.Key != nil {
|
||||
copy(d.paddedKey[:], c.Key)
|
||||
@@ -231,11 +233,11 @@ func (d *digest) Write(p []byte) (nn int, err error) {
|
||||
}
|
||||
|
||||
// Sum returns the calculated checksum.
|
||||
func (d0 *digest) Sum(in []byte) []byte {
|
||||
// Make a copy of d0 so that caller can keep writing and summing.
|
||||
d := *d0
|
||||
hash := d.checkSum()
|
||||
return append(in, hash[:d.size]...)
|
||||
func (d *digest) Sum(in []byte) []byte {
|
||||
// Make a copy of d so that caller can keep writing and summing.
|
||||
d0 := *d
|
||||
hash := d0.checkSum()
|
||||
return append(in, hash[:d0.size]...)
|
||||
}
|
||||
|
||||
func (d *digest) checkSum() [Size]byte {
|
||||
|
||||
@@ -21,6 +21,8 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestSum - tests and validates golden set of values again
|
||||
// pre-defined set of inputs and matches blake2b output.
|
||||
func TestSum(t *testing.T) {
|
||||
buf := make([]byte, len(golden))
|
||||
for i := range buf {
|
||||
@@ -76,44 +78,6 @@ func TestKeyedSum(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
var bench = New512()
|
||||
var buf = make([]byte, 8<<10)
|
||||
|
||||
func BenchmarkWrite1K(b *testing.B) {
|
||||
b.SetBytes(1024)
|
||||
for i := 0; i < b.N; i++ {
|
||||
bench.Write(buf[:1024])
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkWrite8K(b *testing.B) {
|
||||
b.SetBytes(int64(len(buf)))
|
||||
for i := 0; i < b.N; i++ {
|
||||
bench.Write(buf)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHash64(b *testing.B) {
|
||||
b.SetBytes(64)
|
||||
for i := 0; i < b.N; i++ {
|
||||
Sum512(buf[:64])
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHash128(b *testing.B) {
|
||||
b.SetBytes(128)
|
||||
for i := 0; i < b.N; i++ {
|
||||
Sum512(buf[:128])
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHash1K(b *testing.B) {
|
||||
b.SetBytes(1024)
|
||||
for i := 0; i < b.N; i++ {
|
||||
Sum512(buf[:1024])
|
||||
}
|
||||
}
|
||||
|
||||
// Test vectors taken from reference implementation in C#.
|
||||
var golden = []string{
|
||||
"786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce",
|
||||
@@ -632,3 +596,55 @@ var goldenKeyed = []string{
|
||||
"d444bfa2362a96df213d070e33fa841f51334e4e76866b8139e8af3bb3398be2dfaddcbc56b9146de9f68118dc5829e74b0c28d7711907b121f9161cb92b69a9",
|
||||
"142709d62e28fcccd0af97fad0f8465b971e82201dc51070faa0372aa43e92484be1c1e73ba10906d5d1853db6a4106e0a7bf9800d373d6dee2d46d62ef2a461",
|
||||
}
|
||||
|
||||
// Benchmark blake2b implementation.
|
||||
var bench = New512()
|
||||
var buf = make([]byte, 128*1024)
|
||||
|
||||
// Benchmark writes of 64 bytes.
|
||||
func BenchmarkHash64(b *testing.B) {
|
||||
b.SetBytes(64)
|
||||
for i := 0; i < b.N; i++ {
|
||||
Sum512(buf[:64])
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark writes of 128 bytes.
|
||||
func BenchmarkHash128(b *testing.B) {
|
||||
b.SetBytes(128)
|
||||
for i := 0; i < b.N; i++ {
|
||||
Sum512(buf[:128])
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark writes of 1KiB bytes.
|
||||
func BenchmarkWrite1K(b *testing.B) {
|
||||
b.SetBytes(1024)
|
||||
for i := 0; i < b.N; i++ {
|
||||
bench.Write(buf[:1024])
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark writes of 8KiB bytes.
|
||||
func BenchmarkWrite8K(b *testing.B) {
|
||||
b.SetBytes(int64(len(buf)))
|
||||
for i := 0; i < b.N; i++ {
|
||||
bench.Write(buf[:8192])
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark writes of 32KiB bytes.
|
||||
func BenchmarkWrite32K(b *testing.B) {
|
||||
b.SetBytes(int64(len(buf)))
|
||||
for i := 0; i < b.N; i++ {
|
||||
bench.Write(buf[:32*1024])
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark writes of 128KiB bytes.
|
||||
func BenchmarkWrite128K(b *testing.B) {
|
||||
b.SetBytes(int64(len(buf)))
|
||||
for i := 0; i < b.N; i++ {
|
||||
bench.Write(buf)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,9 +20,9 @@
|
||||
package blake2b
|
||||
|
||||
//go:noescape
|
||||
func compressSSE(p []uint8, in, iv , t, f, shffle, out []uint64)
|
||||
func blockSSE(p []uint8, in, iv, t, f, shffle, out []uint64)
|
||||
|
||||
func compress(d *digest, p []uint8) {
|
||||
func compressSSE(d *digest, p []uint8) {
|
||||
h0, h1, h2, h3, h4, h5, h6, h7 := d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7]
|
||||
|
||||
in := make([]uint64, 8, 8)
|
||||
@@ -42,7 +42,7 @@ func compress(d *digest, p []uint8) {
|
||||
|
||||
in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = h0, h1, h2, h3, h4, h5, h6, h7
|
||||
|
||||
compressSSE(p, in, iv[:], d.t[:], d.f[:], shffle, out)
|
||||
blockSSE(p, in, iv[:], d.t[:], d.f[:], shffle, out)
|
||||
|
||||
h0, h1, h2, h3, h4, h5, h6, h7 = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]
|
||||
|
||||
@@ -51,3 +51,11 @@ func compress(d *digest, p []uint8) {
|
||||
|
||||
d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = h0, h1, h2, h3, h4, h5, h6, h7
|
||||
}
|
||||
|
||||
func compress(d *digest, p []uint8) {
|
||||
if sse {
|
||||
compressSSE(d, p)
|
||||
return
|
||||
}
|
||||
compressGeneric(d, p)
|
||||
}
|
||||
|
||||
@@ -42,9 +42,8 @@
|
||||
// rounds 2 & 12 are identical)
|
||||
//
|
||||
|
||||
|
||||
// func compressSSE(compressSSE(p []uint8, in, iv, t, f, shffle, out []uint64)
|
||||
TEXT ·compressSSE(SB), 7, $0
|
||||
// func blockSSE(p []uint8, in, iv, t, f, shffle, out []uint64)
|
||||
TEXT ·blockSSE(SB), 7, $0
|
||||
|
||||
// REGISTER USE
|
||||
// X0 - X7: v0 - v15
|
||||
@@ -1888,3 +1887,4 @@ TEXT ·compressSSE(SB), 7, $0
|
||||
MOVOU X3, 48(DX) // out[6]+out[7] = X3
|
||||
|
||||
RET
|
||||
|
||||
|
||||
1418
compress_generic.go
Normal file
1418
compress_generic.go
Normal file
File diff suppressed because it is too large
Load Diff
1408
compress_noasm.go
1408
compress_noasm.go
File diff suppressed because it is too large
Load Diff
29
cpuid.go
Normal file
29
cpuid.go
Normal file
@@ -0,0 +1,29 @@
|
||||
// +build 386,!gccgo amd64,!gccgo
|
||||
|
||||
// Copyright 2016 Frank Wessels <fwessels@xs4all.nl>
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package blake2b
|
||||
|
||||
func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
|
||||
|
||||
// True when SIMD instructions are available.
|
||||
var sse = haveSSE()
|
||||
|
||||
// haveSSE returns true if we have streaming SIMD instructions.
|
||||
func haveSSE() bool {
|
||||
_, _, _, d := cpuid(1)
|
||||
return (d & (1 << 25)) != 0
|
||||
}
|
||||
15
cpuid_386.s
Normal file
15
cpuid_386.s
Normal file
@@ -0,0 +1,15 @@
|
||||
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
|
||||
|
||||
// +build 386,!gccgo
|
||||
|
||||
// func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·cpuid(SB), 7, $0
|
||||
XORL CX, CX
|
||||
MOVL op+0(FP), AX
|
||||
CPUID
|
||||
MOVL AX, eax+4(FP)
|
||||
MOVL BX, ebx+8(FP)
|
||||
MOVL CX, ecx+12(FP)
|
||||
MOVL DX, edx+16(FP)
|
||||
RET
|
||||
|
||||
14
cpuid_amd64.s
Normal file
14
cpuid_amd64.s
Normal file
@@ -0,0 +1,14 @@
|
||||
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
|
||||
|
||||
// +build amd64,!gccgo
|
||||
|
||||
// func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·cpuid(SB), 7, $0
|
||||
XORQ CX, CX
|
||||
MOVL op+0(FP), AX
|
||||
CPUID
|
||||
MOVL AX, eax+8(FP)
|
||||
MOVL BX, ebx+12(FP)
|
||||
MOVL CX, ecx+16(FP)
|
||||
MOVL DX, edx+20(FP)
|
||||
RET
|
||||
Reference in New Issue
Block a user