1
0
mirror of https://github.com/fumiama/blake2b-simd.git synced 2026-06-05 02:00:26 +08:00

Detect SSE automatically and enable corresponding optimized code path.

With this change, following happens.

```
if sse {
   compressSSE()
   return
}
compressGeneric()
```

compressGeneric is used as a fallback when SSE is not detected.
This commit is contained in:
Harshavardhana
2016-06-25 00:25:28 -07:00
committed by Harshavardhana
parent 83ff4cf5f4
commit 3a46db1cb4
10 changed files with 1553 additions and 1456 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
*.test

View File

@@ -143,6 +143,7 @@ func (d *digest) initialize(c *Config) {
p[2] = 1
p[3] = 1
}
// Initialize.
d.size = c.Size
for i := 0; i < 8; i++ {
@@ -151,6 +152,7 @@ func (d *digest) initialize(c *Config) {
if c.Tree != nil && c.Tree.IsLastNode {
d.isLastNode = true
}
// Process key.
if c.Key != nil {
copy(d.paddedKey[:], c.Key)
@@ -231,11 +233,11 @@ func (d *digest) Write(p []byte) (nn int, err error) {
}
// Sum returns the calculated checksum.
func (d0 *digest) Sum(in []byte) []byte {
// Make a copy of d0 so that caller can keep writing and summing.
d := *d0
hash := d.checkSum()
return append(in, hash[:d.size]...)
func (d *digest) Sum(in []byte) []byte {
// Make a copy of d so that caller can keep writing and summing.
d0 := *d
hash := d0.checkSum()
return append(in, hash[:d0.size]...)
}
func (d *digest) checkSum() [Size]byte {

View File

@@ -21,6 +21,8 @@ import (
"testing"
)
// TestSum - tests and validates golden set of values again
// pre-defined set of inputs and matches blake2b output.
func TestSum(t *testing.T) {
buf := make([]byte, len(golden))
for i := range buf {
@@ -76,44 +78,6 @@ func TestKeyedSum(t *testing.T) {
}
}
var bench = New512()
var buf = make([]byte, 8<<10)
func BenchmarkWrite1K(b *testing.B) {
b.SetBytes(1024)
for i := 0; i < b.N; i++ {
bench.Write(buf[:1024])
}
}
func BenchmarkWrite8K(b *testing.B) {
b.SetBytes(int64(len(buf)))
for i := 0; i < b.N; i++ {
bench.Write(buf)
}
}
func BenchmarkHash64(b *testing.B) {
b.SetBytes(64)
for i := 0; i < b.N; i++ {
Sum512(buf[:64])
}
}
func BenchmarkHash128(b *testing.B) {
b.SetBytes(128)
for i := 0; i < b.N; i++ {
Sum512(buf[:128])
}
}
func BenchmarkHash1K(b *testing.B) {
b.SetBytes(1024)
for i := 0; i < b.N; i++ {
Sum512(buf[:1024])
}
}
// Test vectors taken from reference implementation in C#.
var golden = []string{
"786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce",
@@ -632,3 +596,55 @@ var goldenKeyed = []string{
"d444bfa2362a96df213d070e33fa841f51334e4e76866b8139e8af3bb3398be2dfaddcbc56b9146de9f68118dc5829e74b0c28d7711907b121f9161cb92b69a9",
"142709d62e28fcccd0af97fad0f8465b971e82201dc51070faa0372aa43e92484be1c1e73ba10906d5d1853db6a4106e0a7bf9800d373d6dee2d46d62ef2a461",
}
// Benchmark blake2b implementation.
var bench = New512()
var buf = make([]byte, 128*1024)
// Benchmark writes of 64 bytes.
func BenchmarkHash64(b *testing.B) {
b.SetBytes(64)
for i := 0; i < b.N; i++ {
Sum512(buf[:64])
}
}
// Benchmark writes of 128 bytes.
func BenchmarkHash128(b *testing.B) {
b.SetBytes(128)
for i := 0; i < b.N; i++ {
Sum512(buf[:128])
}
}
// Benchmark writes of 1KiB bytes.
func BenchmarkWrite1K(b *testing.B) {
b.SetBytes(1024)
for i := 0; i < b.N; i++ {
bench.Write(buf[:1024])
}
}
// Benchmark writes of 8KiB bytes.
func BenchmarkWrite8K(b *testing.B) {
b.SetBytes(int64(len(buf)))
for i := 0; i < b.N; i++ {
bench.Write(buf[:8192])
}
}
// Benchmark writes of 32KiB bytes.
func BenchmarkWrite32K(b *testing.B) {
b.SetBytes(int64(len(buf)))
for i := 0; i < b.N; i++ {
bench.Write(buf[:32*1024])
}
}
// Benchmark writes of 128KiB bytes.
func BenchmarkWrite128K(b *testing.B) {
b.SetBytes(int64(len(buf)))
for i := 0; i < b.N; i++ {
bench.Write(buf)
}
}

View File

@@ -20,9 +20,9 @@
package blake2b
//go:noescape
func compressSSE(p []uint8, in, iv , t, f, shffle, out []uint64)
func blockSSE(p []uint8, in, iv, t, f, shffle, out []uint64)
func compress(d *digest, p []uint8) {
func compressSSE(d *digest, p []uint8) {
h0, h1, h2, h3, h4, h5, h6, h7 := d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7]
in := make([]uint64, 8, 8)
@@ -42,7 +42,7 @@ func compress(d *digest, p []uint8) {
in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = h0, h1, h2, h3, h4, h5, h6, h7
compressSSE(p, in, iv[:], d.t[:], d.f[:], shffle, out)
blockSSE(p, in, iv[:], d.t[:], d.f[:], shffle, out)
h0, h1, h2, h3, h4, h5, h6, h7 = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]
@@ -51,3 +51,11 @@ func compress(d *digest, p []uint8) {
d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = h0, h1, h2, h3, h4, h5, h6, h7
}
func compress(d *digest, p []uint8) {
if sse {
compressSSE(d, p)
return
}
compressGeneric(d, p)
}

View File

@@ -42,9 +42,8 @@
// rounds 2 & 12 are identical)
//
// func compressSSE(compressSSE(p []uint8, in, iv, t, f, shffle, out []uint64)
TEXT ·compressSSE(SB), 7, $0
// func blockSSE(p []uint8, in, iv, t, f, shffle, out []uint64)
TEXT ·blockSSE(SB), 7, $0
// REGISTER USE
// X0 - X7: v0 - v15
@@ -1888,3 +1887,4 @@ TEXT ·compressSSE(SB), 7, $0
MOVOU X3, 48(DX) // out[6]+out[7] = X3
RET

1418
compress_generic.go Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

29
cpuid.go Normal file
View File

@@ -0,0 +1,29 @@
// +build 386,!gccgo amd64,!gccgo
// Copyright 2016 Frank Wessels <fwessels@xs4all.nl>
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package blake2b
func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
// True when SIMD instructions are available.
var sse = haveSSE()
// haveSSE returns true if we have streaming SIMD instructions.
func haveSSE() bool {
_, _, _, d := cpuid(1)
return (d & (1 << 25)) != 0
}

15
cpuid_386.s Normal file
View File

@@ -0,0 +1,15 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build 386,!gccgo
// func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuid(SB), 7, $0
XORL CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+4(FP)
MOVL BX, ebx+8(FP)
MOVL CX, ecx+12(FP)
MOVL DX, edx+16(FP)
RET

14
cpuid_amd64.s Normal file
View File

@@ -0,0 +1,14 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build amd64,!gccgo
// func cpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuid(SB), 7, $0
XORQ CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET