mirror of
https://github.com/fumiama/blake2b-simd.git
synced 2026-06-05 18:20:29 +08:00
Skeleton for assembly and test function
This commit is contained in:
27
blake2b.go
27
blake2b.go
@@ -34,6 +34,7 @@ type digest struct {
|
||||
isKeyed bool // indicates whether hash was keyed
|
||||
size uint8 // digest size in bytes
|
||||
isLastNode bool // indicates processing of the last node in tree hashing
|
||||
sseOptimized bool // temp bool to indicate use of SSE (during dev only)
|
||||
}
|
||||
|
||||
// Initialization values.
|
||||
@@ -162,9 +163,12 @@ func (d *digest) initialize(c *Config) {
|
||||
}
|
||||
|
||||
// New512 returns a new hash.Hash computing the BLAKE2b 64-byte checksum.
|
||||
func New512() hash.Hash {
|
||||
func New512(enableSSE bool) hash.Hash {
|
||||
d := new(digest)
|
||||
d.initialize(defaultConfig)
|
||||
if enableSSE {
|
||||
d.EnableSSE()
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
@@ -206,6 +210,9 @@ func (d *digest) Size() int { return int(d.size) }
|
||||
// BlockSize returns the algorithm block size in bytes.
|
||||
func (d *digest) BlockSize() int { return BlockSize }
|
||||
|
||||
// Enable SSE
|
||||
func (d *digest) EnableSSE() { d.sseOptimized = true }
|
||||
|
||||
func (d *digest) Write(p []byte) (nn int, err error) {
|
||||
nn = len(p)
|
||||
left := BlockSize - d.nx
|
||||
@@ -213,7 +220,11 @@ func (d *digest) Write(p []byte) (nn int, err error) {
|
||||
// Process buffer.
|
||||
copy(d.x[d.nx:], p[:left])
|
||||
p = p[left:]
|
||||
blocks(d, d.x[:])
|
||||
if d.sseOptimized {
|
||||
compress(d, d.x[:])
|
||||
} else {
|
||||
blocks(d, d.x[:])
|
||||
}
|
||||
d.nx = 0
|
||||
}
|
||||
// Process full blocks except for the last one.
|
||||
@@ -222,7 +233,11 @@ func (d *digest) Write(p []byte) (nn int, err error) {
|
||||
if n == len(p) {
|
||||
n -= BlockSize
|
||||
}
|
||||
blocks(d, p[:n])
|
||||
if d.sseOptimized {
|
||||
compress(d, p[:n])
|
||||
} else {
|
||||
blocks(d, p[:n])
|
||||
}
|
||||
p = p[n:]
|
||||
}
|
||||
// Fill buffer.
|
||||
@@ -262,7 +277,11 @@ func (d *digest) checkSum() [Size]byte {
|
||||
d.f[1] = 0xffffffffffffffff
|
||||
}
|
||||
// Compress last block.
|
||||
blocks(d, d.x[:])
|
||||
if d.sseOptimized {
|
||||
compress(d, d.x[:])
|
||||
} else {
|
||||
blocks(d, d.x[:])
|
||||
}
|
||||
|
||||
var out [Size]byte
|
||||
j := 0
|
||||
|
||||
@@ -27,9 +27,17 @@ func TestCompress(t *testing.T) {
|
||||
for i := range in {
|
||||
in[i] = byte(i)
|
||||
}
|
||||
good := "2319e3789c47e2daa5fe807f61bec2a1a6537fa03f19ff32e87eecbfd64b7e0e8ccff439ac333b040f19b0c4ddd11a61e24ac1fe0f10a039806c5dcc0da3d115"
|
||||
if good != fmt.Sprintf("%x", Sum512([]byte(in))) {
|
||||
digest := fmt.Sprintf("%x", Sum512([]byte(in)))
|
||||
t.Errorf("Sum512(): \nexpected %s\ngot %s", good, digest)
|
||||
|
||||
hGo := New512(false)
|
||||
hSSE := New512(true)
|
||||
|
||||
hGo.Write(in)
|
||||
sumGo := fmt.Sprintf("%x", hGo.Sum(nil))
|
||||
|
||||
hSSE.Write(in)
|
||||
sumSSE := fmt.Sprintf("%x", hSSE.Sum(nil))
|
||||
|
||||
if sumGo != sumSSE {
|
||||
t.Errorf("expected %s\ngot %s", sumGo, sumSSE)
|
||||
}
|
||||
}
|
||||
|
||||
48
compress_amd64.go
Normal file
48
compress_amd64.go
Normal file
@@ -0,0 +1,48 @@
|
||||
//+build !noasm
|
||||
//+build !appengine
|
||||
|
||||
/*
|
||||
* Copyright 2016 Frank Wessels <fwessels@xs4all.nl>
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package blake2b
|
||||
|
||||
//go:noescape
|
||||
func compressSSE(p []uint8, in, iv, t, f, out []uint64)
|
||||
|
||||
func compress(d *digest, p []uint8) {
|
||||
h0, h1, h2, h3, h4, h5, h6, h7 := d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7]
|
||||
|
||||
in := make([]uint64, 8, 8)
|
||||
out := make([]uint64, 8, 8)
|
||||
|
||||
for len(p) >= BlockSize {
|
||||
// Increment counter.
|
||||
d.t[0] += BlockSize
|
||||
if d.t[0] < BlockSize {
|
||||
d.t[1]++
|
||||
}
|
||||
|
||||
in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7] = h0, h1, h2, h3, h4, h5, h6, h7
|
||||
|
||||
compressSSE(p, in, iv[:], d.t[:], d.f[:], out)
|
||||
|
||||
h0, h1, h2, h3, h4, h5, h6, h7 = out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]
|
||||
|
||||
p = p[BlockSize:]
|
||||
}
|
||||
|
||||
d.h[0], d.h[1], d.h[2], d.h[3], d.h[4], d.h[5], d.h[6], d.h[7] = h0, h1, h2, h3, h4, h5, h6, h7
|
||||
}
|
||||
66
compress_amd64.s
Normal file
66
compress_amd64.s
Normal file
@@ -0,0 +1,66 @@
|
||||
//+build !noasm !appengine
|
||||
|
||||
//
|
||||
// Copyright 2016 Frank Wessels <fwessels@xs4all.nl>
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
// func compressSSE(compressSSE(p []uint8, in, iv, t, f, out []uint64)
|
||||
TEXT ·compressSSE(SB), 7, $0
|
||||
|
||||
// Load digest
|
||||
MOVQ in+24(FP), SI // SI: &in
|
||||
MOVOU 0(SI), X0 // X0 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */
|
||||
MOVOU 16(SI), X1 // X1 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */
|
||||
MOVOU 32(SI), X2 // X2 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */
|
||||
MOVOU 48(SI), X3 // X3 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */
|
||||
|
||||
// Load initialization vector
|
||||
MOVQ iv+48(FP), DX // DX: &iv
|
||||
MOVOU 0(DX), X4 // X4 = iv[0]+iv[1] /* row3l = LOAD( &blake2b_IV[0] ); */
|
||||
MOVOU 16(DX), X5 // X5 = iv[2]+iv[3] /* row3h = LOAD( &blake2b_IV[2] ); */
|
||||
MOVOU 32(DX), X6 // X6 = iv[4]+iv[5] /* LOAD( &blake2b_IV[4] ) */
|
||||
MOVQ t+72(FP), SI // SI: &t
|
||||
MOVOU 0(SI), X7 // X7 = t[0]+t[1] /* LOAD( &S->t[0] ) */
|
||||
PXOR X7, X6 // X6 = X6 ^ X7 /* row4l = _mm_xor_si128( , ); */
|
||||
MOVOU 48(DX), X7 // X7 = iv[6]+iv[7] /* LOAD( &blake2b_IV[6] ) */
|
||||
MOVQ t+96(FP), SI // SI: &f
|
||||
MOVOU 0(SI), X8 // X8 = f[0]+f[0] /* row4h = _mm_xor_si128( LOAD( &S->f[0] ) */
|
||||
PXOR X8, X7 // X7 = X7 ^ X8 /* row4h = _mm_xor_si128( , ); */
|
||||
|
||||
// Reload digest
|
||||
MOVQ in+24(FP), SI // SI: &in
|
||||
MOVOU 0(SI), X12 // X12 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */
|
||||
MOVOU 16(SI), X13 // X13 = in[2]+in[3] /* row1h = LOAD( &S->h[2] ); */
|
||||
MOVOU 32(SI), X14 // X14 = in[4]+in[5] /* row2l = LOAD( &S->h[4] ); */
|
||||
MOVOU 48(SI), X15 // X15 = in[6]+in[7] /* row2h = LOAD( &S->h[6] ); */
|
||||
|
||||
// Final computations and prepare for storing
|
||||
PXOR X4, X0 // X0 = X0 ^ X4 /* row1l = _mm_xor_si128( row3l, row1l ); */
|
||||
PXOR X5, X1 // X1 = X1 ^ X5 /* row1h = _mm_xor_si128( row3h, row1h ); */
|
||||
PXOR X12, X0 // X0 = X0 ^ X12 /* STORE( &S->h[0], _mm_xor_si128( LOAD( &S->h[0] ), row1l ) ); */
|
||||
PXOR X13, X1 // X1 = X1 ^ X13 /* STORE( &S->h[2], _mm_xor_si128( LOAD( &S->h[2] ), row1h ) ); */
|
||||
PXOR X6, X2 // X2 = X2 ^ X6 /* row2l = _mm_xor_si128( row4l, row2l ); */
|
||||
PXOR X7, X3 // X3 = X3 ^ X7 /* row2h = _mm_xor_si128( row4h, row2h ); */
|
||||
PXOR X14, X2 // X2 = X2 ^ X14 /* STORE( &S->h[4], _mm_xor_si128( LOAD( &S->h[4] ), row2l ) ); */
|
||||
PXOR X15, X3 // X3 = X3 ^ X15 /* STORE( &S->h[6], _mm_xor_si128( LOAD( &S->h[6] ), row2h ) ); */
|
||||
|
||||
// Store digest
|
||||
MOVQ out+120(FP), DX // DX: &out
|
||||
MOVOU X0, 0(DX) // out[0]+out[1] = X0
|
||||
MOVOU X1, 16(DX) // out[2]+out[3] = X1
|
||||
MOVOU X2, 32(DX) // out[4]+out[5] = X2
|
||||
MOVOU X3, 48(DX) // out[6]+out[7] = X3
|
||||
|
||||
RET
|
||||
1420
compress_noasm.go
Normal file
1420
compress_noasm.go
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user