1
0
mirror of https://github.com/fumiama/blake2b-simd.git synced 2026-06-05 02:00:26 +08:00

UNDIAGONALIZE macro

This commit is contained in:
frankw
2016-06-25 09:04:36 +02:00
parent b0e1a3d003
commit 53bb6668a3
2 changed files with 15 additions and 2 deletions

View File

@@ -33,8 +33,6 @@ func TestCompress(t *testing.T) {
hGo.Write(in)
sumGo := fmt.Sprintf("%x", hGo.Sum(nil))
// Digest for testing generated with modified codahale/blake2 with ROUND macro that stops after DIAGONALIZE
sumGo = "2306b43fd384cba9820ad5a79c6a0f19775f205e9e13f5956b8c271cf6d5b165de31323244522c59eca5c96d943d76df4b1770b86e26dae7839042fa1875bc60"
hSSE.Write(in)
sumSSE := fmt.Sprintf("%x", hSSE.Sum(nil))

View File

@@ -146,6 +146,21 @@ TEXT ·compressSSE(SB), 7, $0
BYTE $0xc4; BYTE $0x41; BYTE $0x09; BYTE $0x6c; BYTE $0xfe // VPUNPCKLQDQ XMM15, XMM14, XMM14 /* _mm_unpacklo_epi64(t1, t1) */
BYTE $0xc4; BYTE $0xc1; BYTE $0x61; BYTE $0x6d; BYTE $0xdf // VPUNPCKHQDQ XMM3, XMM3, XMM15 /* row2h = _mm_unpackhi_epi64(row2h, ) */
// UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
MOVOU X4, X13 /* t0 = row3l;\ */
MOVOU X5, X4 /* row3l = row3h;\ */
MOVOU X13, X5 /* row3h = t0;\ */
MOVOU X2, X13 /* t0 = row2l;\ */
MOVOU X6, X14 /* t1 = row4l;\ */
BYTE $0xc5; BYTE $0x69; BYTE $0x6c; BYTE $0xfa // VPUNPCKLQDQ XMM15, XMM2, XMM2 /* _mm_unpacklo_epi64(row2l, row2l) */
BYTE $0xc4; BYTE $0xc1; BYTE $0x61; BYTE $0x6d; BYTE $0xd7 // VPUNPCKHQDQ XMM2, XMM3, XMM15 /* row2l = _mm_unpackhi_epi64(row2h, ); \ */
BYTE $0xc5; BYTE $0x61; BYTE $0x6c; BYTE $0xfb // VPUNPCKLQDQ XMM15, XMM3, XMM3 /* _mm_unpacklo_epi64(row2h, row2h) */
BYTE $0xc4; BYTE $0xc1; BYTE $0x11; BYTE $0x6d; BYTE $0xdf // VPUNPCKHQDQ XMM3, XMM13, XMM15 /* row2h = _mm_unpackhi_epi64(t0, ); \ */
BYTE $0xc5; BYTE $0x41; BYTE $0x6c; BYTE $0xff // VPUNPCKLQDQ XMM15, XMM7, XMM7 /* _mm_unpacklo_epi64(row4h, row4h) */
BYTE $0xc4; BYTE $0xc1; BYTE $0x49; BYTE $0x6d; BYTE $0xf7 // VPUNPCKHQDQ XMM6, XMM6, XMM15 /* row4l = _mm_unpackhi_epi64(row4l, ); \ */
BYTE $0xc4; BYTE $0x41; BYTE $0x09; BYTE $0x6c; BYTE $0xfe // VPUNPCKLQDQ XMM15, XMM14, XMM14 /* _mm_unpacklo_epi64(t1, t1) */
BYTE $0xc4; BYTE $0xc1; BYTE $0x41; BYTE $0x6d; BYTE $0xff // VPUNPCKHQDQ XMM7, XMM7, XMM15 /* row4h = _mm_unpackhi_epi64(row4h, ) */
// Reload digest
MOVQ in+24(FP), SI // SI: &in
MOVOU 0(SI), X12 // X12 = in[0]+in[1] /* row1l = LOAD( &S->h[0] ); */