mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2024-12-28 01:56:30 +00:00
2792 lines
54 KiB
ArmAsm
2792 lines
54 KiB
ArmAsm
// Code generated by command: go run blamka_amd64.go -out ../blamka_amd64.s -pkg argon2. DO NOT EDIT.
|
|
|
|
//go:build amd64 && gc && !purego
|
|
|
|
#include "textflag.h"
|
|
|
|
// func blamkaSSE4(b *block)
|
|
// Requires: SSE2, SSSE3
|
|
TEXT ·blamkaSSE4(SB), NOSPLIT, $0-8
|
|
MOVQ b+0(FP), AX
|
|
MOVOU ·c40<>+0(SB), X10
|
|
MOVOU ·c48<>+0(SB), X11
|
|
MOVOU (AX), X0
|
|
MOVOU 16(AX), X1
|
|
MOVOU 32(AX), X2
|
|
MOVOU 48(AX), X3
|
|
MOVOU 64(AX), X4
|
|
MOVOU 80(AX), X5
|
|
MOVOU 96(AX), X6
|
|
MOVOU 112(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 16(AX)
|
|
MOVOU X2, 32(AX)
|
|
MOVOU X3, 48(AX)
|
|
MOVOU X4, 64(AX)
|
|
MOVOU X5, 80(AX)
|
|
MOVOU X6, 96(AX)
|
|
MOVOU X7, 112(AX)
|
|
MOVOU 128(AX), X0
|
|
MOVOU 144(AX), X1
|
|
MOVOU 160(AX), X2
|
|
MOVOU 176(AX), X3
|
|
MOVOU 192(AX), X4
|
|
MOVOU 208(AX), X5
|
|
MOVOU 224(AX), X6
|
|
MOVOU 240(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 128(AX)
|
|
MOVOU X1, 144(AX)
|
|
MOVOU X2, 160(AX)
|
|
MOVOU X3, 176(AX)
|
|
MOVOU X4, 192(AX)
|
|
MOVOU X5, 208(AX)
|
|
MOVOU X6, 224(AX)
|
|
MOVOU X7, 240(AX)
|
|
MOVOU 256(AX), X0
|
|
MOVOU 272(AX), X1
|
|
MOVOU 288(AX), X2
|
|
MOVOU 304(AX), X3
|
|
MOVOU 320(AX), X4
|
|
MOVOU 336(AX), X5
|
|
MOVOU 352(AX), X6
|
|
MOVOU 368(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 256(AX)
|
|
MOVOU X1, 272(AX)
|
|
MOVOU X2, 288(AX)
|
|
MOVOU X3, 304(AX)
|
|
MOVOU X4, 320(AX)
|
|
MOVOU X5, 336(AX)
|
|
MOVOU X6, 352(AX)
|
|
MOVOU X7, 368(AX)
|
|
MOVOU 384(AX), X0
|
|
MOVOU 400(AX), X1
|
|
MOVOU 416(AX), X2
|
|
MOVOU 432(AX), X3
|
|
MOVOU 448(AX), X4
|
|
MOVOU 464(AX), X5
|
|
MOVOU 480(AX), X6
|
|
MOVOU 496(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 384(AX)
|
|
MOVOU X1, 400(AX)
|
|
MOVOU X2, 416(AX)
|
|
MOVOU X3, 432(AX)
|
|
MOVOU X4, 448(AX)
|
|
MOVOU X5, 464(AX)
|
|
MOVOU X6, 480(AX)
|
|
MOVOU X7, 496(AX)
|
|
MOVOU 512(AX), X0
|
|
MOVOU 528(AX), X1
|
|
MOVOU 544(AX), X2
|
|
MOVOU 560(AX), X3
|
|
MOVOU 576(AX), X4
|
|
MOVOU 592(AX), X5
|
|
MOVOU 608(AX), X6
|
|
MOVOU 624(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 512(AX)
|
|
MOVOU X1, 528(AX)
|
|
MOVOU X2, 544(AX)
|
|
MOVOU X3, 560(AX)
|
|
MOVOU X4, 576(AX)
|
|
MOVOU X5, 592(AX)
|
|
MOVOU X6, 608(AX)
|
|
MOVOU X7, 624(AX)
|
|
MOVOU 640(AX), X0
|
|
MOVOU 656(AX), X1
|
|
MOVOU 672(AX), X2
|
|
MOVOU 688(AX), X3
|
|
MOVOU 704(AX), X4
|
|
MOVOU 720(AX), X5
|
|
MOVOU 736(AX), X6
|
|
MOVOU 752(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 640(AX)
|
|
MOVOU X1, 656(AX)
|
|
MOVOU X2, 672(AX)
|
|
MOVOU X3, 688(AX)
|
|
MOVOU X4, 704(AX)
|
|
MOVOU X5, 720(AX)
|
|
MOVOU X6, 736(AX)
|
|
MOVOU X7, 752(AX)
|
|
MOVOU 768(AX), X0
|
|
MOVOU 784(AX), X1
|
|
MOVOU 800(AX), X2
|
|
MOVOU 816(AX), X3
|
|
MOVOU 832(AX), X4
|
|
MOVOU 848(AX), X5
|
|
MOVOU 864(AX), X6
|
|
MOVOU 880(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 768(AX)
|
|
MOVOU X1, 784(AX)
|
|
MOVOU X2, 800(AX)
|
|
MOVOU X3, 816(AX)
|
|
MOVOU X4, 832(AX)
|
|
MOVOU X5, 848(AX)
|
|
MOVOU X6, 864(AX)
|
|
MOVOU X7, 880(AX)
|
|
MOVOU 896(AX), X0
|
|
MOVOU 912(AX), X1
|
|
MOVOU 928(AX), X2
|
|
MOVOU 944(AX), X3
|
|
MOVOU 960(AX), X4
|
|
MOVOU 976(AX), X5
|
|
MOVOU 992(AX), X6
|
|
MOVOU 1008(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 896(AX)
|
|
MOVOU X1, 912(AX)
|
|
MOVOU X2, 928(AX)
|
|
MOVOU X3, 944(AX)
|
|
MOVOU X4, 960(AX)
|
|
MOVOU X5, 976(AX)
|
|
MOVOU X6, 992(AX)
|
|
MOVOU X7, 1008(AX)
|
|
MOVOU (AX), X0
|
|
MOVOU 128(AX), X1
|
|
MOVOU 256(AX), X2
|
|
MOVOU 384(AX), X3
|
|
MOVOU 512(AX), X4
|
|
MOVOU 640(AX), X5
|
|
MOVOU 768(AX), X6
|
|
MOVOU 896(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, (AX)
|
|
MOVOU X1, 128(AX)
|
|
MOVOU X2, 256(AX)
|
|
MOVOU X3, 384(AX)
|
|
MOVOU X4, 512(AX)
|
|
MOVOU X5, 640(AX)
|
|
MOVOU X6, 768(AX)
|
|
MOVOU X7, 896(AX)
|
|
MOVOU 16(AX), X0
|
|
MOVOU 144(AX), X1
|
|
MOVOU 272(AX), X2
|
|
MOVOU 400(AX), X3
|
|
MOVOU 528(AX), X4
|
|
MOVOU 656(AX), X5
|
|
MOVOU 784(AX), X6
|
|
MOVOU 912(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 16(AX)
|
|
MOVOU X1, 144(AX)
|
|
MOVOU X2, 272(AX)
|
|
MOVOU X3, 400(AX)
|
|
MOVOU X4, 528(AX)
|
|
MOVOU X5, 656(AX)
|
|
MOVOU X6, 784(AX)
|
|
MOVOU X7, 912(AX)
|
|
MOVOU 32(AX), X0
|
|
MOVOU 160(AX), X1
|
|
MOVOU 288(AX), X2
|
|
MOVOU 416(AX), X3
|
|
MOVOU 544(AX), X4
|
|
MOVOU 672(AX), X5
|
|
MOVOU 800(AX), X6
|
|
MOVOU 928(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 32(AX)
|
|
MOVOU X1, 160(AX)
|
|
MOVOU X2, 288(AX)
|
|
MOVOU X3, 416(AX)
|
|
MOVOU X4, 544(AX)
|
|
MOVOU X5, 672(AX)
|
|
MOVOU X6, 800(AX)
|
|
MOVOU X7, 928(AX)
|
|
MOVOU 48(AX), X0
|
|
MOVOU 176(AX), X1
|
|
MOVOU 304(AX), X2
|
|
MOVOU 432(AX), X3
|
|
MOVOU 560(AX), X4
|
|
MOVOU 688(AX), X5
|
|
MOVOU 816(AX), X6
|
|
MOVOU 944(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 48(AX)
|
|
MOVOU X1, 176(AX)
|
|
MOVOU X2, 304(AX)
|
|
MOVOU X3, 432(AX)
|
|
MOVOU X4, 560(AX)
|
|
MOVOU X5, 688(AX)
|
|
MOVOU X6, 816(AX)
|
|
MOVOU X7, 944(AX)
|
|
MOVOU 64(AX), X0
|
|
MOVOU 192(AX), X1
|
|
MOVOU 320(AX), X2
|
|
MOVOU 448(AX), X3
|
|
MOVOU 576(AX), X4
|
|
MOVOU 704(AX), X5
|
|
MOVOU 832(AX), X6
|
|
MOVOU 960(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 64(AX)
|
|
MOVOU X1, 192(AX)
|
|
MOVOU X2, 320(AX)
|
|
MOVOU X3, 448(AX)
|
|
MOVOU X4, 576(AX)
|
|
MOVOU X5, 704(AX)
|
|
MOVOU X6, 832(AX)
|
|
MOVOU X7, 960(AX)
|
|
MOVOU 80(AX), X0
|
|
MOVOU 208(AX), X1
|
|
MOVOU 336(AX), X2
|
|
MOVOU 464(AX), X3
|
|
MOVOU 592(AX), X4
|
|
MOVOU 720(AX), X5
|
|
MOVOU 848(AX), X6
|
|
MOVOU 976(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 80(AX)
|
|
MOVOU X1, 208(AX)
|
|
MOVOU X2, 336(AX)
|
|
MOVOU X3, 464(AX)
|
|
MOVOU X4, 592(AX)
|
|
MOVOU X5, 720(AX)
|
|
MOVOU X6, 848(AX)
|
|
MOVOU X7, 976(AX)
|
|
MOVOU 96(AX), X0
|
|
MOVOU 224(AX), X1
|
|
MOVOU 352(AX), X2
|
|
MOVOU 480(AX), X3
|
|
MOVOU 608(AX), X4
|
|
MOVOU 736(AX), X5
|
|
MOVOU 864(AX), X6
|
|
MOVOU 992(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 96(AX)
|
|
MOVOU X1, 224(AX)
|
|
MOVOU X2, 352(AX)
|
|
MOVOU X3, 480(AX)
|
|
MOVOU X4, 608(AX)
|
|
MOVOU X5, 736(AX)
|
|
MOVOU X6, 864(AX)
|
|
MOVOU X7, 992(AX)
|
|
MOVOU 112(AX), X0
|
|
MOVOU 240(AX), X1
|
|
MOVOU 368(AX), X2
|
|
MOVOU 496(AX), X3
|
|
MOVOU 624(AX), X4
|
|
MOVOU 752(AX), X5
|
|
MOVOU 880(AX), X6
|
|
MOVOU 1008(AX), X7
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X6, X8
|
|
PUNPCKLQDQ X6, X9
|
|
PUNPCKHQDQ X7, X6
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X7, X9
|
|
MOVO X8, X7
|
|
MOVO X2, X8
|
|
PUNPCKHQDQ X9, X7
|
|
PUNPCKLQDQ X3, X9
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X3
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFD $0xb1, X6, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
PSHUFB X10, X2
|
|
MOVO X0, X8
|
|
PMULULQ X2, X8
|
|
PADDQ X2, X0
|
|
PADDQ X8, X0
|
|
PADDQ X8, X0
|
|
PXOR X0, X6
|
|
PSHUFB X11, X6
|
|
MOVO X4, X8
|
|
PMULULQ X6, X8
|
|
PADDQ X6, X4
|
|
PADDQ X8, X4
|
|
PADDQ X8, X4
|
|
PXOR X4, X2
|
|
MOVO X2, X8
|
|
PADDQ X2, X8
|
|
PSRLQ $0x3f, X2
|
|
PXOR X8, X2
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFD $0xb1, X7, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
PSHUFB X10, X3
|
|
MOVO X1, X8
|
|
PMULULQ X3, X8
|
|
PADDQ X3, X1
|
|
PADDQ X8, X1
|
|
PADDQ X8, X1
|
|
PXOR X1, X7
|
|
PSHUFB X11, X7
|
|
MOVO X5, X8
|
|
PMULULQ X7, X8
|
|
PADDQ X7, X5
|
|
PADDQ X8, X5
|
|
PADDQ X8, X5
|
|
PXOR X5, X3
|
|
MOVO X3, X8
|
|
PADDQ X3, X8
|
|
PSRLQ $0x3f, X3
|
|
PXOR X8, X3
|
|
MOVO X4, X8
|
|
MOVO X5, X4
|
|
MOVO X8, X5
|
|
MOVO X2, X8
|
|
PUNPCKLQDQ X2, X9
|
|
PUNPCKHQDQ X3, X2
|
|
PUNPCKHQDQ X9, X2
|
|
PUNPCKLQDQ X3, X9
|
|
MOVO X8, X3
|
|
MOVO X6, X8
|
|
PUNPCKHQDQ X9, X3
|
|
PUNPCKLQDQ X7, X9
|
|
PUNPCKHQDQ X9, X6
|
|
PUNPCKLQDQ X8, X9
|
|
PUNPCKHQDQ X9, X7
|
|
MOVOU X0, 112(AX)
|
|
MOVOU X1, 240(AX)
|
|
MOVOU X2, 368(AX)
|
|
MOVOU X3, 496(AX)
|
|
MOVOU X4, 624(AX)
|
|
MOVOU X5, 752(AX)
|
|
MOVOU X6, 880(AX)
|
|
MOVOU X7, 1008(AX)
|
|
RET
|
|
|
|
DATA ·c40<>+0(SB)/8, $0x0201000706050403
|
|
DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
|
|
GLOBL ·c40<>(SB), RODATA|NOPTR, $16
|
|
|
|
DATA ·c48<>+0(SB)/8, $0x0100070605040302
|
|
DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
|
|
GLOBL ·c48<>(SB), RODATA|NOPTR, $16
|
|
|
|
// func mixBlocksSSE2(out *block, a *block, b *block, c *block)
|
|
// Requires: SSE2
|
|
TEXT ·mixBlocksSSE2(SB), NOSPLIT, $0-32
|
|
MOVQ out+0(FP), DX
|
|
MOVQ a+8(FP), AX
|
|
MOVQ b+16(FP), BX
|
|
MOVQ c+24(FP), CX
|
|
MOVQ $0x00000080, DI
|
|
|
|
loop:
|
|
MOVOU (AX), X0
|
|
MOVOU (BX), X1
|
|
MOVOU (CX), X2
|
|
PXOR X1, X0
|
|
PXOR X2, X0
|
|
MOVOU X0, (DX)
|
|
ADDQ $0x10, AX
|
|
ADDQ $0x10, BX
|
|
ADDQ $0x10, CX
|
|
ADDQ $0x10, DX
|
|
SUBQ $0x02, DI
|
|
JA loop
|
|
RET
|
|
|
|
// func xorBlocksSSE2(out *block, a *block, b *block, c *block)
|
|
// Requires: SSE2
|
|
TEXT ·xorBlocksSSE2(SB), NOSPLIT, $0-32
|
|
MOVQ out+0(FP), DX
|
|
MOVQ a+8(FP), AX
|
|
MOVQ b+16(FP), BX
|
|
MOVQ c+24(FP), CX
|
|
MOVQ $0x00000080, DI
|
|
|
|
loop:
|
|
MOVOU (AX), X0
|
|
MOVOU (BX), X1
|
|
MOVOU (CX), X2
|
|
MOVOU (DX), X3
|
|
PXOR X1, X0
|
|
PXOR X2, X0
|
|
PXOR X3, X0
|
|
MOVOU X0, (DX)
|
|
ADDQ $0x10, AX
|
|
ADDQ $0x10, BX
|
|
ADDQ $0x10, CX
|
|
ADDQ $0x10, DX
|
|
SUBQ $0x02, DI
|
|
JA loop
|
|
RET
|