// Code generated by command: go run blake2s_amd64_asm.go -out ../blake2s_amd64.s -pkg blake2s. DO NOT EDIT. //go:build amd64 && gc && !purego #include "textflag.h" // func hashBlocksSSE2(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) // Requires: SSE2 TEXT ·hashBlocksSSE2(SB), $672-48 MOVQ h+0(FP), AX MOVQ c+8(FP), BX MOVL flag+16(FP), CX MOVQ blocks_base+24(FP), SI MOVQ blocks_len+32(FP), DX MOVQ SP, BP ADDQ $0x0f, BP ANDQ $-16, BP MOVQ (BX), R9 MOVQ R9, (BP) MOVQ CX, 8(BP) MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU iv0<>+0(SB), X2 MOVOU iv1<>+0(SB), X3 MOVOU counter<>+0(SB), X12 MOVOU rol16<>+0(SB), X13 MOVOU rol8<>+0(SB), X14 MOVO (BP), X15 loop: MOVO X0, X4 MOVO X1, X5 MOVO X2, X6 MOVO X3, X7 PADDQ X12, X15 PXOR X15, X7 MOVQ (SI), R8 MOVQ 8(SI), R9 MOVQ 16(SI), R10 MOVQ 24(SI), R11 MOVQ 32(SI), R12 MOVQ 40(SI), R13 MOVQ 48(SI), R14 MOVQ 56(SI), R15 MOVL R8, 16(BP) MOVL R8, 116(BP) MOVL R8, 164(BP) MOVL R8, 264(BP) MOVL R8, 288(BP) MOVL R8, 344(BP) MOVL R8, 432(BP) MOVL R8, 512(BP) MOVL R8, 540(BP) MOVL R8, 652(BP) SHRQ $0x20, R8 MOVL R8, 32(BP) MOVL R8, 112(BP) MOVL R8, 200(BP) MOVL R8, 228(BP) MOVL R8, 320(BP) MOVL R8, 380(BP) MOVL R8, 404(BP) MOVL R8, 488(BP) MOVL R8, 568(BP) MOVL R8, 604(BP) MOVL R9, 20(BP) MOVL R9, 132(BP) MOVL R9, 168(BP) MOVL R9, 240(BP) MOVL R9, 280(BP) MOVL R9, 336(BP) MOVL R9, 456(BP) MOVL R9, 508(BP) MOVL R9, 576(BP) MOVL R9, 608(BP) SHRQ $0x20, R9 MOVL R9, 36(BP) MOVL R9, 140(BP) MOVL R9, 180(BP) MOVL R9, 212(BP) MOVL R9, 316(BP) MOVL R9, 364(BP) MOVL R9, 452(BP) MOVL R9, 476(BP) MOVL R9, 552(BP) MOVL R9, 632(BP) MOVL R10, 24(BP) MOVL R10, 84(BP) MOVL R10, 204(BP) MOVL R10, 248(BP) MOVL R10, 296(BP) MOVL R10, 368(BP) MOVL R10, 412(BP) MOVL R10, 516(BP) MOVL R10, 584(BP) MOVL R10, 612(BP) SHRQ $0x20, R10 MOVL R10, 40(BP) MOVL R10, 124(BP) MOVL R10, 152(BP) MOVL R10, 244(BP) MOVL R10, 276(BP) MOVL R10, 388(BP) MOVL R10, 416(BP) MOVL R10, 496(BP) MOVL R10, 588(BP) MOVL R10, 620(BP) MOVL R11, 28(BP) MOVL R11, 108(BP) MOVL R11, 196(BP) MOVL R11, 256(BP) MOVL R11, 312(BP) MOVL R11, 340(BP) MOVL R11, 436(BP) MOVL R11, 520(BP) MOVL R11, 528(BP) MOVL R11, 616(BP) SHRQ $0x20, R11 MOVL R11, 44(BP) MOVL R11, 136(BP) MOVL R11, 184(BP) MOVL R11, 208(BP) MOVL R11, 292(BP) MOVL R11, 372(BP) MOVL R11, 448(BP) MOVL R11, 468(BP) MOVL R11, 580(BP) MOVL R11, 600(BP) MOVL R12, 48(BP) MOVL R12, 100(BP) MOVL R12, 160(BP) MOVL R12, 268(BP) MOVL R12, 328(BP) MOVL R12, 348(BP) MOVL R12, 444(BP) MOVL R12, 504(BP) MOVL R12, 556(BP) MOVL R12, 596(BP) SHRQ $0x20, R12 MOVL R12, 64(BP) MOVL R12, 88(BP) MOVL R12, 188(BP) MOVL R12, 224(BP) MOVL R12, 272(BP) MOVL R12, 396(BP) MOVL R12, 440(BP) MOVL R12, 492(BP) MOVL R12, 548(BP) MOVL R12, 628(BP) MOVL R13, 52(BP) MOVL R13, 96(BP) MOVL R13, 176(BP) MOVL R13, 260(BP) MOVL R13, 284(BP) MOVL R13, 356(BP) MOVL R13, 428(BP) MOVL R13, 524(BP) MOVL R13, 572(BP) MOVL R13, 592(BP) SHRQ $0x20, R13 MOVL R13, 68(BP) MOVL R13, 120(BP) MOVL R13, 144(BP) MOVL R13, 220(BP) MOVL R13, 308(BP) MOVL R13, 360(BP) MOVL R13, 460(BP) MOVL R13, 480(BP) MOVL R13, 536(BP) MOVL R13, 640(BP) MOVL R14, 56(BP) MOVL R14, 128(BP) MOVL R14, 148(BP) MOVL R14, 232(BP) MOVL R14, 324(BP) MOVL R14, 352(BP) MOVL R14, 400(BP) MOVL R14, 472(BP) MOVL R14, 560(BP) MOVL R14, 648(BP) SHRQ $0x20, R14 MOVL R14, 72(BP) MOVL R14, 92(BP) MOVL R14, 172(BP) MOVL R14, 216(BP) MOVL R14, 332(BP) MOVL R14, 384(BP) MOVL R14, 424(BP) MOVL R14, 464(BP) MOVL R14, 564(BP) MOVL R14, 636(BP) MOVL R15, 60(BP) MOVL R15, 80(BP) MOVL R15, 192(BP) MOVL R15, 236(BP) MOVL R15, 304(BP) MOVL R15, 392(BP) MOVL R15, 408(BP) MOVL R15, 484(BP) MOVL R15, 532(BP) MOVL R15, 644(BP) SHRQ $0x20, R15 MOVL R15, 76(BP) MOVL R15, 104(BP) MOVL R15, 156(BP) MOVL R15, 252(BP) MOVL R15, 300(BP) MOVL R15, 376(BP) MOVL R15, 420(BP) MOVL R15, 500(BP) MOVL R15, 544(BP) MOVL R15, 624(BP) PADDL 16(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 32(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 48(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 64(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 80(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 96(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 112(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 128(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 144(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 160(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 176(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 192(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 208(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 224(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 240(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 256(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 272(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 288(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 304(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 320(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 336(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 352(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 368(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 384(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 400(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 416(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 432(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 448(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 464(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 480(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 496(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 512(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 528(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 544(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 560(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 576(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 592(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 608(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 624(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x10, X8 PSRLL $0x10, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 640(BP), X4 PADDL X5, X4 PXOR X4, X7 MOVO X7, X8 PSLLL $0x18, X8 PSRLL $0x08, X7 PXOR X8, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PXOR X4, X0 PXOR X5, X1 PXOR X6, X0 PXOR X7, X1 LEAQ 64(SI), SI SUBQ $0x40, DX JNE loop MOVO X15, (BP) MOVQ (BP), R9 MOVQ R9, (BX) MOVOU X0, (AX) MOVOU X1, 16(AX) RET DATA iv0<>+0(SB)/4, $0x6a09e667 DATA iv0<>+4(SB)/4, $0xbb67ae85 DATA iv0<>+8(SB)/4, $0x3c6ef372 DATA iv0<>+12(SB)/4, $0xa54ff53a GLOBL iv0<>(SB), RODATA|NOPTR, $16 DATA iv1<>+0(SB)/4, $0x510e527f DATA iv1<>+4(SB)/4, $0x9b05688c DATA iv1<>+8(SB)/4, $0x1f83d9ab DATA iv1<>+12(SB)/4, $0x5be0cd19 GLOBL iv1<>(SB), RODATA|NOPTR, $16 DATA counter<>+0(SB)/8, $0x0000000000000040 DATA counter<>+8(SB)/8, $0x0000000000000000 GLOBL counter<>(SB), RODATA|NOPTR, $16 DATA rol16<>+0(SB)/8, $0x0504070601000302 DATA rol16<>+8(SB)/8, $0x0d0c0f0e09080b0a GLOBL rol16<>(SB), RODATA|NOPTR, $16 DATA rol8<>+0(SB)/8, $0x0407060500030201 DATA rol8<>+8(SB)/8, $0x0c0f0e0d080b0a09 GLOBL rol8<>(SB), RODATA|NOPTR, $16 // func hashBlocksSSSE3(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) // Requires: SSE2, SSSE3 TEXT ·hashBlocksSSSE3(SB), $672-48 MOVQ h+0(FP), AX MOVQ c+8(FP), BX MOVL flag+16(FP), CX MOVQ blocks_base+24(FP), SI MOVQ blocks_len+32(FP), DX MOVQ SP, BP ADDQ $0x0f, BP ANDQ $-16, BP MOVQ (BX), R9 MOVQ R9, (BP) MOVQ CX, 8(BP) MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU iv0<>+0(SB), X2 MOVOU iv1<>+0(SB), X3 MOVOU counter<>+0(SB), X12 MOVOU rol16<>+0(SB), X13 MOVOU rol8<>+0(SB), X14 MOVO (BP), X15 loop: MOVO X0, X4 MOVO X1, X5 MOVO X2, X6 MOVO X3, X7 PADDQ X12, X15 PXOR X15, X7 MOVQ (SI), R8 MOVQ 8(SI), R9 MOVQ 16(SI), R10 MOVQ 24(SI), R11 MOVQ 32(SI), R12 MOVQ 40(SI), R13 MOVQ 48(SI), R14 MOVQ 56(SI), R15 MOVL R8, 16(BP) MOVL R8, 116(BP) MOVL R8, 164(BP) MOVL R8, 264(BP) MOVL R8, 288(BP) MOVL R8, 344(BP) MOVL R8, 432(BP) MOVL R8, 512(BP) MOVL R8, 540(BP) MOVL R8, 652(BP) SHRQ $0x20, R8 MOVL R8, 32(BP) MOVL R8, 112(BP) MOVL R8, 200(BP) MOVL R8, 228(BP) MOVL R8, 320(BP) MOVL R8, 380(BP) MOVL R8, 404(BP) MOVL R8, 488(BP) MOVL R8, 568(BP) MOVL R8, 604(BP) MOVL R9, 20(BP) MOVL R9, 132(BP) MOVL R9, 168(BP) MOVL R9, 240(BP) MOVL R9, 280(BP) MOVL R9, 336(BP) MOVL R9, 456(BP) MOVL R9, 508(BP) MOVL R9, 576(BP) MOVL R9, 608(BP) SHRQ $0x20, R9 MOVL R9, 36(BP) MOVL R9, 140(BP) MOVL R9, 180(BP) MOVL R9, 212(BP) MOVL R9, 316(BP) MOVL R9, 364(BP) MOVL R9, 452(BP) MOVL R9, 476(BP) MOVL R9, 552(BP) MOVL R9, 632(BP) MOVL R10, 24(BP) MOVL R10, 84(BP) MOVL R10, 204(BP) MOVL R10, 248(BP) MOVL R10, 296(BP) MOVL R10, 368(BP) MOVL R10, 412(BP) MOVL R10, 516(BP) MOVL R10, 584(BP) MOVL R10, 612(BP) SHRQ $0x20, R10 MOVL R10, 40(BP) MOVL R10, 124(BP) MOVL R10, 152(BP) MOVL R10, 244(BP) MOVL R10, 276(BP) MOVL R10, 388(BP) MOVL R10, 416(BP) MOVL R10, 496(BP) MOVL R10, 588(BP) MOVL R10, 620(BP) MOVL R11, 28(BP) MOVL R11, 108(BP) MOVL R11, 196(BP) MOVL R11, 256(BP) MOVL R11, 312(BP) MOVL R11, 340(BP) MOVL R11, 436(BP) MOVL R11, 520(BP) MOVL R11, 528(BP) MOVL R11, 616(BP) SHRQ $0x20, R11 MOVL R11, 44(BP) MOVL R11, 136(BP) MOVL R11, 184(BP) MOVL R11, 208(BP) MOVL R11, 292(BP) MOVL R11, 372(BP) MOVL R11, 448(BP) MOVL R11, 468(BP) MOVL R11, 580(BP) MOVL R11, 600(BP) MOVL R12, 48(BP) MOVL R12, 100(BP) MOVL R12, 160(BP) MOVL R12, 268(BP) MOVL R12, 328(BP) MOVL R12, 348(BP) MOVL R12, 444(BP) MOVL R12, 504(BP) MOVL R12, 556(BP) MOVL R12, 596(BP) SHRQ $0x20, R12 MOVL R12, 64(BP) MOVL R12, 88(BP) MOVL R12, 188(BP) MOVL R12, 224(BP) MOVL R12, 272(BP) MOVL R12, 396(BP) MOVL R12, 440(BP) MOVL R12, 492(BP) MOVL R12, 548(BP) MOVL R12, 628(BP) MOVL R13, 52(BP) MOVL R13, 96(BP) MOVL R13, 176(BP) MOVL R13, 260(BP) MOVL R13, 284(BP) MOVL R13, 356(BP) MOVL R13, 428(BP) MOVL R13, 524(BP) MOVL R13, 572(BP) MOVL R13, 592(BP) SHRQ $0x20, R13 MOVL R13, 68(BP) MOVL R13, 120(BP) MOVL R13, 144(BP) MOVL R13, 220(BP) MOVL R13, 308(BP) MOVL R13, 360(BP) MOVL R13, 460(BP) MOVL R13, 480(BP) MOVL R13, 536(BP) MOVL R13, 640(BP) MOVL R14, 56(BP) MOVL R14, 128(BP) MOVL R14, 148(BP) MOVL R14, 232(BP) MOVL R14, 324(BP) MOVL R14, 352(BP) MOVL R14, 400(BP) MOVL R14, 472(BP) MOVL R14, 560(BP) MOVL R14, 648(BP) SHRQ $0x20, R14 MOVL R14, 72(BP) MOVL R14, 92(BP) MOVL R14, 172(BP) MOVL R14, 216(BP) MOVL R14, 332(BP) MOVL R14, 384(BP) MOVL R14, 424(BP) MOVL R14, 464(BP) MOVL R14, 564(BP) MOVL R14, 636(BP) MOVL R15, 60(BP) MOVL R15, 80(BP) MOVL R15, 192(BP) MOVL R15, 236(BP) MOVL R15, 304(BP) MOVL R15, 392(BP) MOVL R15, 408(BP) MOVL R15, 484(BP) MOVL R15, 532(BP) MOVL R15, 644(BP) SHRQ $0x20, R15 MOVL R15, 76(BP) MOVL R15, 104(BP) MOVL R15, 156(BP) MOVL R15, 252(BP) MOVL R15, 300(BP) MOVL R15, 376(BP) MOVL R15, 420(BP) MOVL R15, 500(BP) MOVL R15, 544(BP) MOVL R15, 624(BP) PADDL 16(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 32(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 48(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 64(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 80(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 96(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 112(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 128(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 144(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 160(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 176(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 192(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 208(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 224(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 240(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 256(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 272(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 288(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 304(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 320(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 336(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 352(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 368(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 384(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 400(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 416(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 432(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 448(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 464(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 480(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 496(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 512(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 528(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 544(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 560(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 576(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PADDL 592(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 608(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL 624(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL 640(BP), X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PXOR X4, X0 PXOR X5, X1 PXOR X6, X0 PXOR X7, X1 LEAQ 64(SI), SI SUBQ $0x40, DX JNE loop MOVO X15, (BP) MOVQ (BP), R9 MOVQ R9, (BX) MOVOU X0, (AX) MOVOU X1, 16(AX) RET // func hashBlocksSSE4(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) // Requires: SSE2, SSE4.1, SSSE3 TEXT ·hashBlocksSSE4(SB), $32-48 MOVQ h+0(FP), AX MOVQ c+8(FP), BX MOVL flag+16(FP), CX MOVQ blocks_base+24(FP), SI MOVQ blocks_len+32(FP), DX MOVQ SP, BP ADDQ $0x0f, BP ANDQ $-16, BP MOVQ (BX), R9 MOVQ R9, (BP) MOVQ CX, 8(BP) MOVOU (AX), X0 MOVOU 16(AX), X1 MOVOU iv0<>+0(SB), X2 MOVOU iv1<>+0(SB), X3 MOVOU counter<>+0(SB), X12 MOVOU rol16<>+0(SB), X13 MOVOU rol8<>+0(SB), X14 MOVO (BP), X15 loop: MOVO X0, X4 MOVO X1, X5 MOVO X2, X6 MOVO X3, X7 PADDQ X12, X15 PXOR X15, X7 MOVL (SI), X8 PINSRD $0x01, 8(SI), X8 PINSRD $0x02, 16(SI), X8 PINSRD $0x03, 24(SI), X8 MOVL 4(SI), X9 PINSRD $0x01, 12(SI), X9 PINSRD $0x02, 20(SI), X9 PINSRD $0x03, 28(SI), X9 MOVL 32(SI), X10 PINSRD $0x01, 40(SI), X10 PINSRD $0x02, 48(SI), X10 PINSRD $0x03, 56(SI), X10 MOVL 36(SI), X11 PINSRD $0x01, 44(SI), X11 PINSRD $0x02, 52(SI), X11 PINSRD $0x03, 60(SI), X11 PADDL X8, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X9, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL X10, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X11, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 MOVL 56(SI), X8 PINSRD $0x01, 16(SI), X8 PINSRD $0x02, 36(SI), X8 PINSRD $0x03, 52(SI), X8 MOVL 40(SI), X9 PINSRD $0x01, 32(SI), X9 PINSRD $0x02, 60(SI), X9 PINSRD $0x03, 24(SI), X9 MOVL 4(SI), X10 PINSRD $0x01, (SI), X10 PINSRD $0x02, 44(SI), X10 PINSRD $0x03, 20(SI), X10 MOVL 48(SI), X11 PINSRD $0x01, 8(SI), X11 PINSRD $0x02, 28(SI), X11 PINSRD $0x03, 12(SI), X11 PADDL X8, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X9, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL X10, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X11, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 MOVL 44(SI), X8 PINSRD $0x01, 48(SI), X8 PINSRD $0x02, 20(SI), X8 PINSRD $0x03, 60(SI), X8 MOVL 32(SI), X9 PINSRD $0x01, (SI), X9 PINSRD $0x02, 8(SI), X9 PINSRD $0x03, 52(SI), X9 MOVL 40(SI), X10 PINSRD $0x01, 12(SI), X10 PINSRD $0x02, 28(SI), X10 PINSRD $0x03, 36(SI), X10 MOVL 56(SI), X11 PINSRD $0x01, 24(SI), X11 PINSRD $0x02, 4(SI), X11 PINSRD $0x03, 16(SI), X11 PADDL X8, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X9, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL X10, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X11, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 MOVL 28(SI), X8 PINSRD $0x01, 12(SI), X8 PINSRD $0x02, 52(SI), X8 PINSRD $0x03, 44(SI), X8 MOVL 36(SI), X9 PINSRD $0x01, 4(SI), X9 PINSRD $0x02, 48(SI), X9 PINSRD $0x03, 56(SI), X9 MOVL 8(SI), X10 PINSRD $0x01, 20(SI), X10 PINSRD $0x02, 16(SI), X10 PINSRD $0x03, 60(SI), X10 MOVL 24(SI), X11 PINSRD $0x01, 40(SI), X11 PINSRD $0x02, (SI), X11 PINSRD $0x03, 32(SI), X11 PADDL X8, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X9, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL X10, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X11, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 MOVL 36(SI), X8 PINSRD $0x01, 20(SI), X8 PINSRD $0x02, 8(SI), X8 PINSRD $0x03, 40(SI), X8 MOVL (SI), X9 PINSRD $0x01, 28(SI), X9 PINSRD $0x02, 16(SI), X9 PINSRD $0x03, 60(SI), X9 MOVL 56(SI), X10 PINSRD $0x01, 44(SI), X10 PINSRD $0x02, 24(SI), X10 PINSRD $0x03, 12(SI), X10 MOVL 4(SI), X11 PINSRD $0x01, 48(SI), X11 PINSRD $0x02, 32(SI), X11 PINSRD $0x03, 52(SI), X11 PADDL X8, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X9, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL X10, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X11, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 MOVL 8(SI), X8 PINSRD $0x01, 24(SI), X8 PINSRD $0x02, (SI), X8 PINSRD $0x03, 32(SI), X8 MOVL 48(SI), X9 PINSRD $0x01, 40(SI), X9 PINSRD $0x02, 44(SI), X9 PINSRD $0x03, 12(SI), X9 MOVL 16(SI), X10 PINSRD $0x01, 28(SI), X10 PINSRD $0x02, 60(SI), X10 PINSRD $0x03, 4(SI), X10 MOVL 52(SI), X11 PINSRD $0x01, 20(SI), X11 PINSRD $0x02, 56(SI), X11 PINSRD $0x03, 36(SI), X11 PADDL X8, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X9, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL X10, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X11, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 MOVL 48(SI), X8 PINSRD $0x01, 4(SI), X8 PINSRD $0x02, 56(SI), X8 PINSRD $0x03, 16(SI), X8 MOVL 20(SI), X9 PINSRD $0x01, 60(SI), X9 PINSRD $0x02, 52(SI), X9 PINSRD $0x03, 40(SI), X9 MOVL (SI), X10 PINSRD $0x01, 24(SI), X10 PINSRD $0x02, 36(SI), X10 PINSRD $0x03, 32(SI), X10 MOVL 28(SI), X11 PINSRD $0x01, 12(SI), X11 PINSRD $0x02, 8(SI), X11 PINSRD $0x03, 44(SI), X11 PADDL X8, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X9, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL X10, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X11, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 MOVL 52(SI), X8 PINSRD $0x01, 28(SI), X8 PINSRD $0x02, 48(SI), X8 PINSRD $0x03, 12(SI), X8 MOVL 44(SI), X9 PINSRD $0x01, 56(SI), X9 PINSRD $0x02, 4(SI), X9 PINSRD $0x03, 36(SI), X9 MOVL 20(SI), X10 PINSRD $0x01, 60(SI), X10 PINSRD $0x02, 32(SI), X10 PINSRD $0x03, 8(SI), X10 MOVL (SI), X11 PINSRD $0x01, 16(SI), X11 PINSRD $0x02, 24(SI), X11 PINSRD $0x03, 40(SI), X11 PADDL X8, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X9, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL X10, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X11, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 MOVL 24(SI), X8 PINSRD $0x01, 56(SI), X8 PINSRD $0x02, 44(SI), X8 PINSRD $0x03, (SI), X8 MOVL 60(SI), X9 PINSRD $0x01, 36(SI), X9 PINSRD $0x02, 12(SI), X9 PINSRD $0x03, 32(SI), X9 MOVL 48(SI), X10 PINSRD $0x01, 52(SI), X10 PINSRD $0x02, 4(SI), X10 PINSRD $0x03, 40(SI), X10 MOVL 8(SI), X11 PINSRD $0x01, 28(SI), X11 PINSRD $0x02, 16(SI), X11 PINSRD $0x03, 20(SI), X11 PADDL X8, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X9, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL X10, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X11, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 MOVL 40(SI), X8 PINSRD $0x01, 32(SI), X8 PINSRD $0x02, 28(SI), X8 PINSRD $0x03, 4(SI), X8 MOVL 8(SI), X9 PINSRD $0x01, 16(SI), X9 PINSRD $0x02, 24(SI), X9 PINSRD $0x03, 20(SI), X9 MOVL 60(SI), X10 PINSRD $0x01, 36(SI), X10 PINSRD $0x02, 12(SI), X10 PINSRD $0x03, 52(SI), X10 MOVL 44(SI), X11 PINSRD $0x01, 56(SI), X11 PINSRD $0x02, 48(SI), X11 PINSRD $0x03, (SI), X11 PADDL X8, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X9, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X5, X5 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X7, X7 PADDL X10, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X13, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x14, X8 PSRLL $0x0c, X5 PXOR X8, X5 PADDL X11, X4 PADDL X5, X4 PXOR X4, X7 PSHUFB X14, X7 PADDL X7, X6 PXOR X6, X5 MOVO X5, X8 PSLLL $0x19, X8 PSRLL $0x07, X5 PXOR X8, X5 PSHUFL $0x39, X7, X7 PSHUFL $0x4e, X6, X6 PSHUFL $0x93, X5, X5 PXOR X4, X0 PXOR X5, X1 PXOR X6, X0 PXOR X7, X1 LEAQ 64(SI), SI SUBQ $0x40, DX JNE loop MOVO X15, (BP) MOVQ (BP), R9 MOVQ R9, (BX) MOVOU X0, (AX) MOVOU X1, 16(AX) RET