mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-28 10:31:57 +00:00
5775 lines
96 KiB
NASM
5775 lines
96 KiB
NASM
; This file is generated from a similarly-named Perl script in the BoringSSL
|
|
; source tree. Do not edit by hand.
|
|
|
|
default rel
|
|
%define XMMWORD
|
|
%define YMMWORD
|
|
%define ZMMWORD
|
|
|
|
%ifdef BORINGSSL_PREFIX
|
|
%include "boringssl_prefix_symbols_nasm.inc"
|
|
%endif
|
|
section .text code align=64
|
|
|
|
EXTERN OPENSSL_ia32cap_P
|
|
|
|
global sha1_block_data_order
|
|
|
|
ALIGN 16
|
|
sha1_block_data_order:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_sha1_block_data_order:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
|
|
lea r10,[OPENSSL_ia32cap_P]
|
|
mov r9d,DWORD[r10]
|
|
mov r8d,DWORD[4+r10]
|
|
mov r10d,DWORD[8+r10]
|
|
test r8d,512
|
|
jz NEAR $L$ialu
|
|
test r10d,536870912
|
|
jnz NEAR _shaext_shortcut
|
|
and r10d,296
|
|
cmp r10d,296
|
|
je NEAR _avx2_shortcut
|
|
and r8d,268435456
|
|
and r9d,1073741824
|
|
or r8d,r9d
|
|
cmp r8d,1342177280
|
|
je NEAR _avx_shortcut
|
|
jmp NEAR _ssse3_shortcut
|
|
|
|
ALIGN 16
|
|
$L$ialu:
|
|
mov rax,rsp
|
|
|
|
push rbx
|
|
|
|
push rbp
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
mov r8,rdi
|
|
sub rsp,72
|
|
mov r9,rsi
|
|
and rsp,-64
|
|
mov r10,rdx
|
|
mov QWORD[64+rsp],rax
|
|
|
|
$L$prologue:
|
|
|
|
mov esi,DWORD[r8]
|
|
mov edi,DWORD[4+r8]
|
|
mov r11d,DWORD[8+r8]
|
|
mov r12d,DWORD[12+r8]
|
|
mov r13d,DWORD[16+r8]
|
|
jmp NEAR $L$loop
|
|
|
|
ALIGN 16
|
|
$L$loop:
|
|
mov edx,DWORD[r9]
|
|
bswap edx
|
|
mov ebp,DWORD[4+r9]
|
|
mov eax,r12d
|
|
mov DWORD[rsp],edx
|
|
mov ecx,esi
|
|
bswap ebp
|
|
xor eax,r11d
|
|
rol ecx,5
|
|
and eax,edi
|
|
lea r13d,[1518500249+r13*1+rdx]
|
|
add r13d,ecx
|
|
xor eax,r12d
|
|
rol edi,30
|
|
add r13d,eax
|
|
mov r14d,DWORD[8+r9]
|
|
mov eax,r11d
|
|
mov DWORD[4+rsp],ebp
|
|
mov ecx,r13d
|
|
bswap r14d
|
|
xor eax,edi
|
|
rol ecx,5
|
|
and eax,esi
|
|
lea r12d,[1518500249+r12*1+rbp]
|
|
add r12d,ecx
|
|
xor eax,r11d
|
|
rol esi,30
|
|
add r12d,eax
|
|
mov edx,DWORD[12+r9]
|
|
mov eax,edi
|
|
mov DWORD[8+rsp],r14d
|
|
mov ecx,r12d
|
|
bswap edx
|
|
xor eax,esi
|
|
rol ecx,5
|
|
and eax,r13d
|
|
lea r11d,[1518500249+r11*1+r14]
|
|
add r11d,ecx
|
|
xor eax,edi
|
|
rol r13d,30
|
|
add r11d,eax
|
|
mov ebp,DWORD[16+r9]
|
|
mov eax,esi
|
|
mov DWORD[12+rsp],edx
|
|
mov ecx,r11d
|
|
bswap ebp
|
|
xor eax,r13d
|
|
rol ecx,5
|
|
and eax,r12d
|
|
lea edi,[1518500249+rdi*1+rdx]
|
|
add edi,ecx
|
|
xor eax,esi
|
|
rol r12d,30
|
|
add edi,eax
|
|
mov r14d,DWORD[20+r9]
|
|
mov eax,r13d
|
|
mov DWORD[16+rsp],ebp
|
|
mov ecx,edi
|
|
bswap r14d
|
|
xor eax,r12d
|
|
rol ecx,5
|
|
and eax,r11d
|
|
lea esi,[1518500249+rsi*1+rbp]
|
|
add esi,ecx
|
|
xor eax,r13d
|
|
rol r11d,30
|
|
add esi,eax
|
|
mov edx,DWORD[24+r9]
|
|
mov eax,r12d
|
|
mov DWORD[20+rsp],r14d
|
|
mov ecx,esi
|
|
bswap edx
|
|
xor eax,r11d
|
|
rol ecx,5
|
|
and eax,edi
|
|
lea r13d,[1518500249+r13*1+r14]
|
|
add r13d,ecx
|
|
xor eax,r12d
|
|
rol edi,30
|
|
add r13d,eax
|
|
mov ebp,DWORD[28+r9]
|
|
mov eax,r11d
|
|
mov DWORD[24+rsp],edx
|
|
mov ecx,r13d
|
|
bswap ebp
|
|
xor eax,edi
|
|
rol ecx,5
|
|
and eax,esi
|
|
lea r12d,[1518500249+r12*1+rdx]
|
|
add r12d,ecx
|
|
xor eax,r11d
|
|
rol esi,30
|
|
add r12d,eax
|
|
mov r14d,DWORD[32+r9]
|
|
mov eax,edi
|
|
mov DWORD[28+rsp],ebp
|
|
mov ecx,r12d
|
|
bswap r14d
|
|
xor eax,esi
|
|
rol ecx,5
|
|
and eax,r13d
|
|
lea r11d,[1518500249+r11*1+rbp]
|
|
add r11d,ecx
|
|
xor eax,edi
|
|
rol r13d,30
|
|
add r11d,eax
|
|
mov edx,DWORD[36+r9]
|
|
mov eax,esi
|
|
mov DWORD[32+rsp],r14d
|
|
mov ecx,r11d
|
|
bswap edx
|
|
xor eax,r13d
|
|
rol ecx,5
|
|
and eax,r12d
|
|
lea edi,[1518500249+rdi*1+r14]
|
|
add edi,ecx
|
|
xor eax,esi
|
|
rol r12d,30
|
|
add edi,eax
|
|
mov ebp,DWORD[40+r9]
|
|
mov eax,r13d
|
|
mov DWORD[36+rsp],edx
|
|
mov ecx,edi
|
|
bswap ebp
|
|
xor eax,r12d
|
|
rol ecx,5
|
|
and eax,r11d
|
|
lea esi,[1518500249+rsi*1+rdx]
|
|
add esi,ecx
|
|
xor eax,r13d
|
|
rol r11d,30
|
|
add esi,eax
|
|
mov r14d,DWORD[44+r9]
|
|
mov eax,r12d
|
|
mov DWORD[40+rsp],ebp
|
|
mov ecx,esi
|
|
bswap r14d
|
|
xor eax,r11d
|
|
rol ecx,5
|
|
and eax,edi
|
|
lea r13d,[1518500249+r13*1+rbp]
|
|
add r13d,ecx
|
|
xor eax,r12d
|
|
rol edi,30
|
|
add r13d,eax
|
|
mov edx,DWORD[48+r9]
|
|
mov eax,r11d
|
|
mov DWORD[44+rsp],r14d
|
|
mov ecx,r13d
|
|
bswap edx
|
|
xor eax,edi
|
|
rol ecx,5
|
|
and eax,esi
|
|
lea r12d,[1518500249+r12*1+r14]
|
|
add r12d,ecx
|
|
xor eax,r11d
|
|
rol esi,30
|
|
add r12d,eax
|
|
mov ebp,DWORD[52+r9]
|
|
mov eax,edi
|
|
mov DWORD[48+rsp],edx
|
|
mov ecx,r12d
|
|
bswap ebp
|
|
xor eax,esi
|
|
rol ecx,5
|
|
and eax,r13d
|
|
lea r11d,[1518500249+r11*1+rdx]
|
|
add r11d,ecx
|
|
xor eax,edi
|
|
rol r13d,30
|
|
add r11d,eax
|
|
mov r14d,DWORD[56+r9]
|
|
mov eax,esi
|
|
mov DWORD[52+rsp],ebp
|
|
mov ecx,r11d
|
|
bswap r14d
|
|
xor eax,r13d
|
|
rol ecx,5
|
|
and eax,r12d
|
|
lea edi,[1518500249+rdi*1+rbp]
|
|
add edi,ecx
|
|
xor eax,esi
|
|
rol r12d,30
|
|
add edi,eax
|
|
mov edx,DWORD[60+r9]
|
|
mov eax,r13d
|
|
mov DWORD[56+rsp],r14d
|
|
mov ecx,edi
|
|
bswap edx
|
|
xor eax,r12d
|
|
rol ecx,5
|
|
and eax,r11d
|
|
lea esi,[1518500249+rsi*1+r14]
|
|
add esi,ecx
|
|
xor eax,r13d
|
|
rol r11d,30
|
|
add esi,eax
|
|
xor ebp,DWORD[rsp]
|
|
mov eax,r12d
|
|
mov DWORD[60+rsp],edx
|
|
mov ecx,esi
|
|
xor ebp,DWORD[8+rsp]
|
|
xor eax,r11d
|
|
rol ecx,5
|
|
xor ebp,DWORD[32+rsp]
|
|
and eax,edi
|
|
lea r13d,[1518500249+r13*1+rdx]
|
|
rol edi,30
|
|
xor eax,r12d
|
|
add r13d,ecx
|
|
rol ebp,1
|
|
add r13d,eax
|
|
xor r14d,DWORD[4+rsp]
|
|
mov eax,r11d
|
|
mov DWORD[rsp],ebp
|
|
mov ecx,r13d
|
|
xor r14d,DWORD[12+rsp]
|
|
xor eax,edi
|
|
rol ecx,5
|
|
xor r14d,DWORD[36+rsp]
|
|
and eax,esi
|
|
lea r12d,[1518500249+r12*1+rbp]
|
|
rol esi,30
|
|
xor eax,r11d
|
|
add r12d,ecx
|
|
rol r14d,1
|
|
add r12d,eax
|
|
xor edx,DWORD[8+rsp]
|
|
mov eax,edi
|
|
mov DWORD[4+rsp],r14d
|
|
mov ecx,r12d
|
|
xor edx,DWORD[16+rsp]
|
|
xor eax,esi
|
|
rol ecx,5
|
|
xor edx,DWORD[40+rsp]
|
|
and eax,r13d
|
|
lea r11d,[1518500249+r11*1+r14]
|
|
rol r13d,30
|
|
xor eax,edi
|
|
add r11d,ecx
|
|
rol edx,1
|
|
add r11d,eax
|
|
xor ebp,DWORD[12+rsp]
|
|
mov eax,esi
|
|
mov DWORD[8+rsp],edx
|
|
mov ecx,r11d
|
|
xor ebp,DWORD[20+rsp]
|
|
xor eax,r13d
|
|
rol ecx,5
|
|
xor ebp,DWORD[44+rsp]
|
|
and eax,r12d
|
|
lea edi,[1518500249+rdi*1+rdx]
|
|
rol r12d,30
|
|
xor eax,esi
|
|
add edi,ecx
|
|
rol ebp,1
|
|
add edi,eax
|
|
xor r14d,DWORD[16+rsp]
|
|
mov eax,r13d
|
|
mov DWORD[12+rsp],ebp
|
|
mov ecx,edi
|
|
xor r14d,DWORD[24+rsp]
|
|
xor eax,r12d
|
|
rol ecx,5
|
|
xor r14d,DWORD[48+rsp]
|
|
and eax,r11d
|
|
lea esi,[1518500249+rsi*1+rbp]
|
|
rol r11d,30
|
|
xor eax,r13d
|
|
add esi,ecx
|
|
rol r14d,1
|
|
add esi,eax
|
|
xor edx,DWORD[20+rsp]
|
|
mov eax,edi
|
|
mov DWORD[16+rsp],r14d
|
|
mov ecx,esi
|
|
xor edx,DWORD[28+rsp]
|
|
xor eax,r12d
|
|
rol ecx,5
|
|
xor edx,DWORD[52+rsp]
|
|
lea r13d,[1859775393+r13*1+r14]
|
|
xor eax,r11d
|
|
add r13d,ecx
|
|
rol edi,30
|
|
add r13d,eax
|
|
rol edx,1
|
|
xor ebp,DWORD[24+rsp]
|
|
mov eax,esi
|
|
mov DWORD[20+rsp],edx
|
|
mov ecx,r13d
|
|
xor ebp,DWORD[32+rsp]
|
|
xor eax,r11d
|
|
rol ecx,5
|
|
xor ebp,DWORD[56+rsp]
|
|
lea r12d,[1859775393+r12*1+rdx]
|
|
xor eax,edi
|
|
add r12d,ecx
|
|
rol esi,30
|
|
add r12d,eax
|
|
rol ebp,1
|
|
xor r14d,DWORD[28+rsp]
|
|
mov eax,r13d
|
|
mov DWORD[24+rsp],ebp
|
|
mov ecx,r12d
|
|
xor r14d,DWORD[36+rsp]
|
|
xor eax,edi
|
|
rol ecx,5
|
|
xor r14d,DWORD[60+rsp]
|
|
lea r11d,[1859775393+r11*1+rbp]
|
|
xor eax,esi
|
|
add r11d,ecx
|
|
rol r13d,30
|
|
add r11d,eax
|
|
rol r14d,1
|
|
xor edx,DWORD[32+rsp]
|
|
mov eax,r12d
|
|
mov DWORD[28+rsp],r14d
|
|
mov ecx,r11d
|
|
xor edx,DWORD[40+rsp]
|
|
xor eax,esi
|
|
rol ecx,5
|
|
xor edx,DWORD[rsp]
|
|
lea edi,[1859775393+rdi*1+r14]
|
|
xor eax,r13d
|
|
add edi,ecx
|
|
rol r12d,30
|
|
add edi,eax
|
|
rol edx,1
|
|
xor ebp,DWORD[36+rsp]
|
|
mov eax,r11d
|
|
mov DWORD[32+rsp],edx
|
|
mov ecx,edi
|
|
xor ebp,DWORD[44+rsp]
|
|
xor eax,r13d
|
|
rol ecx,5
|
|
xor ebp,DWORD[4+rsp]
|
|
lea esi,[1859775393+rsi*1+rdx]
|
|
xor eax,r12d
|
|
add esi,ecx
|
|
rol r11d,30
|
|
add esi,eax
|
|
rol ebp,1
|
|
xor r14d,DWORD[40+rsp]
|
|
mov eax,edi
|
|
mov DWORD[36+rsp],ebp
|
|
mov ecx,esi
|
|
xor r14d,DWORD[48+rsp]
|
|
xor eax,r12d
|
|
rol ecx,5
|
|
xor r14d,DWORD[8+rsp]
|
|
lea r13d,[1859775393+r13*1+rbp]
|
|
xor eax,r11d
|
|
add r13d,ecx
|
|
rol edi,30
|
|
add r13d,eax
|
|
rol r14d,1
|
|
xor edx,DWORD[44+rsp]
|
|
mov eax,esi
|
|
mov DWORD[40+rsp],r14d
|
|
mov ecx,r13d
|
|
xor edx,DWORD[52+rsp]
|
|
xor eax,r11d
|
|
rol ecx,5
|
|
xor edx,DWORD[12+rsp]
|
|
lea r12d,[1859775393+r12*1+r14]
|
|
xor eax,edi
|
|
add r12d,ecx
|
|
rol esi,30
|
|
add r12d,eax
|
|
rol edx,1
|
|
xor ebp,DWORD[48+rsp]
|
|
mov eax,r13d
|
|
mov DWORD[44+rsp],edx
|
|
mov ecx,r12d
|
|
xor ebp,DWORD[56+rsp]
|
|
xor eax,edi
|
|
rol ecx,5
|
|
xor ebp,DWORD[16+rsp]
|
|
lea r11d,[1859775393+r11*1+rdx]
|
|
xor eax,esi
|
|
add r11d,ecx
|
|
rol r13d,30
|
|
add r11d,eax
|
|
rol ebp,1
|
|
xor r14d,DWORD[52+rsp]
|
|
mov eax,r12d
|
|
mov DWORD[48+rsp],ebp
|
|
mov ecx,r11d
|
|
xor r14d,DWORD[60+rsp]
|
|
xor eax,esi
|
|
rol ecx,5
|
|
xor r14d,DWORD[20+rsp]
|
|
lea edi,[1859775393+rdi*1+rbp]
|
|
xor eax,r13d
|
|
add edi,ecx
|
|
rol r12d,30
|
|
add edi,eax
|
|
rol r14d,1
|
|
xor edx,DWORD[56+rsp]
|
|
mov eax,r11d
|
|
mov DWORD[52+rsp],r14d
|
|
mov ecx,edi
|
|
xor edx,DWORD[rsp]
|
|
xor eax,r13d
|
|
rol ecx,5
|
|
xor edx,DWORD[24+rsp]
|
|
lea esi,[1859775393+rsi*1+r14]
|
|
xor eax,r12d
|
|
add esi,ecx
|
|
rol r11d,30
|
|
add esi,eax
|
|
rol edx,1
|
|
xor ebp,DWORD[60+rsp]
|
|
mov eax,edi
|
|
mov DWORD[56+rsp],edx
|
|
mov ecx,esi
|
|
xor ebp,DWORD[4+rsp]
|
|
xor eax,r12d
|
|
rol ecx,5
|
|
xor ebp,DWORD[28+rsp]
|
|
lea r13d,[1859775393+r13*1+rdx]
|
|
xor eax,r11d
|
|
add r13d,ecx
|
|
rol edi,30
|
|
add r13d,eax
|
|
rol ebp,1
|
|
xor r14d,DWORD[rsp]
|
|
mov eax,esi
|
|
mov DWORD[60+rsp],ebp
|
|
mov ecx,r13d
|
|
xor r14d,DWORD[8+rsp]
|
|
xor eax,r11d
|
|
rol ecx,5
|
|
xor r14d,DWORD[32+rsp]
|
|
lea r12d,[1859775393+r12*1+rbp]
|
|
xor eax,edi
|
|
add r12d,ecx
|
|
rol esi,30
|
|
add r12d,eax
|
|
rol r14d,1
|
|
xor edx,DWORD[4+rsp]
|
|
mov eax,r13d
|
|
mov DWORD[rsp],r14d
|
|
mov ecx,r12d
|
|
xor edx,DWORD[12+rsp]
|
|
xor eax,edi
|
|
rol ecx,5
|
|
xor edx,DWORD[36+rsp]
|
|
lea r11d,[1859775393+r11*1+r14]
|
|
xor eax,esi
|
|
add r11d,ecx
|
|
rol r13d,30
|
|
add r11d,eax
|
|
rol edx,1
|
|
xor ebp,DWORD[8+rsp]
|
|
mov eax,r12d
|
|
mov DWORD[4+rsp],edx
|
|
mov ecx,r11d
|
|
xor ebp,DWORD[16+rsp]
|
|
xor eax,esi
|
|
rol ecx,5
|
|
xor ebp,DWORD[40+rsp]
|
|
lea edi,[1859775393+rdi*1+rdx]
|
|
xor eax,r13d
|
|
add edi,ecx
|
|
rol r12d,30
|
|
add edi,eax
|
|
rol ebp,1
|
|
xor r14d,DWORD[12+rsp]
|
|
mov eax,r11d
|
|
mov DWORD[8+rsp],ebp
|
|
mov ecx,edi
|
|
xor r14d,DWORD[20+rsp]
|
|
xor eax,r13d
|
|
rol ecx,5
|
|
xor r14d,DWORD[44+rsp]
|
|
lea esi,[1859775393+rsi*1+rbp]
|
|
xor eax,r12d
|
|
add esi,ecx
|
|
rol r11d,30
|
|
add esi,eax
|
|
rol r14d,1
|
|
xor edx,DWORD[16+rsp]
|
|
mov eax,edi
|
|
mov DWORD[12+rsp],r14d
|
|
mov ecx,esi
|
|
xor edx,DWORD[24+rsp]
|
|
xor eax,r12d
|
|
rol ecx,5
|
|
xor edx,DWORD[48+rsp]
|
|
lea r13d,[1859775393+r13*1+r14]
|
|
xor eax,r11d
|
|
add r13d,ecx
|
|
rol edi,30
|
|
add r13d,eax
|
|
rol edx,1
|
|
xor ebp,DWORD[20+rsp]
|
|
mov eax,esi
|
|
mov DWORD[16+rsp],edx
|
|
mov ecx,r13d
|
|
xor ebp,DWORD[28+rsp]
|
|
xor eax,r11d
|
|
rol ecx,5
|
|
xor ebp,DWORD[52+rsp]
|
|
lea r12d,[1859775393+r12*1+rdx]
|
|
xor eax,edi
|
|
add r12d,ecx
|
|
rol esi,30
|
|
add r12d,eax
|
|
rol ebp,1
|
|
xor r14d,DWORD[24+rsp]
|
|
mov eax,r13d
|
|
mov DWORD[20+rsp],ebp
|
|
mov ecx,r12d
|
|
xor r14d,DWORD[32+rsp]
|
|
xor eax,edi
|
|
rol ecx,5
|
|
xor r14d,DWORD[56+rsp]
|
|
lea r11d,[1859775393+r11*1+rbp]
|
|
xor eax,esi
|
|
add r11d,ecx
|
|
rol r13d,30
|
|
add r11d,eax
|
|
rol r14d,1
|
|
xor edx,DWORD[28+rsp]
|
|
mov eax,r12d
|
|
mov DWORD[24+rsp],r14d
|
|
mov ecx,r11d
|
|
xor edx,DWORD[36+rsp]
|
|
xor eax,esi
|
|
rol ecx,5
|
|
xor edx,DWORD[60+rsp]
|
|
lea edi,[1859775393+rdi*1+r14]
|
|
xor eax,r13d
|
|
add edi,ecx
|
|
rol r12d,30
|
|
add edi,eax
|
|
rol edx,1
|
|
xor ebp,DWORD[32+rsp]
|
|
mov eax,r11d
|
|
mov DWORD[28+rsp],edx
|
|
mov ecx,edi
|
|
xor ebp,DWORD[40+rsp]
|
|
xor eax,r13d
|
|
rol ecx,5
|
|
xor ebp,DWORD[rsp]
|
|
lea esi,[1859775393+rsi*1+rdx]
|
|
xor eax,r12d
|
|
add esi,ecx
|
|
rol r11d,30
|
|
add esi,eax
|
|
rol ebp,1
|
|
xor r14d,DWORD[36+rsp]
|
|
mov eax,r12d
|
|
mov DWORD[32+rsp],ebp
|
|
mov ebx,r12d
|
|
xor r14d,DWORD[44+rsp]
|
|
and eax,r11d
|
|
mov ecx,esi
|
|
xor r14d,DWORD[4+rsp]
|
|
lea r13d,[((-1894007588))+r13*1+rbp]
|
|
xor ebx,r11d
|
|
rol ecx,5
|
|
add r13d,eax
|
|
rol r14d,1
|
|
and ebx,edi
|
|
add r13d,ecx
|
|
rol edi,30
|
|
add r13d,ebx
|
|
xor edx,DWORD[40+rsp]
|
|
mov eax,r11d
|
|
mov DWORD[36+rsp],r14d
|
|
mov ebx,r11d
|
|
xor edx,DWORD[48+rsp]
|
|
and eax,edi
|
|
mov ecx,r13d
|
|
xor edx,DWORD[8+rsp]
|
|
lea r12d,[((-1894007588))+r12*1+r14]
|
|
xor ebx,edi
|
|
rol ecx,5
|
|
add r12d,eax
|
|
rol edx,1
|
|
and ebx,esi
|
|
add r12d,ecx
|
|
rol esi,30
|
|
add r12d,ebx
|
|
xor ebp,DWORD[44+rsp]
|
|
mov eax,edi
|
|
mov DWORD[40+rsp],edx
|
|
mov ebx,edi
|
|
xor ebp,DWORD[52+rsp]
|
|
and eax,esi
|
|
mov ecx,r12d
|
|
xor ebp,DWORD[12+rsp]
|
|
lea r11d,[((-1894007588))+r11*1+rdx]
|
|
xor ebx,esi
|
|
rol ecx,5
|
|
add r11d,eax
|
|
rol ebp,1
|
|
and ebx,r13d
|
|
add r11d,ecx
|
|
rol r13d,30
|
|
add r11d,ebx
|
|
xor r14d,DWORD[48+rsp]
|
|
mov eax,esi
|
|
mov DWORD[44+rsp],ebp
|
|
mov ebx,esi
|
|
xor r14d,DWORD[56+rsp]
|
|
and eax,r13d
|
|
mov ecx,r11d
|
|
xor r14d,DWORD[16+rsp]
|
|
lea edi,[((-1894007588))+rdi*1+rbp]
|
|
xor ebx,r13d
|
|
rol ecx,5
|
|
add edi,eax
|
|
rol r14d,1
|
|
and ebx,r12d
|
|
add edi,ecx
|
|
rol r12d,30
|
|
add edi,ebx
|
|
xor edx,DWORD[52+rsp]
|
|
mov eax,r13d
|
|
mov DWORD[48+rsp],r14d
|
|
mov ebx,r13d
|
|
xor edx,DWORD[60+rsp]
|
|
and eax,r12d
|
|
mov ecx,edi
|
|
xor edx,DWORD[20+rsp]
|
|
lea esi,[((-1894007588))+rsi*1+r14]
|
|
xor ebx,r12d
|
|
rol ecx,5
|
|
add esi,eax
|
|
rol edx,1
|
|
and ebx,r11d
|
|
add esi,ecx
|
|
rol r11d,30
|
|
add esi,ebx
|
|
xor ebp,DWORD[56+rsp]
|
|
mov eax,r12d
|
|
mov DWORD[52+rsp],edx
|
|
mov ebx,r12d
|
|
xor ebp,DWORD[rsp]
|
|
and eax,r11d
|
|
mov ecx,esi
|
|
xor ebp,DWORD[24+rsp]
|
|
lea r13d,[((-1894007588))+r13*1+rdx]
|
|
xor ebx,r11d
|
|
rol ecx,5
|
|
add r13d,eax
|
|
rol ebp,1
|
|
and ebx,edi
|
|
add r13d,ecx
|
|
rol edi,30
|
|
add r13d,ebx
|
|
xor r14d,DWORD[60+rsp]
|
|
mov eax,r11d
|
|
mov DWORD[56+rsp],ebp
|
|
mov ebx,r11d
|
|
xor r14d,DWORD[4+rsp]
|
|
and eax,edi
|
|
mov ecx,r13d
|
|
xor r14d,DWORD[28+rsp]
|
|
lea r12d,[((-1894007588))+r12*1+rbp]
|
|
xor ebx,edi
|
|
rol ecx,5
|
|
add r12d,eax
|
|
rol r14d,1
|
|
and ebx,esi
|
|
add r12d,ecx
|
|
rol esi,30
|
|
add r12d,ebx
|
|
xor edx,DWORD[rsp]
|
|
mov eax,edi
|
|
mov DWORD[60+rsp],r14d
|
|
mov ebx,edi
|
|
xor edx,DWORD[8+rsp]
|
|
and eax,esi
|
|
mov ecx,r12d
|
|
xor edx,DWORD[32+rsp]
|
|
lea r11d,[((-1894007588))+r11*1+r14]
|
|
xor ebx,esi
|
|
rol ecx,5
|
|
add r11d,eax
|
|
rol edx,1
|
|
and ebx,r13d
|
|
add r11d,ecx
|
|
rol r13d,30
|
|
add r11d,ebx
|
|
xor ebp,DWORD[4+rsp]
|
|
mov eax,esi
|
|
mov DWORD[rsp],edx
|
|
mov ebx,esi
|
|
xor ebp,DWORD[12+rsp]
|
|
and eax,r13d
|
|
mov ecx,r11d
|
|
xor ebp,DWORD[36+rsp]
|
|
lea edi,[((-1894007588))+rdi*1+rdx]
|
|
xor ebx,r13d
|
|
rol ecx,5
|
|
add edi,eax
|
|
rol ebp,1
|
|
and ebx,r12d
|
|
add edi,ecx
|
|
rol r12d,30
|
|
add edi,ebx
|
|
xor r14d,DWORD[8+rsp]
|
|
mov eax,r13d
|
|
mov DWORD[4+rsp],ebp
|
|
mov ebx,r13d
|
|
xor r14d,DWORD[16+rsp]
|
|
and eax,r12d
|
|
mov ecx,edi
|
|
xor r14d,DWORD[40+rsp]
|
|
lea esi,[((-1894007588))+rsi*1+rbp]
|
|
xor ebx,r12d
|
|
rol ecx,5
|
|
add esi,eax
|
|
rol r14d,1
|
|
and ebx,r11d
|
|
add esi,ecx
|
|
rol r11d,30
|
|
add esi,ebx
|
|
xor edx,DWORD[12+rsp]
|
|
mov eax,r12d
|
|
mov DWORD[8+rsp],r14d
|
|
mov ebx,r12d
|
|
xor edx,DWORD[20+rsp]
|
|
and eax,r11d
|
|
mov ecx,esi
|
|
xor edx,DWORD[44+rsp]
|
|
lea r13d,[((-1894007588))+r13*1+r14]
|
|
xor ebx,r11d
|
|
rol ecx,5
|
|
add r13d,eax
|
|
rol edx,1
|
|
and ebx,edi
|
|
add r13d,ecx
|
|
rol edi,30
|
|
add r13d,ebx
|
|
xor ebp,DWORD[16+rsp]
|
|
mov eax,r11d
|
|
mov DWORD[12+rsp],edx
|
|
mov ebx,r11d
|
|
xor ebp,DWORD[24+rsp]
|
|
and eax,edi
|
|
mov ecx,r13d
|
|
xor ebp,DWORD[48+rsp]
|
|
lea r12d,[((-1894007588))+r12*1+rdx]
|
|
xor ebx,edi
|
|
rol ecx,5
|
|
add r12d,eax
|
|
rol ebp,1
|
|
and ebx,esi
|
|
add r12d,ecx
|
|
rol esi,30
|
|
add r12d,ebx
|
|
xor r14d,DWORD[20+rsp]
|
|
mov eax,edi
|
|
mov DWORD[16+rsp],ebp
|
|
mov ebx,edi
|
|
xor r14d,DWORD[28+rsp]
|
|
and eax,esi
|
|
mov ecx,r12d
|
|
xor r14d,DWORD[52+rsp]
|
|
lea r11d,[((-1894007588))+r11*1+rbp]
|
|
xor ebx,esi
|
|
rol ecx,5
|
|
add r11d,eax
|
|
rol r14d,1
|
|
and ebx,r13d
|
|
add r11d,ecx
|
|
rol r13d,30
|
|
add r11d,ebx
|
|
xor edx,DWORD[24+rsp]
|
|
mov eax,esi
|
|
mov DWORD[20+rsp],r14d
|
|
mov ebx,esi
|
|
xor edx,DWORD[32+rsp]
|
|
and eax,r13d
|
|
mov ecx,r11d
|
|
xor edx,DWORD[56+rsp]
|
|
lea edi,[((-1894007588))+rdi*1+r14]
|
|
xor ebx,r13d
|
|
rol ecx,5
|
|
add edi,eax
|
|
rol edx,1
|
|
and ebx,r12d
|
|
add edi,ecx
|
|
rol r12d,30
|
|
add edi,ebx
|
|
xor ebp,DWORD[28+rsp]
|
|
mov eax,r13d
|
|
mov DWORD[24+rsp],edx
|
|
mov ebx,r13d
|
|
xor ebp,DWORD[36+rsp]
|
|
and eax,r12d
|
|
mov ecx,edi
|
|
xor ebp,DWORD[60+rsp]
|
|
lea esi,[((-1894007588))+rsi*1+rdx]
|
|
xor ebx,r12d
|
|
rol ecx,5
|
|
add esi,eax
|
|
rol ebp,1
|
|
and ebx,r11d
|
|
add esi,ecx
|
|
rol r11d,30
|
|
add esi,ebx
|
|
xor r14d,DWORD[32+rsp]
|
|
mov eax,r12d
|
|
mov DWORD[28+rsp],ebp
|
|
mov ebx,r12d
|
|
xor r14d,DWORD[40+rsp]
|
|
and eax,r11d
|
|
mov ecx,esi
|
|
xor r14d,DWORD[rsp]
|
|
lea r13d,[((-1894007588))+r13*1+rbp]
|
|
xor ebx,r11d
|
|
rol ecx,5
|
|
add r13d,eax
|
|
rol r14d,1
|
|
and ebx,edi
|
|
add r13d,ecx
|
|
rol edi,30
|
|
add r13d,ebx
|
|
xor edx,DWORD[36+rsp]
|
|
mov eax,r11d
|
|
mov DWORD[32+rsp],r14d
|
|
mov ebx,r11d
|
|
xor edx,DWORD[44+rsp]
|
|
and eax,edi
|
|
mov ecx,r13d
|
|
xor edx,DWORD[4+rsp]
|
|
lea r12d,[((-1894007588))+r12*1+r14]
|
|
xor ebx,edi
|
|
rol ecx,5
|
|
add r12d,eax
|
|
rol edx,1
|
|
and ebx,esi
|
|
add r12d,ecx
|
|
rol esi,30
|
|
add r12d,ebx
|
|
xor ebp,DWORD[40+rsp]
|
|
mov eax,edi
|
|
mov DWORD[36+rsp],edx
|
|
mov ebx,edi
|
|
xor ebp,DWORD[48+rsp]
|
|
and eax,esi
|
|
mov ecx,r12d
|
|
xor ebp,DWORD[8+rsp]
|
|
lea r11d,[((-1894007588))+r11*1+rdx]
|
|
xor ebx,esi
|
|
rol ecx,5
|
|
add r11d,eax
|
|
rol ebp,1
|
|
and ebx,r13d
|
|
add r11d,ecx
|
|
rol r13d,30
|
|
add r11d,ebx
|
|
xor r14d,DWORD[44+rsp]
|
|
mov eax,esi
|
|
mov DWORD[40+rsp],ebp
|
|
mov ebx,esi
|
|
xor r14d,DWORD[52+rsp]
|
|
and eax,r13d
|
|
mov ecx,r11d
|
|
xor r14d,DWORD[12+rsp]
|
|
lea edi,[((-1894007588))+rdi*1+rbp]
|
|
xor ebx,r13d
|
|
rol ecx,5
|
|
add edi,eax
|
|
rol r14d,1
|
|
and ebx,r12d
|
|
add edi,ecx
|
|
rol r12d,30
|
|
add edi,ebx
|
|
xor edx,DWORD[48+rsp]
|
|
mov eax,r13d
|
|
mov DWORD[44+rsp],r14d
|
|
mov ebx,r13d
|
|
xor edx,DWORD[56+rsp]
|
|
and eax,r12d
|
|
mov ecx,edi
|
|
xor edx,DWORD[16+rsp]
|
|
lea esi,[((-1894007588))+rsi*1+r14]
|
|
xor ebx,r12d
|
|
rol ecx,5
|
|
add esi,eax
|
|
rol edx,1
|
|
and ebx,r11d
|
|
add esi,ecx
|
|
rol r11d,30
|
|
add esi,ebx
|
|
xor ebp,DWORD[52+rsp]
|
|
mov eax,edi
|
|
mov DWORD[48+rsp],edx
|
|
mov ecx,esi
|
|
xor ebp,DWORD[60+rsp]
|
|
xor eax,r12d
|
|
rol ecx,5
|
|
xor ebp,DWORD[20+rsp]
|
|
lea r13d,[((-899497514))+r13*1+rdx]
|
|
xor eax,r11d
|
|
add r13d,ecx
|
|
rol edi,30
|
|
add r13d,eax
|
|
rol ebp,1
|
|
xor r14d,DWORD[56+rsp]
|
|
mov eax,esi
|
|
mov DWORD[52+rsp],ebp
|
|
mov ecx,r13d
|
|
xor r14d,DWORD[rsp]
|
|
xor eax,r11d
|
|
rol ecx,5
|
|
xor r14d,DWORD[24+rsp]
|
|
lea r12d,[((-899497514))+r12*1+rbp]
|
|
xor eax,edi
|
|
add r12d,ecx
|
|
rol esi,30
|
|
add r12d,eax
|
|
rol r14d,1
|
|
xor edx,DWORD[60+rsp]
|
|
mov eax,r13d
|
|
mov DWORD[56+rsp],r14d
|
|
mov ecx,r12d
|
|
xor edx,DWORD[4+rsp]
|
|
xor eax,edi
|
|
rol ecx,5
|
|
xor edx,DWORD[28+rsp]
|
|
lea r11d,[((-899497514))+r11*1+r14]
|
|
xor eax,esi
|
|
add r11d,ecx
|
|
rol r13d,30
|
|
add r11d,eax
|
|
rol edx,1
|
|
xor ebp,DWORD[rsp]
|
|
mov eax,r12d
|
|
mov DWORD[60+rsp],edx
|
|
mov ecx,r11d
|
|
xor ebp,DWORD[8+rsp]
|
|
xor eax,esi
|
|
rol ecx,5
|
|
xor ebp,DWORD[32+rsp]
|
|
lea edi,[((-899497514))+rdi*1+rdx]
|
|
xor eax,r13d
|
|
add edi,ecx
|
|
rol r12d,30
|
|
add edi,eax
|
|
rol ebp,1
|
|
xor r14d,DWORD[4+rsp]
|
|
mov eax,r11d
|
|
mov DWORD[rsp],ebp
|
|
mov ecx,edi
|
|
xor r14d,DWORD[12+rsp]
|
|
xor eax,r13d
|
|
rol ecx,5
|
|
xor r14d,DWORD[36+rsp]
|
|
lea esi,[((-899497514))+rsi*1+rbp]
|
|
xor eax,r12d
|
|
add esi,ecx
|
|
rol r11d,30
|
|
add esi,eax
|
|
rol r14d,1
|
|
xor edx,DWORD[8+rsp]
|
|
mov eax,edi
|
|
mov DWORD[4+rsp],r14d
|
|
mov ecx,esi
|
|
xor edx,DWORD[16+rsp]
|
|
xor eax,r12d
|
|
rol ecx,5
|
|
xor edx,DWORD[40+rsp]
|
|
lea r13d,[((-899497514))+r13*1+r14]
|
|
xor eax,r11d
|
|
add r13d,ecx
|
|
rol edi,30
|
|
add r13d,eax
|
|
rol edx,1
|
|
xor ebp,DWORD[12+rsp]
|
|
mov eax,esi
|
|
mov DWORD[8+rsp],edx
|
|
mov ecx,r13d
|
|
xor ebp,DWORD[20+rsp]
|
|
xor eax,r11d
|
|
rol ecx,5
|
|
xor ebp,DWORD[44+rsp]
|
|
lea r12d,[((-899497514))+r12*1+rdx]
|
|
xor eax,edi
|
|
add r12d,ecx
|
|
rol esi,30
|
|
add r12d,eax
|
|
rol ebp,1
|
|
xor r14d,DWORD[16+rsp]
|
|
mov eax,r13d
|
|
mov DWORD[12+rsp],ebp
|
|
mov ecx,r12d
|
|
xor r14d,DWORD[24+rsp]
|
|
xor eax,edi
|
|
rol ecx,5
|
|
xor r14d,DWORD[48+rsp]
|
|
lea r11d,[((-899497514))+r11*1+rbp]
|
|
xor eax,esi
|
|
add r11d,ecx
|
|
rol r13d,30
|
|
add r11d,eax
|
|
rol r14d,1
|
|
xor edx,DWORD[20+rsp]
|
|
mov eax,r12d
|
|
mov DWORD[16+rsp],r14d
|
|
mov ecx,r11d
|
|
xor edx,DWORD[28+rsp]
|
|
xor eax,esi
|
|
rol ecx,5
|
|
xor edx,DWORD[52+rsp]
|
|
lea edi,[((-899497514))+rdi*1+r14]
|
|
xor eax,r13d
|
|
add edi,ecx
|
|
rol r12d,30
|
|
add edi,eax
|
|
rol edx,1
|
|
xor ebp,DWORD[24+rsp]
|
|
mov eax,r11d
|
|
mov DWORD[20+rsp],edx
|
|
mov ecx,edi
|
|
xor ebp,DWORD[32+rsp]
|
|
xor eax,r13d
|
|
rol ecx,5
|
|
xor ebp,DWORD[56+rsp]
|
|
lea esi,[((-899497514))+rsi*1+rdx]
|
|
xor eax,r12d
|
|
add esi,ecx
|
|
rol r11d,30
|
|
add esi,eax
|
|
rol ebp,1
|
|
xor r14d,DWORD[28+rsp]
|
|
mov eax,edi
|
|
mov DWORD[24+rsp],ebp
|
|
mov ecx,esi
|
|
xor r14d,DWORD[36+rsp]
|
|
xor eax,r12d
|
|
rol ecx,5
|
|
xor r14d,DWORD[60+rsp]
|
|
lea r13d,[((-899497514))+r13*1+rbp]
|
|
xor eax,r11d
|
|
add r13d,ecx
|
|
rol edi,30
|
|
add r13d,eax
|
|
rol r14d,1
|
|
xor edx,DWORD[32+rsp]
|
|
mov eax,esi
|
|
mov DWORD[28+rsp],r14d
|
|
mov ecx,r13d
|
|
xor edx,DWORD[40+rsp]
|
|
xor eax,r11d
|
|
rol ecx,5
|
|
xor edx,DWORD[rsp]
|
|
lea r12d,[((-899497514))+r12*1+r14]
|
|
xor eax,edi
|
|
add r12d,ecx
|
|
rol esi,30
|
|
add r12d,eax
|
|
rol edx,1
|
|
xor ebp,DWORD[36+rsp]
|
|
mov eax,r13d
|
|
|
|
mov ecx,r12d
|
|
xor ebp,DWORD[44+rsp]
|
|
xor eax,edi
|
|
rol ecx,5
|
|
xor ebp,DWORD[4+rsp]
|
|
lea r11d,[((-899497514))+r11*1+rdx]
|
|
xor eax,esi
|
|
add r11d,ecx
|
|
rol r13d,30
|
|
add r11d,eax
|
|
rol ebp,1
|
|
xor r14d,DWORD[40+rsp]
|
|
mov eax,r12d
|
|
|
|
mov ecx,r11d
|
|
xor r14d,DWORD[48+rsp]
|
|
xor eax,esi
|
|
rol ecx,5
|
|
xor r14d,DWORD[8+rsp]
|
|
lea edi,[((-899497514))+rdi*1+rbp]
|
|
xor eax,r13d
|
|
add edi,ecx
|
|
rol r12d,30
|
|
add edi,eax
|
|
rol r14d,1
|
|
xor edx,DWORD[44+rsp]
|
|
mov eax,r11d
|
|
|
|
mov ecx,edi
|
|
xor edx,DWORD[52+rsp]
|
|
xor eax,r13d
|
|
rol ecx,5
|
|
xor edx,DWORD[12+rsp]
|
|
lea esi,[((-899497514))+rsi*1+r14]
|
|
xor eax,r12d
|
|
add esi,ecx
|
|
rol r11d,30
|
|
add esi,eax
|
|
rol edx,1
|
|
xor ebp,DWORD[48+rsp]
|
|
mov eax,edi
|
|
|
|
mov ecx,esi
|
|
xor ebp,DWORD[56+rsp]
|
|
xor eax,r12d
|
|
rol ecx,5
|
|
xor ebp,DWORD[16+rsp]
|
|
lea r13d,[((-899497514))+r13*1+rdx]
|
|
xor eax,r11d
|
|
add r13d,ecx
|
|
rol edi,30
|
|
add r13d,eax
|
|
rol ebp,1
|
|
xor r14d,DWORD[52+rsp]
|
|
mov eax,esi
|
|
|
|
mov ecx,r13d
|
|
xor r14d,DWORD[60+rsp]
|
|
xor eax,r11d
|
|
rol ecx,5
|
|
xor r14d,DWORD[20+rsp]
|
|
lea r12d,[((-899497514))+r12*1+rbp]
|
|
xor eax,edi
|
|
add r12d,ecx
|
|
rol esi,30
|
|
add r12d,eax
|
|
rol r14d,1
|
|
xor edx,DWORD[56+rsp]
|
|
mov eax,r13d
|
|
|
|
mov ecx,r12d
|
|
xor edx,DWORD[rsp]
|
|
xor eax,edi
|
|
rol ecx,5
|
|
xor edx,DWORD[24+rsp]
|
|
lea r11d,[((-899497514))+r11*1+r14]
|
|
xor eax,esi
|
|
add r11d,ecx
|
|
rol r13d,30
|
|
add r11d,eax
|
|
rol edx,1
|
|
xor ebp,DWORD[60+rsp]
|
|
mov eax,r12d
|
|
|
|
mov ecx,r11d
|
|
xor ebp,DWORD[4+rsp]
|
|
xor eax,esi
|
|
rol ecx,5
|
|
xor ebp,DWORD[28+rsp]
|
|
lea edi,[((-899497514))+rdi*1+rdx]
|
|
xor eax,r13d
|
|
add edi,ecx
|
|
rol r12d,30
|
|
add edi,eax
|
|
rol ebp,1
|
|
mov eax,r11d
|
|
mov ecx,edi
|
|
xor eax,r13d
|
|
lea esi,[((-899497514))+rsi*1+rbp]
|
|
rol ecx,5
|
|
xor eax,r12d
|
|
add esi,ecx
|
|
rol r11d,30
|
|
add esi,eax
|
|
add esi,DWORD[r8]
|
|
add edi,DWORD[4+r8]
|
|
add r11d,DWORD[8+r8]
|
|
add r12d,DWORD[12+r8]
|
|
add r13d,DWORD[16+r8]
|
|
mov DWORD[r8],esi
|
|
mov DWORD[4+r8],edi
|
|
mov DWORD[8+r8],r11d
|
|
mov DWORD[12+r8],r12d
|
|
mov DWORD[16+r8],r13d
|
|
|
|
sub r10,1
|
|
lea r9,[64+r9]
|
|
jnz NEAR $L$loop
|
|
|
|
mov rsi,QWORD[64+rsp]
|
|
|
|
mov r14,QWORD[((-40))+rsi]
|
|
|
|
mov r13,QWORD[((-32))+rsi]
|
|
|
|
mov r12,QWORD[((-24))+rsi]
|
|
|
|
mov rbp,QWORD[((-16))+rsi]
|
|
|
|
mov rbx,QWORD[((-8))+rsi]
|
|
|
|
lea rsp,[rsi]
|
|
|
|
$L$epilogue:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
$L$SEH_end_sha1_block_data_order:
|
|
|
|
ALIGN 32
|
|
sha1_block_data_order_shaext:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_sha1_block_data_order_shaext:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
_shaext_shortcut:
|
|
|
|
lea rsp,[((-72))+rsp]
|
|
movaps XMMWORD[(-8-64)+rax],xmm6
|
|
movaps XMMWORD[(-8-48)+rax],xmm7
|
|
movaps XMMWORD[(-8-32)+rax],xmm8
|
|
movaps XMMWORD[(-8-16)+rax],xmm9
|
|
$L$prologue_shaext:
|
|
movdqu xmm0,XMMWORD[rdi]
|
|
movd xmm1,DWORD[16+rdi]
|
|
movdqa xmm3,XMMWORD[((K_XX_XX+160))]
|
|
|
|
movdqu xmm4,XMMWORD[rsi]
|
|
pshufd xmm0,xmm0,27
|
|
movdqu xmm5,XMMWORD[16+rsi]
|
|
pshufd xmm1,xmm1,27
|
|
movdqu xmm6,XMMWORD[32+rsi]
|
|
DB 102,15,56,0,227
|
|
movdqu xmm7,XMMWORD[48+rsi]
|
|
DB 102,15,56,0,235
|
|
DB 102,15,56,0,243
|
|
movdqa xmm9,xmm1
|
|
DB 102,15,56,0,251
|
|
jmp NEAR $L$oop_shaext
|
|
|
|
ALIGN 16
|
|
$L$oop_shaext:
|
|
dec rdx
|
|
lea r8,[64+rsi]
|
|
paddd xmm1,xmm4
|
|
cmovne rsi,r8
|
|
movdqa xmm8,xmm0
|
|
DB 15,56,201,229
|
|
movdqa xmm2,xmm0
|
|
DB 15,58,204,193,0
|
|
DB 15,56,200,213
|
|
pxor xmm4,xmm6
|
|
DB 15,56,201,238
|
|
DB 15,56,202,231
|
|
|
|
movdqa xmm1,xmm0
|
|
DB 15,58,204,194,0
|
|
DB 15,56,200,206
|
|
pxor xmm5,xmm7
|
|
DB 15,56,202,236
|
|
DB 15,56,201,247
|
|
movdqa xmm2,xmm0
|
|
DB 15,58,204,193,0
|
|
DB 15,56,200,215
|
|
pxor xmm6,xmm4
|
|
DB 15,56,201,252
|
|
DB 15,56,202,245
|
|
|
|
movdqa xmm1,xmm0
|
|
DB 15,58,204,194,0
|
|
DB 15,56,200,204
|
|
pxor xmm7,xmm5
|
|
DB 15,56,202,254
|
|
DB 15,56,201,229
|
|
movdqa xmm2,xmm0
|
|
DB 15,58,204,193,0
|
|
DB 15,56,200,213
|
|
pxor xmm4,xmm6
|
|
DB 15,56,201,238
|
|
DB 15,56,202,231
|
|
|
|
movdqa xmm1,xmm0
|
|
DB 15,58,204,194,1
|
|
DB 15,56,200,206
|
|
pxor xmm5,xmm7
|
|
DB 15,56,202,236
|
|
DB 15,56,201,247
|
|
movdqa xmm2,xmm0
|
|
DB 15,58,204,193,1
|
|
DB 15,56,200,215
|
|
pxor xmm6,xmm4
|
|
DB 15,56,201,252
|
|
DB 15,56,202,245
|
|
|
|
movdqa xmm1,xmm0
|
|
DB 15,58,204,194,1
|
|
DB 15,56,200,204
|
|
pxor xmm7,xmm5
|
|
DB 15,56,202,254
|
|
DB 15,56,201,229
|
|
movdqa xmm2,xmm0
|
|
DB 15,58,204,193,1
|
|
DB 15,56,200,213
|
|
pxor xmm4,xmm6
|
|
DB 15,56,201,238
|
|
DB 15,56,202,231
|
|
|
|
movdqa xmm1,xmm0
|
|
DB 15,58,204,194,1
|
|
DB 15,56,200,206
|
|
pxor xmm5,xmm7
|
|
DB 15,56,202,236
|
|
DB 15,56,201,247
|
|
movdqa xmm2,xmm0
|
|
DB 15,58,204,193,2
|
|
DB 15,56,200,215
|
|
pxor xmm6,xmm4
|
|
DB 15,56,201,252
|
|
DB 15,56,202,245
|
|
|
|
movdqa xmm1,xmm0
|
|
DB 15,58,204,194,2
|
|
DB 15,56,200,204
|
|
pxor xmm7,xmm5
|
|
DB 15,56,202,254
|
|
DB 15,56,201,229
|
|
movdqa xmm2,xmm0
|
|
DB 15,58,204,193,2
|
|
DB 15,56,200,213
|
|
pxor xmm4,xmm6
|
|
DB 15,56,201,238
|
|
DB 15,56,202,231
|
|
|
|
movdqa xmm1,xmm0
|
|
DB 15,58,204,194,2
|
|
DB 15,56,200,206
|
|
pxor xmm5,xmm7
|
|
DB 15,56,202,236
|
|
DB 15,56,201,247
|
|
movdqa xmm2,xmm0
|
|
DB 15,58,204,193,2
|
|
DB 15,56,200,215
|
|
pxor xmm6,xmm4
|
|
DB 15,56,201,252
|
|
DB 15,56,202,245
|
|
|
|
movdqa xmm1,xmm0
|
|
DB 15,58,204,194,3
|
|
DB 15,56,200,204
|
|
pxor xmm7,xmm5
|
|
DB 15,56,202,254
|
|
movdqu xmm4,XMMWORD[rsi]
|
|
movdqa xmm2,xmm0
|
|
DB 15,58,204,193,3
|
|
DB 15,56,200,213
|
|
movdqu xmm5,XMMWORD[16+rsi]
|
|
DB 102,15,56,0,227
|
|
|
|
movdqa xmm1,xmm0
|
|
DB 15,58,204,194,3
|
|
DB 15,56,200,206
|
|
movdqu xmm6,XMMWORD[32+rsi]
|
|
DB 102,15,56,0,235
|
|
|
|
movdqa xmm2,xmm0
|
|
DB 15,58,204,193,3
|
|
DB 15,56,200,215
|
|
movdqu xmm7,XMMWORD[48+rsi]
|
|
DB 102,15,56,0,243
|
|
|
|
movdqa xmm1,xmm0
|
|
DB 15,58,204,194,3
|
|
DB 65,15,56,200,201
|
|
DB 102,15,56,0,251
|
|
|
|
paddd xmm0,xmm8
|
|
movdqa xmm9,xmm1
|
|
|
|
jnz NEAR $L$oop_shaext
|
|
|
|
pshufd xmm0,xmm0,27
|
|
pshufd xmm1,xmm1,27
|
|
movdqu XMMWORD[rdi],xmm0
|
|
movd DWORD[16+rdi],xmm1
|
|
movaps xmm6,XMMWORD[((-8-64))+rax]
|
|
movaps xmm7,XMMWORD[((-8-48))+rax]
|
|
movaps xmm8,XMMWORD[((-8-32))+rax]
|
|
movaps xmm9,XMMWORD[((-8-16))+rax]
|
|
mov rsp,rax
|
|
$L$epilogue_shaext:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
$L$SEH_end_sha1_block_data_order_shaext:
|
|
|
|
ALIGN 16
|
|
sha1_block_data_order_ssse3:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_sha1_block_data_order_ssse3:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
_ssse3_shortcut:
|
|
|
|
mov r11,rsp
|
|
|
|
push rbx
|
|
|
|
push rbp
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
lea rsp,[((-160))+rsp]
|
|
movaps XMMWORD[(-40-96)+r11],xmm6
|
|
movaps XMMWORD[(-40-80)+r11],xmm7
|
|
movaps XMMWORD[(-40-64)+r11],xmm8
|
|
movaps XMMWORD[(-40-48)+r11],xmm9
|
|
movaps XMMWORD[(-40-32)+r11],xmm10
|
|
movaps XMMWORD[(-40-16)+r11],xmm11
|
|
$L$prologue_ssse3:
|
|
and rsp,-64
|
|
mov r8,rdi
|
|
mov r9,rsi
|
|
mov r10,rdx
|
|
|
|
shl r10,6
|
|
add r10,r9
|
|
lea r14,[((K_XX_XX+64))]
|
|
|
|
mov eax,DWORD[r8]
|
|
mov ebx,DWORD[4+r8]
|
|
mov ecx,DWORD[8+r8]
|
|
mov edx,DWORD[12+r8]
|
|
mov esi,ebx
|
|
mov ebp,DWORD[16+r8]
|
|
mov edi,ecx
|
|
xor edi,edx
|
|
and esi,edi
|
|
|
|
movdqa xmm6,XMMWORD[64+r14]
|
|
movdqa xmm9,XMMWORD[((-64))+r14]
|
|
movdqu xmm0,XMMWORD[r9]
|
|
movdqu xmm1,XMMWORD[16+r9]
|
|
movdqu xmm2,XMMWORD[32+r9]
|
|
movdqu xmm3,XMMWORD[48+r9]
|
|
DB 102,15,56,0,198
|
|
DB 102,15,56,0,206
|
|
DB 102,15,56,0,214
|
|
add r9,64
|
|
paddd xmm0,xmm9
|
|
DB 102,15,56,0,222
|
|
paddd xmm1,xmm9
|
|
paddd xmm2,xmm9
|
|
movdqa XMMWORD[rsp],xmm0
|
|
psubd xmm0,xmm9
|
|
movdqa XMMWORD[16+rsp],xmm1
|
|
psubd xmm1,xmm9
|
|
movdqa XMMWORD[32+rsp],xmm2
|
|
psubd xmm2,xmm9
|
|
jmp NEAR $L$oop_ssse3
|
|
ALIGN 16
|
|
$L$oop_ssse3:
|
|
ror ebx,2
|
|
pshufd xmm4,xmm0,238
|
|
xor esi,edx
|
|
movdqa xmm8,xmm3
|
|
paddd xmm9,xmm3
|
|
mov edi,eax
|
|
add ebp,DWORD[rsp]
|
|
punpcklqdq xmm4,xmm1
|
|
xor ebx,ecx
|
|
rol eax,5
|
|
add ebp,esi
|
|
psrldq xmm8,4
|
|
and edi,ebx
|
|
xor ebx,ecx
|
|
pxor xmm4,xmm0
|
|
add ebp,eax
|
|
ror eax,7
|
|
pxor xmm8,xmm2
|
|
xor edi,ecx
|
|
mov esi,ebp
|
|
add edx,DWORD[4+rsp]
|
|
pxor xmm4,xmm8
|
|
xor eax,ebx
|
|
rol ebp,5
|
|
movdqa XMMWORD[48+rsp],xmm9
|
|
add edx,edi
|
|
and esi,eax
|
|
movdqa xmm10,xmm4
|
|
xor eax,ebx
|
|
add edx,ebp
|
|
ror ebp,7
|
|
movdqa xmm8,xmm4
|
|
xor esi,ebx
|
|
pslldq xmm10,12
|
|
paddd xmm4,xmm4
|
|
mov edi,edx
|
|
add ecx,DWORD[8+rsp]
|
|
psrld xmm8,31
|
|
xor ebp,eax
|
|
rol edx,5
|
|
add ecx,esi
|
|
movdqa xmm9,xmm10
|
|
and edi,ebp
|
|
xor ebp,eax
|
|
psrld xmm10,30
|
|
add ecx,edx
|
|
ror edx,7
|
|
por xmm4,xmm8
|
|
xor edi,eax
|
|
mov esi,ecx
|
|
add ebx,DWORD[12+rsp]
|
|
pslld xmm9,2
|
|
pxor xmm4,xmm10
|
|
xor edx,ebp
|
|
movdqa xmm10,XMMWORD[((-64))+r14]
|
|
rol ecx,5
|
|
add ebx,edi
|
|
and esi,edx
|
|
pxor xmm4,xmm9
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
ror ecx,7
|
|
pshufd xmm5,xmm1,238
|
|
xor esi,ebp
|
|
movdqa xmm9,xmm4
|
|
paddd xmm10,xmm4
|
|
mov edi,ebx
|
|
add eax,DWORD[16+rsp]
|
|
punpcklqdq xmm5,xmm2
|
|
xor ecx,edx
|
|
rol ebx,5
|
|
add eax,esi
|
|
psrldq xmm9,4
|
|
and edi,ecx
|
|
xor ecx,edx
|
|
pxor xmm5,xmm1
|
|
add eax,ebx
|
|
ror ebx,7
|
|
pxor xmm9,xmm3
|
|
xor edi,edx
|
|
mov esi,eax
|
|
add ebp,DWORD[20+rsp]
|
|
pxor xmm5,xmm9
|
|
xor ebx,ecx
|
|
rol eax,5
|
|
movdqa XMMWORD[rsp],xmm10
|
|
add ebp,edi
|
|
and esi,ebx
|
|
movdqa xmm8,xmm5
|
|
xor ebx,ecx
|
|
add ebp,eax
|
|
ror eax,7
|
|
movdqa xmm9,xmm5
|
|
xor esi,ecx
|
|
pslldq xmm8,12
|
|
paddd xmm5,xmm5
|
|
mov edi,ebp
|
|
add edx,DWORD[24+rsp]
|
|
psrld xmm9,31
|
|
xor eax,ebx
|
|
rol ebp,5
|
|
add edx,esi
|
|
movdqa xmm10,xmm8
|
|
and edi,eax
|
|
xor eax,ebx
|
|
psrld xmm8,30
|
|
add edx,ebp
|
|
ror ebp,7
|
|
por xmm5,xmm9
|
|
xor edi,ebx
|
|
mov esi,edx
|
|
add ecx,DWORD[28+rsp]
|
|
pslld xmm10,2
|
|
pxor xmm5,xmm8
|
|
xor ebp,eax
|
|
movdqa xmm8,XMMWORD[((-32))+r14]
|
|
rol edx,5
|
|
add ecx,edi
|
|
and esi,ebp
|
|
pxor xmm5,xmm10
|
|
xor ebp,eax
|
|
add ecx,edx
|
|
ror edx,7
|
|
pshufd xmm6,xmm2,238
|
|
xor esi,eax
|
|
movdqa xmm10,xmm5
|
|
paddd xmm8,xmm5
|
|
mov edi,ecx
|
|
add ebx,DWORD[32+rsp]
|
|
punpcklqdq xmm6,xmm3
|
|
xor edx,ebp
|
|
rol ecx,5
|
|
add ebx,esi
|
|
psrldq xmm10,4
|
|
and edi,edx
|
|
xor edx,ebp
|
|
pxor xmm6,xmm2
|
|
add ebx,ecx
|
|
ror ecx,7
|
|
pxor xmm10,xmm4
|
|
xor edi,ebp
|
|
mov esi,ebx
|
|
add eax,DWORD[36+rsp]
|
|
pxor xmm6,xmm10
|
|
xor ecx,edx
|
|
rol ebx,5
|
|
movdqa XMMWORD[16+rsp],xmm8
|
|
add eax,edi
|
|
and esi,ecx
|
|
movdqa xmm9,xmm6
|
|
xor ecx,edx
|
|
add eax,ebx
|
|
ror ebx,7
|
|
movdqa xmm10,xmm6
|
|
xor esi,edx
|
|
pslldq xmm9,12
|
|
paddd xmm6,xmm6
|
|
mov edi,eax
|
|
add ebp,DWORD[40+rsp]
|
|
psrld xmm10,31
|
|
xor ebx,ecx
|
|
rol eax,5
|
|
add ebp,esi
|
|
movdqa xmm8,xmm9
|
|
and edi,ebx
|
|
xor ebx,ecx
|
|
psrld xmm9,30
|
|
add ebp,eax
|
|
ror eax,7
|
|
por xmm6,xmm10
|
|
xor edi,ecx
|
|
mov esi,ebp
|
|
add edx,DWORD[44+rsp]
|
|
pslld xmm8,2
|
|
pxor xmm6,xmm9
|
|
xor eax,ebx
|
|
movdqa xmm9,XMMWORD[((-32))+r14]
|
|
rol ebp,5
|
|
add edx,edi
|
|
and esi,eax
|
|
pxor xmm6,xmm8
|
|
xor eax,ebx
|
|
add edx,ebp
|
|
ror ebp,7
|
|
pshufd xmm7,xmm3,238
|
|
xor esi,ebx
|
|
movdqa xmm8,xmm6
|
|
paddd xmm9,xmm6
|
|
mov edi,edx
|
|
add ecx,DWORD[48+rsp]
|
|
punpcklqdq xmm7,xmm4
|
|
xor ebp,eax
|
|
rol edx,5
|
|
add ecx,esi
|
|
psrldq xmm8,4
|
|
and edi,ebp
|
|
xor ebp,eax
|
|
pxor xmm7,xmm3
|
|
add ecx,edx
|
|
ror edx,7
|
|
pxor xmm8,xmm5
|
|
xor edi,eax
|
|
mov esi,ecx
|
|
add ebx,DWORD[52+rsp]
|
|
pxor xmm7,xmm8
|
|
xor edx,ebp
|
|
rol ecx,5
|
|
movdqa XMMWORD[32+rsp],xmm9
|
|
add ebx,edi
|
|
and esi,edx
|
|
movdqa xmm10,xmm7
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
ror ecx,7
|
|
movdqa xmm8,xmm7
|
|
xor esi,ebp
|
|
pslldq xmm10,12
|
|
paddd xmm7,xmm7
|
|
mov edi,ebx
|
|
add eax,DWORD[56+rsp]
|
|
psrld xmm8,31
|
|
xor ecx,edx
|
|
rol ebx,5
|
|
add eax,esi
|
|
movdqa xmm9,xmm10
|
|
and edi,ecx
|
|
xor ecx,edx
|
|
psrld xmm10,30
|
|
add eax,ebx
|
|
ror ebx,7
|
|
por xmm7,xmm8
|
|
xor edi,edx
|
|
mov esi,eax
|
|
add ebp,DWORD[60+rsp]
|
|
pslld xmm9,2
|
|
pxor xmm7,xmm10
|
|
xor ebx,ecx
|
|
movdqa xmm10,XMMWORD[((-32))+r14]
|
|
rol eax,5
|
|
add ebp,edi
|
|
and esi,ebx
|
|
pxor xmm7,xmm9
|
|
pshufd xmm9,xmm6,238
|
|
xor ebx,ecx
|
|
add ebp,eax
|
|
ror eax,7
|
|
pxor xmm0,xmm4
|
|
xor esi,ecx
|
|
mov edi,ebp
|
|
add edx,DWORD[rsp]
|
|
punpcklqdq xmm9,xmm7
|
|
xor eax,ebx
|
|
rol ebp,5
|
|
pxor xmm0,xmm1
|
|
add edx,esi
|
|
and edi,eax
|
|
movdqa xmm8,xmm10
|
|
xor eax,ebx
|
|
paddd xmm10,xmm7
|
|
add edx,ebp
|
|
pxor xmm0,xmm9
|
|
ror ebp,7
|
|
xor edi,ebx
|
|
mov esi,edx
|
|
add ecx,DWORD[4+rsp]
|
|
movdqa xmm9,xmm0
|
|
xor ebp,eax
|
|
rol edx,5
|
|
movdqa XMMWORD[48+rsp],xmm10
|
|
add ecx,edi
|
|
and esi,ebp
|
|
xor ebp,eax
|
|
pslld xmm0,2
|
|
add ecx,edx
|
|
ror edx,7
|
|
psrld xmm9,30
|
|
xor esi,eax
|
|
mov edi,ecx
|
|
add ebx,DWORD[8+rsp]
|
|
por xmm0,xmm9
|
|
xor edx,ebp
|
|
rol ecx,5
|
|
pshufd xmm10,xmm7,238
|
|
add ebx,esi
|
|
and edi,edx
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
add eax,DWORD[12+rsp]
|
|
xor edi,ebp
|
|
mov esi,ebx
|
|
rol ebx,5
|
|
add eax,edi
|
|
xor esi,edx
|
|
ror ecx,7
|
|
add eax,ebx
|
|
pxor xmm1,xmm5
|
|
add ebp,DWORD[16+rsp]
|
|
xor esi,ecx
|
|
punpcklqdq xmm10,xmm0
|
|
mov edi,eax
|
|
rol eax,5
|
|
pxor xmm1,xmm2
|
|
add ebp,esi
|
|
xor edi,ecx
|
|
movdqa xmm9,xmm8
|
|
ror ebx,7
|
|
paddd xmm8,xmm0
|
|
add ebp,eax
|
|
pxor xmm1,xmm10
|
|
add edx,DWORD[20+rsp]
|
|
xor edi,ebx
|
|
mov esi,ebp
|
|
rol ebp,5
|
|
movdqa xmm10,xmm1
|
|
add edx,edi
|
|
xor esi,ebx
|
|
movdqa XMMWORD[rsp],xmm8
|
|
ror eax,7
|
|
add edx,ebp
|
|
add ecx,DWORD[24+rsp]
|
|
pslld xmm1,2
|
|
xor esi,eax
|
|
mov edi,edx
|
|
psrld xmm10,30
|
|
rol edx,5
|
|
add ecx,esi
|
|
xor edi,eax
|
|
ror ebp,7
|
|
por xmm1,xmm10
|
|
add ecx,edx
|
|
add ebx,DWORD[28+rsp]
|
|
pshufd xmm8,xmm0,238
|
|
xor edi,ebp
|
|
mov esi,ecx
|
|
rol ecx,5
|
|
add ebx,edi
|
|
xor esi,ebp
|
|
ror edx,7
|
|
add ebx,ecx
|
|
pxor xmm2,xmm6
|
|
add eax,DWORD[32+rsp]
|
|
xor esi,edx
|
|
punpcklqdq xmm8,xmm1
|
|
mov edi,ebx
|
|
rol ebx,5
|
|
pxor xmm2,xmm3
|
|
add eax,esi
|
|
xor edi,edx
|
|
movdqa xmm10,XMMWORD[r14]
|
|
ror ecx,7
|
|
paddd xmm9,xmm1
|
|
add eax,ebx
|
|
pxor xmm2,xmm8
|
|
add ebp,DWORD[36+rsp]
|
|
xor edi,ecx
|
|
mov esi,eax
|
|
rol eax,5
|
|
movdqa xmm8,xmm2
|
|
add ebp,edi
|
|
xor esi,ecx
|
|
movdqa XMMWORD[16+rsp],xmm9
|
|
ror ebx,7
|
|
add ebp,eax
|
|
add edx,DWORD[40+rsp]
|
|
pslld xmm2,2
|
|
xor esi,ebx
|
|
mov edi,ebp
|
|
psrld xmm8,30
|
|
rol ebp,5
|
|
add edx,esi
|
|
xor edi,ebx
|
|
ror eax,7
|
|
por xmm2,xmm8
|
|
add edx,ebp
|
|
add ecx,DWORD[44+rsp]
|
|
pshufd xmm9,xmm1,238
|
|
xor edi,eax
|
|
mov esi,edx
|
|
rol edx,5
|
|
add ecx,edi
|
|
xor esi,eax
|
|
ror ebp,7
|
|
add ecx,edx
|
|
pxor xmm3,xmm7
|
|
add ebx,DWORD[48+rsp]
|
|
xor esi,ebp
|
|
punpcklqdq xmm9,xmm2
|
|
mov edi,ecx
|
|
rol ecx,5
|
|
pxor xmm3,xmm4
|
|
add ebx,esi
|
|
xor edi,ebp
|
|
movdqa xmm8,xmm10
|
|
ror edx,7
|
|
paddd xmm10,xmm2
|
|
add ebx,ecx
|
|
pxor xmm3,xmm9
|
|
add eax,DWORD[52+rsp]
|
|
xor edi,edx
|
|
mov esi,ebx
|
|
rol ebx,5
|
|
movdqa xmm9,xmm3
|
|
add eax,edi
|
|
xor esi,edx
|
|
movdqa XMMWORD[32+rsp],xmm10
|
|
ror ecx,7
|
|
add eax,ebx
|
|
add ebp,DWORD[56+rsp]
|
|
pslld xmm3,2
|
|
xor esi,ecx
|
|
mov edi,eax
|
|
psrld xmm9,30
|
|
rol eax,5
|
|
add ebp,esi
|
|
xor edi,ecx
|
|
ror ebx,7
|
|
por xmm3,xmm9
|
|
add ebp,eax
|
|
add edx,DWORD[60+rsp]
|
|
pshufd xmm10,xmm2,238
|
|
xor edi,ebx
|
|
mov esi,ebp
|
|
rol ebp,5
|
|
add edx,edi
|
|
xor esi,ebx
|
|
ror eax,7
|
|
add edx,ebp
|
|
pxor xmm4,xmm0
|
|
add ecx,DWORD[rsp]
|
|
xor esi,eax
|
|
punpcklqdq xmm10,xmm3
|
|
mov edi,edx
|
|
rol edx,5
|
|
pxor xmm4,xmm5
|
|
add ecx,esi
|
|
xor edi,eax
|
|
movdqa xmm9,xmm8
|
|
ror ebp,7
|
|
paddd xmm8,xmm3
|
|
add ecx,edx
|
|
pxor xmm4,xmm10
|
|
add ebx,DWORD[4+rsp]
|
|
xor edi,ebp
|
|
mov esi,ecx
|
|
rol ecx,5
|
|
movdqa xmm10,xmm4
|
|
add ebx,edi
|
|
xor esi,ebp
|
|
movdqa XMMWORD[48+rsp],xmm8
|
|
ror edx,7
|
|
add ebx,ecx
|
|
add eax,DWORD[8+rsp]
|
|
pslld xmm4,2
|
|
xor esi,edx
|
|
mov edi,ebx
|
|
psrld xmm10,30
|
|
rol ebx,5
|
|
add eax,esi
|
|
xor edi,edx
|
|
ror ecx,7
|
|
por xmm4,xmm10
|
|
add eax,ebx
|
|
add ebp,DWORD[12+rsp]
|
|
pshufd xmm8,xmm3,238
|
|
xor edi,ecx
|
|
mov esi,eax
|
|
rol eax,5
|
|
add ebp,edi
|
|
xor esi,ecx
|
|
ror ebx,7
|
|
add ebp,eax
|
|
pxor xmm5,xmm1
|
|
add edx,DWORD[16+rsp]
|
|
xor esi,ebx
|
|
punpcklqdq xmm8,xmm4
|
|
mov edi,ebp
|
|
rol ebp,5
|
|
pxor xmm5,xmm6
|
|
add edx,esi
|
|
xor edi,ebx
|
|
movdqa xmm10,xmm9
|
|
ror eax,7
|
|
paddd xmm9,xmm4
|
|
add edx,ebp
|
|
pxor xmm5,xmm8
|
|
add ecx,DWORD[20+rsp]
|
|
xor edi,eax
|
|
mov esi,edx
|
|
rol edx,5
|
|
movdqa xmm8,xmm5
|
|
add ecx,edi
|
|
xor esi,eax
|
|
movdqa XMMWORD[rsp],xmm9
|
|
ror ebp,7
|
|
add ecx,edx
|
|
add ebx,DWORD[24+rsp]
|
|
pslld xmm5,2
|
|
xor esi,ebp
|
|
mov edi,ecx
|
|
psrld xmm8,30
|
|
rol ecx,5
|
|
add ebx,esi
|
|
xor edi,ebp
|
|
ror edx,7
|
|
por xmm5,xmm8
|
|
add ebx,ecx
|
|
add eax,DWORD[28+rsp]
|
|
pshufd xmm9,xmm4,238
|
|
ror ecx,7
|
|
mov esi,ebx
|
|
xor edi,edx
|
|
rol ebx,5
|
|
add eax,edi
|
|
xor esi,ecx
|
|
xor ecx,edx
|
|
add eax,ebx
|
|
pxor xmm6,xmm2
|
|
add ebp,DWORD[32+rsp]
|
|
and esi,ecx
|
|
xor ecx,edx
|
|
ror ebx,7
|
|
punpcklqdq xmm9,xmm5
|
|
mov edi,eax
|
|
xor esi,ecx
|
|
pxor xmm6,xmm7
|
|
rol eax,5
|
|
add ebp,esi
|
|
movdqa xmm8,xmm10
|
|
xor edi,ebx
|
|
paddd xmm10,xmm5
|
|
xor ebx,ecx
|
|
pxor xmm6,xmm9
|
|
add ebp,eax
|
|
add edx,DWORD[36+rsp]
|
|
and edi,ebx
|
|
xor ebx,ecx
|
|
ror eax,7
|
|
movdqa xmm9,xmm6
|
|
mov esi,ebp
|
|
xor edi,ebx
|
|
movdqa XMMWORD[16+rsp],xmm10
|
|
rol ebp,5
|
|
add edx,edi
|
|
xor esi,eax
|
|
pslld xmm6,2
|
|
xor eax,ebx
|
|
add edx,ebp
|
|
psrld xmm9,30
|
|
add ecx,DWORD[40+rsp]
|
|
and esi,eax
|
|
xor eax,ebx
|
|
por xmm6,xmm9
|
|
ror ebp,7
|
|
mov edi,edx
|
|
xor esi,eax
|
|
rol edx,5
|
|
pshufd xmm10,xmm5,238
|
|
add ecx,esi
|
|
xor edi,ebp
|
|
xor ebp,eax
|
|
add ecx,edx
|
|
add ebx,DWORD[44+rsp]
|
|
and edi,ebp
|
|
xor ebp,eax
|
|
ror edx,7
|
|
mov esi,ecx
|
|
xor edi,ebp
|
|
rol ecx,5
|
|
add ebx,edi
|
|
xor esi,edx
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
pxor xmm7,xmm3
|
|
add eax,DWORD[48+rsp]
|
|
and esi,edx
|
|
xor edx,ebp
|
|
ror ecx,7
|
|
punpcklqdq xmm10,xmm6
|
|
mov edi,ebx
|
|
xor esi,edx
|
|
pxor xmm7,xmm0
|
|
rol ebx,5
|
|
add eax,esi
|
|
movdqa xmm9,XMMWORD[32+r14]
|
|
xor edi,ecx
|
|
paddd xmm8,xmm6
|
|
xor ecx,edx
|
|
pxor xmm7,xmm10
|
|
add eax,ebx
|
|
add ebp,DWORD[52+rsp]
|
|
and edi,ecx
|
|
xor ecx,edx
|
|
ror ebx,7
|
|
movdqa xmm10,xmm7
|
|
mov esi,eax
|
|
xor edi,ecx
|
|
movdqa XMMWORD[32+rsp],xmm8
|
|
rol eax,5
|
|
add ebp,edi
|
|
xor esi,ebx
|
|
pslld xmm7,2
|
|
xor ebx,ecx
|
|
add ebp,eax
|
|
psrld xmm10,30
|
|
add edx,DWORD[56+rsp]
|
|
and esi,ebx
|
|
xor ebx,ecx
|
|
por xmm7,xmm10
|
|
ror eax,7
|
|
mov edi,ebp
|
|
xor esi,ebx
|
|
rol ebp,5
|
|
pshufd xmm8,xmm6,238
|
|
add edx,esi
|
|
xor edi,eax
|
|
xor eax,ebx
|
|
add edx,ebp
|
|
add ecx,DWORD[60+rsp]
|
|
and edi,eax
|
|
xor eax,ebx
|
|
ror ebp,7
|
|
mov esi,edx
|
|
xor edi,eax
|
|
rol edx,5
|
|
add ecx,edi
|
|
xor esi,ebp
|
|
xor ebp,eax
|
|
add ecx,edx
|
|
pxor xmm0,xmm4
|
|
add ebx,DWORD[rsp]
|
|
and esi,ebp
|
|
xor ebp,eax
|
|
ror edx,7
|
|
punpcklqdq xmm8,xmm7
|
|
mov edi,ecx
|
|
xor esi,ebp
|
|
pxor xmm0,xmm1
|
|
rol ecx,5
|
|
add ebx,esi
|
|
movdqa xmm10,xmm9
|
|
xor edi,edx
|
|
paddd xmm9,xmm7
|
|
xor edx,ebp
|
|
pxor xmm0,xmm8
|
|
add ebx,ecx
|
|
add eax,DWORD[4+rsp]
|
|
and edi,edx
|
|
xor edx,ebp
|
|
ror ecx,7
|
|
movdqa xmm8,xmm0
|
|
mov esi,ebx
|
|
xor edi,edx
|
|
movdqa XMMWORD[48+rsp],xmm9
|
|
rol ebx,5
|
|
add eax,edi
|
|
xor esi,ecx
|
|
pslld xmm0,2
|
|
xor ecx,edx
|
|
add eax,ebx
|
|
psrld xmm8,30
|
|
add ebp,DWORD[8+rsp]
|
|
and esi,ecx
|
|
xor ecx,edx
|
|
por xmm0,xmm8
|
|
ror ebx,7
|
|
mov edi,eax
|
|
xor esi,ecx
|
|
rol eax,5
|
|
pshufd xmm9,xmm7,238
|
|
add ebp,esi
|
|
xor edi,ebx
|
|
xor ebx,ecx
|
|
add ebp,eax
|
|
add edx,DWORD[12+rsp]
|
|
and edi,ebx
|
|
xor ebx,ecx
|
|
ror eax,7
|
|
mov esi,ebp
|
|
xor edi,ebx
|
|
rol ebp,5
|
|
add edx,edi
|
|
xor esi,eax
|
|
xor eax,ebx
|
|
add edx,ebp
|
|
pxor xmm1,xmm5
|
|
add ecx,DWORD[16+rsp]
|
|
and esi,eax
|
|
xor eax,ebx
|
|
ror ebp,7
|
|
punpcklqdq xmm9,xmm0
|
|
mov edi,edx
|
|
xor esi,eax
|
|
pxor xmm1,xmm2
|
|
rol edx,5
|
|
add ecx,esi
|
|
movdqa xmm8,xmm10
|
|
xor edi,ebp
|
|
paddd xmm10,xmm0
|
|
xor ebp,eax
|
|
pxor xmm1,xmm9
|
|
add ecx,edx
|
|
add ebx,DWORD[20+rsp]
|
|
and edi,ebp
|
|
xor ebp,eax
|
|
ror edx,7
|
|
movdqa xmm9,xmm1
|
|
mov esi,ecx
|
|
xor edi,ebp
|
|
movdqa XMMWORD[rsp],xmm10
|
|
rol ecx,5
|
|
add ebx,edi
|
|
xor esi,edx
|
|
pslld xmm1,2
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
psrld xmm9,30
|
|
add eax,DWORD[24+rsp]
|
|
and esi,edx
|
|
xor edx,ebp
|
|
por xmm1,xmm9
|
|
ror ecx,7
|
|
mov edi,ebx
|
|
xor esi,edx
|
|
rol ebx,5
|
|
pshufd xmm10,xmm0,238
|
|
add eax,esi
|
|
xor edi,ecx
|
|
xor ecx,edx
|
|
add eax,ebx
|
|
add ebp,DWORD[28+rsp]
|
|
and edi,ecx
|
|
xor ecx,edx
|
|
ror ebx,7
|
|
mov esi,eax
|
|
xor edi,ecx
|
|
rol eax,5
|
|
add ebp,edi
|
|
xor esi,ebx
|
|
xor ebx,ecx
|
|
add ebp,eax
|
|
pxor xmm2,xmm6
|
|
add edx,DWORD[32+rsp]
|
|
and esi,ebx
|
|
xor ebx,ecx
|
|
ror eax,7
|
|
punpcklqdq xmm10,xmm1
|
|
mov edi,ebp
|
|
xor esi,ebx
|
|
pxor xmm2,xmm3
|
|
rol ebp,5
|
|
add edx,esi
|
|
movdqa xmm9,xmm8
|
|
xor edi,eax
|
|
paddd xmm8,xmm1
|
|
xor eax,ebx
|
|
pxor xmm2,xmm10
|
|
add edx,ebp
|
|
add ecx,DWORD[36+rsp]
|
|
and edi,eax
|
|
xor eax,ebx
|
|
ror ebp,7
|
|
movdqa xmm10,xmm2
|
|
mov esi,edx
|
|
xor edi,eax
|
|
movdqa XMMWORD[16+rsp],xmm8
|
|
rol edx,5
|
|
add ecx,edi
|
|
xor esi,ebp
|
|
pslld xmm2,2
|
|
xor ebp,eax
|
|
add ecx,edx
|
|
psrld xmm10,30
|
|
add ebx,DWORD[40+rsp]
|
|
and esi,ebp
|
|
xor ebp,eax
|
|
por xmm2,xmm10
|
|
ror edx,7
|
|
mov edi,ecx
|
|
xor esi,ebp
|
|
rol ecx,5
|
|
pshufd xmm8,xmm1,238
|
|
add ebx,esi
|
|
xor edi,edx
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
add eax,DWORD[44+rsp]
|
|
and edi,edx
|
|
xor edx,ebp
|
|
ror ecx,7
|
|
mov esi,ebx
|
|
xor edi,edx
|
|
rol ebx,5
|
|
add eax,edi
|
|
xor esi,edx
|
|
add eax,ebx
|
|
pxor xmm3,xmm7
|
|
add ebp,DWORD[48+rsp]
|
|
xor esi,ecx
|
|
punpcklqdq xmm8,xmm2
|
|
mov edi,eax
|
|
rol eax,5
|
|
pxor xmm3,xmm4
|
|
add ebp,esi
|
|
xor edi,ecx
|
|
movdqa xmm10,xmm9
|
|
ror ebx,7
|
|
paddd xmm9,xmm2
|
|
add ebp,eax
|
|
pxor xmm3,xmm8
|
|
add edx,DWORD[52+rsp]
|
|
xor edi,ebx
|
|
mov esi,ebp
|
|
rol ebp,5
|
|
movdqa xmm8,xmm3
|
|
add edx,edi
|
|
xor esi,ebx
|
|
movdqa XMMWORD[32+rsp],xmm9
|
|
ror eax,7
|
|
add edx,ebp
|
|
add ecx,DWORD[56+rsp]
|
|
pslld xmm3,2
|
|
xor esi,eax
|
|
mov edi,edx
|
|
psrld xmm8,30
|
|
rol edx,5
|
|
add ecx,esi
|
|
xor edi,eax
|
|
ror ebp,7
|
|
por xmm3,xmm8
|
|
add ecx,edx
|
|
add ebx,DWORD[60+rsp]
|
|
xor edi,ebp
|
|
mov esi,ecx
|
|
rol ecx,5
|
|
add ebx,edi
|
|
xor esi,ebp
|
|
ror edx,7
|
|
add ebx,ecx
|
|
add eax,DWORD[rsp]
|
|
xor esi,edx
|
|
mov edi,ebx
|
|
rol ebx,5
|
|
paddd xmm10,xmm3
|
|
add eax,esi
|
|
xor edi,edx
|
|
movdqa XMMWORD[48+rsp],xmm10
|
|
ror ecx,7
|
|
add eax,ebx
|
|
add ebp,DWORD[4+rsp]
|
|
xor edi,ecx
|
|
mov esi,eax
|
|
rol eax,5
|
|
add ebp,edi
|
|
xor esi,ecx
|
|
ror ebx,7
|
|
add ebp,eax
|
|
add edx,DWORD[8+rsp]
|
|
xor esi,ebx
|
|
mov edi,ebp
|
|
rol ebp,5
|
|
add edx,esi
|
|
xor edi,ebx
|
|
ror eax,7
|
|
add edx,ebp
|
|
add ecx,DWORD[12+rsp]
|
|
xor edi,eax
|
|
mov esi,edx
|
|
rol edx,5
|
|
add ecx,edi
|
|
xor esi,eax
|
|
ror ebp,7
|
|
add ecx,edx
|
|
cmp r9,r10
|
|
je NEAR $L$done_ssse3
|
|
movdqa xmm6,XMMWORD[64+r14]
|
|
movdqa xmm9,XMMWORD[((-64))+r14]
|
|
movdqu xmm0,XMMWORD[r9]
|
|
movdqu xmm1,XMMWORD[16+r9]
|
|
movdqu xmm2,XMMWORD[32+r9]
|
|
movdqu xmm3,XMMWORD[48+r9]
|
|
DB 102,15,56,0,198
|
|
add r9,64
|
|
add ebx,DWORD[16+rsp]
|
|
xor esi,ebp
|
|
mov edi,ecx
|
|
DB 102,15,56,0,206
|
|
rol ecx,5
|
|
add ebx,esi
|
|
xor edi,ebp
|
|
ror edx,7
|
|
paddd xmm0,xmm9
|
|
add ebx,ecx
|
|
add eax,DWORD[20+rsp]
|
|
xor edi,edx
|
|
mov esi,ebx
|
|
movdqa XMMWORD[rsp],xmm0
|
|
rol ebx,5
|
|
add eax,edi
|
|
xor esi,edx
|
|
ror ecx,7
|
|
psubd xmm0,xmm9
|
|
add eax,ebx
|
|
add ebp,DWORD[24+rsp]
|
|
xor esi,ecx
|
|
mov edi,eax
|
|
rol eax,5
|
|
add ebp,esi
|
|
xor edi,ecx
|
|
ror ebx,7
|
|
add ebp,eax
|
|
add edx,DWORD[28+rsp]
|
|
xor edi,ebx
|
|
mov esi,ebp
|
|
rol ebp,5
|
|
add edx,edi
|
|
xor esi,ebx
|
|
ror eax,7
|
|
add edx,ebp
|
|
add ecx,DWORD[32+rsp]
|
|
xor esi,eax
|
|
mov edi,edx
|
|
DB 102,15,56,0,214
|
|
rol edx,5
|
|
add ecx,esi
|
|
xor edi,eax
|
|
ror ebp,7
|
|
paddd xmm1,xmm9
|
|
add ecx,edx
|
|
add ebx,DWORD[36+rsp]
|
|
xor edi,ebp
|
|
mov esi,ecx
|
|
movdqa XMMWORD[16+rsp],xmm1
|
|
rol ecx,5
|
|
add ebx,edi
|
|
xor esi,ebp
|
|
ror edx,7
|
|
psubd xmm1,xmm9
|
|
add ebx,ecx
|
|
add eax,DWORD[40+rsp]
|
|
xor esi,edx
|
|
mov edi,ebx
|
|
rol ebx,5
|
|
add eax,esi
|
|
xor edi,edx
|
|
ror ecx,7
|
|
add eax,ebx
|
|
add ebp,DWORD[44+rsp]
|
|
xor edi,ecx
|
|
mov esi,eax
|
|
rol eax,5
|
|
add ebp,edi
|
|
xor esi,ecx
|
|
ror ebx,7
|
|
add ebp,eax
|
|
add edx,DWORD[48+rsp]
|
|
xor esi,ebx
|
|
mov edi,ebp
|
|
DB 102,15,56,0,222
|
|
rol ebp,5
|
|
add edx,esi
|
|
xor edi,ebx
|
|
ror eax,7
|
|
paddd xmm2,xmm9
|
|
add edx,ebp
|
|
add ecx,DWORD[52+rsp]
|
|
xor edi,eax
|
|
mov esi,edx
|
|
movdqa XMMWORD[32+rsp],xmm2
|
|
rol edx,5
|
|
add ecx,edi
|
|
xor esi,eax
|
|
ror ebp,7
|
|
psubd xmm2,xmm9
|
|
add ecx,edx
|
|
add ebx,DWORD[56+rsp]
|
|
xor esi,ebp
|
|
mov edi,ecx
|
|
rol ecx,5
|
|
add ebx,esi
|
|
xor edi,ebp
|
|
ror edx,7
|
|
add ebx,ecx
|
|
add eax,DWORD[60+rsp]
|
|
xor edi,edx
|
|
mov esi,ebx
|
|
rol ebx,5
|
|
add eax,edi
|
|
ror ecx,7
|
|
add eax,ebx
|
|
add eax,DWORD[r8]
|
|
add esi,DWORD[4+r8]
|
|
add ecx,DWORD[8+r8]
|
|
add edx,DWORD[12+r8]
|
|
mov DWORD[r8],eax
|
|
add ebp,DWORD[16+r8]
|
|
mov DWORD[4+r8],esi
|
|
mov ebx,esi
|
|
mov DWORD[8+r8],ecx
|
|
mov edi,ecx
|
|
mov DWORD[12+r8],edx
|
|
xor edi,edx
|
|
mov DWORD[16+r8],ebp
|
|
and esi,edi
|
|
jmp NEAR $L$oop_ssse3
|
|
|
|
ALIGN 16
|
|
$L$done_ssse3:
|
|
add ebx,DWORD[16+rsp]
|
|
xor esi,ebp
|
|
mov edi,ecx
|
|
rol ecx,5
|
|
add ebx,esi
|
|
xor edi,ebp
|
|
ror edx,7
|
|
add ebx,ecx
|
|
add eax,DWORD[20+rsp]
|
|
xor edi,edx
|
|
mov esi,ebx
|
|
rol ebx,5
|
|
add eax,edi
|
|
xor esi,edx
|
|
ror ecx,7
|
|
add eax,ebx
|
|
add ebp,DWORD[24+rsp]
|
|
xor esi,ecx
|
|
mov edi,eax
|
|
rol eax,5
|
|
add ebp,esi
|
|
xor edi,ecx
|
|
ror ebx,7
|
|
add ebp,eax
|
|
add edx,DWORD[28+rsp]
|
|
xor edi,ebx
|
|
mov esi,ebp
|
|
rol ebp,5
|
|
add edx,edi
|
|
xor esi,ebx
|
|
ror eax,7
|
|
add edx,ebp
|
|
add ecx,DWORD[32+rsp]
|
|
xor esi,eax
|
|
mov edi,edx
|
|
rol edx,5
|
|
add ecx,esi
|
|
xor edi,eax
|
|
ror ebp,7
|
|
add ecx,edx
|
|
add ebx,DWORD[36+rsp]
|
|
xor edi,ebp
|
|
mov esi,ecx
|
|
rol ecx,5
|
|
add ebx,edi
|
|
xor esi,ebp
|
|
ror edx,7
|
|
add ebx,ecx
|
|
add eax,DWORD[40+rsp]
|
|
xor esi,edx
|
|
mov edi,ebx
|
|
rol ebx,5
|
|
add eax,esi
|
|
xor edi,edx
|
|
ror ecx,7
|
|
add eax,ebx
|
|
add ebp,DWORD[44+rsp]
|
|
xor edi,ecx
|
|
mov esi,eax
|
|
rol eax,5
|
|
add ebp,edi
|
|
xor esi,ecx
|
|
ror ebx,7
|
|
add ebp,eax
|
|
add edx,DWORD[48+rsp]
|
|
xor esi,ebx
|
|
mov edi,ebp
|
|
rol ebp,5
|
|
add edx,esi
|
|
xor edi,ebx
|
|
ror eax,7
|
|
add edx,ebp
|
|
add ecx,DWORD[52+rsp]
|
|
xor edi,eax
|
|
mov esi,edx
|
|
rol edx,5
|
|
add ecx,edi
|
|
xor esi,eax
|
|
ror ebp,7
|
|
add ecx,edx
|
|
add ebx,DWORD[56+rsp]
|
|
xor esi,ebp
|
|
mov edi,ecx
|
|
rol ecx,5
|
|
add ebx,esi
|
|
xor edi,ebp
|
|
ror edx,7
|
|
add ebx,ecx
|
|
add eax,DWORD[60+rsp]
|
|
xor edi,edx
|
|
mov esi,ebx
|
|
rol ebx,5
|
|
add eax,edi
|
|
ror ecx,7
|
|
add eax,ebx
|
|
add eax,DWORD[r8]
|
|
add esi,DWORD[4+r8]
|
|
add ecx,DWORD[8+r8]
|
|
mov DWORD[r8],eax
|
|
add edx,DWORD[12+r8]
|
|
mov DWORD[4+r8],esi
|
|
add ebp,DWORD[16+r8]
|
|
mov DWORD[8+r8],ecx
|
|
mov DWORD[12+r8],edx
|
|
mov DWORD[16+r8],ebp
|
|
movaps xmm6,XMMWORD[((-40-96))+r11]
|
|
movaps xmm7,XMMWORD[((-40-80))+r11]
|
|
movaps xmm8,XMMWORD[((-40-64))+r11]
|
|
movaps xmm9,XMMWORD[((-40-48))+r11]
|
|
movaps xmm10,XMMWORD[((-40-32))+r11]
|
|
movaps xmm11,XMMWORD[((-40-16))+r11]
|
|
mov r14,QWORD[((-40))+r11]
|
|
|
|
mov r13,QWORD[((-32))+r11]
|
|
|
|
mov r12,QWORD[((-24))+r11]
|
|
|
|
mov rbp,QWORD[((-16))+r11]
|
|
|
|
mov rbx,QWORD[((-8))+r11]
|
|
|
|
lea rsp,[r11]
|
|
|
|
$L$epilogue_ssse3:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
$L$SEH_end_sha1_block_data_order_ssse3:
|
|
|
|
ALIGN 16
|
|
sha1_block_data_order_avx:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_sha1_block_data_order_avx:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
_avx_shortcut:
|
|
|
|
mov r11,rsp
|
|
|
|
push rbx
|
|
|
|
push rbp
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
lea rsp,[((-160))+rsp]
|
|
vzeroupper
|
|
vmovaps XMMWORD[(-40-96)+r11],xmm6
|
|
vmovaps XMMWORD[(-40-80)+r11],xmm7
|
|
vmovaps XMMWORD[(-40-64)+r11],xmm8
|
|
vmovaps XMMWORD[(-40-48)+r11],xmm9
|
|
vmovaps XMMWORD[(-40-32)+r11],xmm10
|
|
vmovaps XMMWORD[(-40-16)+r11],xmm11
|
|
$L$prologue_avx:
|
|
and rsp,-64
|
|
mov r8,rdi
|
|
mov r9,rsi
|
|
mov r10,rdx
|
|
|
|
shl r10,6
|
|
add r10,r9
|
|
lea r14,[((K_XX_XX+64))]
|
|
|
|
mov eax,DWORD[r8]
|
|
mov ebx,DWORD[4+r8]
|
|
mov ecx,DWORD[8+r8]
|
|
mov edx,DWORD[12+r8]
|
|
mov esi,ebx
|
|
mov ebp,DWORD[16+r8]
|
|
mov edi,ecx
|
|
xor edi,edx
|
|
and esi,edi
|
|
|
|
vmovdqa xmm6,XMMWORD[64+r14]
|
|
vmovdqa xmm11,XMMWORD[((-64))+r14]
|
|
vmovdqu xmm0,XMMWORD[r9]
|
|
vmovdqu xmm1,XMMWORD[16+r9]
|
|
vmovdqu xmm2,XMMWORD[32+r9]
|
|
vmovdqu xmm3,XMMWORD[48+r9]
|
|
vpshufb xmm0,xmm0,xmm6
|
|
add r9,64
|
|
vpshufb xmm1,xmm1,xmm6
|
|
vpshufb xmm2,xmm2,xmm6
|
|
vpshufb xmm3,xmm3,xmm6
|
|
vpaddd xmm4,xmm0,xmm11
|
|
vpaddd xmm5,xmm1,xmm11
|
|
vpaddd xmm6,xmm2,xmm11
|
|
vmovdqa XMMWORD[rsp],xmm4
|
|
vmovdqa XMMWORD[16+rsp],xmm5
|
|
vmovdqa XMMWORD[32+rsp],xmm6
|
|
jmp NEAR $L$oop_avx
|
|
ALIGN 16
|
|
$L$oop_avx:
|
|
shrd ebx,ebx,2
|
|
xor esi,edx
|
|
vpalignr xmm4,xmm1,xmm0,8
|
|
mov edi,eax
|
|
add ebp,DWORD[rsp]
|
|
vpaddd xmm9,xmm11,xmm3
|
|
xor ebx,ecx
|
|
shld eax,eax,5
|
|
vpsrldq xmm8,xmm3,4
|
|
add ebp,esi
|
|
and edi,ebx
|
|
vpxor xmm4,xmm4,xmm0
|
|
xor ebx,ecx
|
|
add ebp,eax
|
|
vpxor xmm8,xmm8,xmm2
|
|
shrd eax,eax,7
|
|
xor edi,ecx
|
|
mov esi,ebp
|
|
add edx,DWORD[4+rsp]
|
|
vpxor xmm4,xmm4,xmm8
|
|
xor eax,ebx
|
|
shld ebp,ebp,5
|
|
vmovdqa XMMWORD[48+rsp],xmm9
|
|
add edx,edi
|
|
and esi,eax
|
|
vpsrld xmm8,xmm4,31
|
|
xor eax,ebx
|
|
add edx,ebp
|
|
shrd ebp,ebp,7
|
|
xor esi,ebx
|
|
vpslldq xmm10,xmm4,12
|
|
vpaddd xmm4,xmm4,xmm4
|
|
mov edi,edx
|
|
add ecx,DWORD[8+rsp]
|
|
xor ebp,eax
|
|
shld edx,edx,5
|
|
vpsrld xmm9,xmm10,30
|
|
vpor xmm4,xmm4,xmm8
|
|
add ecx,esi
|
|
and edi,ebp
|
|
xor ebp,eax
|
|
add ecx,edx
|
|
vpslld xmm10,xmm10,2
|
|
vpxor xmm4,xmm4,xmm9
|
|
shrd edx,edx,7
|
|
xor edi,eax
|
|
mov esi,ecx
|
|
add ebx,DWORD[12+rsp]
|
|
vpxor xmm4,xmm4,xmm10
|
|
xor edx,ebp
|
|
shld ecx,ecx,5
|
|
add ebx,edi
|
|
and esi,edx
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
shrd ecx,ecx,7
|
|
xor esi,ebp
|
|
vpalignr xmm5,xmm2,xmm1,8
|
|
mov edi,ebx
|
|
add eax,DWORD[16+rsp]
|
|
vpaddd xmm9,xmm11,xmm4
|
|
xor ecx,edx
|
|
shld ebx,ebx,5
|
|
vpsrldq xmm8,xmm4,4
|
|
add eax,esi
|
|
and edi,ecx
|
|
vpxor xmm5,xmm5,xmm1
|
|
xor ecx,edx
|
|
add eax,ebx
|
|
vpxor xmm8,xmm8,xmm3
|
|
shrd ebx,ebx,7
|
|
xor edi,edx
|
|
mov esi,eax
|
|
add ebp,DWORD[20+rsp]
|
|
vpxor xmm5,xmm5,xmm8
|
|
xor ebx,ecx
|
|
shld eax,eax,5
|
|
vmovdqa XMMWORD[rsp],xmm9
|
|
add ebp,edi
|
|
and esi,ebx
|
|
vpsrld xmm8,xmm5,31
|
|
xor ebx,ecx
|
|
add ebp,eax
|
|
shrd eax,eax,7
|
|
xor esi,ecx
|
|
vpslldq xmm10,xmm5,12
|
|
vpaddd xmm5,xmm5,xmm5
|
|
mov edi,ebp
|
|
add edx,DWORD[24+rsp]
|
|
xor eax,ebx
|
|
shld ebp,ebp,5
|
|
vpsrld xmm9,xmm10,30
|
|
vpor xmm5,xmm5,xmm8
|
|
add edx,esi
|
|
and edi,eax
|
|
xor eax,ebx
|
|
add edx,ebp
|
|
vpslld xmm10,xmm10,2
|
|
vpxor xmm5,xmm5,xmm9
|
|
shrd ebp,ebp,7
|
|
xor edi,ebx
|
|
mov esi,edx
|
|
add ecx,DWORD[28+rsp]
|
|
vpxor xmm5,xmm5,xmm10
|
|
xor ebp,eax
|
|
shld edx,edx,5
|
|
vmovdqa xmm11,XMMWORD[((-32))+r14]
|
|
add ecx,edi
|
|
and esi,ebp
|
|
xor ebp,eax
|
|
add ecx,edx
|
|
shrd edx,edx,7
|
|
xor esi,eax
|
|
vpalignr xmm6,xmm3,xmm2,8
|
|
mov edi,ecx
|
|
add ebx,DWORD[32+rsp]
|
|
vpaddd xmm9,xmm11,xmm5
|
|
xor edx,ebp
|
|
shld ecx,ecx,5
|
|
vpsrldq xmm8,xmm5,4
|
|
add ebx,esi
|
|
and edi,edx
|
|
vpxor xmm6,xmm6,xmm2
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
vpxor xmm8,xmm8,xmm4
|
|
shrd ecx,ecx,7
|
|
xor edi,ebp
|
|
mov esi,ebx
|
|
add eax,DWORD[36+rsp]
|
|
vpxor xmm6,xmm6,xmm8
|
|
xor ecx,edx
|
|
shld ebx,ebx,5
|
|
vmovdqa XMMWORD[16+rsp],xmm9
|
|
add eax,edi
|
|
and esi,ecx
|
|
vpsrld xmm8,xmm6,31
|
|
xor ecx,edx
|
|
add eax,ebx
|
|
shrd ebx,ebx,7
|
|
xor esi,edx
|
|
vpslldq xmm10,xmm6,12
|
|
vpaddd xmm6,xmm6,xmm6
|
|
mov edi,eax
|
|
add ebp,DWORD[40+rsp]
|
|
xor ebx,ecx
|
|
shld eax,eax,5
|
|
vpsrld xmm9,xmm10,30
|
|
vpor xmm6,xmm6,xmm8
|
|
add ebp,esi
|
|
and edi,ebx
|
|
xor ebx,ecx
|
|
add ebp,eax
|
|
vpslld xmm10,xmm10,2
|
|
vpxor xmm6,xmm6,xmm9
|
|
shrd eax,eax,7
|
|
xor edi,ecx
|
|
mov esi,ebp
|
|
add edx,DWORD[44+rsp]
|
|
vpxor xmm6,xmm6,xmm10
|
|
xor eax,ebx
|
|
shld ebp,ebp,5
|
|
add edx,edi
|
|
and esi,eax
|
|
xor eax,ebx
|
|
add edx,ebp
|
|
shrd ebp,ebp,7
|
|
xor esi,ebx
|
|
vpalignr xmm7,xmm4,xmm3,8
|
|
mov edi,edx
|
|
add ecx,DWORD[48+rsp]
|
|
vpaddd xmm9,xmm11,xmm6
|
|
xor ebp,eax
|
|
shld edx,edx,5
|
|
vpsrldq xmm8,xmm6,4
|
|
add ecx,esi
|
|
and edi,ebp
|
|
vpxor xmm7,xmm7,xmm3
|
|
xor ebp,eax
|
|
add ecx,edx
|
|
vpxor xmm8,xmm8,xmm5
|
|
shrd edx,edx,7
|
|
xor edi,eax
|
|
mov esi,ecx
|
|
add ebx,DWORD[52+rsp]
|
|
vpxor xmm7,xmm7,xmm8
|
|
xor edx,ebp
|
|
shld ecx,ecx,5
|
|
vmovdqa XMMWORD[32+rsp],xmm9
|
|
add ebx,edi
|
|
and esi,edx
|
|
vpsrld xmm8,xmm7,31
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
shrd ecx,ecx,7
|
|
xor esi,ebp
|
|
vpslldq xmm10,xmm7,12
|
|
vpaddd xmm7,xmm7,xmm7
|
|
mov edi,ebx
|
|
add eax,DWORD[56+rsp]
|
|
xor ecx,edx
|
|
shld ebx,ebx,5
|
|
vpsrld xmm9,xmm10,30
|
|
vpor xmm7,xmm7,xmm8
|
|
add eax,esi
|
|
and edi,ecx
|
|
xor ecx,edx
|
|
add eax,ebx
|
|
vpslld xmm10,xmm10,2
|
|
vpxor xmm7,xmm7,xmm9
|
|
shrd ebx,ebx,7
|
|
xor edi,edx
|
|
mov esi,eax
|
|
add ebp,DWORD[60+rsp]
|
|
vpxor xmm7,xmm7,xmm10
|
|
xor ebx,ecx
|
|
shld eax,eax,5
|
|
add ebp,edi
|
|
and esi,ebx
|
|
xor ebx,ecx
|
|
add ebp,eax
|
|
vpalignr xmm8,xmm7,xmm6,8
|
|
vpxor xmm0,xmm0,xmm4
|
|
shrd eax,eax,7
|
|
xor esi,ecx
|
|
mov edi,ebp
|
|
add edx,DWORD[rsp]
|
|
vpxor xmm0,xmm0,xmm1
|
|
xor eax,ebx
|
|
shld ebp,ebp,5
|
|
vpaddd xmm9,xmm11,xmm7
|
|
add edx,esi
|
|
and edi,eax
|
|
vpxor xmm0,xmm0,xmm8
|
|
xor eax,ebx
|
|
add edx,ebp
|
|
shrd ebp,ebp,7
|
|
xor edi,ebx
|
|
vpsrld xmm8,xmm0,30
|
|
vmovdqa XMMWORD[48+rsp],xmm9
|
|
mov esi,edx
|
|
add ecx,DWORD[4+rsp]
|
|
xor ebp,eax
|
|
shld edx,edx,5
|
|
vpslld xmm0,xmm0,2
|
|
add ecx,edi
|
|
and esi,ebp
|
|
xor ebp,eax
|
|
add ecx,edx
|
|
shrd edx,edx,7
|
|
xor esi,eax
|
|
mov edi,ecx
|
|
add ebx,DWORD[8+rsp]
|
|
vpor xmm0,xmm0,xmm8
|
|
xor edx,ebp
|
|
shld ecx,ecx,5
|
|
add ebx,esi
|
|
and edi,edx
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
add eax,DWORD[12+rsp]
|
|
xor edi,ebp
|
|
mov esi,ebx
|
|
shld ebx,ebx,5
|
|
add eax,edi
|
|
xor esi,edx
|
|
shrd ecx,ecx,7
|
|
add eax,ebx
|
|
vpalignr xmm8,xmm0,xmm7,8
|
|
vpxor xmm1,xmm1,xmm5
|
|
add ebp,DWORD[16+rsp]
|
|
xor esi,ecx
|
|
mov edi,eax
|
|
shld eax,eax,5
|
|
vpxor xmm1,xmm1,xmm2
|
|
add ebp,esi
|
|
xor edi,ecx
|
|
vpaddd xmm9,xmm11,xmm0
|
|
shrd ebx,ebx,7
|
|
add ebp,eax
|
|
vpxor xmm1,xmm1,xmm8
|
|
add edx,DWORD[20+rsp]
|
|
xor edi,ebx
|
|
mov esi,ebp
|
|
shld ebp,ebp,5
|
|
vpsrld xmm8,xmm1,30
|
|
vmovdqa XMMWORD[rsp],xmm9
|
|
add edx,edi
|
|
xor esi,ebx
|
|
shrd eax,eax,7
|
|
add edx,ebp
|
|
vpslld xmm1,xmm1,2
|
|
add ecx,DWORD[24+rsp]
|
|
xor esi,eax
|
|
mov edi,edx
|
|
shld edx,edx,5
|
|
add ecx,esi
|
|
xor edi,eax
|
|
shrd ebp,ebp,7
|
|
add ecx,edx
|
|
vpor xmm1,xmm1,xmm8
|
|
add ebx,DWORD[28+rsp]
|
|
xor edi,ebp
|
|
mov esi,ecx
|
|
shld ecx,ecx,5
|
|
add ebx,edi
|
|
xor esi,ebp
|
|
shrd edx,edx,7
|
|
add ebx,ecx
|
|
vpalignr xmm8,xmm1,xmm0,8
|
|
vpxor xmm2,xmm2,xmm6
|
|
add eax,DWORD[32+rsp]
|
|
xor esi,edx
|
|
mov edi,ebx
|
|
shld ebx,ebx,5
|
|
vpxor xmm2,xmm2,xmm3
|
|
add eax,esi
|
|
xor edi,edx
|
|
vpaddd xmm9,xmm11,xmm1
|
|
vmovdqa xmm11,XMMWORD[r14]
|
|
shrd ecx,ecx,7
|
|
add eax,ebx
|
|
vpxor xmm2,xmm2,xmm8
|
|
add ebp,DWORD[36+rsp]
|
|
xor edi,ecx
|
|
mov esi,eax
|
|
shld eax,eax,5
|
|
vpsrld xmm8,xmm2,30
|
|
vmovdqa XMMWORD[16+rsp],xmm9
|
|
add ebp,edi
|
|
xor esi,ecx
|
|
shrd ebx,ebx,7
|
|
add ebp,eax
|
|
vpslld xmm2,xmm2,2
|
|
add edx,DWORD[40+rsp]
|
|
xor esi,ebx
|
|
mov edi,ebp
|
|
shld ebp,ebp,5
|
|
add edx,esi
|
|
xor edi,ebx
|
|
shrd eax,eax,7
|
|
add edx,ebp
|
|
vpor xmm2,xmm2,xmm8
|
|
add ecx,DWORD[44+rsp]
|
|
xor edi,eax
|
|
mov esi,edx
|
|
shld edx,edx,5
|
|
add ecx,edi
|
|
xor esi,eax
|
|
shrd ebp,ebp,7
|
|
add ecx,edx
|
|
vpalignr xmm8,xmm2,xmm1,8
|
|
vpxor xmm3,xmm3,xmm7
|
|
add ebx,DWORD[48+rsp]
|
|
xor esi,ebp
|
|
mov edi,ecx
|
|
shld ecx,ecx,5
|
|
vpxor xmm3,xmm3,xmm4
|
|
add ebx,esi
|
|
xor edi,ebp
|
|
vpaddd xmm9,xmm11,xmm2
|
|
shrd edx,edx,7
|
|
add ebx,ecx
|
|
vpxor xmm3,xmm3,xmm8
|
|
add eax,DWORD[52+rsp]
|
|
xor edi,edx
|
|
mov esi,ebx
|
|
shld ebx,ebx,5
|
|
vpsrld xmm8,xmm3,30
|
|
vmovdqa XMMWORD[32+rsp],xmm9
|
|
add eax,edi
|
|
xor esi,edx
|
|
shrd ecx,ecx,7
|
|
add eax,ebx
|
|
vpslld xmm3,xmm3,2
|
|
add ebp,DWORD[56+rsp]
|
|
xor esi,ecx
|
|
mov edi,eax
|
|
shld eax,eax,5
|
|
add ebp,esi
|
|
xor edi,ecx
|
|
shrd ebx,ebx,7
|
|
add ebp,eax
|
|
vpor xmm3,xmm3,xmm8
|
|
add edx,DWORD[60+rsp]
|
|
xor edi,ebx
|
|
mov esi,ebp
|
|
shld ebp,ebp,5
|
|
add edx,edi
|
|
xor esi,ebx
|
|
shrd eax,eax,7
|
|
add edx,ebp
|
|
vpalignr xmm8,xmm3,xmm2,8
|
|
vpxor xmm4,xmm4,xmm0
|
|
add ecx,DWORD[rsp]
|
|
xor esi,eax
|
|
mov edi,edx
|
|
shld edx,edx,5
|
|
vpxor xmm4,xmm4,xmm5
|
|
add ecx,esi
|
|
xor edi,eax
|
|
vpaddd xmm9,xmm11,xmm3
|
|
shrd ebp,ebp,7
|
|
add ecx,edx
|
|
vpxor xmm4,xmm4,xmm8
|
|
add ebx,DWORD[4+rsp]
|
|
xor edi,ebp
|
|
mov esi,ecx
|
|
shld ecx,ecx,5
|
|
vpsrld xmm8,xmm4,30
|
|
vmovdqa XMMWORD[48+rsp],xmm9
|
|
add ebx,edi
|
|
xor esi,ebp
|
|
shrd edx,edx,7
|
|
add ebx,ecx
|
|
vpslld xmm4,xmm4,2
|
|
add eax,DWORD[8+rsp]
|
|
xor esi,edx
|
|
mov edi,ebx
|
|
shld ebx,ebx,5
|
|
add eax,esi
|
|
xor edi,edx
|
|
shrd ecx,ecx,7
|
|
add eax,ebx
|
|
vpor xmm4,xmm4,xmm8
|
|
add ebp,DWORD[12+rsp]
|
|
xor edi,ecx
|
|
mov esi,eax
|
|
shld eax,eax,5
|
|
add ebp,edi
|
|
xor esi,ecx
|
|
shrd ebx,ebx,7
|
|
add ebp,eax
|
|
vpalignr xmm8,xmm4,xmm3,8
|
|
vpxor xmm5,xmm5,xmm1
|
|
add edx,DWORD[16+rsp]
|
|
xor esi,ebx
|
|
mov edi,ebp
|
|
shld ebp,ebp,5
|
|
vpxor xmm5,xmm5,xmm6
|
|
add edx,esi
|
|
xor edi,ebx
|
|
vpaddd xmm9,xmm11,xmm4
|
|
shrd eax,eax,7
|
|
add edx,ebp
|
|
vpxor xmm5,xmm5,xmm8
|
|
add ecx,DWORD[20+rsp]
|
|
xor edi,eax
|
|
mov esi,edx
|
|
shld edx,edx,5
|
|
vpsrld xmm8,xmm5,30
|
|
vmovdqa XMMWORD[rsp],xmm9
|
|
add ecx,edi
|
|
xor esi,eax
|
|
shrd ebp,ebp,7
|
|
add ecx,edx
|
|
vpslld xmm5,xmm5,2
|
|
add ebx,DWORD[24+rsp]
|
|
xor esi,ebp
|
|
mov edi,ecx
|
|
shld ecx,ecx,5
|
|
add ebx,esi
|
|
xor edi,ebp
|
|
shrd edx,edx,7
|
|
add ebx,ecx
|
|
vpor xmm5,xmm5,xmm8
|
|
add eax,DWORD[28+rsp]
|
|
shrd ecx,ecx,7
|
|
mov esi,ebx
|
|
xor edi,edx
|
|
shld ebx,ebx,5
|
|
add eax,edi
|
|
xor esi,ecx
|
|
xor ecx,edx
|
|
add eax,ebx
|
|
vpalignr xmm8,xmm5,xmm4,8
|
|
vpxor xmm6,xmm6,xmm2
|
|
add ebp,DWORD[32+rsp]
|
|
and esi,ecx
|
|
xor ecx,edx
|
|
shrd ebx,ebx,7
|
|
vpxor xmm6,xmm6,xmm7
|
|
mov edi,eax
|
|
xor esi,ecx
|
|
vpaddd xmm9,xmm11,xmm5
|
|
shld eax,eax,5
|
|
add ebp,esi
|
|
vpxor xmm6,xmm6,xmm8
|
|
xor edi,ebx
|
|
xor ebx,ecx
|
|
add ebp,eax
|
|
add edx,DWORD[36+rsp]
|
|
vpsrld xmm8,xmm6,30
|
|
vmovdqa XMMWORD[16+rsp],xmm9
|
|
and edi,ebx
|
|
xor ebx,ecx
|
|
shrd eax,eax,7
|
|
mov esi,ebp
|
|
vpslld xmm6,xmm6,2
|
|
xor edi,ebx
|
|
shld ebp,ebp,5
|
|
add edx,edi
|
|
xor esi,eax
|
|
xor eax,ebx
|
|
add edx,ebp
|
|
add ecx,DWORD[40+rsp]
|
|
and esi,eax
|
|
vpor xmm6,xmm6,xmm8
|
|
xor eax,ebx
|
|
shrd ebp,ebp,7
|
|
mov edi,edx
|
|
xor esi,eax
|
|
shld edx,edx,5
|
|
add ecx,esi
|
|
xor edi,ebp
|
|
xor ebp,eax
|
|
add ecx,edx
|
|
add ebx,DWORD[44+rsp]
|
|
and edi,ebp
|
|
xor ebp,eax
|
|
shrd edx,edx,7
|
|
mov esi,ecx
|
|
xor edi,ebp
|
|
shld ecx,ecx,5
|
|
add ebx,edi
|
|
xor esi,edx
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
vpalignr xmm8,xmm6,xmm5,8
|
|
vpxor xmm7,xmm7,xmm3
|
|
add eax,DWORD[48+rsp]
|
|
and esi,edx
|
|
xor edx,ebp
|
|
shrd ecx,ecx,7
|
|
vpxor xmm7,xmm7,xmm0
|
|
mov edi,ebx
|
|
xor esi,edx
|
|
vpaddd xmm9,xmm11,xmm6
|
|
vmovdqa xmm11,XMMWORD[32+r14]
|
|
shld ebx,ebx,5
|
|
add eax,esi
|
|
vpxor xmm7,xmm7,xmm8
|
|
xor edi,ecx
|
|
xor ecx,edx
|
|
add eax,ebx
|
|
add ebp,DWORD[52+rsp]
|
|
vpsrld xmm8,xmm7,30
|
|
vmovdqa XMMWORD[32+rsp],xmm9
|
|
and edi,ecx
|
|
xor ecx,edx
|
|
shrd ebx,ebx,7
|
|
mov esi,eax
|
|
vpslld xmm7,xmm7,2
|
|
xor edi,ecx
|
|
shld eax,eax,5
|
|
add ebp,edi
|
|
xor esi,ebx
|
|
xor ebx,ecx
|
|
add ebp,eax
|
|
add edx,DWORD[56+rsp]
|
|
and esi,ebx
|
|
vpor xmm7,xmm7,xmm8
|
|
xor ebx,ecx
|
|
shrd eax,eax,7
|
|
mov edi,ebp
|
|
xor esi,ebx
|
|
shld ebp,ebp,5
|
|
add edx,esi
|
|
xor edi,eax
|
|
xor eax,ebx
|
|
add edx,ebp
|
|
add ecx,DWORD[60+rsp]
|
|
and edi,eax
|
|
xor eax,ebx
|
|
shrd ebp,ebp,7
|
|
mov esi,edx
|
|
xor edi,eax
|
|
shld edx,edx,5
|
|
add ecx,edi
|
|
xor esi,ebp
|
|
xor ebp,eax
|
|
add ecx,edx
|
|
vpalignr xmm8,xmm7,xmm6,8
|
|
vpxor xmm0,xmm0,xmm4
|
|
add ebx,DWORD[rsp]
|
|
and esi,ebp
|
|
xor ebp,eax
|
|
shrd edx,edx,7
|
|
vpxor xmm0,xmm0,xmm1
|
|
mov edi,ecx
|
|
xor esi,ebp
|
|
vpaddd xmm9,xmm11,xmm7
|
|
shld ecx,ecx,5
|
|
add ebx,esi
|
|
vpxor xmm0,xmm0,xmm8
|
|
xor edi,edx
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
add eax,DWORD[4+rsp]
|
|
vpsrld xmm8,xmm0,30
|
|
vmovdqa XMMWORD[48+rsp],xmm9
|
|
and edi,edx
|
|
xor edx,ebp
|
|
shrd ecx,ecx,7
|
|
mov esi,ebx
|
|
vpslld xmm0,xmm0,2
|
|
xor edi,edx
|
|
shld ebx,ebx,5
|
|
add eax,edi
|
|
xor esi,ecx
|
|
xor ecx,edx
|
|
add eax,ebx
|
|
add ebp,DWORD[8+rsp]
|
|
and esi,ecx
|
|
vpor xmm0,xmm0,xmm8
|
|
xor ecx,edx
|
|
shrd ebx,ebx,7
|
|
mov edi,eax
|
|
xor esi,ecx
|
|
shld eax,eax,5
|
|
add ebp,esi
|
|
xor edi,ebx
|
|
xor ebx,ecx
|
|
add ebp,eax
|
|
add edx,DWORD[12+rsp]
|
|
and edi,ebx
|
|
xor ebx,ecx
|
|
shrd eax,eax,7
|
|
mov esi,ebp
|
|
xor edi,ebx
|
|
shld ebp,ebp,5
|
|
add edx,edi
|
|
xor esi,eax
|
|
xor eax,ebx
|
|
add edx,ebp
|
|
vpalignr xmm8,xmm0,xmm7,8
|
|
vpxor xmm1,xmm1,xmm5
|
|
add ecx,DWORD[16+rsp]
|
|
and esi,eax
|
|
xor eax,ebx
|
|
shrd ebp,ebp,7
|
|
vpxor xmm1,xmm1,xmm2
|
|
mov edi,edx
|
|
xor esi,eax
|
|
vpaddd xmm9,xmm11,xmm0
|
|
shld edx,edx,5
|
|
add ecx,esi
|
|
vpxor xmm1,xmm1,xmm8
|
|
xor edi,ebp
|
|
xor ebp,eax
|
|
add ecx,edx
|
|
add ebx,DWORD[20+rsp]
|
|
vpsrld xmm8,xmm1,30
|
|
vmovdqa XMMWORD[rsp],xmm9
|
|
and edi,ebp
|
|
xor ebp,eax
|
|
shrd edx,edx,7
|
|
mov esi,ecx
|
|
vpslld xmm1,xmm1,2
|
|
xor edi,ebp
|
|
shld ecx,ecx,5
|
|
add ebx,edi
|
|
xor esi,edx
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
add eax,DWORD[24+rsp]
|
|
and esi,edx
|
|
vpor xmm1,xmm1,xmm8
|
|
xor edx,ebp
|
|
shrd ecx,ecx,7
|
|
mov edi,ebx
|
|
xor esi,edx
|
|
shld ebx,ebx,5
|
|
add eax,esi
|
|
xor edi,ecx
|
|
xor ecx,edx
|
|
add eax,ebx
|
|
add ebp,DWORD[28+rsp]
|
|
and edi,ecx
|
|
xor ecx,edx
|
|
shrd ebx,ebx,7
|
|
mov esi,eax
|
|
xor edi,ecx
|
|
shld eax,eax,5
|
|
add ebp,edi
|
|
xor esi,ebx
|
|
xor ebx,ecx
|
|
add ebp,eax
|
|
vpalignr xmm8,xmm1,xmm0,8
|
|
vpxor xmm2,xmm2,xmm6
|
|
add edx,DWORD[32+rsp]
|
|
and esi,ebx
|
|
xor ebx,ecx
|
|
shrd eax,eax,7
|
|
vpxor xmm2,xmm2,xmm3
|
|
mov edi,ebp
|
|
xor esi,ebx
|
|
vpaddd xmm9,xmm11,xmm1
|
|
shld ebp,ebp,5
|
|
add edx,esi
|
|
vpxor xmm2,xmm2,xmm8
|
|
xor edi,eax
|
|
xor eax,ebx
|
|
add edx,ebp
|
|
add ecx,DWORD[36+rsp]
|
|
vpsrld xmm8,xmm2,30
|
|
vmovdqa XMMWORD[16+rsp],xmm9
|
|
and edi,eax
|
|
xor eax,ebx
|
|
shrd ebp,ebp,7
|
|
mov esi,edx
|
|
vpslld xmm2,xmm2,2
|
|
xor edi,eax
|
|
shld edx,edx,5
|
|
add ecx,edi
|
|
xor esi,ebp
|
|
xor ebp,eax
|
|
add ecx,edx
|
|
add ebx,DWORD[40+rsp]
|
|
and esi,ebp
|
|
vpor xmm2,xmm2,xmm8
|
|
xor ebp,eax
|
|
shrd edx,edx,7
|
|
mov edi,ecx
|
|
xor esi,ebp
|
|
shld ecx,ecx,5
|
|
add ebx,esi
|
|
xor edi,edx
|
|
xor edx,ebp
|
|
add ebx,ecx
|
|
add eax,DWORD[44+rsp]
|
|
and edi,edx
|
|
xor edx,ebp
|
|
shrd ecx,ecx,7
|
|
mov esi,ebx
|
|
xor edi,edx
|
|
shld ebx,ebx,5
|
|
add eax,edi
|
|
xor esi,edx
|
|
add eax,ebx
|
|
vpalignr xmm8,xmm2,xmm1,8
|
|
vpxor xmm3,xmm3,xmm7
|
|
add ebp,DWORD[48+rsp]
|
|
xor esi,ecx
|
|
mov edi,eax
|
|
shld eax,eax,5
|
|
vpxor xmm3,xmm3,xmm4
|
|
add ebp,esi
|
|
xor edi,ecx
|
|
vpaddd xmm9,xmm11,xmm2
|
|
shrd ebx,ebx,7
|
|
add ebp,eax
|
|
vpxor xmm3,xmm3,xmm8
|
|
add edx,DWORD[52+rsp]
|
|
xor edi,ebx
|
|
mov esi,ebp
|
|
shld ebp,ebp,5
|
|
vpsrld xmm8,xmm3,30
|
|
vmovdqa XMMWORD[32+rsp],xmm9
|
|
add edx,edi
|
|
xor esi,ebx
|
|
shrd eax,eax,7
|
|
add edx,ebp
|
|
vpslld xmm3,xmm3,2
|
|
add ecx,DWORD[56+rsp]
|
|
xor esi,eax
|
|
mov edi,edx
|
|
shld edx,edx,5
|
|
add ecx,esi
|
|
xor edi,eax
|
|
shrd ebp,ebp,7
|
|
add ecx,edx
|
|
vpor xmm3,xmm3,xmm8
|
|
add ebx,DWORD[60+rsp]
|
|
xor edi,ebp
|
|
mov esi,ecx
|
|
shld ecx,ecx,5
|
|
add ebx,edi
|
|
xor esi,ebp
|
|
shrd edx,edx,7
|
|
add ebx,ecx
|
|
add eax,DWORD[rsp]
|
|
vpaddd xmm9,xmm11,xmm3
|
|
xor esi,edx
|
|
mov edi,ebx
|
|
shld ebx,ebx,5
|
|
add eax,esi
|
|
vmovdqa XMMWORD[48+rsp],xmm9
|
|
xor edi,edx
|
|
shrd ecx,ecx,7
|
|
add eax,ebx
|
|
add ebp,DWORD[4+rsp]
|
|
xor edi,ecx
|
|
mov esi,eax
|
|
shld eax,eax,5
|
|
add ebp,edi
|
|
xor esi,ecx
|
|
shrd ebx,ebx,7
|
|
add ebp,eax
|
|
add edx,DWORD[8+rsp]
|
|
xor esi,ebx
|
|
mov edi,ebp
|
|
shld ebp,ebp,5
|
|
add edx,esi
|
|
xor edi,ebx
|
|
shrd eax,eax,7
|
|
add edx,ebp
|
|
add ecx,DWORD[12+rsp]
|
|
xor edi,eax
|
|
mov esi,edx
|
|
shld edx,edx,5
|
|
add ecx,edi
|
|
xor esi,eax
|
|
shrd ebp,ebp,7
|
|
add ecx,edx
|
|
cmp r9,r10
|
|
je NEAR $L$done_avx
|
|
vmovdqa xmm6,XMMWORD[64+r14]
|
|
vmovdqa xmm11,XMMWORD[((-64))+r14]
|
|
vmovdqu xmm0,XMMWORD[r9]
|
|
vmovdqu xmm1,XMMWORD[16+r9]
|
|
vmovdqu xmm2,XMMWORD[32+r9]
|
|
vmovdqu xmm3,XMMWORD[48+r9]
|
|
vpshufb xmm0,xmm0,xmm6
|
|
add r9,64
|
|
add ebx,DWORD[16+rsp]
|
|
xor esi,ebp
|
|
vpshufb xmm1,xmm1,xmm6
|
|
mov edi,ecx
|
|
shld ecx,ecx,5
|
|
vpaddd xmm4,xmm0,xmm11
|
|
add ebx,esi
|
|
xor edi,ebp
|
|
shrd edx,edx,7
|
|
add ebx,ecx
|
|
vmovdqa XMMWORD[rsp],xmm4
|
|
add eax,DWORD[20+rsp]
|
|
xor edi,edx
|
|
mov esi,ebx
|
|
shld ebx,ebx,5
|
|
add eax,edi
|
|
xor esi,edx
|
|
shrd ecx,ecx,7
|
|
add eax,ebx
|
|
add ebp,DWORD[24+rsp]
|
|
xor esi,ecx
|
|
mov edi,eax
|
|
shld eax,eax,5
|
|
add ebp,esi
|
|
xor edi,ecx
|
|
shrd ebx,ebx,7
|
|
add ebp,eax
|
|
add edx,DWORD[28+rsp]
|
|
xor edi,ebx
|
|
mov esi,ebp
|
|
shld ebp,ebp,5
|
|
add edx,edi
|
|
xor esi,ebx
|
|
shrd eax,eax,7
|
|
add edx,ebp
|
|
add ecx,DWORD[32+rsp]
|
|
xor esi,eax
|
|
vpshufb xmm2,xmm2,xmm6
|
|
mov edi,edx
|
|
shld edx,edx,5
|
|
vpaddd xmm5,xmm1,xmm11
|
|
add ecx,esi
|
|
xor edi,eax
|
|
shrd ebp,ebp,7
|
|
add ecx,edx
|
|
vmovdqa XMMWORD[16+rsp],xmm5
|
|
add ebx,DWORD[36+rsp]
|
|
xor edi,ebp
|
|
mov esi,ecx
|
|
shld ecx,ecx,5
|
|
add ebx,edi
|
|
xor esi,ebp
|
|
shrd edx,edx,7
|
|
add ebx,ecx
|
|
add eax,DWORD[40+rsp]
|
|
xor esi,edx
|
|
mov edi,ebx
|
|
shld ebx,ebx,5
|
|
add eax,esi
|
|
xor edi,edx
|
|
shrd ecx,ecx,7
|
|
add eax,ebx
|
|
add ebp,DWORD[44+rsp]
|
|
xor edi,ecx
|
|
mov esi,eax
|
|
shld eax,eax,5
|
|
add ebp,edi
|
|
xor esi,ecx
|
|
shrd ebx,ebx,7
|
|
add ebp,eax
|
|
add edx,DWORD[48+rsp]
|
|
xor esi,ebx
|
|
vpshufb xmm3,xmm3,xmm6
|
|
mov edi,ebp
|
|
shld ebp,ebp,5
|
|
vpaddd xmm6,xmm2,xmm11
|
|
add edx,esi
|
|
xor edi,ebx
|
|
shrd eax,eax,7
|
|
add edx,ebp
|
|
vmovdqa XMMWORD[32+rsp],xmm6
|
|
add ecx,DWORD[52+rsp]
|
|
xor edi,eax
|
|
mov esi,edx
|
|
shld edx,edx,5
|
|
add ecx,edi
|
|
xor esi,eax
|
|
shrd ebp,ebp,7
|
|
add ecx,edx
|
|
add ebx,DWORD[56+rsp]
|
|
xor esi,ebp
|
|
mov edi,ecx
|
|
shld ecx,ecx,5
|
|
add ebx,esi
|
|
xor edi,ebp
|
|
shrd edx,edx,7
|
|
add ebx,ecx
|
|
add eax,DWORD[60+rsp]
|
|
xor edi,edx
|
|
mov esi,ebx
|
|
shld ebx,ebx,5
|
|
add eax,edi
|
|
shrd ecx,ecx,7
|
|
add eax,ebx
|
|
add eax,DWORD[r8]
|
|
add esi,DWORD[4+r8]
|
|
add ecx,DWORD[8+r8]
|
|
add edx,DWORD[12+r8]
|
|
mov DWORD[r8],eax
|
|
add ebp,DWORD[16+r8]
|
|
mov DWORD[4+r8],esi
|
|
mov ebx,esi
|
|
mov DWORD[8+r8],ecx
|
|
mov edi,ecx
|
|
mov DWORD[12+r8],edx
|
|
xor edi,edx
|
|
mov DWORD[16+r8],ebp
|
|
and esi,edi
|
|
jmp NEAR $L$oop_avx
|
|
|
|
ALIGN 16
|
|
$L$done_avx:
|
|
add ebx,DWORD[16+rsp]
|
|
xor esi,ebp
|
|
mov edi,ecx
|
|
shld ecx,ecx,5
|
|
add ebx,esi
|
|
xor edi,ebp
|
|
shrd edx,edx,7
|
|
add ebx,ecx
|
|
add eax,DWORD[20+rsp]
|
|
xor edi,edx
|
|
mov esi,ebx
|
|
shld ebx,ebx,5
|
|
add eax,edi
|
|
xor esi,edx
|
|
shrd ecx,ecx,7
|
|
add eax,ebx
|
|
add ebp,DWORD[24+rsp]
|
|
xor esi,ecx
|
|
mov edi,eax
|
|
shld eax,eax,5
|
|
add ebp,esi
|
|
xor edi,ecx
|
|
shrd ebx,ebx,7
|
|
add ebp,eax
|
|
add edx,DWORD[28+rsp]
|
|
xor edi,ebx
|
|
mov esi,ebp
|
|
shld ebp,ebp,5
|
|
add edx,edi
|
|
xor esi,ebx
|
|
shrd eax,eax,7
|
|
add edx,ebp
|
|
add ecx,DWORD[32+rsp]
|
|
xor esi,eax
|
|
mov edi,edx
|
|
shld edx,edx,5
|
|
add ecx,esi
|
|
xor edi,eax
|
|
shrd ebp,ebp,7
|
|
add ecx,edx
|
|
add ebx,DWORD[36+rsp]
|
|
xor edi,ebp
|
|
mov esi,ecx
|
|
shld ecx,ecx,5
|
|
add ebx,edi
|
|
xor esi,ebp
|
|
shrd edx,edx,7
|
|
add ebx,ecx
|
|
add eax,DWORD[40+rsp]
|
|
xor esi,edx
|
|
mov edi,ebx
|
|
shld ebx,ebx,5
|
|
add eax,esi
|
|
xor edi,edx
|
|
shrd ecx,ecx,7
|
|
add eax,ebx
|
|
add ebp,DWORD[44+rsp]
|
|
xor edi,ecx
|
|
mov esi,eax
|
|
shld eax,eax,5
|
|
add ebp,edi
|
|
xor esi,ecx
|
|
shrd ebx,ebx,7
|
|
add ebp,eax
|
|
add edx,DWORD[48+rsp]
|
|
xor esi,ebx
|
|
mov edi,ebp
|
|
shld ebp,ebp,5
|
|
add edx,esi
|
|
xor edi,ebx
|
|
shrd eax,eax,7
|
|
add edx,ebp
|
|
add ecx,DWORD[52+rsp]
|
|
xor edi,eax
|
|
mov esi,edx
|
|
shld edx,edx,5
|
|
add ecx,edi
|
|
xor esi,eax
|
|
shrd ebp,ebp,7
|
|
add ecx,edx
|
|
add ebx,DWORD[56+rsp]
|
|
xor esi,ebp
|
|
mov edi,ecx
|
|
shld ecx,ecx,5
|
|
add ebx,esi
|
|
xor edi,ebp
|
|
shrd edx,edx,7
|
|
add ebx,ecx
|
|
add eax,DWORD[60+rsp]
|
|
xor edi,edx
|
|
mov esi,ebx
|
|
shld ebx,ebx,5
|
|
add eax,edi
|
|
shrd ecx,ecx,7
|
|
add eax,ebx
|
|
vzeroupper
|
|
|
|
add eax,DWORD[r8]
|
|
add esi,DWORD[4+r8]
|
|
add ecx,DWORD[8+r8]
|
|
mov DWORD[r8],eax
|
|
add edx,DWORD[12+r8]
|
|
mov DWORD[4+r8],esi
|
|
add ebp,DWORD[16+r8]
|
|
mov DWORD[8+r8],ecx
|
|
mov DWORD[12+r8],edx
|
|
mov DWORD[16+r8],ebp
|
|
movaps xmm6,XMMWORD[((-40-96))+r11]
|
|
movaps xmm7,XMMWORD[((-40-80))+r11]
|
|
movaps xmm8,XMMWORD[((-40-64))+r11]
|
|
movaps xmm9,XMMWORD[((-40-48))+r11]
|
|
movaps xmm10,XMMWORD[((-40-32))+r11]
|
|
movaps xmm11,XMMWORD[((-40-16))+r11]
|
|
mov r14,QWORD[((-40))+r11]
|
|
|
|
mov r13,QWORD[((-32))+r11]
|
|
|
|
mov r12,QWORD[((-24))+r11]
|
|
|
|
mov rbp,QWORD[((-16))+r11]
|
|
|
|
mov rbx,QWORD[((-8))+r11]
|
|
|
|
lea rsp,[r11]
|
|
|
|
$L$epilogue_avx:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
$L$SEH_end_sha1_block_data_order_avx:
|
|
|
|
ALIGN 16
|
|
sha1_block_data_order_avx2:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_sha1_block_data_order_avx2:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
_avx2_shortcut:
|
|
|
|
mov r11,rsp
|
|
|
|
push rbx
|
|
|
|
push rbp
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
vzeroupper
|
|
lea rsp,[((-96))+rsp]
|
|
vmovaps XMMWORD[(-40-96)+r11],xmm6
|
|
vmovaps XMMWORD[(-40-80)+r11],xmm7
|
|
vmovaps XMMWORD[(-40-64)+r11],xmm8
|
|
vmovaps XMMWORD[(-40-48)+r11],xmm9
|
|
vmovaps XMMWORD[(-40-32)+r11],xmm10
|
|
vmovaps XMMWORD[(-40-16)+r11],xmm11
|
|
$L$prologue_avx2:
|
|
mov r8,rdi
|
|
mov r9,rsi
|
|
mov r10,rdx
|
|
|
|
lea rsp,[((-640))+rsp]
|
|
shl r10,6
|
|
lea r13,[64+r9]
|
|
and rsp,-128
|
|
add r10,r9
|
|
lea r14,[((K_XX_XX+64))]
|
|
|
|
mov eax,DWORD[r8]
|
|
cmp r13,r10
|
|
cmovae r13,r9
|
|
mov ebp,DWORD[4+r8]
|
|
mov ecx,DWORD[8+r8]
|
|
mov edx,DWORD[12+r8]
|
|
mov esi,DWORD[16+r8]
|
|
vmovdqu ymm6,YMMWORD[64+r14]
|
|
|
|
vmovdqu xmm0,XMMWORD[r9]
|
|
vmovdqu xmm1,XMMWORD[16+r9]
|
|
vmovdqu xmm2,XMMWORD[32+r9]
|
|
vmovdqu xmm3,XMMWORD[48+r9]
|
|
lea r9,[64+r9]
|
|
vinserti128 ymm0,ymm0,XMMWORD[r13],1
|
|
vinserti128 ymm1,ymm1,XMMWORD[16+r13],1
|
|
vpshufb ymm0,ymm0,ymm6
|
|
vinserti128 ymm2,ymm2,XMMWORD[32+r13],1
|
|
vpshufb ymm1,ymm1,ymm6
|
|
vinserti128 ymm3,ymm3,XMMWORD[48+r13],1
|
|
vpshufb ymm2,ymm2,ymm6
|
|
vmovdqu ymm11,YMMWORD[((-64))+r14]
|
|
vpshufb ymm3,ymm3,ymm6
|
|
|
|
vpaddd ymm4,ymm0,ymm11
|
|
vpaddd ymm5,ymm1,ymm11
|
|
vmovdqu YMMWORD[rsp],ymm4
|
|
vpaddd ymm6,ymm2,ymm11
|
|
vmovdqu YMMWORD[32+rsp],ymm5
|
|
vpaddd ymm7,ymm3,ymm11
|
|
vmovdqu YMMWORD[64+rsp],ymm6
|
|
vmovdqu YMMWORD[96+rsp],ymm7
|
|
vpalignr ymm4,ymm1,ymm0,8
|
|
vpsrldq ymm8,ymm3,4
|
|
vpxor ymm4,ymm4,ymm0
|
|
vpxor ymm8,ymm8,ymm2
|
|
vpxor ymm4,ymm4,ymm8
|
|
vpsrld ymm8,ymm4,31
|
|
vpslldq ymm10,ymm4,12
|
|
vpaddd ymm4,ymm4,ymm4
|
|
vpsrld ymm9,ymm10,30
|
|
vpor ymm4,ymm4,ymm8
|
|
vpslld ymm10,ymm10,2
|
|
vpxor ymm4,ymm4,ymm9
|
|
vpxor ymm4,ymm4,ymm10
|
|
vpaddd ymm9,ymm4,ymm11
|
|
vmovdqu YMMWORD[128+rsp],ymm9
|
|
vpalignr ymm5,ymm2,ymm1,8
|
|
vpsrldq ymm8,ymm4,4
|
|
vpxor ymm5,ymm5,ymm1
|
|
vpxor ymm8,ymm8,ymm3
|
|
vpxor ymm5,ymm5,ymm8
|
|
vpsrld ymm8,ymm5,31
|
|
vmovdqu ymm11,YMMWORD[((-32))+r14]
|
|
vpslldq ymm10,ymm5,12
|
|
vpaddd ymm5,ymm5,ymm5
|
|
vpsrld ymm9,ymm10,30
|
|
vpor ymm5,ymm5,ymm8
|
|
vpslld ymm10,ymm10,2
|
|
vpxor ymm5,ymm5,ymm9
|
|
vpxor ymm5,ymm5,ymm10
|
|
vpaddd ymm9,ymm5,ymm11
|
|
vmovdqu YMMWORD[160+rsp],ymm9
|
|
vpalignr ymm6,ymm3,ymm2,8
|
|
vpsrldq ymm8,ymm5,4
|
|
vpxor ymm6,ymm6,ymm2
|
|
vpxor ymm8,ymm8,ymm4
|
|
vpxor ymm6,ymm6,ymm8
|
|
vpsrld ymm8,ymm6,31
|
|
vpslldq ymm10,ymm6,12
|
|
vpaddd ymm6,ymm6,ymm6
|
|
vpsrld ymm9,ymm10,30
|
|
vpor ymm6,ymm6,ymm8
|
|
vpslld ymm10,ymm10,2
|
|
vpxor ymm6,ymm6,ymm9
|
|
vpxor ymm6,ymm6,ymm10
|
|
vpaddd ymm9,ymm6,ymm11
|
|
vmovdqu YMMWORD[192+rsp],ymm9
|
|
vpalignr ymm7,ymm4,ymm3,8
|
|
vpsrldq ymm8,ymm6,4
|
|
vpxor ymm7,ymm7,ymm3
|
|
vpxor ymm8,ymm8,ymm5
|
|
vpxor ymm7,ymm7,ymm8
|
|
vpsrld ymm8,ymm7,31
|
|
vpslldq ymm10,ymm7,12
|
|
vpaddd ymm7,ymm7,ymm7
|
|
vpsrld ymm9,ymm10,30
|
|
vpor ymm7,ymm7,ymm8
|
|
vpslld ymm10,ymm10,2
|
|
vpxor ymm7,ymm7,ymm9
|
|
vpxor ymm7,ymm7,ymm10
|
|
vpaddd ymm9,ymm7,ymm11
|
|
vmovdqu YMMWORD[224+rsp],ymm9
|
|
lea r13,[128+rsp]
|
|
jmp NEAR $L$oop_avx2
|
|
ALIGN 32
|
|
$L$oop_avx2:
|
|
rorx ebx,ebp,2
|
|
andn edi,ebp,edx
|
|
and ebp,ecx
|
|
xor ebp,edi
|
|
jmp NEAR $L$align32_1
|
|
ALIGN 32
|
|
$L$align32_1:
|
|
vpalignr ymm8,ymm7,ymm6,8
|
|
vpxor ymm0,ymm0,ymm4
|
|
add esi,DWORD[((-128))+r13]
|
|
andn edi,eax,ecx
|
|
vpxor ymm0,ymm0,ymm1
|
|
add esi,ebp
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
vpxor ymm0,ymm0,ymm8
|
|
and eax,ebx
|
|
add esi,r12d
|
|
xor eax,edi
|
|
vpsrld ymm8,ymm0,30
|
|
vpslld ymm0,ymm0,2
|
|
add edx,DWORD[((-124))+r13]
|
|
andn edi,esi,ebx
|
|
add edx,eax
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
and esi,ebp
|
|
vpor ymm0,ymm0,ymm8
|
|
add edx,r12d
|
|
xor esi,edi
|
|
add ecx,DWORD[((-120))+r13]
|
|
andn edi,edx,ebp
|
|
vpaddd ymm9,ymm0,ymm11
|
|
add ecx,esi
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
and edx,eax
|
|
vmovdqu YMMWORD[256+rsp],ymm9
|
|
add ecx,r12d
|
|
xor edx,edi
|
|
add ebx,DWORD[((-116))+r13]
|
|
andn edi,ecx,eax
|
|
add ebx,edx
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
and ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,edi
|
|
add ebp,DWORD[((-96))+r13]
|
|
andn edi,ebx,esi
|
|
add ebp,ecx
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
and ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,edi
|
|
vpalignr ymm8,ymm0,ymm7,8
|
|
vpxor ymm1,ymm1,ymm5
|
|
add eax,DWORD[((-92))+r13]
|
|
andn edi,ebp,edx
|
|
vpxor ymm1,ymm1,ymm2
|
|
add eax,ebx
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
vpxor ymm1,ymm1,ymm8
|
|
and ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edi
|
|
vpsrld ymm8,ymm1,30
|
|
vpslld ymm1,ymm1,2
|
|
add esi,DWORD[((-88))+r13]
|
|
andn edi,eax,ecx
|
|
add esi,ebp
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
and eax,ebx
|
|
vpor ymm1,ymm1,ymm8
|
|
add esi,r12d
|
|
xor eax,edi
|
|
add edx,DWORD[((-84))+r13]
|
|
andn edi,esi,ebx
|
|
vpaddd ymm9,ymm1,ymm11
|
|
add edx,eax
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
and esi,ebp
|
|
vmovdqu YMMWORD[288+rsp],ymm9
|
|
add edx,r12d
|
|
xor esi,edi
|
|
add ecx,DWORD[((-64))+r13]
|
|
andn edi,edx,ebp
|
|
add ecx,esi
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
and edx,eax
|
|
add ecx,r12d
|
|
xor edx,edi
|
|
add ebx,DWORD[((-60))+r13]
|
|
andn edi,ecx,eax
|
|
add ebx,edx
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
and ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,edi
|
|
vpalignr ymm8,ymm1,ymm0,8
|
|
vpxor ymm2,ymm2,ymm6
|
|
add ebp,DWORD[((-56))+r13]
|
|
andn edi,ebx,esi
|
|
vpxor ymm2,ymm2,ymm3
|
|
vmovdqu ymm11,YMMWORD[r14]
|
|
add ebp,ecx
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
vpxor ymm2,ymm2,ymm8
|
|
and ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,edi
|
|
vpsrld ymm8,ymm2,30
|
|
vpslld ymm2,ymm2,2
|
|
add eax,DWORD[((-52))+r13]
|
|
andn edi,ebp,edx
|
|
add eax,ebx
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
and ebp,ecx
|
|
vpor ymm2,ymm2,ymm8
|
|
add eax,r12d
|
|
xor ebp,edi
|
|
add esi,DWORD[((-32))+r13]
|
|
andn edi,eax,ecx
|
|
vpaddd ymm9,ymm2,ymm11
|
|
add esi,ebp
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
and eax,ebx
|
|
vmovdqu YMMWORD[320+rsp],ymm9
|
|
add esi,r12d
|
|
xor eax,edi
|
|
add edx,DWORD[((-28))+r13]
|
|
andn edi,esi,ebx
|
|
add edx,eax
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
and esi,ebp
|
|
add edx,r12d
|
|
xor esi,edi
|
|
add ecx,DWORD[((-24))+r13]
|
|
andn edi,edx,ebp
|
|
add ecx,esi
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
and edx,eax
|
|
add ecx,r12d
|
|
xor edx,edi
|
|
vpalignr ymm8,ymm2,ymm1,8
|
|
vpxor ymm3,ymm3,ymm7
|
|
add ebx,DWORD[((-20))+r13]
|
|
andn edi,ecx,eax
|
|
vpxor ymm3,ymm3,ymm4
|
|
add ebx,edx
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
vpxor ymm3,ymm3,ymm8
|
|
and ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,edi
|
|
vpsrld ymm8,ymm3,30
|
|
vpslld ymm3,ymm3,2
|
|
add ebp,DWORD[r13]
|
|
andn edi,ebx,esi
|
|
add ebp,ecx
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
and ebx,edx
|
|
vpor ymm3,ymm3,ymm8
|
|
add ebp,r12d
|
|
xor ebx,edi
|
|
add eax,DWORD[4+r13]
|
|
andn edi,ebp,edx
|
|
vpaddd ymm9,ymm3,ymm11
|
|
add eax,ebx
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
and ebp,ecx
|
|
vmovdqu YMMWORD[352+rsp],ymm9
|
|
add eax,r12d
|
|
xor ebp,edi
|
|
add esi,DWORD[8+r13]
|
|
andn edi,eax,ecx
|
|
add esi,ebp
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
and eax,ebx
|
|
add esi,r12d
|
|
xor eax,edi
|
|
add edx,DWORD[12+r13]
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
xor esi,ebx
|
|
vpalignr ymm8,ymm3,ymm2,8
|
|
vpxor ymm4,ymm4,ymm0
|
|
add ecx,DWORD[32+r13]
|
|
lea ecx,[rsi*1+rcx]
|
|
vpxor ymm4,ymm4,ymm5
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
vpxor ymm4,ymm4,ymm8
|
|
add ecx,r12d
|
|
xor edx,ebp
|
|
add ebx,DWORD[36+r13]
|
|
vpsrld ymm8,ymm4,30
|
|
vpslld ymm4,ymm4,2
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,eax
|
|
vpor ymm4,ymm4,ymm8
|
|
add ebp,DWORD[40+r13]
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
vpaddd ymm9,ymm4,ymm11
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,esi
|
|
add eax,DWORD[44+r13]
|
|
vmovdqu YMMWORD[384+rsp],ymm9
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edx
|
|
add esi,DWORD[64+r13]
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
xor eax,ecx
|
|
vpalignr ymm8,ymm4,ymm3,8
|
|
vpxor ymm5,ymm5,ymm1
|
|
add edx,DWORD[68+r13]
|
|
lea edx,[rax*1+rdx]
|
|
vpxor ymm5,ymm5,ymm6
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
vpxor ymm5,ymm5,ymm8
|
|
add edx,r12d
|
|
xor esi,ebx
|
|
add ecx,DWORD[72+r13]
|
|
vpsrld ymm8,ymm5,30
|
|
vpslld ymm5,ymm5,2
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
xor edx,ebp
|
|
vpor ymm5,ymm5,ymm8
|
|
add ebx,DWORD[76+r13]
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
vpaddd ymm9,ymm5,ymm11
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,eax
|
|
add ebp,DWORD[96+r13]
|
|
vmovdqu YMMWORD[416+rsp],ymm9
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,esi
|
|
add eax,DWORD[100+r13]
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edx
|
|
vpalignr ymm8,ymm5,ymm4,8
|
|
vpxor ymm6,ymm6,ymm2
|
|
add esi,DWORD[104+r13]
|
|
lea esi,[rbp*1+rsi]
|
|
vpxor ymm6,ymm6,ymm7
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
vpxor ymm6,ymm6,ymm8
|
|
add esi,r12d
|
|
xor eax,ecx
|
|
add edx,DWORD[108+r13]
|
|
lea r13,[256+r13]
|
|
vpsrld ymm8,ymm6,30
|
|
vpslld ymm6,ymm6,2
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
xor esi,ebx
|
|
vpor ymm6,ymm6,ymm8
|
|
add ecx,DWORD[((-128))+r13]
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
vpaddd ymm9,ymm6,ymm11
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
xor edx,ebp
|
|
add ebx,DWORD[((-124))+r13]
|
|
vmovdqu YMMWORD[448+rsp],ymm9
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,eax
|
|
add ebp,DWORD[((-120))+r13]
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,esi
|
|
vpalignr ymm8,ymm6,ymm5,8
|
|
vpxor ymm7,ymm7,ymm3
|
|
add eax,DWORD[((-116))+r13]
|
|
lea eax,[rbx*1+rax]
|
|
vpxor ymm7,ymm7,ymm0
|
|
vmovdqu ymm11,YMMWORD[32+r14]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
vpxor ymm7,ymm7,ymm8
|
|
add eax,r12d
|
|
xor ebp,edx
|
|
add esi,DWORD[((-96))+r13]
|
|
vpsrld ymm8,ymm7,30
|
|
vpslld ymm7,ymm7,2
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
xor eax,ecx
|
|
vpor ymm7,ymm7,ymm8
|
|
add edx,DWORD[((-92))+r13]
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
vpaddd ymm9,ymm7,ymm11
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
xor esi,ebx
|
|
add ecx,DWORD[((-88))+r13]
|
|
vmovdqu YMMWORD[480+rsp],ymm9
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
xor edx,ebp
|
|
add ebx,DWORD[((-84))+r13]
|
|
mov edi,esi
|
|
xor edi,eax
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
and ecx,edi
|
|
jmp NEAR $L$align32_2
|
|
ALIGN 32
|
|
$L$align32_2:
|
|
vpalignr ymm8,ymm7,ymm6,8
|
|
vpxor ymm0,ymm0,ymm4
|
|
add ebp,DWORD[((-64))+r13]
|
|
xor ecx,esi
|
|
vpxor ymm0,ymm0,ymm1
|
|
mov edi,edx
|
|
xor edi,esi
|
|
lea ebp,[rbp*1+rcx]
|
|
vpxor ymm0,ymm0,ymm8
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
vpsrld ymm8,ymm0,30
|
|
vpslld ymm0,ymm0,2
|
|
add ebp,r12d
|
|
and ebx,edi
|
|
add eax,DWORD[((-60))+r13]
|
|
xor ebx,edx
|
|
mov edi,ecx
|
|
xor edi,edx
|
|
vpor ymm0,ymm0,ymm8
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
vpaddd ymm9,ymm0,ymm11
|
|
add eax,r12d
|
|
and ebp,edi
|
|
add esi,DWORD[((-56))+r13]
|
|
xor ebp,ecx
|
|
vmovdqu YMMWORD[512+rsp],ymm9
|
|
mov edi,ebx
|
|
xor edi,ecx
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
and eax,edi
|
|
add edx,DWORD[((-52))+r13]
|
|
xor eax,ebx
|
|
mov edi,ebp
|
|
xor edi,ebx
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
and esi,edi
|
|
add ecx,DWORD[((-32))+r13]
|
|
xor esi,ebp
|
|
mov edi,eax
|
|
xor edi,ebp
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
and edx,edi
|
|
vpalignr ymm8,ymm0,ymm7,8
|
|
vpxor ymm1,ymm1,ymm5
|
|
add ebx,DWORD[((-28))+r13]
|
|
xor edx,eax
|
|
vpxor ymm1,ymm1,ymm2
|
|
mov edi,esi
|
|
xor edi,eax
|
|
lea ebx,[rdx*1+rbx]
|
|
vpxor ymm1,ymm1,ymm8
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
vpsrld ymm8,ymm1,30
|
|
vpslld ymm1,ymm1,2
|
|
add ebx,r12d
|
|
and ecx,edi
|
|
add ebp,DWORD[((-24))+r13]
|
|
xor ecx,esi
|
|
mov edi,edx
|
|
xor edi,esi
|
|
vpor ymm1,ymm1,ymm8
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
vpaddd ymm9,ymm1,ymm11
|
|
add ebp,r12d
|
|
and ebx,edi
|
|
add eax,DWORD[((-20))+r13]
|
|
xor ebx,edx
|
|
vmovdqu YMMWORD[544+rsp],ymm9
|
|
mov edi,ecx
|
|
xor edi,edx
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
and ebp,edi
|
|
add esi,DWORD[r13]
|
|
xor ebp,ecx
|
|
mov edi,ebx
|
|
xor edi,ecx
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
and eax,edi
|
|
add edx,DWORD[4+r13]
|
|
xor eax,ebx
|
|
mov edi,ebp
|
|
xor edi,ebx
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
and esi,edi
|
|
vpalignr ymm8,ymm1,ymm0,8
|
|
vpxor ymm2,ymm2,ymm6
|
|
add ecx,DWORD[8+r13]
|
|
xor esi,ebp
|
|
vpxor ymm2,ymm2,ymm3
|
|
mov edi,eax
|
|
xor edi,ebp
|
|
lea ecx,[rsi*1+rcx]
|
|
vpxor ymm2,ymm2,ymm8
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
vpsrld ymm8,ymm2,30
|
|
vpslld ymm2,ymm2,2
|
|
add ecx,r12d
|
|
and edx,edi
|
|
add ebx,DWORD[12+r13]
|
|
xor edx,eax
|
|
mov edi,esi
|
|
xor edi,eax
|
|
vpor ymm2,ymm2,ymm8
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
vpaddd ymm9,ymm2,ymm11
|
|
add ebx,r12d
|
|
and ecx,edi
|
|
add ebp,DWORD[32+r13]
|
|
xor ecx,esi
|
|
vmovdqu YMMWORD[576+rsp],ymm9
|
|
mov edi,edx
|
|
xor edi,esi
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
and ebx,edi
|
|
add eax,DWORD[36+r13]
|
|
xor ebx,edx
|
|
mov edi,ecx
|
|
xor edi,edx
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
and ebp,edi
|
|
add esi,DWORD[40+r13]
|
|
xor ebp,ecx
|
|
mov edi,ebx
|
|
xor edi,ecx
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
and eax,edi
|
|
vpalignr ymm8,ymm2,ymm1,8
|
|
vpxor ymm3,ymm3,ymm7
|
|
add edx,DWORD[44+r13]
|
|
xor eax,ebx
|
|
vpxor ymm3,ymm3,ymm4
|
|
mov edi,ebp
|
|
xor edi,ebx
|
|
lea edx,[rax*1+rdx]
|
|
vpxor ymm3,ymm3,ymm8
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
vpsrld ymm8,ymm3,30
|
|
vpslld ymm3,ymm3,2
|
|
add edx,r12d
|
|
and esi,edi
|
|
add ecx,DWORD[64+r13]
|
|
xor esi,ebp
|
|
mov edi,eax
|
|
xor edi,ebp
|
|
vpor ymm3,ymm3,ymm8
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
vpaddd ymm9,ymm3,ymm11
|
|
add ecx,r12d
|
|
and edx,edi
|
|
add ebx,DWORD[68+r13]
|
|
xor edx,eax
|
|
vmovdqu YMMWORD[608+rsp],ymm9
|
|
mov edi,esi
|
|
xor edi,eax
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
and ecx,edi
|
|
add ebp,DWORD[72+r13]
|
|
xor ecx,esi
|
|
mov edi,edx
|
|
xor edi,esi
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
and ebx,edi
|
|
add eax,DWORD[76+r13]
|
|
xor ebx,edx
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edx
|
|
add esi,DWORD[96+r13]
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
xor eax,ecx
|
|
add edx,DWORD[100+r13]
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
xor esi,ebx
|
|
add ecx,DWORD[104+r13]
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
xor edx,ebp
|
|
add ebx,DWORD[108+r13]
|
|
lea r13,[256+r13]
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,eax
|
|
add ebp,DWORD[((-128))+r13]
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,esi
|
|
add eax,DWORD[((-124))+r13]
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edx
|
|
add esi,DWORD[((-120))+r13]
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
xor eax,ecx
|
|
add edx,DWORD[((-116))+r13]
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
xor esi,ebx
|
|
add ecx,DWORD[((-96))+r13]
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
xor edx,ebp
|
|
add ebx,DWORD[((-92))+r13]
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,eax
|
|
add ebp,DWORD[((-88))+r13]
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,esi
|
|
add eax,DWORD[((-84))+r13]
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edx
|
|
add esi,DWORD[((-64))+r13]
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
xor eax,ecx
|
|
add edx,DWORD[((-60))+r13]
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
xor esi,ebx
|
|
add ecx,DWORD[((-56))+r13]
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
xor edx,ebp
|
|
add ebx,DWORD[((-52))+r13]
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,eax
|
|
add ebp,DWORD[((-32))+r13]
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,esi
|
|
add eax,DWORD[((-28))+r13]
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edx
|
|
add esi,DWORD[((-24))+r13]
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
xor eax,ecx
|
|
add edx,DWORD[((-20))+r13]
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
add edx,r12d
|
|
lea r13,[128+r9]
|
|
lea rdi,[128+r9]
|
|
cmp r13,r10
|
|
cmovae r13,r9
|
|
|
|
|
|
add edx,DWORD[r8]
|
|
add esi,DWORD[4+r8]
|
|
add ebp,DWORD[8+r8]
|
|
mov DWORD[r8],edx
|
|
add ebx,DWORD[12+r8]
|
|
mov DWORD[4+r8],esi
|
|
mov eax,edx
|
|
add ecx,DWORD[16+r8]
|
|
mov r12d,ebp
|
|
mov DWORD[8+r8],ebp
|
|
mov edx,ebx
|
|
|
|
mov DWORD[12+r8],ebx
|
|
mov ebp,esi
|
|
mov DWORD[16+r8],ecx
|
|
|
|
mov esi,ecx
|
|
mov ecx,r12d
|
|
|
|
|
|
cmp r9,r10
|
|
je NEAR $L$done_avx2
|
|
vmovdqu ymm6,YMMWORD[64+r14]
|
|
cmp rdi,r10
|
|
ja NEAR $L$ast_avx2
|
|
|
|
vmovdqu xmm0,XMMWORD[((-64))+rdi]
|
|
vmovdqu xmm1,XMMWORD[((-48))+rdi]
|
|
vmovdqu xmm2,XMMWORD[((-32))+rdi]
|
|
vmovdqu xmm3,XMMWORD[((-16))+rdi]
|
|
vinserti128 ymm0,ymm0,XMMWORD[r13],1
|
|
vinserti128 ymm1,ymm1,XMMWORD[16+r13],1
|
|
vinserti128 ymm2,ymm2,XMMWORD[32+r13],1
|
|
vinserti128 ymm3,ymm3,XMMWORD[48+r13],1
|
|
jmp NEAR $L$ast_avx2
|
|
|
|
ALIGN 32
|
|
$L$ast_avx2:
|
|
lea r13,[((128+16))+rsp]
|
|
rorx ebx,ebp,2
|
|
andn edi,ebp,edx
|
|
and ebp,ecx
|
|
xor ebp,edi
|
|
sub r9,-128
|
|
add esi,DWORD[((-128))+r13]
|
|
andn edi,eax,ecx
|
|
add esi,ebp
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
and eax,ebx
|
|
add esi,r12d
|
|
xor eax,edi
|
|
add edx,DWORD[((-124))+r13]
|
|
andn edi,esi,ebx
|
|
add edx,eax
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
and esi,ebp
|
|
add edx,r12d
|
|
xor esi,edi
|
|
add ecx,DWORD[((-120))+r13]
|
|
andn edi,edx,ebp
|
|
add ecx,esi
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
and edx,eax
|
|
add ecx,r12d
|
|
xor edx,edi
|
|
add ebx,DWORD[((-116))+r13]
|
|
andn edi,ecx,eax
|
|
add ebx,edx
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
and ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,edi
|
|
add ebp,DWORD[((-96))+r13]
|
|
andn edi,ebx,esi
|
|
add ebp,ecx
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
and ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,edi
|
|
add eax,DWORD[((-92))+r13]
|
|
andn edi,ebp,edx
|
|
add eax,ebx
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
and ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edi
|
|
add esi,DWORD[((-88))+r13]
|
|
andn edi,eax,ecx
|
|
add esi,ebp
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
and eax,ebx
|
|
add esi,r12d
|
|
xor eax,edi
|
|
add edx,DWORD[((-84))+r13]
|
|
andn edi,esi,ebx
|
|
add edx,eax
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
and esi,ebp
|
|
add edx,r12d
|
|
xor esi,edi
|
|
add ecx,DWORD[((-64))+r13]
|
|
andn edi,edx,ebp
|
|
add ecx,esi
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
and edx,eax
|
|
add ecx,r12d
|
|
xor edx,edi
|
|
add ebx,DWORD[((-60))+r13]
|
|
andn edi,ecx,eax
|
|
add ebx,edx
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
and ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,edi
|
|
add ebp,DWORD[((-56))+r13]
|
|
andn edi,ebx,esi
|
|
add ebp,ecx
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
and ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,edi
|
|
add eax,DWORD[((-52))+r13]
|
|
andn edi,ebp,edx
|
|
add eax,ebx
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
and ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edi
|
|
add esi,DWORD[((-32))+r13]
|
|
andn edi,eax,ecx
|
|
add esi,ebp
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
and eax,ebx
|
|
add esi,r12d
|
|
xor eax,edi
|
|
add edx,DWORD[((-28))+r13]
|
|
andn edi,esi,ebx
|
|
add edx,eax
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
and esi,ebp
|
|
add edx,r12d
|
|
xor esi,edi
|
|
add ecx,DWORD[((-24))+r13]
|
|
andn edi,edx,ebp
|
|
add ecx,esi
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
and edx,eax
|
|
add ecx,r12d
|
|
xor edx,edi
|
|
add ebx,DWORD[((-20))+r13]
|
|
andn edi,ecx,eax
|
|
add ebx,edx
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
and ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,edi
|
|
add ebp,DWORD[r13]
|
|
andn edi,ebx,esi
|
|
add ebp,ecx
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
and ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,edi
|
|
add eax,DWORD[4+r13]
|
|
andn edi,ebp,edx
|
|
add eax,ebx
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
and ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edi
|
|
add esi,DWORD[8+r13]
|
|
andn edi,eax,ecx
|
|
add esi,ebp
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
and eax,ebx
|
|
add esi,r12d
|
|
xor eax,edi
|
|
add edx,DWORD[12+r13]
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
xor esi,ebx
|
|
add ecx,DWORD[32+r13]
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
xor edx,ebp
|
|
add ebx,DWORD[36+r13]
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,eax
|
|
add ebp,DWORD[40+r13]
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,esi
|
|
add eax,DWORD[44+r13]
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edx
|
|
add esi,DWORD[64+r13]
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
xor eax,ecx
|
|
vmovdqu ymm11,YMMWORD[((-64))+r14]
|
|
vpshufb ymm0,ymm0,ymm6
|
|
add edx,DWORD[68+r13]
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
xor esi,ebx
|
|
add ecx,DWORD[72+r13]
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
xor edx,ebp
|
|
add ebx,DWORD[76+r13]
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,eax
|
|
add ebp,DWORD[96+r13]
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,esi
|
|
add eax,DWORD[100+r13]
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edx
|
|
vpshufb ymm1,ymm1,ymm6
|
|
vpaddd ymm8,ymm0,ymm11
|
|
add esi,DWORD[104+r13]
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
xor eax,ecx
|
|
add edx,DWORD[108+r13]
|
|
lea r13,[256+r13]
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
xor esi,ebx
|
|
add ecx,DWORD[((-128))+r13]
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
xor edx,ebp
|
|
add ebx,DWORD[((-124))+r13]
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,eax
|
|
add ebp,DWORD[((-120))+r13]
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,esi
|
|
vmovdqu YMMWORD[rsp],ymm8
|
|
vpshufb ymm2,ymm2,ymm6
|
|
vpaddd ymm9,ymm1,ymm11
|
|
add eax,DWORD[((-116))+r13]
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edx
|
|
add esi,DWORD[((-96))+r13]
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
xor eax,ecx
|
|
add edx,DWORD[((-92))+r13]
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
xor esi,ebx
|
|
add ecx,DWORD[((-88))+r13]
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
xor edx,ebp
|
|
add ebx,DWORD[((-84))+r13]
|
|
mov edi,esi
|
|
xor edi,eax
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
and ecx,edi
|
|
vmovdqu YMMWORD[32+rsp],ymm9
|
|
vpshufb ymm3,ymm3,ymm6
|
|
vpaddd ymm6,ymm2,ymm11
|
|
add ebp,DWORD[((-64))+r13]
|
|
xor ecx,esi
|
|
mov edi,edx
|
|
xor edi,esi
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
and ebx,edi
|
|
add eax,DWORD[((-60))+r13]
|
|
xor ebx,edx
|
|
mov edi,ecx
|
|
xor edi,edx
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
and ebp,edi
|
|
add esi,DWORD[((-56))+r13]
|
|
xor ebp,ecx
|
|
mov edi,ebx
|
|
xor edi,ecx
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
and eax,edi
|
|
add edx,DWORD[((-52))+r13]
|
|
xor eax,ebx
|
|
mov edi,ebp
|
|
xor edi,ebx
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
and esi,edi
|
|
add ecx,DWORD[((-32))+r13]
|
|
xor esi,ebp
|
|
mov edi,eax
|
|
xor edi,ebp
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
and edx,edi
|
|
jmp NEAR $L$align32_3
|
|
ALIGN 32
|
|
$L$align32_3:
|
|
vmovdqu YMMWORD[64+rsp],ymm6
|
|
vpaddd ymm7,ymm3,ymm11
|
|
add ebx,DWORD[((-28))+r13]
|
|
xor edx,eax
|
|
mov edi,esi
|
|
xor edi,eax
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
and ecx,edi
|
|
add ebp,DWORD[((-24))+r13]
|
|
xor ecx,esi
|
|
mov edi,edx
|
|
xor edi,esi
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
and ebx,edi
|
|
add eax,DWORD[((-20))+r13]
|
|
xor ebx,edx
|
|
mov edi,ecx
|
|
xor edi,edx
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
and ebp,edi
|
|
add esi,DWORD[r13]
|
|
xor ebp,ecx
|
|
mov edi,ebx
|
|
xor edi,ecx
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
and eax,edi
|
|
add edx,DWORD[4+r13]
|
|
xor eax,ebx
|
|
mov edi,ebp
|
|
xor edi,ebx
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
and esi,edi
|
|
vmovdqu YMMWORD[96+rsp],ymm7
|
|
add ecx,DWORD[8+r13]
|
|
xor esi,ebp
|
|
mov edi,eax
|
|
xor edi,ebp
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
and edx,edi
|
|
add ebx,DWORD[12+r13]
|
|
xor edx,eax
|
|
mov edi,esi
|
|
xor edi,eax
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
and ecx,edi
|
|
add ebp,DWORD[32+r13]
|
|
xor ecx,esi
|
|
mov edi,edx
|
|
xor edi,esi
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
and ebx,edi
|
|
add eax,DWORD[36+r13]
|
|
xor ebx,edx
|
|
mov edi,ecx
|
|
xor edi,edx
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
and ebp,edi
|
|
add esi,DWORD[40+r13]
|
|
xor ebp,ecx
|
|
mov edi,ebx
|
|
xor edi,ecx
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
and eax,edi
|
|
vpalignr ymm4,ymm1,ymm0,8
|
|
add edx,DWORD[44+r13]
|
|
xor eax,ebx
|
|
mov edi,ebp
|
|
xor edi,ebx
|
|
vpsrldq ymm8,ymm3,4
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
vpxor ymm4,ymm4,ymm0
|
|
vpxor ymm8,ymm8,ymm2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
vpxor ymm4,ymm4,ymm8
|
|
and esi,edi
|
|
add ecx,DWORD[64+r13]
|
|
xor esi,ebp
|
|
mov edi,eax
|
|
vpsrld ymm8,ymm4,31
|
|
xor edi,ebp
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
vpslldq ymm10,ymm4,12
|
|
vpaddd ymm4,ymm4,ymm4
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
vpsrld ymm9,ymm10,30
|
|
vpor ymm4,ymm4,ymm8
|
|
add ecx,r12d
|
|
and edx,edi
|
|
vpslld ymm10,ymm10,2
|
|
vpxor ymm4,ymm4,ymm9
|
|
add ebx,DWORD[68+r13]
|
|
xor edx,eax
|
|
vpxor ymm4,ymm4,ymm10
|
|
mov edi,esi
|
|
xor edi,eax
|
|
lea ebx,[rdx*1+rbx]
|
|
vpaddd ymm9,ymm4,ymm11
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
vmovdqu YMMWORD[128+rsp],ymm9
|
|
add ebx,r12d
|
|
and ecx,edi
|
|
add ebp,DWORD[72+r13]
|
|
xor ecx,esi
|
|
mov edi,edx
|
|
xor edi,esi
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
and ebx,edi
|
|
add eax,DWORD[76+r13]
|
|
xor ebx,edx
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edx
|
|
vpalignr ymm5,ymm2,ymm1,8
|
|
add esi,DWORD[96+r13]
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
vpsrldq ymm8,ymm4,4
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
xor eax,ecx
|
|
vpxor ymm5,ymm5,ymm1
|
|
vpxor ymm8,ymm8,ymm3
|
|
add edx,DWORD[100+r13]
|
|
lea edx,[rax*1+rdx]
|
|
vpxor ymm5,ymm5,ymm8
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
vpsrld ymm8,ymm5,31
|
|
vmovdqu ymm11,YMMWORD[((-32))+r14]
|
|
xor esi,ebx
|
|
add ecx,DWORD[104+r13]
|
|
lea ecx,[rsi*1+rcx]
|
|
vpslldq ymm10,ymm5,12
|
|
vpaddd ymm5,ymm5,ymm5
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
vpsrld ymm9,ymm10,30
|
|
vpor ymm5,ymm5,ymm8
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
vpslld ymm10,ymm10,2
|
|
vpxor ymm5,ymm5,ymm9
|
|
xor edx,ebp
|
|
add ebx,DWORD[108+r13]
|
|
lea r13,[256+r13]
|
|
vpxor ymm5,ymm5,ymm10
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
vpaddd ymm9,ymm5,ymm11
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,eax
|
|
vmovdqu YMMWORD[160+rsp],ymm9
|
|
add ebp,DWORD[((-128))+r13]
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,esi
|
|
vpalignr ymm6,ymm3,ymm2,8
|
|
add eax,DWORD[((-124))+r13]
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
vpsrldq ymm8,ymm5,4
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edx
|
|
vpxor ymm6,ymm6,ymm2
|
|
vpxor ymm8,ymm8,ymm4
|
|
add esi,DWORD[((-120))+r13]
|
|
lea esi,[rbp*1+rsi]
|
|
vpxor ymm6,ymm6,ymm8
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
vpsrld ymm8,ymm6,31
|
|
xor eax,ecx
|
|
add edx,DWORD[((-116))+r13]
|
|
lea edx,[rax*1+rdx]
|
|
vpslldq ymm10,ymm6,12
|
|
vpaddd ymm6,ymm6,ymm6
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
vpsrld ymm9,ymm10,30
|
|
vpor ymm6,ymm6,ymm8
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
vpslld ymm10,ymm10,2
|
|
vpxor ymm6,ymm6,ymm9
|
|
xor esi,ebx
|
|
add ecx,DWORD[((-96))+r13]
|
|
vpxor ymm6,ymm6,ymm10
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
vpaddd ymm9,ymm6,ymm11
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
xor edx,ebp
|
|
vmovdqu YMMWORD[192+rsp],ymm9
|
|
add ebx,DWORD[((-92))+r13]
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,eax
|
|
vpalignr ymm7,ymm4,ymm3,8
|
|
add ebp,DWORD[((-88))+r13]
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
vpsrldq ymm8,ymm6,4
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,esi
|
|
vpxor ymm7,ymm7,ymm3
|
|
vpxor ymm8,ymm8,ymm5
|
|
add eax,DWORD[((-84))+r13]
|
|
lea eax,[rbx*1+rax]
|
|
vpxor ymm7,ymm7,ymm8
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
vpsrld ymm8,ymm7,31
|
|
xor ebp,edx
|
|
add esi,DWORD[((-64))+r13]
|
|
lea esi,[rbp*1+rsi]
|
|
vpslldq ymm10,ymm7,12
|
|
vpaddd ymm7,ymm7,ymm7
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
vpsrld ymm9,ymm10,30
|
|
vpor ymm7,ymm7,ymm8
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
vpslld ymm10,ymm10,2
|
|
vpxor ymm7,ymm7,ymm9
|
|
xor eax,ecx
|
|
add edx,DWORD[((-60))+r13]
|
|
vpxor ymm7,ymm7,ymm10
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
rorx eax,esi,2
|
|
vpaddd ymm9,ymm7,ymm11
|
|
xor esi,ebp
|
|
add edx,r12d
|
|
xor esi,ebx
|
|
vmovdqu YMMWORD[224+rsp],ymm9
|
|
add ecx,DWORD[((-56))+r13]
|
|
lea ecx,[rsi*1+rcx]
|
|
rorx r12d,edx,27
|
|
rorx esi,edx,2
|
|
xor edx,eax
|
|
add ecx,r12d
|
|
xor edx,ebp
|
|
add ebx,DWORD[((-52))+r13]
|
|
lea ebx,[rdx*1+rbx]
|
|
rorx r12d,ecx,27
|
|
rorx edx,ecx,2
|
|
xor ecx,esi
|
|
add ebx,r12d
|
|
xor ecx,eax
|
|
add ebp,DWORD[((-32))+r13]
|
|
lea ebp,[rbp*1+rcx]
|
|
rorx r12d,ebx,27
|
|
rorx ecx,ebx,2
|
|
xor ebx,edx
|
|
add ebp,r12d
|
|
xor ebx,esi
|
|
add eax,DWORD[((-28))+r13]
|
|
lea eax,[rbx*1+rax]
|
|
rorx r12d,ebp,27
|
|
rorx ebx,ebp,2
|
|
xor ebp,ecx
|
|
add eax,r12d
|
|
xor ebp,edx
|
|
add esi,DWORD[((-24))+r13]
|
|
lea esi,[rbp*1+rsi]
|
|
rorx r12d,eax,27
|
|
rorx ebp,eax,2
|
|
xor eax,ebx
|
|
add esi,r12d
|
|
xor eax,ecx
|
|
add edx,DWORD[((-20))+r13]
|
|
lea edx,[rax*1+rdx]
|
|
rorx r12d,esi,27
|
|
add edx,r12d
|
|
lea r13,[128+rsp]
|
|
|
|
|
|
add edx,DWORD[r8]
|
|
add esi,DWORD[4+r8]
|
|
add ebp,DWORD[8+r8]
|
|
mov DWORD[r8],edx
|
|
add ebx,DWORD[12+r8]
|
|
mov DWORD[4+r8],esi
|
|
mov eax,edx
|
|
add ecx,DWORD[16+r8]
|
|
mov r12d,ebp
|
|
mov DWORD[8+r8],ebp
|
|
mov edx,ebx
|
|
|
|
mov DWORD[12+r8],ebx
|
|
mov ebp,esi
|
|
mov DWORD[16+r8],ecx
|
|
|
|
mov esi,ecx
|
|
mov ecx,r12d
|
|
|
|
|
|
cmp r9,r10
|
|
jbe NEAR $L$oop_avx2
|
|
|
|
$L$done_avx2:
|
|
vzeroupper
|
|
movaps xmm6,XMMWORD[((-40-96))+r11]
|
|
movaps xmm7,XMMWORD[((-40-80))+r11]
|
|
movaps xmm8,XMMWORD[((-40-64))+r11]
|
|
movaps xmm9,XMMWORD[((-40-48))+r11]
|
|
movaps xmm10,XMMWORD[((-40-32))+r11]
|
|
movaps xmm11,XMMWORD[((-40-16))+r11]
|
|
mov r14,QWORD[((-40))+r11]
|
|
|
|
mov r13,QWORD[((-32))+r11]
|
|
|
|
mov r12,QWORD[((-24))+r11]
|
|
|
|
mov rbp,QWORD[((-16))+r11]
|
|
|
|
mov rbx,QWORD[((-8))+r11]
|
|
|
|
lea rsp,[r11]
|
|
|
|
$L$epilogue_avx2:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
$L$SEH_end_sha1_block_data_order_avx2:
|
|
ALIGN 64
|
|
K_XX_XX:
|
|
DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999
|
|
DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999
|
|
DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
|
|
DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
|
|
DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
|
|
DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
|
|
DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
|
|
DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
|
|
DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
|
|
DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
|
|
DB 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
|
|
DB 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
|
|
DB 102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44
|
|
DB 32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60
|
|
DB 97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114
|
|
DB 103,62,0
|
|
ALIGN 64
|
|
EXTERN __imp_RtlVirtualUnwind
|
|
|
|
ALIGN 16
|
|
se_handler:
|
|
push rsi
|
|
push rdi
|
|
push rbx
|
|
push rbp
|
|
push r12
|
|
push r13
|
|
push r14
|
|
push r15
|
|
pushfq
|
|
sub rsp,64
|
|
|
|
mov rax,QWORD[120+r8]
|
|
mov rbx,QWORD[248+r8]
|
|
|
|
lea r10,[$L$prologue]
|
|
cmp rbx,r10
|
|
jb NEAR $L$common_seh_tail
|
|
|
|
mov rax,QWORD[152+r8]
|
|
|
|
lea r10,[$L$epilogue]
|
|
cmp rbx,r10
|
|
jae NEAR $L$common_seh_tail
|
|
|
|
mov rax,QWORD[64+rax]
|
|
|
|
mov rbx,QWORD[((-8))+rax]
|
|
mov rbp,QWORD[((-16))+rax]
|
|
mov r12,QWORD[((-24))+rax]
|
|
mov r13,QWORD[((-32))+rax]
|
|
mov r14,QWORD[((-40))+rax]
|
|
mov QWORD[144+r8],rbx
|
|
mov QWORD[160+r8],rbp
|
|
mov QWORD[216+r8],r12
|
|
mov QWORD[224+r8],r13
|
|
mov QWORD[232+r8],r14
|
|
|
|
jmp NEAR $L$common_seh_tail
|
|
|
|
|
|
ALIGN 16
|
|
shaext_handler:
|
|
push rsi
|
|
push rdi
|
|
push rbx
|
|
push rbp
|
|
push r12
|
|
push r13
|
|
push r14
|
|
push r15
|
|
pushfq
|
|
sub rsp,64
|
|
|
|
mov rax,QWORD[120+r8]
|
|
mov rbx,QWORD[248+r8]
|
|
|
|
lea r10,[$L$prologue_shaext]
|
|
cmp rbx,r10
|
|
jb NEAR $L$common_seh_tail
|
|
|
|
lea r10,[$L$epilogue_shaext]
|
|
cmp rbx,r10
|
|
jae NEAR $L$common_seh_tail
|
|
|
|
lea rsi,[((-8-64))+rax]
|
|
lea rdi,[512+r8]
|
|
mov ecx,8
|
|
DD 0xa548f3fc
|
|
|
|
jmp NEAR $L$common_seh_tail
|
|
|
|
|
|
ALIGN 16
|
|
ssse3_handler:
|
|
push rsi
|
|
push rdi
|
|
push rbx
|
|
push rbp
|
|
push r12
|
|
push r13
|
|
push r14
|
|
push r15
|
|
pushfq
|
|
sub rsp,64
|
|
|
|
mov rax,QWORD[120+r8]
|
|
mov rbx,QWORD[248+r8]
|
|
|
|
mov rsi,QWORD[8+r9]
|
|
mov r11,QWORD[56+r9]
|
|
|
|
mov r10d,DWORD[r11]
|
|
lea r10,[r10*1+rsi]
|
|
cmp rbx,r10
|
|
jb NEAR $L$common_seh_tail
|
|
|
|
mov rax,QWORD[208+r8]
|
|
|
|
mov r10d,DWORD[4+r11]
|
|
lea r10,[r10*1+rsi]
|
|
cmp rbx,r10
|
|
jae NEAR $L$common_seh_tail
|
|
|
|
lea rsi,[((-40-96))+rax]
|
|
lea rdi,[512+r8]
|
|
mov ecx,12
|
|
DD 0xa548f3fc
|
|
|
|
mov rbx,QWORD[((-8))+rax]
|
|
mov rbp,QWORD[((-16))+rax]
|
|
mov r12,QWORD[((-24))+rax]
|
|
mov r13,QWORD[((-32))+rax]
|
|
mov r14,QWORD[((-40))+rax]
|
|
mov QWORD[144+r8],rbx
|
|
mov QWORD[160+r8],rbp
|
|
mov QWORD[216+r8],r12
|
|
mov QWORD[224+r8],r13
|
|
mov QWORD[232+r8],r14
|
|
|
|
$L$common_seh_tail:
|
|
mov rdi,QWORD[8+rax]
|
|
mov rsi,QWORD[16+rax]
|
|
mov QWORD[152+r8],rax
|
|
mov QWORD[168+r8],rsi
|
|
mov QWORD[176+r8],rdi
|
|
|
|
mov rdi,QWORD[40+r9]
|
|
mov rsi,r8
|
|
mov ecx,154
|
|
DD 0xa548f3fc
|
|
|
|
mov rsi,r9
|
|
xor rcx,rcx
|
|
mov rdx,QWORD[8+rsi]
|
|
mov r8,QWORD[rsi]
|
|
mov r9,QWORD[16+rsi]
|
|
mov r10,QWORD[40+rsi]
|
|
lea r11,[56+rsi]
|
|
lea r12,[24+rsi]
|
|
mov QWORD[32+rsp],r10
|
|
mov QWORD[40+rsp],r11
|
|
mov QWORD[48+rsp],r12
|
|
mov QWORD[56+rsp],rcx
|
|
call QWORD[__imp_RtlVirtualUnwind]
|
|
|
|
mov eax,1
|
|
add rsp,64
|
|
popfq
|
|
pop r15
|
|
pop r14
|
|
pop r13
|
|
pop r12
|
|
pop rbp
|
|
pop rbx
|
|
pop rdi
|
|
pop rsi
|
|
DB 0F3h,0C3h ;repret
|
|
|
|
|
|
section .pdata rdata align=4
|
|
ALIGN 4
|
|
DD $L$SEH_begin_sha1_block_data_order wrt ..imagebase
|
|
DD $L$SEH_end_sha1_block_data_order wrt ..imagebase
|
|
DD $L$SEH_info_sha1_block_data_order wrt ..imagebase
|
|
DD $L$SEH_begin_sha1_block_data_order_shaext wrt ..imagebase
|
|
DD $L$SEH_end_sha1_block_data_order_shaext wrt ..imagebase
|
|
DD $L$SEH_info_sha1_block_data_order_shaext wrt ..imagebase
|
|
DD $L$SEH_begin_sha1_block_data_order_ssse3 wrt ..imagebase
|
|
DD $L$SEH_end_sha1_block_data_order_ssse3 wrt ..imagebase
|
|
DD $L$SEH_info_sha1_block_data_order_ssse3 wrt ..imagebase
|
|
DD $L$SEH_begin_sha1_block_data_order_avx wrt ..imagebase
|
|
DD $L$SEH_end_sha1_block_data_order_avx wrt ..imagebase
|
|
DD $L$SEH_info_sha1_block_data_order_avx wrt ..imagebase
|
|
DD $L$SEH_begin_sha1_block_data_order_avx2 wrt ..imagebase
|
|
DD $L$SEH_end_sha1_block_data_order_avx2 wrt ..imagebase
|
|
DD $L$SEH_info_sha1_block_data_order_avx2 wrt ..imagebase
|
|
section .xdata rdata align=8
|
|
ALIGN 8
|
|
$L$SEH_info_sha1_block_data_order:
|
|
DB 9,0,0,0
|
|
DD se_handler wrt ..imagebase
|
|
$L$SEH_info_sha1_block_data_order_shaext:
|
|
DB 9,0,0,0
|
|
DD shaext_handler wrt ..imagebase
|
|
$L$SEH_info_sha1_block_data_order_ssse3:
|
|
DB 9,0,0,0
|
|
DD ssse3_handler wrt ..imagebase
|
|
DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase
|
|
$L$SEH_info_sha1_block_data_order_avx:
|
|
DB 9,0,0,0
|
|
DD ssse3_handler wrt ..imagebase
|
|
DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
|
|
$L$SEH_info_sha1_block_data_order_avx2:
|
|
DB 9,0,0,0
|
|
DD ssse3_handler wrt ..imagebase
|
|
DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase
|