mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Merge branch 'master' into zvonand-base58
This commit is contained in:
commit
c18d09a617
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -271,3 +271,6 @@
|
||||
[submodule "contrib/base-x"]
|
||||
path = contrib/base-x
|
||||
url = https://github.com/ClickHouse/base-x.git
|
||||
[submodule "contrib/liburing"]
|
||||
path = contrib/liburing
|
||||
url = https://github.com/axboe/liburing.git
|
||||
|
3
contrib/CMakeLists.txt
vendored
3
contrib/CMakeLists.txt
vendored
@ -37,6 +37,8 @@ function(add_contrib cmake_folder)
|
||||
|
||||
file(GLOB contrib_files "${base_folder}/*")
|
||||
if (NOT contrib_files)
|
||||
# Checking out *all* submodules takes > 5 min. Therefore, the smoke build ("FastTest") in CI initializes only the set of
|
||||
# submodules minimally needed for a build and we cannot assume here that all submodules are populated.
|
||||
message(STATUS "submodule ${base_folder} is missing or empty. to fix try run:")
|
||||
message(STATUS " git submodule update --init")
|
||||
return()
|
||||
@ -118,6 +120,7 @@ add_contrib (simdjson-cmake simdjson)
|
||||
add_contrib (rapidjson-cmake rapidjson)
|
||||
add_contrib (fastops-cmake fastops)
|
||||
add_contrib (libuv-cmake libuv)
|
||||
add_contrib (liburing-cmake liburing)
|
||||
add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv
|
||||
add_contrib (cassandra-cmake cassandra) # requires: libuv
|
||||
|
||||
|
2
contrib/boringssl
vendored
2
contrib/boringssl
vendored
@ -1 +1 @@
|
||||
Subproject commit 9c0715ce459de443e7b08f270a518c1702f1a380
|
||||
Subproject commit c1e01a441d6db234f4f12e63a7657d1f9e6db9c1
|
@ -159,14 +159,12 @@ set(
|
||||
ios-aarch64/crypto/fipsmodule/sha512-armv8.S
|
||||
ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
|
||||
ios-aarch64/crypto/test/trampoline-armv8.S
|
||||
ios-aarch64/crypto/third_party/sike/asm/fp-armv8.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_ios_arm_SOURCES
|
||||
|
||||
ios-arm/crypto/chacha/chacha-armv4.S
|
||||
ios-arm/crypto/fipsmodule/aes-armv4.S
|
||||
ios-arm/crypto/fipsmodule/aesv8-armx32.S
|
||||
ios-arm/crypto/fipsmodule/armv4-mont.S
|
||||
ios-arm/crypto/fipsmodule/bsaes-armv7.S
|
||||
@ -192,14 +190,12 @@ set(
|
||||
linux-aarch64/crypto/fipsmodule/sha512-armv8.S
|
||||
linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
|
||||
linux-aarch64/crypto/test/trampoline-armv8.S
|
||||
linux-aarch64/crypto/third_party/sike/asm/fp-armv8.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_arm_SOURCES
|
||||
|
||||
linux-arm/crypto/chacha/chacha-armv4.S
|
||||
linux-arm/crypto/fipsmodule/aes-armv4.S
|
||||
linux-arm/crypto/fipsmodule/aesv8-armx32.S
|
||||
linux-arm/crypto/fipsmodule/armv4-mont.S
|
||||
linux-arm/crypto/fipsmodule/bsaes-armv7.S
|
||||
@ -219,13 +215,13 @@ set(
|
||||
|
||||
linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
|
||||
linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
|
||||
linux-ppc64le/crypto/test/trampoline-ppc.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_x86_SOURCES
|
||||
|
||||
linux-x86/crypto/chacha/chacha-x86.S
|
||||
linux-x86/crypto/fipsmodule/aes-586.S
|
||||
linux-x86/crypto/fipsmodule/aesni-x86.S
|
||||
linux-x86/crypto/fipsmodule/bn-586.S
|
||||
linux-x86/crypto/fipsmodule/co-586.S
|
||||
@ -246,7 +242,6 @@ set(
|
||||
linux-x86_64/crypto/chacha/chacha-x86_64.S
|
||||
linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
|
||||
linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/aes-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
|
||||
@ -263,7 +258,6 @@ set(
|
||||
linux-x86_64/crypto/fipsmodule/x86_64-mont.S
|
||||
linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
|
||||
linux-x86_64/crypto/test/trampoline-x86_64.S
|
||||
linux-x86_64/crypto/third_party/sike/asm/fp-x86_64.S
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/hrss/asm/poly_rq_mul.S"
|
||||
)
|
||||
|
||||
@ -271,7 +265,6 @@ set(
|
||||
CRYPTO_mac_x86_SOURCES
|
||||
|
||||
mac-x86/crypto/chacha/chacha-x86.S
|
||||
mac-x86/crypto/fipsmodule/aes-586.S
|
||||
mac-x86/crypto/fipsmodule/aesni-x86.S
|
||||
mac-x86/crypto/fipsmodule/bn-586.S
|
||||
mac-x86/crypto/fipsmodule/co-586.S
|
||||
@ -292,7 +285,6 @@ set(
|
||||
mac-x86_64/crypto/chacha/chacha-x86_64.S
|
||||
mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
|
||||
mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/aes-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/aesni-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
|
||||
@ -309,7 +301,6 @@ set(
|
||||
mac-x86_64/crypto/fipsmodule/x86_64-mont.S
|
||||
mac-x86_64/crypto/fipsmodule/x86_64-mont5.S
|
||||
mac-x86_64/crypto/test/trampoline-x86_64.S
|
||||
mac-x86_64/crypto/third_party/sike/asm/fp-x86_64.S
|
||||
)
|
||||
|
||||
set(
|
||||
@ -331,7 +322,6 @@ set(
|
||||
CRYPTO_win_x86_SOURCES
|
||||
|
||||
win-x86/crypto/chacha/chacha-x86.asm
|
||||
win-x86/crypto/fipsmodule/aes-586.asm
|
||||
win-x86/crypto/fipsmodule/aesni-x86.asm
|
||||
win-x86/crypto/fipsmodule/bn-586.asm
|
||||
win-x86/crypto/fipsmodule/co-586.asm
|
||||
@ -352,7 +342,6 @@ set(
|
||||
win-x86_64/crypto/chacha/chacha-x86_64.asm
|
||||
win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
|
||||
win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/aes-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm
|
||||
@ -369,7 +358,6 @@ set(
|
||||
win-x86_64/crypto/fipsmodule/x86_64-mont.asm
|
||||
win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
|
||||
win-x86_64/crypto/test/trampoline-x86_64.asm
|
||||
win-x86_64/crypto/third_party/sike/asm/fp-x86_64.asm
|
||||
)
|
||||
|
||||
if(APPLE AND ARCH STREQUAL "aarch64")
|
||||
@ -401,6 +389,7 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strex.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c"
|
||||
@ -430,6 +419,7 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/blake2/blake2.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c"
|
||||
@ -454,20 +444,22 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-win.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-ppc64le.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/crypto.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/curve25519/curve25519.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/curve25519/spake25519.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dh/check.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dh/dh.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dh/dh_asn1.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dh/params.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/dh_asn1.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/params.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/digest_extra/digest_extra.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa_asn1.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_asn1.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_derive.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/hash_to_curve.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/ecdh_extra/ecdh_extra.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/ecdsa_extra/ecdsa_asn1.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/engine/engine.c"
|
||||
@ -492,8 +484,8 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/is_fips.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/lhash/lhash.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/mem.c"
|
||||
@ -519,6 +511,7 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/deterministic.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/forkunsafe.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/fuchsia.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/passive.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/rand_extra.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/windows.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rc4/rc4.c"
|
||||
@ -532,15 +525,18 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/thread_none.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/thread_pthread.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/thread_win.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/pmbtoken.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/trust_token.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_strex.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/name_print.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c"
|
||||
@ -606,19 +602,11 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pci.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcia.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcons.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pku.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pmaps.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_prn.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_purp.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_skey.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_sxnet.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_utl.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/third_party/fiat/curve25519.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/third_party/sike/asm/fp_generic.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/third_party/sike/curve_params.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/third_party/sike/fpx.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/third_party/sike/isogeny.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/third_party/sike/sike.c"
|
||||
)
|
||||
|
||||
add_library(
|
||||
@ -631,6 +619,8 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/encrypted_client_hello.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/extensions.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc"
|
||||
@ -653,7 +643,6 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/t1_lib.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc"
|
||||
@ -674,7 +663,9 @@ add_executable(
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/client.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/const.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/digest.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/fd.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/file.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/generate_ech.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc"
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@ -33,6 +33,7 @@ Lone:
|
||||
|
||||
.align 5
|
||||
_ChaCha20_ctr32:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
cbz x2,Labort
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x5,:pg_hi21_nc:_OPENSSL_armcap_P
|
||||
@ -46,6 +47,7 @@ _ChaCha20_ctr32:
|
||||
b.ne ChaCha20_neon
|
||||
|
||||
Lshort:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -258,6 +260,7 @@ Loop:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
Labort:
|
||||
ret
|
||||
|
||||
@ -314,12 +317,14 @@ Loop_tail:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
|
||||
.align 5
|
||||
ChaCha20_neon:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -700,6 +705,7 @@ Loop_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
Ltail_neon:
|
||||
@ -809,11 +815,13 @@ Ldone_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.align 5
|
||||
ChaCha20_512_neon:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1977,6 +1985,7 @@ Ldone_512_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
|
@ -32,6 +32,8 @@ Lrcon:
|
||||
.align 5
|
||||
_aes_hw_set_encrypt_key:
|
||||
Lenc_key:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
@ -200,6 +202,7 @@ Lenc_key_abort:
|
||||
|
||||
.align 5
|
||||
_aes_hw_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl Lenc_key
|
||||
@ -233,6 +236,7 @@ Loop_imc:
|
||||
eor x0,x0,x0 // return value
|
||||
Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _aes_hw_encrypt
|
||||
@ -240,6 +244,7 @@ Ldec_key_abort:
|
||||
|
||||
.align 5
|
||||
_aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
@ -270,6 +275,7 @@ Loop_enc:
|
||||
|
||||
.align 5
|
||||
_aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
@ -300,6 +306,8 @@ Loop_dec:
|
||||
|
||||
.align 5
|
||||
_aes_hw_cbc_encrypt:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
@ -591,6 +599,8 @@ Lcbc_abort:
|
||||
|
||||
.align 5
|
||||
_aes_hw_ctr32_encrypt_blocks:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
|
@ -12,6 +12,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl _bn_mul_mont
|
||||
@ -19,6 +21,7 @@
|
||||
|
||||
.align 5
|
||||
_bn_mul_mont:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
tst x5,#7
|
||||
b.eq __bn_sqr8x_mont
|
||||
tst x5,#3
|
||||
@ -216,11 +219,14 @@ Lcond_copy:
|
||||
mov x0,#1
|
||||
ldp x23,x24,[x29,#48]
|
||||
ldr x29,[sp],#64
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.align 5
|
||||
__bn_sqr8x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
|
||||
// only from bn_mul_mont which has already signed the return address.
|
||||
cmp x1,x2
|
||||
b.ne __bn_mul4x_mont
|
||||
Lsqr8x_mont:
|
||||
@ -974,11 +980,16 @@ Lsqr8x_done:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.align 5
|
||||
__bn_mul4x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
|
||||
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
|
||||
// return address.
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
stp x19,x20,[sp,#16]
|
||||
@ -1412,6 +1423,8 @@ Lmul4x_done:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
|
@ -12,6 +12,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl _gcm_init_neon
|
||||
@ -19,6 +21,7 @@
|
||||
|
||||
.align 4
|
||||
_gcm_init_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
@ -44,6 +47,7 @@ _gcm_init_neon:
|
||||
|
||||
.align 4
|
||||
_gcm_gmult_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
@ -63,6 +67,7 @@ _gcm_gmult_neon:
|
||||
|
||||
.align 4
|
||||
_gcm_ghash_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
.align 4
|
||||
_gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
@ -72,6 +73,7 @@ _gcm_init_v8:
|
||||
|
||||
.align 4
|
||||
_gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
@ -114,6 +116,7 @@ _gcm_gmult_v8:
|
||||
|
||||
.align 4
|
||||
_gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
|
@ -22,6 +22,8 @@
|
||||
|
||||
.align 6
|
||||
_sha1_block_data_order:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
|
||||
#else
|
||||
@ -1089,6 +1091,8 @@ Loop:
|
||||
|
||||
.align 6
|
||||
sha1_block_armv8:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
Lv8_entry:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
@ -63,6 +63,7 @@
|
||||
|
||||
.align 6
|
||||
_sha256_block_data_order:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#ifndef __KERNEL__
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
|
||||
@ -73,6 +74,7 @@ _sha256_block_data_order:
|
||||
tst w16,#ARMV8_SHA256
|
||||
b.ne Lv8_entry
|
||||
#endif
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1033,6 +1035,7 @@ Loop_16_xx:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -1067,6 +1070,7 @@ LK256:
|
||||
.align 6
|
||||
sha256_block_armv8:
|
||||
Lv8_entry:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -63,6 +63,7 @@
|
||||
|
||||
.align 6
|
||||
_sha512_block_data_order:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1023,6 +1024,7 @@ Loop_16_xx:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
|
@ -12,6 +12,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.section __TEXT,__const
|
||||
|
||||
|
||||
@ -214,6 +216,7 @@ Lenc_entry:
|
||||
|
||||
.align 4
|
||||
_vpaes_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -223,6 +226,7 @@ _vpaes_encrypt:
|
||||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -451,6 +455,7 @@ Ldec_entry:
|
||||
|
||||
.align 4
|
||||
_vpaes_decrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -460,6 +465,7 @@ _vpaes_decrypt:
|
||||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -629,6 +635,7 @@ _vpaes_key_preheat:
|
||||
|
||||
.align 4
|
||||
_vpaes_schedule_core:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -798,6 +805,7 @@ Lschedule_mangle_last_dec:
|
||||
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
|
||||
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
|
||||
ldp x29, x30, [sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -1011,6 +1019,7 @@ Lschedule_mangle_both:
|
||||
|
||||
.align 4
|
||||
_vpaes_set_encrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1026,6 +1035,7 @@ _vpaes_set_encrypt_key:
|
||||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -1034,6 +1044,7 @@ _vpaes_set_encrypt_key:
|
||||
|
||||
.align 4
|
||||
_vpaes_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1053,6 +1064,7 @@ _vpaes_set_decrypt_key:
|
||||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _vpaes_cbc_encrypt
|
||||
@ -1060,6 +1072,7 @@ _vpaes_set_decrypt_key:
|
||||
|
||||
.align 4
|
||||
_vpaes_cbc_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
cbz x2, Lcbc_abort
|
||||
cmp w5, #0 // check direction
|
||||
b.eq vpaes_cbc_decrypt
|
||||
@ -1086,6 +1099,7 @@ Lcbc_enc_loop:
|
||||
st1 {v0.16b}, [x4] // write ivec
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
Lcbc_abort:
|
||||
ret
|
||||
|
||||
@ -1093,6 +1107,8 @@ Lcbc_abort:
|
||||
|
||||
.align 4
|
||||
vpaes_cbc_decrypt:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
|
||||
// only from vpaes_cbc_encrypt which has already signed the return address.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1134,6 +1150,7 @@ Lcbc_dec_done:
|
||||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _vpaes_ctr32_encrypt_blocks
|
||||
@ -1141,6 +1158,7 @@ Lcbc_dec_done:
|
||||
|
||||
.align 4
|
||||
_vpaes_ctr32_encrypt_blocks:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1208,6 +1226,7 @@ Lctr32_done:
|
||||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
|
@ -12,6 +12,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
@ -26,6 +28,7 @@
|
||||
.align 4
|
||||
_abi_test_trampoline:
|
||||
Labi_test_trampoline_begin:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
// Stack layout (low to high addresses)
|
||||
// x29,x30 (16 bytes)
|
||||
// d8-d15 (64 bytes)
|
||||
@ -128,6 +131,7 @@ Lx29_ok:
|
||||
ldp x27, x28, [sp, #144]
|
||||
|
||||
ldp x29, x30, [sp], #176
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -135,6 +139,7 @@ Lx29_ok:
|
||||
.private_extern _abi_test_clobber_x0
|
||||
.align 4
|
||||
_abi_test_clobber_x0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x0, xzr
|
||||
ret
|
||||
|
||||
@ -143,6 +148,7 @@ _abi_test_clobber_x0:
|
||||
.private_extern _abi_test_clobber_x1
|
||||
.align 4
|
||||
_abi_test_clobber_x1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x1, xzr
|
||||
ret
|
||||
|
||||
@ -151,6 +157,7 @@ _abi_test_clobber_x1:
|
||||
.private_extern _abi_test_clobber_x2
|
||||
.align 4
|
||||
_abi_test_clobber_x2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x2, xzr
|
||||
ret
|
||||
|
||||
@ -159,6 +166,7 @@ _abi_test_clobber_x2:
|
||||
.private_extern _abi_test_clobber_x3
|
||||
.align 4
|
||||
_abi_test_clobber_x3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x3, xzr
|
||||
ret
|
||||
|
||||
@ -167,6 +175,7 @@ _abi_test_clobber_x3:
|
||||
.private_extern _abi_test_clobber_x4
|
||||
.align 4
|
||||
_abi_test_clobber_x4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x4, xzr
|
||||
ret
|
||||
|
||||
@ -175,6 +184,7 @@ _abi_test_clobber_x4:
|
||||
.private_extern _abi_test_clobber_x5
|
||||
.align 4
|
||||
_abi_test_clobber_x5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x5, xzr
|
||||
ret
|
||||
|
||||
@ -183,6 +193,7 @@ _abi_test_clobber_x5:
|
||||
.private_extern _abi_test_clobber_x6
|
||||
.align 4
|
||||
_abi_test_clobber_x6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x6, xzr
|
||||
ret
|
||||
|
||||
@ -191,6 +202,7 @@ _abi_test_clobber_x6:
|
||||
.private_extern _abi_test_clobber_x7
|
||||
.align 4
|
||||
_abi_test_clobber_x7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x7, xzr
|
||||
ret
|
||||
|
||||
@ -199,6 +211,7 @@ _abi_test_clobber_x7:
|
||||
.private_extern _abi_test_clobber_x8
|
||||
.align 4
|
||||
_abi_test_clobber_x8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x8, xzr
|
||||
ret
|
||||
|
||||
@ -207,6 +220,7 @@ _abi_test_clobber_x8:
|
||||
.private_extern _abi_test_clobber_x9
|
||||
.align 4
|
||||
_abi_test_clobber_x9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x9, xzr
|
||||
ret
|
||||
|
||||
@ -215,6 +229,7 @@ _abi_test_clobber_x9:
|
||||
.private_extern _abi_test_clobber_x10
|
||||
.align 4
|
||||
_abi_test_clobber_x10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x10, xzr
|
||||
ret
|
||||
|
||||
@ -223,6 +238,7 @@ _abi_test_clobber_x10:
|
||||
.private_extern _abi_test_clobber_x11
|
||||
.align 4
|
||||
_abi_test_clobber_x11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x11, xzr
|
||||
ret
|
||||
|
||||
@ -231,6 +247,7 @@ _abi_test_clobber_x11:
|
||||
.private_extern _abi_test_clobber_x12
|
||||
.align 4
|
||||
_abi_test_clobber_x12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x12, xzr
|
||||
ret
|
||||
|
||||
@ -239,6 +256,7 @@ _abi_test_clobber_x12:
|
||||
.private_extern _abi_test_clobber_x13
|
||||
.align 4
|
||||
_abi_test_clobber_x13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x13, xzr
|
||||
ret
|
||||
|
||||
@ -247,6 +265,7 @@ _abi_test_clobber_x13:
|
||||
.private_extern _abi_test_clobber_x14
|
||||
.align 4
|
||||
_abi_test_clobber_x14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x14, xzr
|
||||
ret
|
||||
|
||||
@ -255,6 +274,7 @@ _abi_test_clobber_x14:
|
||||
.private_extern _abi_test_clobber_x15
|
||||
.align 4
|
||||
_abi_test_clobber_x15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x15, xzr
|
||||
ret
|
||||
|
||||
@ -263,6 +283,7 @@ _abi_test_clobber_x15:
|
||||
.private_extern _abi_test_clobber_x16
|
||||
.align 4
|
||||
_abi_test_clobber_x16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x16, xzr
|
||||
ret
|
||||
|
||||
@ -271,6 +292,7 @@ _abi_test_clobber_x16:
|
||||
.private_extern _abi_test_clobber_x17
|
||||
.align 4
|
||||
_abi_test_clobber_x17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x17, xzr
|
||||
ret
|
||||
|
||||
@ -279,6 +301,7 @@ _abi_test_clobber_x17:
|
||||
.private_extern _abi_test_clobber_x19
|
||||
.align 4
|
||||
_abi_test_clobber_x19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x19, xzr
|
||||
ret
|
||||
|
||||
@ -287,6 +310,7 @@ _abi_test_clobber_x19:
|
||||
.private_extern _abi_test_clobber_x20
|
||||
.align 4
|
||||
_abi_test_clobber_x20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x20, xzr
|
||||
ret
|
||||
|
||||
@ -295,6 +319,7 @@ _abi_test_clobber_x20:
|
||||
.private_extern _abi_test_clobber_x21
|
||||
.align 4
|
||||
_abi_test_clobber_x21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x21, xzr
|
||||
ret
|
||||
|
||||
@ -303,6 +328,7 @@ _abi_test_clobber_x21:
|
||||
.private_extern _abi_test_clobber_x22
|
||||
.align 4
|
||||
_abi_test_clobber_x22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x22, xzr
|
||||
ret
|
||||
|
||||
@ -311,6 +337,7 @@ _abi_test_clobber_x22:
|
||||
.private_extern _abi_test_clobber_x23
|
||||
.align 4
|
||||
_abi_test_clobber_x23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x23, xzr
|
||||
ret
|
||||
|
||||
@ -319,6 +346,7 @@ _abi_test_clobber_x23:
|
||||
.private_extern _abi_test_clobber_x24
|
||||
.align 4
|
||||
_abi_test_clobber_x24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x24, xzr
|
||||
ret
|
||||
|
||||
@ -327,6 +355,7 @@ _abi_test_clobber_x24:
|
||||
.private_extern _abi_test_clobber_x25
|
||||
.align 4
|
||||
_abi_test_clobber_x25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x25, xzr
|
||||
ret
|
||||
|
||||
@ -335,6 +364,7 @@ _abi_test_clobber_x25:
|
||||
.private_extern _abi_test_clobber_x26
|
||||
.align 4
|
||||
_abi_test_clobber_x26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x26, xzr
|
||||
ret
|
||||
|
||||
@ -343,6 +373,7 @@ _abi_test_clobber_x26:
|
||||
.private_extern _abi_test_clobber_x27
|
||||
.align 4
|
||||
_abi_test_clobber_x27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x27, xzr
|
||||
ret
|
||||
|
||||
@ -351,6 +382,7 @@ _abi_test_clobber_x27:
|
||||
.private_extern _abi_test_clobber_x28
|
||||
.align 4
|
||||
_abi_test_clobber_x28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x28, xzr
|
||||
ret
|
||||
|
||||
@ -359,6 +391,7 @@ _abi_test_clobber_x28:
|
||||
.private_extern _abi_test_clobber_x29
|
||||
.align 4
|
||||
_abi_test_clobber_x29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x29, xzr
|
||||
ret
|
||||
|
||||
@ -367,6 +400,7 @@ _abi_test_clobber_x29:
|
||||
.private_extern _abi_test_clobber_d0
|
||||
.align 4
|
||||
_abi_test_clobber_d0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d0, xzr
|
||||
ret
|
||||
|
||||
@ -375,6 +409,7 @@ _abi_test_clobber_d0:
|
||||
.private_extern _abi_test_clobber_d1
|
||||
.align 4
|
||||
_abi_test_clobber_d1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d1, xzr
|
||||
ret
|
||||
|
||||
@ -383,6 +418,7 @@ _abi_test_clobber_d1:
|
||||
.private_extern _abi_test_clobber_d2
|
||||
.align 4
|
||||
_abi_test_clobber_d2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d2, xzr
|
||||
ret
|
||||
|
||||
@ -391,6 +427,7 @@ _abi_test_clobber_d2:
|
||||
.private_extern _abi_test_clobber_d3
|
||||
.align 4
|
||||
_abi_test_clobber_d3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d3, xzr
|
||||
ret
|
||||
|
||||
@ -399,6 +436,7 @@ _abi_test_clobber_d3:
|
||||
.private_extern _abi_test_clobber_d4
|
||||
.align 4
|
||||
_abi_test_clobber_d4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d4, xzr
|
||||
ret
|
||||
|
||||
@ -407,6 +445,7 @@ _abi_test_clobber_d4:
|
||||
.private_extern _abi_test_clobber_d5
|
||||
.align 4
|
||||
_abi_test_clobber_d5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d5, xzr
|
||||
ret
|
||||
|
||||
@ -415,6 +454,7 @@ _abi_test_clobber_d5:
|
||||
.private_extern _abi_test_clobber_d6
|
||||
.align 4
|
||||
_abi_test_clobber_d6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d6, xzr
|
||||
ret
|
||||
|
||||
@ -423,6 +463,7 @@ _abi_test_clobber_d6:
|
||||
.private_extern _abi_test_clobber_d7
|
||||
.align 4
|
||||
_abi_test_clobber_d7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d7, xzr
|
||||
ret
|
||||
|
||||
@ -431,6 +472,7 @@ _abi_test_clobber_d7:
|
||||
.private_extern _abi_test_clobber_d8
|
||||
.align 4
|
||||
_abi_test_clobber_d8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d8, xzr
|
||||
ret
|
||||
|
||||
@ -439,6 +481,7 @@ _abi_test_clobber_d8:
|
||||
.private_extern _abi_test_clobber_d9
|
||||
.align 4
|
||||
_abi_test_clobber_d9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d9, xzr
|
||||
ret
|
||||
|
||||
@ -447,6 +490,7 @@ _abi_test_clobber_d9:
|
||||
.private_extern _abi_test_clobber_d10
|
||||
.align 4
|
||||
_abi_test_clobber_d10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d10, xzr
|
||||
ret
|
||||
|
||||
@ -455,6 +499,7 @@ _abi_test_clobber_d10:
|
||||
.private_extern _abi_test_clobber_d11
|
||||
.align 4
|
||||
_abi_test_clobber_d11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d11, xzr
|
||||
ret
|
||||
|
||||
@ -463,6 +508,7 @@ _abi_test_clobber_d11:
|
||||
.private_extern _abi_test_clobber_d12
|
||||
.align 4
|
||||
_abi_test_clobber_d12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d12, xzr
|
||||
ret
|
||||
|
||||
@ -471,6 +517,7 @@ _abi_test_clobber_d12:
|
||||
.private_extern _abi_test_clobber_d13
|
||||
.align 4
|
||||
_abi_test_clobber_d13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d13, xzr
|
||||
ret
|
||||
|
||||
@ -479,6 +526,7 @@ _abi_test_clobber_d13:
|
||||
.private_extern _abi_test_clobber_d14
|
||||
.align 4
|
||||
_abi_test_clobber_d14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d14, xzr
|
||||
ret
|
||||
|
||||
@ -487,6 +535,7 @@ _abi_test_clobber_d14:
|
||||
.private_extern _abi_test_clobber_d15
|
||||
.align 4
|
||||
_abi_test_clobber_d15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d15, xzr
|
||||
ret
|
||||
|
||||
@ -495,6 +544,7 @@ _abi_test_clobber_d15:
|
||||
.private_extern _abi_test_clobber_d16
|
||||
.align 4
|
||||
_abi_test_clobber_d16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d16, xzr
|
||||
ret
|
||||
|
||||
@ -503,6 +553,7 @@ _abi_test_clobber_d16:
|
||||
.private_extern _abi_test_clobber_d17
|
||||
.align 4
|
||||
_abi_test_clobber_d17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d17, xzr
|
||||
ret
|
||||
|
||||
@ -511,6 +562,7 @@ _abi_test_clobber_d17:
|
||||
.private_extern _abi_test_clobber_d18
|
||||
.align 4
|
||||
_abi_test_clobber_d18:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d18, xzr
|
||||
ret
|
||||
|
||||
@ -519,6 +571,7 @@ _abi_test_clobber_d18:
|
||||
.private_extern _abi_test_clobber_d19
|
||||
.align 4
|
||||
_abi_test_clobber_d19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d19, xzr
|
||||
ret
|
||||
|
||||
@ -527,6 +580,7 @@ _abi_test_clobber_d19:
|
||||
.private_extern _abi_test_clobber_d20
|
||||
.align 4
|
||||
_abi_test_clobber_d20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d20, xzr
|
||||
ret
|
||||
|
||||
@ -535,6 +589,7 @@ _abi_test_clobber_d20:
|
||||
.private_extern _abi_test_clobber_d21
|
||||
.align 4
|
||||
_abi_test_clobber_d21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d21, xzr
|
||||
ret
|
||||
|
||||
@ -543,6 +598,7 @@ _abi_test_clobber_d21:
|
||||
.private_extern _abi_test_clobber_d22
|
||||
.align 4
|
||||
_abi_test_clobber_d22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d22, xzr
|
||||
ret
|
||||
|
||||
@ -551,6 +607,7 @@ _abi_test_clobber_d22:
|
||||
.private_extern _abi_test_clobber_d23
|
||||
.align 4
|
||||
_abi_test_clobber_d23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d23, xzr
|
||||
ret
|
||||
|
||||
@ -559,6 +616,7 @@ _abi_test_clobber_d23:
|
||||
.private_extern _abi_test_clobber_d24
|
||||
.align 4
|
||||
_abi_test_clobber_d24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d24, xzr
|
||||
ret
|
||||
|
||||
@ -567,6 +625,7 @@ _abi_test_clobber_d24:
|
||||
.private_extern _abi_test_clobber_d25
|
||||
.align 4
|
||||
_abi_test_clobber_d25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d25, xzr
|
||||
ret
|
||||
|
||||
@ -575,6 +634,7 @@ _abi_test_clobber_d25:
|
||||
.private_extern _abi_test_clobber_d26
|
||||
.align 4
|
||||
_abi_test_clobber_d26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d26, xzr
|
||||
ret
|
||||
|
||||
@ -583,6 +643,7 @@ _abi_test_clobber_d26:
|
||||
.private_extern _abi_test_clobber_d27
|
||||
.align 4
|
||||
_abi_test_clobber_d27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d27, xzr
|
||||
ret
|
||||
|
||||
@ -591,6 +652,7 @@ _abi_test_clobber_d27:
|
||||
.private_extern _abi_test_clobber_d28
|
||||
.align 4
|
||||
_abi_test_clobber_d28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d28, xzr
|
||||
ret
|
||||
|
||||
@ -599,6 +661,7 @@ _abi_test_clobber_d28:
|
||||
.private_extern _abi_test_clobber_d29
|
||||
.align 4
|
||||
_abi_test_clobber_d29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d29, xzr
|
||||
ret
|
||||
|
||||
@ -607,6 +670,7 @@ _abi_test_clobber_d29:
|
||||
.private_extern _abi_test_clobber_d30
|
||||
.align 4
|
||||
_abi_test_clobber_d30:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d30, xzr
|
||||
ret
|
||||
|
||||
@ -615,6 +679,7 @@ _abi_test_clobber_d30:
|
||||
.private_extern _abi_test_clobber_d31
|
||||
.align 4
|
||||
_abi_test_clobber_d31:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d31, xzr
|
||||
ret
|
||||
|
||||
@ -623,6 +688,7 @@ _abi_test_clobber_d31:
|
||||
.private_extern _abi_test_clobber_v8_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v8_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v8.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -631,6 +697,7 @@ _abi_test_clobber_v8_upper:
|
||||
.private_extern _abi_test_clobber_v9_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v9_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v9.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -639,6 +706,7 @@ _abi_test_clobber_v9_upper:
|
||||
.private_extern _abi_test_clobber_v10_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v10_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v10.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -647,6 +715,7 @@ _abi_test_clobber_v10_upper:
|
||||
.private_extern _abi_test_clobber_v11_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v11_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v11.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -655,6 +724,7 @@ _abi_test_clobber_v11_upper:
|
||||
.private_extern _abi_test_clobber_v12_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v12_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v12.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -663,6 +733,7 @@ _abi_test_clobber_v12_upper:
|
||||
.private_extern _abi_test_clobber_v13_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v13_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v13.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -671,6 +742,7 @@ _abi_test_clobber_v13_upper:
|
||||
.private_extern _abi_test_clobber_v14_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v14_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v14.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -679,6 +751,7 @@ _abi_test_clobber_v14_upper:
|
||||
.private_extern _abi_test_clobber_v15_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v15_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v15.d[1], xzr
|
||||
ret
|
||||
|
||||
|
@ -1,996 +0,0 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.section __TEXT,__const
|
||||
|
||||
# p434 x 2
|
||||
Lp434x2:
|
||||
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
|
||||
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
|
||||
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
|
||||
|
||||
# p434 + 1
|
||||
Lp434p1:
|
||||
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
|
||||
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
|
||||
|
||||
.text
|
||||
.globl _sike_mpmul
|
||||
.private_extern _sike_mpmul
|
||||
.align 4
|
||||
_sike_mpmul:
|
||||
stp x29, x30, [sp,#-96]!
|
||||
add x29, sp, #0
|
||||
stp x19, x20, [sp,#16]
|
||||
stp x21, x22, [sp,#32]
|
||||
stp x23, x24, [sp,#48]
|
||||
stp x25, x26, [sp,#64]
|
||||
stp x27, x28, [sp,#80]
|
||||
|
||||
ldp x3, x4, [x0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x10, x11, [x1,#0]
|
||||
ldp x12, x13, [x1,#16]
|
||||
ldp x14, x15, [x1,#32]
|
||||
ldr x16, [x1,#48]
|
||||
|
||||
// x3-x7 <- AH + AL, x7 <- carry
|
||||
adds x3, x3, x7
|
||||
adcs x4, x4, x8
|
||||
adcs x5, x5, x9
|
||||
adcs x6, x6, xzr
|
||||
adc x7, xzr, xzr
|
||||
|
||||
// x10-x13 <- BH + BL, x8 <- carry
|
||||
adds x10, x10, x14
|
||||
adcs x11, x11, x15
|
||||
adcs x12, x12, x16
|
||||
adcs x13, x13, xzr
|
||||
adc x8, xzr, xzr
|
||||
|
||||
// x9 <- combined carry
|
||||
and x9, x7, x8
|
||||
// x7-x8 <- mask
|
||||
sub x7, xzr, x7
|
||||
sub x8, xzr, x8
|
||||
|
||||
// x15-x19 <- masked (BH + BL)
|
||||
and x14, x10, x7
|
||||
and x15, x11, x7
|
||||
and x16, x12, x7
|
||||
and x17, x13, x7
|
||||
|
||||
// x20-x23 <- masked (AH + AL)
|
||||
and x20, x3, x8
|
||||
and x21, x4, x8
|
||||
and x22, x5, x8
|
||||
and x23, x6, x8
|
||||
|
||||
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
|
||||
adds x14, x14, x20
|
||||
adcs x15, x15, x21
|
||||
adcs x16, x16, x22
|
||||
adcs x17, x17, x23
|
||||
adc x7, x9, xzr
|
||||
|
||||
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
|
||||
stp x3, x4, [x2,#0]
|
||||
// A0-A1 <- AH + AL, T0 <- mask
|
||||
adds x3, x3, x5
|
||||
adcs x4, x4, x6
|
||||
adc x25, xzr, xzr
|
||||
|
||||
// C6, T1 <- BH + BL, C7 <- mask
|
||||
adds x23, x10, x12
|
||||
adcs x26, x11, x13
|
||||
adc x24, xzr, xzr
|
||||
|
||||
// C0-C1 <- masked (BH + BL)
|
||||
sub x19, xzr, x25
|
||||
sub x20, xzr, x24
|
||||
and x8, x23, x19
|
||||
and x9, x26, x19
|
||||
|
||||
// C4-C5 <- masked (AH + AL), T0 <- combined carry
|
||||
and x21, x3, x20
|
||||
and x22, x4, x20
|
||||
mul x19, x3, x23
|
||||
mul x20, x3, x26
|
||||
and x25, x25, x24
|
||||
|
||||
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
|
||||
adds x8, x21, x8
|
||||
umulh x21, x3, x26
|
||||
adcs x9, x22, x9
|
||||
umulh x22, x3, x23
|
||||
adc x25, x25, xzr
|
||||
|
||||
// C2-C5 <- (AH+AL) x (BH+BL), low part
|
||||
mul x3, x4, x23
|
||||
umulh x23, x4, x23
|
||||
adds x20, x20, x22
|
||||
adc x21, x21, xzr
|
||||
|
||||
mul x24, x4, x26
|
||||
umulh x26, x4, x26
|
||||
adds x20, x20, x3
|
||||
adcs x21, x21, x23
|
||||
adc x22, xzr, xzr
|
||||
|
||||
adds x21, x21, x24
|
||||
adc x22, x22, x26
|
||||
|
||||
ldp x3, x4, [x2,#0]
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
|
||||
adds x21, x8, x21
|
||||
umulh x24, x3, x10
|
||||
umulh x26, x3, x11
|
||||
adcs x22, x9, x22
|
||||
mul x8, x3, x10
|
||||
mul x9, x3, x11
|
||||
adc x25, x25, xzr
|
||||
|
||||
// C0-C1, T1, C7 <- AL x BL
|
||||
mul x3, x4, x10
|
||||
umulh x10, x4, x10
|
||||
adds x9, x9, x24
|
||||
adc x26, x26, xzr
|
||||
|
||||
mul x23, x4, x11
|
||||
umulh x11, x4, x11
|
||||
adds x9, x9, x3
|
||||
adcs x26, x26, x10
|
||||
adc x24, xzr, xzr
|
||||
|
||||
adds x26, x26, x23
|
||||
adc x24, x24, x11
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
|
||||
mul x3, x5, x12
|
||||
umulh x10, x5, x12
|
||||
subs x19, x19, x8
|
||||
sbcs x20, x20, x9
|
||||
sbcs x21, x21, x26
|
||||
mul x4, x5, x13
|
||||
umulh x23, x5, x13
|
||||
sbcs x22, x22, x24
|
||||
sbc x25, x25, xzr
|
||||
|
||||
// A0, A1, C6, B0 <- AH x BH
|
||||
mul x5, x6, x12
|
||||
umulh x12, x6, x12
|
||||
adds x4, x4, x10
|
||||
adc x23, x23, xzr
|
||||
|
||||
mul x11, x6, x13
|
||||
umulh x13, x6, x13
|
||||
adds x4, x4, x5
|
||||
adcs x23, x23, x12
|
||||
adc x10, xzr, xzr
|
||||
|
||||
adds x23, x23, x11
|
||||
adc x10, x10, x13
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x19, x19, x3
|
||||
sbcs x20, x20, x4
|
||||
sbcs x21, x21, x23
|
||||
sbcs x22, x22, x10
|
||||
sbc x25, x25, xzr
|
||||
|
||||
adds x19, x19, x26
|
||||
adcs x20, x20, x24
|
||||
adcs x21, x21, x3
|
||||
adcs x22, x22, x4
|
||||
adcs x23, x25, x23
|
||||
adc x24, x10, xzr
|
||||
|
||||
|
||||
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
|
||||
adds x14, x14, x21
|
||||
adcs x15, x15, x22
|
||||
adcs x16, x16, x23
|
||||
adcs x17, x17, x24
|
||||
adc x7, x7, xzr
|
||||
|
||||
// Load AL
|
||||
ldp x3, x4, [x0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
// Load BL
|
||||
ldp x10, x11, [x1,#0]
|
||||
ldp x12, x13, [x1,#16]
|
||||
|
||||
// Temporarily store x8 in x2
|
||||
stp x8, x9, [x2,#0]
|
||||
// x21-x28 <- AL x BL
|
||||
// A0-A1 <- AH + AL, T0 <- mask
|
||||
adds x3, x3, x5
|
||||
adcs x4, x4, x6
|
||||
adc x8, xzr, xzr
|
||||
|
||||
// C6, T1 <- BH + BL, C7 <- mask
|
||||
adds x27, x10, x12
|
||||
adcs x9, x11, x13
|
||||
adc x28, xzr, xzr
|
||||
|
||||
// C0-C1 <- masked (BH + BL)
|
||||
sub x23, xzr, x8
|
||||
sub x24, xzr, x28
|
||||
and x21, x27, x23
|
||||
and x22, x9, x23
|
||||
|
||||
// C4-C5 <- masked (AH + AL), T0 <- combined carry
|
||||
and x25, x3, x24
|
||||
and x26, x4, x24
|
||||
mul x23, x3, x27
|
||||
mul x24, x3, x9
|
||||
and x8, x8, x28
|
||||
|
||||
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
|
||||
adds x21, x25, x21
|
||||
umulh x25, x3, x9
|
||||
adcs x22, x26, x22
|
||||
umulh x26, x3, x27
|
||||
adc x8, x8, xzr
|
||||
|
||||
// C2-C5 <- (AH+AL) x (BH+BL), low part
|
||||
mul x3, x4, x27
|
||||
umulh x27, x4, x27
|
||||
adds x24, x24, x26
|
||||
adc x25, x25, xzr
|
||||
|
||||
mul x28, x4, x9
|
||||
umulh x9, x4, x9
|
||||
adds x24, x24, x3
|
||||
adcs x25, x25, x27
|
||||
adc x26, xzr, xzr
|
||||
|
||||
adds x25, x25, x28
|
||||
adc x26, x26, x9
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
|
||||
adds x25, x21, x25
|
||||
umulh x28, x3, x10
|
||||
umulh x9, x3, x11
|
||||
adcs x26, x22, x26
|
||||
mul x21, x3, x10
|
||||
mul x22, x3, x11
|
||||
adc x8, x8, xzr
|
||||
|
||||
// C0-C1, T1, C7 <- AL x BL
|
||||
mul x3, x4, x10
|
||||
umulh x10, x4, x10
|
||||
adds x22, x22, x28
|
||||
adc x9, x9, xzr
|
||||
|
||||
mul x27, x4, x11
|
||||
umulh x11, x4, x11
|
||||
adds x22, x22, x3
|
||||
adcs x9, x9, x10
|
||||
adc x28, xzr, xzr
|
||||
|
||||
adds x9, x9, x27
|
||||
adc x28, x28, x11
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
|
||||
mul x3, x5, x12
|
||||
umulh x10, x5, x12
|
||||
subs x23, x23, x21
|
||||
sbcs x24, x24, x22
|
||||
sbcs x25, x25, x9
|
||||
mul x4, x5, x13
|
||||
umulh x27, x5, x13
|
||||
sbcs x26, x26, x28
|
||||
sbc x8, x8, xzr
|
||||
|
||||
// A0, A1, C6, B0 <- AH x BH
|
||||
mul x5, x6, x12
|
||||
umulh x12, x6, x12
|
||||
adds x4, x4, x10
|
||||
adc x27, x27, xzr
|
||||
|
||||
mul x11, x6, x13
|
||||
umulh x13, x6, x13
|
||||
adds x4, x4, x5
|
||||
adcs x27, x27, x12
|
||||
adc x10, xzr, xzr
|
||||
|
||||
adds x27, x27, x11
|
||||
adc x10, x10, x13
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x23, x23, x3
|
||||
sbcs x24, x24, x4
|
||||
sbcs x25, x25, x27
|
||||
sbcs x26, x26, x10
|
||||
sbc x8, x8, xzr
|
||||
|
||||
adds x23, x23, x9
|
||||
adcs x24, x24, x28
|
||||
adcs x25, x25, x3
|
||||
adcs x26, x26, x4
|
||||
adcs x27, x8, x27
|
||||
adc x28, x10, xzr
|
||||
|
||||
// Restore x8
|
||||
ldp x8, x9, [x2,#0]
|
||||
|
||||
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
|
||||
subs x8, x8, x21
|
||||
sbcs x9, x9, x22
|
||||
sbcs x19, x19, x23
|
||||
sbcs x20, x20, x24
|
||||
sbcs x14, x14, x25
|
||||
sbcs x15, x15, x26
|
||||
sbcs x16, x16, x27
|
||||
sbcs x17, x17, x28
|
||||
sbc x7, x7, xzr
|
||||
|
||||
// Store ALxBL, low
|
||||
stp x21, x22, [x2]
|
||||
stp x23, x24, [x2,#16]
|
||||
|
||||
// Load AH
|
||||
ldp x3, x4, [x0,#32]
|
||||
ldr x5, [x0,#48]
|
||||
// Load BH
|
||||
ldp x10, x11, [x1,#32]
|
||||
ldr x12, [x1,#48]
|
||||
|
||||
adds x8, x8, x25
|
||||
adcs x9, x9, x26
|
||||
adcs x19, x19, x27
|
||||
adcs x20, x20, x28
|
||||
adc x1, xzr, xzr
|
||||
|
||||
add x0, x0, #32
|
||||
// Temporarily store x8,x9 in x2
|
||||
stp x8,x9, [x2,#32]
|
||||
// x21-x28 <- AH x BH
|
||||
|
||||
// A0 * B0
|
||||
mul x21, x3, x10 // C0
|
||||
umulh x24, x3, x10
|
||||
|
||||
// A0 * B1
|
||||
mul x22, x3, x11
|
||||
umulh x23, x3, x11
|
||||
|
||||
// A1 * B0
|
||||
mul x8, x4, x10
|
||||
umulh x9, x4, x10
|
||||
adds x22, x22, x24
|
||||
adc x23, x23, xzr
|
||||
|
||||
// A0 * B2
|
||||
mul x27, x3, x12
|
||||
umulh x28, x3, x12
|
||||
adds x22, x22, x8 // C1
|
||||
adcs x23, x23, x9
|
||||
adc x24, xzr, xzr
|
||||
|
||||
// A2 * B0
|
||||
mul x8, x5, x10
|
||||
umulh x25, x5, x10
|
||||
adds x23, x23, x27
|
||||
adcs x24, x24, x25
|
||||
adc x25, xzr, xzr
|
||||
|
||||
// A1 * B1
|
||||
mul x27, x4, x11
|
||||
umulh x9, x4, x11
|
||||
adds x23, x23, x8
|
||||
adcs x24, x24, x28
|
||||
adc x25, x25, xzr
|
||||
|
||||
// A1 * B2
|
||||
mul x8, x4, x12
|
||||
umulh x28, x4, x12
|
||||
adds x23, x23, x27 // C2
|
||||
adcs x24, x24, x9
|
||||
adc x25, x25, xzr
|
||||
|
||||
// A2 * B1
|
||||
mul x27, x5, x11
|
||||
umulh x9, x5, x11
|
||||
adds x24, x24, x8
|
||||
adcs x25, x25, x28
|
||||
adc x26, xzr, xzr
|
||||
|
||||
// A2 * B2
|
||||
mul x8, x5, x12
|
||||
umulh x28, x5, x12
|
||||
adds x24, x24, x27 // C3
|
||||
adcs x25, x25, x9
|
||||
adc x26, x26, xzr
|
||||
|
||||
adds x25, x25, x8 // C4
|
||||
adc x26, x26, x28 // C5
|
||||
|
||||
// Restore x8,x9
|
||||
ldp x8,x9, [x2,#32]
|
||||
|
||||
neg x1, x1
|
||||
|
||||
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x8, x8, x21
|
||||
sbcs x9, x9, x22
|
||||
sbcs x19, x19, x23
|
||||
sbcs x20, x20, x24
|
||||
sbcs x14, x14, x25
|
||||
sbcs x15, x15, x26
|
||||
sbcs x16, x16, xzr
|
||||
sbcs x17, x17, xzr
|
||||
sbc x7, x7, xzr
|
||||
|
||||
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
|
||||
stp x8, x9, [x2,#32]
|
||||
stp x19, x20, [x2,#48]
|
||||
|
||||
adds x1, x1, #1
|
||||
adcs x14, x14, x21
|
||||
adcs x15, x15, x22
|
||||
adcs x16, x16, x23
|
||||
adcs x17, x17, x24
|
||||
adcs x25, x7, x25
|
||||
adc x26, x26, xzr
|
||||
|
||||
stp x14, x15, [x2,#64]
|
||||
stp x16, x17, [x2,#80]
|
||||
stp x25, x26, [x2,#96]
|
||||
|
||||
ldp x19, x20, [x29,#16]
|
||||
ldp x21, x22, [x29,#32]
|
||||
ldp x23, x24, [x29,#48]
|
||||
ldp x25, x26, [x29,#64]
|
||||
ldp x27, x28, [x29,#80]
|
||||
ldp x29, x30, [sp],#96
|
||||
ret
|
||||
.globl _sike_fprdc
|
||||
.private_extern _sike_fprdc
|
||||
.align 4
|
||||
_sike_fprdc:
|
||||
stp x29, x30, [sp, #-96]!
|
||||
add x29, sp, xzr
|
||||
stp x19, x20, [sp,#16]
|
||||
stp x21, x22, [sp,#32]
|
||||
stp x23, x24, [sp,#48]
|
||||
stp x25, x26, [sp,#64]
|
||||
stp x27, x28, [sp,#80]
|
||||
|
||||
ldp x2, x3, [x0,#0] // a[0-1]
|
||||
|
||||
// Load the prime constant
|
||||
adrp x26, Lp434p1@PAGE
|
||||
add x26, x26, Lp434p1@PAGEOFF
|
||||
ldp x23, x24, [x26, #0x0]
|
||||
ldp x25, x26, [x26,#0x10]
|
||||
|
||||
// a[0-1] * p434+1
|
||||
mul x4, x2, x23 // C0
|
||||
umulh x7, x2, x23
|
||||
|
||||
mul x5, x2, x24
|
||||
umulh x6, x2, x24
|
||||
|
||||
mul x10, x3, x23
|
||||
umulh x11, x3, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x2, x25
|
||||
umulh x28, x2, x25
|
||||
adds x5, x5, x10 // C1
|
||||
adcs x6, x6, x11
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x10, x3, x24
|
||||
umulh x11, x3, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x2, x26
|
||||
umulh x28, x2, x26
|
||||
adds x6, x6, x10 // C2
|
||||
adcs x7, x7, x11
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x10, x3, x25
|
||||
umulh x11, x3, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x3, x26
|
||||
umulh x28, x3, x26
|
||||
adds x7, x7, x10 // C3
|
||||
adcs x8, x8, x11
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
|
||||
ldp x10, x11, [x0, #0x18]
|
||||
ldp x12, x13, [x0, #0x28]
|
||||
ldp x14, x15, [x0, #0x38]
|
||||
ldp x16, x17, [x0, #0x48]
|
||||
ldp x19, x20, [x0, #0x58]
|
||||
ldr x21, [x0, #0x68]
|
||||
|
||||
adds x10, x10, x4
|
||||
adcs x11, x11, x5
|
||||
adcs x12, x12, x6
|
||||
adcs x13, x13, x7
|
||||
adcs x14, x14, x8
|
||||
adcs x15, x15, x9
|
||||
adcs x22, x16, xzr
|
||||
adcs x17, x17, xzr
|
||||
adcs x19, x19, xzr
|
||||
adcs x20, x20, xzr
|
||||
adc x21, x21, xzr
|
||||
|
||||
ldr x2, [x0,#0x10] // a[2]
|
||||
// a[2-3] * p434+1
|
||||
mul x4, x2, x23 // C0
|
||||
umulh x7, x2, x23
|
||||
|
||||
mul x5, x2, x24
|
||||
umulh x6, x2, x24
|
||||
|
||||
mul x0, x10, x23
|
||||
umulh x3, x10, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x2, x25
|
||||
umulh x28, x2, x25
|
||||
adds x5, x5, x0 // C1
|
||||
adcs x6, x6, x3
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x0, x10, x24
|
||||
umulh x3, x10, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x2, x26
|
||||
umulh x28, x2, x26
|
||||
adds x6, x6, x0 // C2
|
||||
adcs x7, x7, x3
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x0, x10, x25
|
||||
umulh x3, x10, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x10, x26
|
||||
umulh x28, x10, x26
|
||||
adds x7, x7, x0 // C3
|
||||
adcs x8, x8, x3
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
|
||||
adds x12, x12, x4
|
||||
adcs x13, x13, x5
|
||||
adcs x14, x14, x6
|
||||
adcs x15, x15, x7
|
||||
adcs x16, x22, x8
|
||||
adcs x17, x17, x9
|
||||
adcs x22, x19, xzr
|
||||
adcs x20, x20, xzr
|
||||
adc x21, x21, xzr
|
||||
|
||||
mul x4, x11, x23 // C0
|
||||
umulh x7, x11, x23
|
||||
|
||||
mul x5, x11, x24
|
||||
umulh x6, x11, x24
|
||||
|
||||
mul x10, x12, x23
|
||||
umulh x3, x12, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x11, x25
|
||||
umulh x28, x11, x25
|
||||
adds x5, x5, x10 // C1
|
||||
adcs x6, x6, x3
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x10, x12, x24
|
||||
umulh x3, x12, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x11, x26
|
||||
umulh x28, x11, x26
|
||||
adds x6, x6, x10 // C2
|
||||
adcs x7, x7, x3
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x10, x12, x25
|
||||
umulh x3, x12, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x12, x26
|
||||
umulh x28, x12, x26
|
||||
adds x7, x7, x10 // C3
|
||||
adcs x8, x8, x3
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
adds x14, x14, x4
|
||||
adcs x15, x15, x5
|
||||
adcs x16, x16, x6
|
||||
adcs x17, x17, x7
|
||||
adcs x19, x22, x8
|
||||
adcs x20, x20, x9
|
||||
adc x22, x21, xzr
|
||||
|
||||
stp x14, x15, [x1, #0x0] // C0, C1
|
||||
|
||||
mul x4, x13, x23 // C0
|
||||
umulh x10, x13, x23
|
||||
|
||||
mul x5, x13, x24
|
||||
umulh x27, x13, x24
|
||||
adds x5, x5, x10 // C1
|
||||
adc x10, xzr, xzr
|
||||
|
||||
mul x6, x13, x25
|
||||
umulh x28, x13, x25
|
||||
adds x27, x10, x27
|
||||
adcs x6, x6, x27 // C2
|
||||
adc x10, xzr, xzr
|
||||
|
||||
mul x7, x13, x26
|
||||
umulh x8, x13, x26
|
||||
adds x28, x10, x28
|
||||
adcs x7, x7, x28 // C3
|
||||
adc x8, x8, xzr // C4
|
||||
|
||||
adds x16, x16, x4
|
||||
adcs x17, x17, x5
|
||||
adcs x19, x19, x6
|
||||
adcs x20, x20, x7
|
||||
adc x21, x22, x8
|
||||
|
||||
str x16, [x1, #0x10]
|
||||
stp x17, x19, [x1, #0x18]
|
||||
stp x20, x21, [x1, #0x28]
|
||||
|
||||
ldp x19, x20, [x29,#16]
|
||||
ldp x21, x22, [x29,#32]
|
||||
ldp x23, x24, [x29,#48]
|
||||
ldp x25, x26, [x29,#64]
|
||||
ldp x27, x28, [x29,#80]
|
||||
ldp x29, x30, [sp],#96
|
||||
ret
|
||||
.globl _sike_fpadd
|
||||
.private_extern _sike_fpadd
|
||||
.align 4
|
||||
_sike_fpadd:
|
||||
stp x29,x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
// Add a + b
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adc x9, x9, x17
|
||||
|
||||
// Subtract 2xp434
|
||||
adrp x17, Lp434x2@PAGE
|
||||
add x17, x17, Lp434x2@PAGEOFF
|
||||
ldp x11, x12, [x17, #0]
|
||||
ldp x13, x14, [x17, #16]
|
||||
ldp x15, x16, [x17, #32]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x12
|
||||
sbcs x6, x6, x13
|
||||
sbcs x7, x7, x14
|
||||
sbcs x8, x8, x15
|
||||
sbcs x9, x9, x16
|
||||
sbc x0, xzr, xzr // x0 can be reused now
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adc x9, x9, x16
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl _sike_fpsub
|
||||
.private_extern _sike_fpsub
|
||||
.align 4
|
||||
_sike_fpsub:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
// Subtract a - b
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
sbcs x9, x9, x17
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
adrp x17, Lp434x2@PAGE
|
||||
add x17, x17, Lp434x2@PAGEOFF
|
||||
|
||||
// First half
|
||||
ldp x11, x12, [x17, #0]
|
||||
ldp x13, x14, [x17, #16]
|
||||
ldp x15, x16, [x17, #32]
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adc x9, x9, x16
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl _sike_mpadd_asm
|
||||
.private_extern _sike_mpadd_asm
|
||||
.align 4
|
||||
_sike_mpadd_asm:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adc x9, x9, x17
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl _sike_mpsubx2_asm
|
||||
.private_extern _sike_mpsubx2_asm
|
||||
.align 4
|
||||
_sike_mpsubx2_asm:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x11, x12, [x1,#32]
|
||||
ldp x13, x14, [x1,#48]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
sbcs x9, x9, x13
|
||||
sbcs x10, x10, x14
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
|
||||
ldp x3, x4, [x0,#64]
|
||||
ldp x5, x6, [x0,#80]
|
||||
ldp x11, x12, [x1,#64]
|
||||
ldp x13, x14, [x1,#80]
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#96]
|
||||
ldp x11, x12, [x1,#96]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
stp x3, x4, [x2,#64]
|
||||
stp x5, x6, [x2,#80]
|
||||
stp x7, x8, [x2,#96]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl _sike_mpdblsubx2_asm
|
||||
.private_extern _sike_mpdblsubx2_asm
|
||||
.align 4
|
||||
_sike_mpdblsubx2_asm:
|
||||
stp x29, x30, [sp, #-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x2, #0]
|
||||
ldp x5, x6, [x2,#16]
|
||||
ldp x7, x8, [x2,#32]
|
||||
|
||||
ldp x11, x12, [x0, #0]
|
||||
ldp x13, x14, [x0,#16]
|
||||
ldp x15, x16, [x0,#32]
|
||||
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
|
||||
// x9 stores carry
|
||||
adc x9, xzr, xzr
|
||||
|
||||
ldp x11, x12, [x1, #0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, x9, xzr
|
||||
|
||||
stp x3, x4, [x2, #0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
|
||||
ldp x3, x4, [x2,#48]
|
||||
ldp x5, x6, [x2,#64]
|
||||
ldp x7, x8, [x2,#80]
|
||||
|
||||
ldp x11, x12, [x0,#48]
|
||||
ldp x13, x14, [x0,#64]
|
||||
ldp x15, x16, [x0,#80]
|
||||
|
||||
// x9 = 2 - x9
|
||||
neg x9, x9
|
||||
add x9, x9, #2
|
||||
|
||||
subs x3, x3, x9
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, xzr, xzr
|
||||
|
||||
ldp x11, x12, [x1,#48]
|
||||
ldp x13, x14, [x1,#64]
|
||||
ldp x15, x16, [x1,#80]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, x9, xzr
|
||||
|
||||
stp x3, x4, [x2,#48]
|
||||
stp x5, x6, [x2,#64]
|
||||
stp x7, x8, [x2,#80]
|
||||
|
||||
ldp x3, x4, [x2,#96]
|
||||
ldp x11, x12, [x0,#96]
|
||||
ldp x13, x14, [x1,#96]
|
||||
|
||||
// x9 = 2 - x9
|
||||
neg x9, x9
|
||||
add x9, x9, #2
|
||||
|
||||
subs x3, x3, x9
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
subs x3, x3, x13
|
||||
sbc x4, x4, x14
|
||||
stp x3, x4, [x2,#96]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
#endif // !OPENSSL_NO_ASM
|
File diff suppressed because it is too large
Load Diff
@ -30,348 +30,6 @@
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
|
||||
.align 5
|
||||
rem_4bit:
|
||||
.short 0x0000,0x1C20,0x3840,0x2460
|
||||
.short 0x7080,0x6CA0,0x48C0,0x54E0
|
||||
.short 0xE100,0xFD20,0xD940,0xC560
|
||||
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
|
||||
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func rem_4bit_get
|
||||
#endif
|
||||
rem_4bit_get:
|
||||
#if defined(__thumb2__)
|
||||
adr r2,rem_4bit
|
||||
#else
|
||||
sub r2,pc,#8+32 @ &rem_4bit
|
||||
#endif
|
||||
b Lrem_4bit_got
|
||||
nop
|
||||
nop
|
||||
|
||||
|
||||
.globl _gcm_ghash_4bit
|
||||
.private_extern _gcm_ghash_4bit
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_ghash_4bit
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_ghash_4bit:
|
||||
#if defined(__thumb2__)
|
||||
adr r12,rem_4bit
|
||||
#else
|
||||
sub r12,pc,#8+48 @ &rem_4bit
|
||||
#endif
|
||||
add r3,r2,r3 @ r3 to point at the end
|
||||
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
|
||||
|
||||
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
|
||||
|
||||
ldrb r12,[r2,#15]
|
||||
ldrb r14,[r0,#15]
|
||||
Louter:
|
||||
eor r12,r12,r14
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
mov r3,#14
|
||||
|
||||
add r7,r1,r12,lsl#4
|
||||
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
|
||||
add r11,r1,r14
|
||||
ldrb r12,[r2,#14]
|
||||
|
||||
and r14,r4,#0xf @ rem
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
add r14,r14,r14
|
||||
eor r4,r8,r4,lsr#4
|
||||
ldrh r8,[sp,r14] @ rem_4bit[rem]
|
||||
eor r4,r4,r5,lsl#28
|
||||
ldrb r14,[r0,#14]
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
eor r12,r12,r14
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
eor r7,r7,r8,lsl#16
|
||||
|
||||
Linner:
|
||||
add r11,r1,r12,lsl#4
|
||||
and r12,r4,#0xf @ rem
|
||||
subs r3,r3,#1
|
||||
add r12,r12,r12
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
ldrh r8,[sp,r12] @ rem_4bit[rem]
|
||||
eor r6,r10,r6,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r12,[r2,r3]
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
add r14,r14,r14
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
eor r4,r8,r4,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r8,[r0,r3]
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
ldrh r9,[sp,r14]
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
eorpl r12,r12,r8
|
||||
eor r7,r11,r7,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl r14,r12,#0xf0
|
||||
andpl r12,r12,#0x0f
|
||||
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
|
||||
bpl Linner
|
||||
|
||||
ldr r3,[sp,#32] @ re-load r3/end
|
||||
add r2,r2,#16
|
||||
mov r14,r4
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r4,r4
|
||||
str r4,[r0,#12]
|
||||
#elif defined(__ARMEB__)
|
||||
str r4,[r0,#12]
|
||||
#else
|
||||
mov r9,r4,lsr#8
|
||||
strb r4,[r0,#12+3]
|
||||
mov r10,r4,lsr#16
|
||||
strb r9,[r0,#12+2]
|
||||
mov r11,r4,lsr#24
|
||||
strb r10,[r0,#12+1]
|
||||
strb r11,[r0,#12]
|
||||
#endif
|
||||
cmp r2,r3
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r5,r5
|
||||
str r5,[r0,#8]
|
||||
#elif defined(__ARMEB__)
|
||||
str r5,[r0,#8]
|
||||
#else
|
||||
mov r9,r5,lsr#8
|
||||
strb r5,[r0,#8+3]
|
||||
mov r10,r5,lsr#16
|
||||
strb r9,[r0,#8+2]
|
||||
mov r11,r5,lsr#24
|
||||
strb r10,[r0,#8+1]
|
||||
strb r11,[r0,#8]
|
||||
#endif
|
||||
|
||||
#ifdef __thumb2__
|
||||
it ne
|
||||
#endif
|
||||
ldrneb r12,[r2,#15]
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r6,r6
|
||||
str r6,[r0,#4]
|
||||
#elif defined(__ARMEB__)
|
||||
str r6,[r0,#4]
|
||||
#else
|
||||
mov r9,r6,lsr#8
|
||||
strb r6,[r0,#4+3]
|
||||
mov r10,r6,lsr#16
|
||||
strb r9,[r0,#4+2]
|
||||
mov r11,r6,lsr#24
|
||||
strb r10,[r0,#4+1]
|
||||
strb r11,[r0,#4]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r7,r7
|
||||
str r7,[r0,#0]
|
||||
#elif defined(__ARMEB__)
|
||||
str r7,[r0,#0]
|
||||
#else
|
||||
mov r9,r7,lsr#8
|
||||
strb r7,[r0,#0+3]
|
||||
mov r10,r7,lsr#16
|
||||
strb r9,[r0,#0+2]
|
||||
mov r11,r7,lsr#24
|
||||
strb r10,[r0,#0+1]
|
||||
strb r11,[r0,#0]
|
||||
#endif
|
||||
|
||||
bne Louter
|
||||
|
||||
add sp,sp,#36
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
|
||||
|
||||
.globl _gcm_gmult_4bit
|
||||
.private_extern _gcm_gmult_4bit
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_gmult_4bit
|
||||
#endif
|
||||
_gcm_gmult_4bit:
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
ldrb r12,[r0,#15]
|
||||
b rem_4bit_get
|
||||
Lrem_4bit_got:
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
mov r3,#14
|
||||
|
||||
add r7,r1,r12,lsl#4
|
||||
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
|
||||
ldrb r12,[r0,#14]
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
add r14,r14,r14
|
||||
eor r4,r8,r4,lsr#4
|
||||
ldrh r8,[r2,r14] @ rem_4bit[rem]
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
and r14,r12,#0xf0
|
||||
eor r7,r7,r8,lsl#16
|
||||
and r12,r12,#0x0f
|
||||
|
||||
Loop:
|
||||
add r11,r1,r12,lsl#4
|
||||
and r12,r4,#0xf @ rem
|
||||
subs r3,r3,#1
|
||||
add r12,r12,r12
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
ldrh r8,[r2,r12] @ rem_4bit[rem]
|
||||
eor r6,r10,r6,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r12,[r0,r3]
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
add r14,r14,r14
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
ldrh r8,[r2,r14] @ rem_4bit[rem]
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl r14,r12,#0xf0
|
||||
andpl r12,r12,#0x0f
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
bpl Loop
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r4,r4
|
||||
str r4,[r0,#12]
|
||||
#elif defined(__ARMEB__)
|
||||
str r4,[r0,#12]
|
||||
#else
|
||||
mov r9,r4,lsr#8
|
||||
strb r4,[r0,#12+3]
|
||||
mov r10,r4,lsr#16
|
||||
strb r9,[r0,#12+2]
|
||||
mov r11,r4,lsr#24
|
||||
strb r10,[r0,#12+1]
|
||||
strb r11,[r0,#12]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r5,r5
|
||||
str r5,[r0,#8]
|
||||
#elif defined(__ARMEB__)
|
||||
str r5,[r0,#8]
|
||||
#else
|
||||
mov r9,r5,lsr#8
|
||||
strb r5,[r0,#8+3]
|
||||
mov r10,r5,lsr#16
|
||||
strb r9,[r0,#8+2]
|
||||
mov r11,r5,lsr#24
|
||||
strb r10,[r0,#8+1]
|
||||
strb r11,[r0,#8]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r6,r6
|
||||
str r6,[r0,#4]
|
||||
#elif defined(__ARMEB__)
|
||||
str r6,[r0,#4]
|
||||
#else
|
||||
mov r9,r6,lsr#8
|
||||
strb r6,[r0,#4+3]
|
||||
mov r10,r6,lsr#16
|
||||
strb r9,[r0,#4+2]
|
||||
mov r11,r6,lsr#24
|
||||
strb r10,[r0,#4+1]
|
||||
strb r11,[r0,#4]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r7,r7
|
||||
str r7,[r0,#0]
|
||||
#elif defined(__ARMEB__)
|
||||
str r7,[r0,#0]
|
||||
#else
|
||||
mov r9,r7,lsr#8
|
||||
strb r7,[r0,#0+3]
|
||||
mov r10,r7,lsr#16
|
||||
strb r9,[r0,#0+2]
|
||||
mov r11,r7,lsr#24
|
||||
strb r10,[r0,#0+1]
|
||||
strb r11,[r0,#0]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
|
||||
|
||||
|
1265
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
1265
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -30,7 +30,6 @@
|
||||
.private_extern _abi_test_trampoline
|
||||
.align 4
|
||||
_abi_test_trampoline:
|
||||
Labi_test_trampoline_begin:
|
||||
@ Save parameters and all callee-saved registers. For convenience, we
|
||||
@ save r9 on iOS even though it's volatile.
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
|
@ -34,6 +34,7 @@
|
||||
.type ChaCha20_ctr32,%function
|
||||
.align 5
|
||||
ChaCha20_ctr32:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
cbz x2,.Labort
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x5,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
@ -47,6 +48,7 @@ ChaCha20_ctr32:
|
||||
b.ne ChaCha20_neon
|
||||
|
||||
.Lshort:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -259,6 +261,7 @@ ChaCha20_ctr32:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
.Labort:
|
||||
ret
|
||||
|
||||
@ -315,12 +318,14 @@ ChaCha20_ctr32:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size ChaCha20_ctr32,.-ChaCha20_ctr32
|
||||
|
||||
.type ChaCha20_neon,%function
|
||||
.align 5
|
||||
ChaCha20_neon:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -701,6 +706,7 @@ ChaCha20_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.Ltail_neon:
|
||||
@ -810,11 +816,13 @@ ChaCha20_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size ChaCha20_neon,.-ChaCha20_neon
|
||||
.type ChaCha20_512_neon,%function
|
||||
.align 5
|
||||
ChaCha20_512_neon:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1978,7 +1986,9 @@ ChaCha20_512_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size ChaCha20_512_neon,.-ChaCha20_512_neon
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -33,6 +33,8 @@
|
||||
.align 5
|
||||
aes_hw_set_encrypt_key:
|
||||
.Lenc_key:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
@ -201,6 +203,7 @@ aes_hw_set_encrypt_key:
|
||||
.type aes_hw_set_decrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl .Lenc_key
|
||||
@ -234,6 +237,7 @@ aes_hw_set_decrypt_key:
|
||||
eor x0,x0,x0 // return value
|
||||
.Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
|
||||
.globl aes_hw_encrypt
|
||||
@ -241,6 +245,7 @@ aes_hw_set_decrypt_key:
|
||||
.type aes_hw_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
@ -271,6 +276,7 @@ aes_hw_encrypt:
|
||||
.type aes_hw_decrypt,%function
|
||||
.align 5
|
||||
aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
@ -301,6 +307,8 @@ aes_hw_decrypt:
|
||||
.type aes_hw_cbc_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_cbc_encrypt:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
@ -592,6 +600,8 @@ aes_hw_cbc_encrypt:
|
||||
.type aes_hw_ctr32_encrypt_blocks,%function
|
||||
.align 5
|
||||
aes_hw_ctr32_encrypt_blocks:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
@ -772,3 +782,4 @@ aes_hw_ctr32_encrypt_blocks:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -13,6 +13,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl bn_mul_mont
|
||||
@ -20,6 +22,7 @@
|
||||
.type bn_mul_mont,%function
|
||||
.align 5
|
||||
bn_mul_mont:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
tst x5,#7
|
||||
b.eq __bn_sqr8x_mont
|
||||
tst x5,#3
|
||||
@ -217,11 +220,14 @@ bn_mul_mont:
|
||||
mov x0,#1
|
||||
ldp x23,x24,[x29,#48]
|
||||
ldr x29,[sp],#64
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size bn_mul_mont,.-bn_mul_mont
|
||||
.type __bn_sqr8x_mont,%function
|
||||
.align 5
|
||||
__bn_sqr8x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
|
||||
// only from bn_mul_mont which has already signed the return address.
|
||||
cmp x1,x2
|
||||
b.ne __bn_mul4x_mont
|
||||
.Lsqr8x_mont:
|
||||
@ -975,11 +981,16 @@ __bn_sqr8x_mont:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
|
||||
.type __bn_mul4x_mont,%function
|
||||
.align 5
|
||||
__bn_mul4x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
|
||||
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
|
||||
// return address.
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
stp x19,x20,[sp,#16]
|
||||
@ -1413,6 +1424,8 @@ __bn_mul4x_mont:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size __bn_mul4x_mont,.-__bn_mul4x_mont
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
@ -1420,3 +1433,4 @@ __bn_mul4x_mont:
|
||||
.align 4
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -13,6 +13,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl gcm_init_neon
|
||||
@ -20,6 +22,7 @@
|
||||
.type gcm_init_neon,%function
|
||||
.align 4
|
||||
gcm_init_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
@ -45,6 +48,7 @@ gcm_init_neon:
|
||||
.type gcm_gmult_neon,%function
|
||||
.align 4
|
||||
gcm_gmult_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
@ -64,6 +68,7 @@ gcm_gmult_neon:
|
||||
.type gcm_ghash_neon,%function
|
||||
.align 4
|
||||
gcm_ghash_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
@ -338,3 +343,4 @@ gcm_ghash_neon:
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -22,6 +22,7 @@
|
||||
.type gcm_init_v8,%function
|
||||
.align 4
|
||||
gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
@ -73,6 +74,7 @@ gcm_init_v8:
|
||||
.type gcm_gmult_v8,%function
|
||||
.align 4
|
||||
gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
@ -115,6 +117,7 @@ gcm_gmult_v8:
|
||||
.type gcm_ghash_v8,%function
|
||||
.align 4
|
||||
gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
@ -246,3 +249,4 @@ gcm_ghash_v8:
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -23,6 +23,8 @@
|
||||
.type sha1_block_data_order,%function
|
||||
.align 6
|
||||
sha1_block_data_order:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
#else
|
||||
@ -1090,6 +1092,8 @@ sha1_block_data_order:
|
||||
.type sha1_block_armv8,%function
|
||||
.align 6
|
||||
sha1_block_armv8:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
.Lv8_entry:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
@ -1232,3 +1236,4 @@ sha1_block_armv8:
|
||||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -64,6 +64,7 @@
|
||||
.type sha256_block_data_order,%function
|
||||
.align 6
|
||||
sha256_block_data_order:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#ifndef __KERNEL__
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
@ -74,6 +75,7 @@ sha256_block_data_order:
|
||||
tst w16,#ARMV8_SHA256
|
||||
b.ne .Lv8_entry
|
||||
#endif
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1034,6 +1036,7 @@ sha256_block_data_order:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size sha256_block_data_order,.-sha256_block_data_order
|
||||
|
||||
@ -1068,6 +1071,7 @@ sha256_block_data_order:
|
||||
.align 6
|
||||
sha256_block_armv8:
|
||||
.Lv8_entry:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1210,3 +1214,4 @@ sha256_block_armv8:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -64,6 +64,7 @@
|
||||
.type sha512_block_data_order,%function
|
||||
.align 6
|
||||
sha512_block_data_order:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1024,6 +1025,7 @@ sha512_block_data_order:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size sha512_block_data_order,.-sha512_block_data_order
|
||||
|
||||
@ -1082,3 +1084,4 @@ sha512_block_data_order:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -13,6 +13,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.section .rodata
|
||||
|
||||
.type _vpaes_consts,%object
|
||||
@ -215,6 +217,7 @@ _vpaes_encrypt_core:
|
||||
.type vpaes_encrypt,%function
|
||||
.align 4
|
||||
vpaes_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -224,6 +227,7 @@ vpaes_encrypt:
|
||||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_encrypt,.-vpaes_encrypt
|
||||
|
||||
@ -452,6 +456,7 @@ _vpaes_decrypt_core:
|
||||
.type vpaes_decrypt,%function
|
||||
.align 4
|
||||
vpaes_decrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -461,6 +466,7 @@ vpaes_decrypt:
|
||||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_decrypt,.-vpaes_decrypt
|
||||
|
||||
@ -630,6 +636,7 @@ _vpaes_key_preheat:
|
||||
.type _vpaes_schedule_core,%function
|
||||
.align 4
|
||||
_vpaes_schedule_core:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -799,6 +806,7 @@ _vpaes_schedule_core:
|
||||
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
|
||||
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
|
||||
ldp x29, x30, [sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size _vpaes_schedule_core,.-_vpaes_schedule_core
|
||||
|
||||
@ -1012,6 +1020,7 @@ _vpaes_schedule_mangle:
|
||||
.type vpaes_set_encrypt_key,%function
|
||||
.align 4
|
||||
vpaes_set_encrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1027,6 +1036,7 @@ vpaes_set_encrypt_key:
|
||||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
|
||||
|
||||
@ -1035,6 +1045,7 @@ vpaes_set_encrypt_key:
|
||||
.type vpaes_set_decrypt_key,%function
|
||||
.align 4
|
||||
vpaes_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1054,6 +1065,7 @@ vpaes_set_decrypt_key:
|
||||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
|
||||
.globl vpaes_cbc_encrypt
|
||||
@ -1061,6 +1073,7 @@ vpaes_set_decrypt_key:
|
||||
.type vpaes_cbc_encrypt,%function
|
||||
.align 4
|
||||
vpaes_cbc_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
cbz x2, .Lcbc_abort
|
||||
cmp w5, #0 // check direction
|
||||
b.eq vpaes_cbc_decrypt
|
||||
@ -1087,6 +1100,7 @@ vpaes_cbc_encrypt:
|
||||
st1 {v0.16b}, [x4] // write ivec
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
.Lcbc_abort:
|
||||
ret
|
||||
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
|
||||
@ -1094,6 +1108,8 @@ vpaes_cbc_encrypt:
|
||||
.type vpaes_cbc_decrypt,%function
|
||||
.align 4
|
||||
vpaes_cbc_decrypt:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
|
||||
// only from vpaes_cbc_encrypt which has already signed the return address.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1135,6 +1151,7 @@ vpaes_cbc_decrypt:
|
||||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt
|
||||
.globl vpaes_ctr32_encrypt_blocks
|
||||
@ -1142,6 +1159,7 @@ vpaes_cbc_decrypt:
|
||||
.type vpaes_ctr32_encrypt_blocks,%function
|
||||
.align 4
|
||||
vpaes_ctr32_encrypt_blocks:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1209,7 +1227,9 @@ vpaes_ctr32_encrypt_blocks:
|
||||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -13,6 +13,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
@ -27,6 +29,7 @@
|
||||
.align 4
|
||||
abi_test_trampoline:
|
||||
.Labi_test_trampoline_begin:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
// Stack layout (low to high addresses)
|
||||
// x29,x30 (16 bytes)
|
||||
// d8-d15 (64 bytes)
|
||||
@ -129,6 +132,7 @@ abi_test_trampoline:
|
||||
ldp x27, x28, [sp, #144]
|
||||
|
||||
ldp x29, x30, [sp], #176
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size abi_test_trampoline,.-abi_test_trampoline
|
||||
.type abi_test_clobber_x0, %function
|
||||
@ -136,6 +140,7 @@ abi_test_trampoline:
|
||||
.hidden abi_test_clobber_x0
|
||||
.align 4
|
||||
abi_test_clobber_x0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x0, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x0,.-abi_test_clobber_x0
|
||||
@ -144,6 +149,7 @@ abi_test_clobber_x0:
|
||||
.hidden abi_test_clobber_x1
|
||||
.align 4
|
||||
abi_test_clobber_x1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x1, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x1,.-abi_test_clobber_x1
|
||||
@ -152,6 +158,7 @@ abi_test_clobber_x1:
|
||||
.hidden abi_test_clobber_x2
|
||||
.align 4
|
||||
abi_test_clobber_x2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x2, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x2,.-abi_test_clobber_x2
|
||||
@ -160,6 +167,7 @@ abi_test_clobber_x2:
|
||||
.hidden abi_test_clobber_x3
|
||||
.align 4
|
||||
abi_test_clobber_x3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x3, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x3,.-abi_test_clobber_x3
|
||||
@ -168,6 +176,7 @@ abi_test_clobber_x3:
|
||||
.hidden abi_test_clobber_x4
|
||||
.align 4
|
||||
abi_test_clobber_x4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x4, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x4,.-abi_test_clobber_x4
|
||||
@ -176,6 +185,7 @@ abi_test_clobber_x4:
|
||||
.hidden abi_test_clobber_x5
|
||||
.align 4
|
||||
abi_test_clobber_x5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x5, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x5,.-abi_test_clobber_x5
|
||||
@ -184,6 +194,7 @@ abi_test_clobber_x5:
|
||||
.hidden abi_test_clobber_x6
|
||||
.align 4
|
||||
abi_test_clobber_x6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x6, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x6,.-abi_test_clobber_x6
|
||||
@ -192,6 +203,7 @@ abi_test_clobber_x6:
|
||||
.hidden abi_test_clobber_x7
|
||||
.align 4
|
||||
abi_test_clobber_x7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x7, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x7,.-abi_test_clobber_x7
|
||||
@ -200,6 +212,7 @@ abi_test_clobber_x7:
|
||||
.hidden abi_test_clobber_x8
|
||||
.align 4
|
||||
abi_test_clobber_x8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x8, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x8,.-abi_test_clobber_x8
|
||||
@ -208,6 +221,7 @@ abi_test_clobber_x8:
|
||||
.hidden abi_test_clobber_x9
|
||||
.align 4
|
||||
abi_test_clobber_x9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x9, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x9,.-abi_test_clobber_x9
|
||||
@ -216,6 +230,7 @@ abi_test_clobber_x9:
|
||||
.hidden abi_test_clobber_x10
|
||||
.align 4
|
||||
abi_test_clobber_x10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x10, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x10,.-abi_test_clobber_x10
|
||||
@ -224,6 +239,7 @@ abi_test_clobber_x10:
|
||||
.hidden abi_test_clobber_x11
|
||||
.align 4
|
||||
abi_test_clobber_x11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x11, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x11,.-abi_test_clobber_x11
|
||||
@ -232,6 +248,7 @@ abi_test_clobber_x11:
|
||||
.hidden abi_test_clobber_x12
|
||||
.align 4
|
||||
abi_test_clobber_x12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x12, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x12,.-abi_test_clobber_x12
|
||||
@ -240,6 +257,7 @@ abi_test_clobber_x12:
|
||||
.hidden abi_test_clobber_x13
|
||||
.align 4
|
||||
abi_test_clobber_x13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x13, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x13,.-abi_test_clobber_x13
|
||||
@ -248,6 +266,7 @@ abi_test_clobber_x13:
|
||||
.hidden abi_test_clobber_x14
|
||||
.align 4
|
||||
abi_test_clobber_x14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x14, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x14,.-abi_test_clobber_x14
|
||||
@ -256,6 +275,7 @@ abi_test_clobber_x14:
|
||||
.hidden abi_test_clobber_x15
|
||||
.align 4
|
||||
abi_test_clobber_x15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x15, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x15,.-abi_test_clobber_x15
|
||||
@ -264,6 +284,7 @@ abi_test_clobber_x15:
|
||||
.hidden abi_test_clobber_x16
|
||||
.align 4
|
||||
abi_test_clobber_x16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x16, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x16,.-abi_test_clobber_x16
|
||||
@ -272,6 +293,7 @@ abi_test_clobber_x16:
|
||||
.hidden abi_test_clobber_x17
|
||||
.align 4
|
||||
abi_test_clobber_x17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x17, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x17,.-abi_test_clobber_x17
|
||||
@ -280,6 +302,7 @@ abi_test_clobber_x17:
|
||||
.hidden abi_test_clobber_x19
|
||||
.align 4
|
||||
abi_test_clobber_x19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x19, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x19,.-abi_test_clobber_x19
|
||||
@ -288,6 +311,7 @@ abi_test_clobber_x19:
|
||||
.hidden abi_test_clobber_x20
|
||||
.align 4
|
||||
abi_test_clobber_x20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x20, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x20,.-abi_test_clobber_x20
|
||||
@ -296,6 +320,7 @@ abi_test_clobber_x20:
|
||||
.hidden abi_test_clobber_x21
|
||||
.align 4
|
||||
abi_test_clobber_x21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x21, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x21,.-abi_test_clobber_x21
|
||||
@ -304,6 +329,7 @@ abi_test_clobber_x21:
|
||||
.hidden abi_test_clobber_x22
|
||||
.align 4
|
||||
abi_test_clobber_x22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x22, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x22,.-abi_test_clobber_x22
|
||||
@ -312,6 +338,7 @@ abi_test_clobber_x22:
|
||||
.hidden abi_test_clobber_x23
|
||||
.align 4
|
||||
abi_test_clobber_x23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x23, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x23,.-abi_test_clobber_x23
|
||||
@ -320,6 +347,7 @@ abi_test_clobber_x23:
|
||||
.hidden abi_test_clobber_x24
|
||||
.align 4
|
||||
abi_test_clobber_x24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x24, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x24,.-abi_test_clobber_x24
|
||||
@ -328,6 +356,7 @@ abi_test_clobber_x24:
|
||||
.hidden abi_test_clobber_x25
|
||||
.align 4
|
||||
abi_test_clobber_x25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x25, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x25,.-abi_test_clobber_x25
|
||||
@ -336,6 +365,7 @@ abi_test_clobber_x25:
|
||||
.hidden abi_test_clobber_x26
|
||||
.align 4
|
||||
abi_test_clobber_x26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x26, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x26,.-abi_test_clobber_x26
|
||||
@ -344,6 +374,7 @@ abi_test_clobber_x26:
|
||||
.hidden abi_test_clobber_x27
|
||||
.align 4
|
||||
abi_test_clobber_x27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x27, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x27,.-abi_test_clobber_x27
|
||||
@ -352,6 +383,7 @@ abi_test_clobber_x27:
|
||||
.hidden abi_test_clobber_x28
|
||||
.align 4
|
||||
abi_test_clobber_x28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x28, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x28,.-abi_test_clobber_x28
|
||||
@ -360,6 +392,7 @@ abi_test_clobber_x28:
|
||||
.hidden abi_test_clobber_x29
|
||||
.align 4
|
||||
abi_test_clobber_x29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x29, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x29,.-abi_test_clobber_x29
|
||||
@ -368,6 +401,7 @@ abi_test_clobber_x29:
|
||||
.hidden abi_test_clobber_d0
|
||||
.align 4
|
||||
abi_test_clobber_d0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d0, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d0,.-abi_test_clobber_d0
|
||||
@ -376,6 +410,7 @@ abi_test_clobber_d0:
|
||||
.hidden abi_test_clobber_d1
|
||||
.align 4
|
||||
abi_test_clobber_d1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d1, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d1,.-abi_test_clobber_d1
|
||||
@ -384,6 +419,7 @@ abi_test_clobber_d1:
|
||||
.hidden abi_test_clobber_d2
|
||||
.align 4
|
||||
abi_test_clobber_d2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d2, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d2,.-abi_test_clobber_d2
|
||||
@ -392,6 +428,7 @@ abi_test_clobber_d2:
|
||||
.hidden abi_test_clobber_d3
|
||||
.align 4
|
||||
abi_test_clobber_d3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d3, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d3,.-abi_test_clobber_d3
|
||||
@ -400,6 +437,7 @@ abi_test_clobber_d3:
|
||||
.hidden abi_test_clobber_d4
|
||||
.align 4
|
||||
abi_test_clobber_d4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d4, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d4,.-abi_test_clobber_d4
|
||||
@ -408,6 +446,7 @@ abi_test_clobber_d4:
|
||||
.hidden abi_test_clobber_d5
|
||||
.align 4
|
||||
abi_test_clobber_d5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d5, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d5,.-abi_test_clobber_d5
|
||||
@ -416,6 +455,7 @@ abi_test_clobber_d5:
|
||||
.hidden abi_test_clobber_d6
|
||||
.align 4
|
||||
abi_test_clobber_d6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d6, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d6,.-abi_test_clobber_d6
|
||||
@ -424,6 +464,7 @@ abi_test_clobber_d6:
|
||||
.hidden abi_test_clobber_d7
|
||||
.align 4
|
||||
abi_test_clobber_d7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d7, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d7,.-abi_test_clobber_d7
|
||||
@ -432,6 +473,7 @@ abi_test_clobber_d7:
|
||||
.hidden abi_test_clobber_d8
|
||||
.align 4
|
||||
abi_test_clobber_d8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d8, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d8,.-abi_test_clobber_d8
|
||||
@ -440,6 +482,7 @@ abi_test_clobber_d8:
|
||||
.hidden abi_test_clobber_d9
|
||||
.align 4
|
||||
abi_test_clobber_d9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d9, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d9,.-abi_test_clobber_d9
|
||||
@ -448,6 +491,7 @@ abi_test_clobber_d9:
|
||||
.hidden abi_test_clobber_d10
|
||||
.align 4
|
||||
abi_test_clobber_d10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d10, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d10,.-abi_test_clobber_d10
|
||||
@ -456,6 +500,7 @@ abi_test_clobber_d10:
|
||||
.hidden abi_test_clobber_d11
|
||||
.align 4
|
||||
abi_test_clobber_d11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d11, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d11,.-abi_test_clobber_d11
|
||||
@ -464,6 +509,7 @@ abi_test_clobber_d11:
|
||||
.hidden abi_test_clobber_d12
|
||||
.align 4
|
||||
abi_test_clobber_d12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d12, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d12,.-abi_test_clobber_d12
|
||||
@ -472,6 +518,7 @@ abi_test_clobber_d12:
|
||||
.hidden abi_test_clobber_d13
|
||||
.align 4
|
||||
abi_test_clobber_d13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d13, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d13,.-abi_test_clobber_d13
|
||||
@ -480,6 +527,7 @@ abi_test_clobber_d13:
|
||||
.hidden abi_test_clobber_d14
|
||||
.align 4
|
||||
abi_test_clobber_d14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d14, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d14,.-abi_test_clobber_d14
|
||||
@ -488,6 +536,7 @@ abi_test_clobber_d14:
|
||||
.hidden abi_test_clobber_d15
|
||||
.align 4
|
||||
abi_test_clobber_d15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d15, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d15,.-abi_test_clobber_d15
|
||||
@ -496,6 +545,7 @@ abi_test_clobber_d15:
|
||||
.hidden abi_test_clobber_d16
|
||||
.align 4
|
||||
abi_test_clobber_d16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d16, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d16,.-abi_test_clobber_d16
|
||||
@ -504,6 +554,7 @@ abi_test_clobber_d16:
|
||||
.hidden abi_test_clobber_d17
|
||||
.align 4
|
||||
abi_test_clobber_d17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d17, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d17,.-abi_test_clobber_d17
|
||||
@ -512,6 +563,7 @@ abi_test_clobber_d17:
|
||||
.hidden abi_test_clobber_d18
|
||||
.align 4
|
||||
abi_test_clobber_d18:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d18, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d18,.-abi_test_clobber_d18
|
||||
@ -520,6 +572,7 @@ abi_test_clobber_d18:
|
||||
.hidden abi_test_clobber_d19
|
||||
.align 4
|
||||
abi_test_clobber_d19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d19, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d19,.-abi_test_clobber_d19
|
||||
@ -528,6 +581,7 @@ abi_test_clobber_d19:
|
||||
.hidden abi_test_clobber_d20
|
||||
.align 4
|
||||
abi_test_clobber_d20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d20, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d20,.-abi_test_clobber_d20
|
||||
@ -536,6 +590,7 @@ abi_test_clobber_d20:
|
||||
.hidden abi_test_clobber_d21
|
||||
.align 4
|
||||
abi_test_clobber_d21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d21, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d21,.-abi_test_clobber_d21
|
||||
@ -544,6 +599,7 @@ abi_test_clobber_d21:
|
||||
.hidden abi_test_clobber_d22
|
||||
.align 4
|
||||
abi_test_clobber_d22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d22, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d22,.-abi_test_clobber_d22
|
||||
@ -552,6 +608,7 @@ abi_test_clobber_d22:
|
||||
.hidden abi_test_clobber_d23
|
||||
.align 4
|
||||
abi_test_clobber_d23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d23, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d23,.-abi_test_clobber_d23
|
||||
@ -560,6 +617,7 @@ abi_test_clobber_d23:
|
||||
.hidden abi_test_clobber_d24
|
||||
.align 4
|
||||
abi_test_clobber_d24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d24, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d24,.-abi_test_clobber_d24
|
||||
@ -568,6 +626,7 @@ abi_test_clobber_d24:
|
||||
.hidden abi_test_clobber_d25
|
||||
.align 4
|
||||
abi_test_clobber_d25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d25, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d25,.-abi_test_clobber_d25
|
||||
@ -576,6 +635,7 @@ abi_test_clobber_d25:
|
||||
.hidden abi_test_clobber_d26
|
||||
.align 4
|
||||
abi_test_clobber_d26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d26, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d26,.-abi_test_clobber_d26
|
||||
@ -584,6 +644,7 @@ abi_test_clobber_d26:
|
||||
.hidden abi_test_clobber_d27
|
||||
.align 4
|
||||
abi_test_clobber_d27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d27, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d27,.-abi_test_clobber_d27
|
||||
@ -592,6 +653,7 @@ abi_test_clobber_d27:
|
||||
.hidden abi_test_clobber_d28
|
||||
.align 4
|
||||
abi_test_clobber_d28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d28, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d28,.-abi_test_clobber_d28
|
||||
@ -600,6 +662,7 @@ abi_test_clobber_d28:
|
||||
.hidden abi_test_clobber_d29
|
||||
.align 4
|
||||
abi_test_clobber_d29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d29, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d29,.-abi_test_clobber_d29
|
||||
@ -608,6 +671,7 @@ abi_test_clobber_d29:
|
||||
.hidden abi_test_clobber_d30
|
||||
.align 4
|
||||
abi_test_clobber_d30:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d30, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d30,.-abi_test_clobber_d30
|
||||
@ -616,6 +680,7 @@ abi_test_clobber_d30:
|
||||
.hidden abi_test_clobber_d31
|
||||
.align 4
|
||||
abi_test_clobber_d31:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d31, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d31,.-abi_test_clobber_d31
|
||||
@ -624,6 +689,7 @@ abi_test_clobber_d31:
|
||||
.hidden abi_test_clobber_v8_upper
|
||||
.align 4
|
||||
abi_test_clobber_v8_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v8.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
|
||||
@ -632,6 +698,7 @@ abi_test_clobber_v8_upper:
|
||||
.hidden abi_test_clobber_v9_upper
|
||||
.align 4
|
||||
abi_test_clobber_v9_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v9.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
|
||||
@ -640,6 +707,7 @@ abi_test_clobber_v9_upper:
|
||||
.hidden abi_test_clobber_v10_upper
|
||||
.align 4
|
||||
abi_test_clobber_v10_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v10.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
|
||||
@ -648,6 +716,7 @@ abi_test_clobber_v10_upper:
|
||||
.hidden abi_test_clobber_v11_upper
|
||||
.align 4
|
||||
abi_test_clobber_v11_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v11.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
|
||||
@ -656,6 +725,7 @@ abi_test_clobber_v11_upper:
|
||||
.hidden abi_test_clobber_v12_upper
|
||||
.align 4
|
||||
abi_test_clobber_v12_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v12.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
|
||||
@ -664,6 +734,7 @@ abi_test_clobber_v12_upper:
|
||||
.hidden abi_test_clobber_v13_upper
|
||||
.align 4
|
||||
abi_test_clobber_v13_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v13.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
|
||||
@ -672,6 +743,7 @@ abi_test_clobber_v13_upper:
|
||||
.hidden abi_test_clobber_v14_upper
|
||||
.align 4
|
||||
abi_test_clobber_v14_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v14.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
|
||||
@ -680,8 +752,10 @@ abi_test_clobber_v14_upper:
|
||||
.hidden abi_test_clobber_v15_upper
|
||||
.align 4
|
||||
abi_test_clobber_v15_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v15.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -1,998 +0,0 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.section .rodata
|
||||
|
||||
# p434 x 2
|
||||
.Lp434x2:
|
||||
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
|
||||
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
|
||||
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
|
||||
|
||||
# p434 + 1
|
||||
.Lp434p1:
|
||||
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
|
||||
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
|
||||
|
||||
.text
|
||||
.globl sike_mpmul
|
||||
.hidden sike_mpmul
|
||||
.align 4
|
||||
sike_mpmul:
|
||||
stp x29, x30, [sp,#-96]!
|
||||
add x29, sp, #0
|
||||
stp x19, x20, [sp,#16]
|
||||
stp x21, x22, [sp,#32]
|
||||
stp x23, x24, [sp,#48]
|
||||
stp x25, x26, [sp,#64]
|
||||
stp x27, x28, [sp,#80]
|
||||
|
||||
ldp x3, x4, [x0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x10, x11, [x1,#0]
|
||||
ldp x12, x13, [x1,#16]
|
||||
ldp x14, x15, [x1,#32]
|
||||
ldr x16, [x1,#48]
|
||||
|
||||
// x3-x7 <- AH + AL, x7 <- carry
|
||||
adds x3, x3, x7
|
||||
adcs x4, x4, x8
|
||||
adcs x5, x5, x9
|
||||
adcs x6, x6, xzr
|
||||
adc x7, xzr, xzr
|
||||
|
||||
// x10-x13 <- BH + BL, x8 <- carry
|
||||
adds x10, x10, x14
|
||||
adcs x11, x11, x15
|
||||
adcs x12, x12, x16
|
||||
adcs x13, x13, xzr
|
||||
adc x8, xzr, xzr
|
||||
|
||||
// x9 <- combined carry
|
||||
and x9, x7, x8
|
||||
// x7-x8 <- mask
|
||||
sub x7, xzr, x7
|
||||
sub x8, xzr, x8
|
||||
|
||||
// x15-x19 <- masked (BH + BL)
|
||||
and x14, x10, x7
|
||||
and x15, x11, x7
|
||||
and x16, x12, x7
|
||||
and x17, x13, x7
|
||||
|
||||
// x20-x23 <- masked (AH + AL)
|
||||
and x20, x3, x8
|
||||
and x21, x4, x8
|
||||
and x22, x5, x8
|
||||
and x23, x6, x8
|
||||
|
||||
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
|
||||
adds x14, x14, x20
|
||||
adcs x15, x15, x21
|
||||
adcs x16, x16, x22
|
||||
adcs x17, x17, x23
|
||||
adc x7, x9, xzr
|
||||
|
||||
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
|
||||
stp x3, x4, [x2,#0]
|
||||
// A0-A1 <- AH + AL, T0 <- mask
|
||||
adds x3, x3, x5
|
||||
adcs x4, x4, x6
|
||||
adc x25, xzr, xzr
|
||||
|
||||
// C6, T1 <- BH + BL, C7 <- mask
|
||||
adds x23, x10, x12
|
||||
adcs x26, x11, x13
|
||||
adc x24, xzr, xzr
|
||||
|
||||
// C0-C1 <- masked (BH + BL)
|
||||
sub x19, xzr, x25
|
||||
sub x20, xzr, x24
|
||||
and x8, x23, x19
|
||||
and x9, x26, x19
|
||||
|
||||
// C4-C5 <- masked (AH + AL), T0 <- combined carry
|
||||
and x21, x3, x20
|
||||
and x22, x4, x20
|
||||
mul x19, x3, x23
|
||||
mul x20, x3, x26
|
||||
and x25, x25, x24
|
||||
|
||||
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
|
||||
adds x8, x21, x8
|
||||
umulh x21, x3, x26
|
||||
adcs x9, x22, x9
|
||||
umulh x22, x3, x23
|
||||
adc x25, x25, xzr
|
||||
|
||||
// C2-C5 <- (AH+AL) x (BH+BL), low part
|
||||
mul x3, x4, x23
|
||||
umulh x23, x4, x23
|
||||
adds x20, x20, x22
|
||||
adc x21, x21, xzr
|
||||
|
||||
mul x24, x4, x26
|
||||
umulh x26, x4, x26
|
||||
adds x20, x20, x3
|
||||
adcs x21, x21, x23
|
||||
adc x22, xzr, xzr
|
||||
|
||||
adds x21, x21, x24
|
||||
adc x22, x22, x26
|
||||
|
||||
ldp x3, x4, [x2,#0]
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
|
||||
adds x21, x8, x21
|
||||
umulh x24, x3, x10
|
||||
umulh x26, x3, x11
|
||||
adcs x22, x9, x22
|
||||
mul x8, x3, x10
|
||||
mul x9, x3, x11
|
||||
adc x25, x25, xzr
|
||||
|
||||
// C0-C1, T1, C7 <- AL x BL
|
||||
mul x3, x4, x10
|
||||
umulh x10, x4, x10
|
||||
adds x9, x9, x24
|
||||
adc x26, x26, xzr
|
||||
|
||||
mul x23, x4, x11
|
||||
umulh x11, x4, x11
|
||||
adds x9, x9, x3
|
||||
adcs x26, x26, x10
|
||||
adc x24, xzr, xzr
|
||||
|
||||
adds x26, x26, x23
|
||||
adc x24, x24, x11
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
|
||||
mul x3, x5, x12
|
||||
umulh x10, x5, x12
|
||||
subs x19, x19, x8
|
||||
sbcs x20, x20, x9
|
||||
sbcs x21, x21, x26
|
||||
mul x4, x5, x13
|
||||
umulh x23, x5, x13
|
||||
sbcs x22, x22, x24
|
||||
sbc x25, x25, xzr
|
||||
|
||||
// A0, A1, C6, B0 <- AH x BH
|
||||
mul x5, x6, x12
|
||||
umulh x12, x6, x12
|
||||
adds x4, x4, x10
|
||||
adc x23, x23, xzr
|
||||
|
||||
mul x11, x6, x13
|
||||
umulh x13, x6, x13
|
||||
adds x4, x4, x5
|
||||
adcs x23, x23, x12
|
||||
adc x10, xzr, xzr
|
||||
|
||||
adds x23, x23, x11
|
||||
adc x10, x10, x13
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x19, x19, x3
|
||||
sbcs x20, x20, x4
|
||||
sbcs x21, x21, x23
|
||||
sbcs x22, x22, x10
|
||||
sbc x25, x25, xzr
|
||||
|
||||
adds x19, x19, x26
|
||||
adcs x20, x20, x24
|
||||
adcs x21, x21, x3
|
||||
adcs x22, x22, x4
|
||||
adcs x23, x25, x23
|
||||
adc x24, x10, xzr
|
||||
|
||||
|
||||
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
|
||||
adds x14, x14, x21
|
||||
adcs x15, x15, x22
|
||||
adcs x16, x16, x23
|
||||
adcs x17, x17, x24
|
||||
adc x7, x7, xzr
|
||||
|
||||
// Load AL
|
||||
ldp x3, x4, [x0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
// Load BL
|
||||
ldp x10, x11, [x1,#0]
|
||||
ldp x12, x13, [x1,#16]
|
||||
|
||||
// Temporarily store x8 in x2
|
||||
stp x8, x9, [x2,#0]
|
||||
// x21-x28 <- AL x BL
|
||||
// A0-A1 <- AH + AL, T0 <- mask
|
||||
adds x3, x3, x5
|
||||
adcs x4, x4, x6
|
||||
adc x8, xzr, xzr
|
||||
|
||||
// C6, T1 <- BH + BL, C7 <- mask
|
||||
adds x27, x10, x12
|
||||
adcs x9, x11, x13
|
||||
adc x28, xzr, xzr
|
||||
|
||||
// C0-C1 <- masked (BH + BL)
|
||||
sub x23, xzr, x8
|
||||
sub x24, xzr, x28
|
||||
and x21, x27, x23
|
||||
and x22, x9, x23
|
||||
|
||||
// C4-C5 <- masked (AH + AL), T0 <- combined carry
|
||||
and x25, x3, x24
|
||||
and x26, x4, x24
|
||||
mul x23, x3, x27
|
||||
mul x24, x3, x9
|
||||
and x8, x8, x28
|
||||
|
||||
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
|
||||
adds x21, x25, x21
|
||||
umulh x25, x3, x9
|
||||
adcs x22, x26, x22
|
||||
umulh x26, x3, x27
|
||||
adc x8, x8, xzr
|
||||
|
||||
// C2-C5 <- (AH+AL) x (BH+BL), low part
|
||||
mul x3, x4, x27
|
||||
umulh x27, x4, x27
|
||||
adds x24, x24, x26
|
||||
adc x25, x25, xzr
|
||||
|
||||
mul x28, x4, x9
|
||||
umulh x9, x4, x9
|
||||
adds x24, x24, x3
|
||||
adcs x25, x25, x27
|
||||
adc x26, xzr, xzr
|
||||
|
||||
adds x25, x25, x28
|
||||
adc x26, x26, x9
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
|
||||
adds x25, x21, x25
|
||||
umulh x28, x3, x10
|
||||
umulh x9, x3, x11
|
||||
adcs x26, x22, x26
|
||||
mul x21, x3, x10
|
||||
mul x22, x3, x11
|
||||
adc x8, x8, xzr
|
||||
|
||||
// C0-C1, T1, C7 <- AL x BL
|
||||
mul x3, x4, x10
|
||||
umulh x10, x4, x10
|
||||
adds x22, x22, x28
|
||||
adc x9, x9, xzr
|
||||
|
||||
mul x27, x4, x11
|
||||
umulh x11, x4, x11
|
||||
adds x22, x22, x3
|
||||
adcs x9, x9, x10
|
||||
adc x28, xzr, xzr
|
||||
|
||||
adds x9, x9, x27
|
||||
adc x28, x28, x11
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
|
||||
mul x3, x5, x12
|
||||
umulh x10, x5, x12
|
||||
subs x23, x23, x21
|
||||
sbcs x24, x24, x22
|
||||
sbcs x25, x25, x9
|
||||
mul x4, x5, x13
|
||||
umulh x27, x5, x13
|
||||
sbcs x26, x26, x28
|
||||
sbc x8, x8, xzr
|
||||
|
||||
// A0, A1, C6, B0 <- AH x BH
|
||||
mul x5, x6, x12
|
||||
umulh x12, x6, x12
|
||||
adds x4, x4, x10
|
||||
adc x27, x27, xzr
|
||||
|
||||
mul x11, x6, x13
|
||||
umulh x13, x6, x13
|
||||
adds x4, x4, x5
|
||||
adcs x27, x27, x12
|
||||
adc x10, xzr, xzr
|
||||
|
||||
adds x27, x27, x11
|
||||
adc x10, x10, x13
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x23, x23, x3
|
||||
sbcs x24, x24, x4
|
||||
sbcs x25, x25, x27
|
||||
sbcs x26, x26, x10
|
||||
sbc x8, x8, xzr
|
||||
|
||||
adds x23, x23, x9
|
||||
adcs x24, x24, x28
|
||||
adcs x25, x25, x3
|
||||
adcs x26, x26, x4
|
||||
adcs x27, x8, x27
|
||||
adc x28, x10, xzr
|
||||
|
||||
// Restore x8
|
||||
ldp x8, x9, [x2,#0]
|
||||
|
||||
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
|
||||
subs x8, x8, x21
|
||||
sbcs x9, x9, x22
|
||||
sbcs x19, x19, x23
|
||||
sbcs x20, x20, x24
|
||||
sbcs x14, x14, x25
|
||||
sbcs x15, x15, x26
|
||||
sbcs x16, x16, x27
|
||||
sbcs x17, x17, x28
|
||||
sbc x7, x7, xzr
|
||||
|
||||
// Store ALxBL, low
|
||||
stp x21, x22, [x2]
|
||||
stp x23, x24, [x2,#16]
|
||||
|
||||
// Load AH
|
||||
ldp x3, x4, [x0,#32]
|
||||
ldr x5, [x0,#48]
|
||||
// Load BH
|
||||
ldp x10, x11, [x1,#32]
|
||||
ldr x12, [x1,#48]
|
||||
|
||||
adds x8, x8, x25
|
||||
adcs x9, x9, x26
|
||||
adcs x19, x19, x27
|
||||
adcs x20, x20, x28
|
||||
adc x1, xzr, xzr
|
||||
|
||||
add x0, x0, #32
|
||||
// Temporarily store x8,x9 in x2
|
||||
stp x8,x9, [x2,#32]
|
||||
// x21-x28 <- AH x BH
|
||||
|
||||
// A0 * B0
|
||||
mul x21, x3, x10 // C0
|
||||
umulh x24, x3, x10
|
||||
|
||||
// A0 * B1
|
||||
mul x22, x3, x11
|
||||
umulh x23, x3, x11
|
||||
|
||||
// A1 * B0
|
||||
mul x8, x4, x10
|
||||
umulh x9, x4, x10
|
||||
adds x22, x22, x24
|
||||
adc x23, x23, xzr
|
||||
|
||||
// A0 * B2
|
||||
mul x27, x3, x12
|
||||
umulh x28, x3, x12
|
||||
adds x22, x22, x8 // C1
|
||||
adcs x23, x23, x9
|
||||
adc x24, xzr, xzr
|
||||
|
||||
// A2 * B0
|
||||
mul x8, x5, x10
|
||||
umulh x25, x5, x10
|
||||
adds x23, x23, x27
|
||||
adcs x24, x24, x25
|
||||
adc x25, xzr, xzr
|
||||
|
||||
// A1 * B1
|
||||
mul x27, x4, x11
|
||||
umulh x9, x4, x11
|
||||
adds x23, x23, x8
|
||||
adcs x24, x24, x28
|
||||
adc x25, x25, xzr
|
||||
|
||||
// A1 * B2
|
||||
mul x8, x4, x12
|
||||
umulh x28, x4, x12
|
||||
adds x23, x23, x27 // C2
|
||||
adcs x24, x24, x9
|
||||
adc x25, x25, xzr
|
||||
|
||||
// A2 * B1
|
||||
mul x27, x5, x11
|
||||
umulh x9, x5, x11
|
||||
adds x24, x24, x8
|
||||
adcs x25, x25, x28
|
||||
adc x26, xzr, xzr
|
||||
|
||||
// A2 * B2
|
||||
mul x8, x5, x12
|
||||
umulh x28, x5, x12
|
||||
adds x24, x24, x27 // C3
|
||||
adcs x25, x25, x9
|
||||
adc x26, x26, xzr
|
||||
|
||||
adds x25, x25, x8 // C4
|
||||
adc x26, x26, x28 // C5
|
||||
|
||||
// Restore x8,x9
|
||||
ldp x8,x9, [x2,#32]
|
||||
|
||||
neg x1, x1
|
||||
|
||||
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x8, x8, x21
|
||||
sbcs x9, x9, x22
|
||||
sbcs x19, x19, x23
|
||||
sbcs x20, x20, x24
|
||||
sbcs x14, x14, x25
|
||||
sbcs x15, x15, x26
|
||||
sbcs x16, x16, xzr
|
||||
sbcs x17, x17, xzr
|
||||
sbc x7, x7, xzr
|
||||
|
||||
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
|
||||
stp x8, x9, [x2,#32]
|
||||
stp x19, x20, [x2,#48]
|
||||
|
||||
adds x1, x1, #1
|
||||
adcs x14, x14, x21
|
||||
adcs x15, x15, x22
|
||||
adcs x16, x16, x23
|
||||
adcs x17, x17, x24
|
||||
adcs x25, x7, x25
|
||||
adc x26, x26, xzr
|
||||
|
||||
stp x14, x15, [x2,#64]
|
||||
stp x16, x17, [x2,#80]
|
||||
stp x25, x26, [x2,#96]
|
||||
|
||||
ldp x19, x20, [x29,#16]
|
||||
ldp x21, x22, [x29,#32]
|
||||
ldp x23, x24, [x29,#48]
|
||||
ldp x25, x26, [x29,#64]
|
||||
ldp x27, x28, [x29,#80]
|
||||
ldp x29, x30, [sp],#96
|
||||
ret
|
||||
.globl sike_fprdc
|
||||
.hidden sike_fprdc
|
||||
.align 4
|
||||
sike_fprdc:
|
||||
stp x29, x30, [sp, #-96]!
|
||||
add x29, sp, xzr
|
||||
stp x19, x20, [sp,#16]
|
||||
stp x21, x22, [sp,#32]
|
||||
stp x23, x24, [sp,#48]
|
||||
stp x25, x26, [sp,#64]
|
||||
stp x27, x28, [sp,#80]
|
||||
|
||||
ldp x2, x3, [x0,#0] // a[0-1]
|
||||
|
||||
// Load the prime constant
|
||||
adrp x26, .Lp434p1
|
||||
add x26, x26, :lo12:.Lp434p1
|
||||
ldp x23, x24, [x26, #0x0]
|
||||
ldp x25, x26, [x26,#0x10]
|
||||
|
||||
// a[0-1] * p434+1
|
||||
mul x4, x2, x23 // C0
|
||||
umulh x7, x2, x23
|
||||
|
||||
mul x5, x2, x24
|
||||
umulh x6, x2, x24
|
||||
|
||||
mul x10, x3, x23
|
||||
umulh x11, x3, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x2, x25
|
||||
umulh x28, x2, x25
|
||||
adds x5, x5, x10 // C1
|
||||
adcs x6, x6, x11
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x10, x3, x24
|
||||
umulh x11, x3, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x2, x26
|
||||
umulh x28, x2, x26
|
||||
adds x6, x6, x10 // C2
|
||||
adcs x7, x7, x11
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x10, x3, x25
|
||||
umulh x11, x3, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x3, x26
|
||||
umulh x28, x3, x26
|
||||
adds x7, x7, x10 // C3
|
||||
adcs x8, x8, x11
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
|
||||
ldp x10, x11, [x0, #0x18]
|
||||
ldp x12, x13, [x0, #0x28]
|
||||
ldp x14, x15, [x0, #0x38]
|
||||
ldp x16, x17, [x0, #0x48]
|
||||
ldp x19, x20, [x0, #0x58]
|
||||
ldr x21, [x0, #0x68]
|
||||
|
||||
adds x10, x10, x4
|
||||
adcs x11, x11, x5
|
||||
adcs x12, x12, x6
|
||||
adcs x13, x13, x7
|
||||
adcs x14, x14, x8
|
||||
adcs x15, x15, x9
|
||||
adcs x22, x16, xzr
|
||||
adcs x17, x17, xzr
|
||||
adcs x19, x19, xzr
|
||||
adcs x20, x20, xzr
|
||||
adc x21, x21, xzr
|
||||
|
||||
ldr x2, [x0,#0x10] // a[2]
|
||||
// a[2-3] * p434+1
|
||||
mul x4, x2, x23 // C0
|
||||
umulh x7, x2, x23
|
||||
|
||||
mul x5, x2, x24
|
||||
umulh x6, x2, x24
|
||||
|
||||
mul x0, x10, x23
|
||||
umulh x3, x10, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x2, x25
|
||||
umulh x28, x2, x25
|
||||
adds x5, x5, x0 // C1
|
||||
adcs x6, x6, x3
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x0, x10, x24
|
||||
umulh x3, x10, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x2, x26
|
||||
umulh x28, x2, x26
|
||||
adds x6, x6, x0 // C2
|
||||
adcs x7, x7, x3
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x0, x10, x25
|
||||
umulh x3, x10, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x10, x26
|
||||
umulh x28, x10, x26
|
||||
adds x7, x7, x0 // C3
|
||||
adcs x8, x8, x3
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
|
||||
adds x12, x12, x4
|
||||
adcs x13, x13, x5
|
||||
adcs x14, x14, x6
|
||||
adcs x15, x15, x7
|
||||
adcs x16, x22, x8
|
||||
adcs x17, x17, x9
|
||||
adcs x22, x19, xzr
|
||||
adcs x20, x20, xzr
|
||||
adc x21, x21, xzr
|
||||
|
||||
mul x4, x11, x23 // C0
|
||||
umulh x7, x11, x23
|
||||
|
||||
mul x5, x11, x24
|
||||
umulh x6, x11, x24
|
||||
|
||||
mul x10, x12, x23
|
||||
umulh x3, x12, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x11, x25
|
||||
umulh x28, x11, x25
|
||||
adds x5, x5, x10 // C1
|
||||
adcs x6, x6, x3
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x10, x12, x24
|
||||
umulh x3, x12, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x11, x26
|
||||
umulh x28, x11, x26
|
||||
adds x6, x6, x10 // C2
|
||||
adcs x7, x7, x3
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x10, x12, x25
|
||||
umulh x3, x12, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x12, x26
|
||||
umulh x28, x12, x26
|
||||
adds x7, x7, x10 // C3
|
||||
adcs x8, x8, x3
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
adds x14, x14, x4
|
||||
adcs x15, x15, x5
|
||||
adcs x16, x16, x6
|
||||
adcs x17, x17, x7
|
||||
adcs x19, x22, x8
|
||||
adcs x20, x20, x9
|
||||
adc x22, x21, xzr
|
||||
|
||||
stp x14, x15, [x1, #0x0] // C0, C1
|
||||
|
||||
mul x4, x13, x23 // C0
|
||||
umulh x10, x13, x23
|
||||
|
||||
mul x5, x13, x24
|
||||
umulh x27, x13, x24
|
||||
adds x5, x5, x10 // C1
|
||||
adc x10, xzr, xzr
|
||||
|
||||
mul x6, x13, x25
|
||||
umulh x28, x13, x25
|
||||
adds x27, x10, x27
|
||||
adcs x6, x6, x27 // C2
|
||||
adc x10, xzr, xzr
|
||||
|
||||
mul x7, x13, x26
|
||||
umulh x8, x13, x26
|
||||
adds x28, x10, x28
|
||||
adcs x7, x7, x28 // C3
|
||||
adc x8, x8, xzr // C4
|
||||
|
||||
adds x16, x16, x4
|
||||
adcs x17, x17, x5
|
||||
adcs x19, x19, x6
|
||||
adcs x20, x20, x7
|
||||
adc x21, x22, x8
|
||||
|
||||
str x16, [x1, #0x10]
|
||||
stp x17, x19, [x1, #0x18]
|
||||
stp x20, x21, [x1, #0x28]
|
||||
|
||||
ldp x19, x20, [x29,#16]
|
||||
ldp x21, x22, [x29,#32]
|
||||
ldp x23, x24, [x29,#48]
|
||||
ldp x25, x26, [x29,#64]
|
||||
ldp x27, x28, [x29,#80]
|
||||
ldp x29, x30, [sp],#96
|
||||
ret
|
||||
.globl sike_fpadd
|
||||
.hidden sike_fpadd
|
||||
.align 4
|
||||
sike_fpadd:
|
||||
stp x29,x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
// Add a + b
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adc x9, x9, x17
|
||||
|
||||
// Subtract 2xp434
|
||||
adrp x17, .Lp434x2
|
||||
add x17, x17, :lo12:.Lp434x2
|
||||
ldp x11, x12, [x17, #0]
|
||||
ldp x13, x14, [x17, #16]
|
||||
ldp x15, x16, [x17, #32]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x12
|
||||
sbcs x6, x6, x13
|
||||
sbcs x7, x7, x14
|
||||
sbcs x8, x8, x15
|
||||
sbcs x9, x9, x16
|
||||
sbc x0, xzr, xzr // x0 can be reused now
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adc x9, x9, x16
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl sike_fpsub
|
||||
.hidden sike_fpsub
|
||||
.align 4
|
||||
sike_fpsub:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
// Subtract a - b
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
sbcs x9, x9, x17
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
adrp x17, .Lp434x2
|
||||
add x17, x17, :lo12:.Lp434x2
|
||||
|
||||
// First half
|
||||
ldp x11, x12, [x17, #0]
|
||||
ldp x13, x14, [x17, #16]
|
||||
ldp x15, x16, [x17, #32]
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adc x9, x9, x16
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl sike_mpadd_asm
|
||||
.hidden sike_mpadd_asm
|
||||
.align 4
|
||||
sike_mpadd_asm:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adc x9, x9, x17
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl sike_mpsubx2_asm
|
||||
.hidden sike_mpsubx2_asm
|
||||
.align 4
|
||||
sike_mpsubx2_asm:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x11, x12, [x1,#32]
|
||||
ldp x13, x14, [x1,#48]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
sbcs x9, x9, x13
|
||||
sbcs x10, x10, x14
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
|
||||
ldp x3, x4, [x0,#64]
|
||||
ldp x5, x6, [x0,#80]
|
||||
ldp x11, x12, [x1,#64]
|
||||
ldp x13, x14, [x1,#80]
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#96]
|
||||
ldp x11, x12, [x1,#96]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
stp x3, x4, [x2,#64]
|
||||
stp x5, x6, [x2,#80]
|
||||
stp x7, x8, [x2,#96]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl sike_mpdblsubx2_asm
|
||||
.hidden sike_mpdblsubx2_asm
|
||||
.align 4
|
||||
sike_mpdblsubx2_asm:
|
||||
stp x29, x30, [sp, #-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x2, #0]
|
||||
ldp x5, x6, [x2,#16]
|
||||
ldp x7, x8, [x2,#32]
|
||||
|
||||
ldp x11, x12, [x0, #0]
|
||||
ldp x13, x14, [x0,#16]
|
||||
ldp x15, x16, [x0,#32]
|
||||
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
|
||||
// x9 stores carry
|
||||
adc x9, xzr, xzr
|
||||
|
||||
ldp x11, x12, [x1, #0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, x9, xzr
|
||||
|
||||
stp x3, x4, [x2, #0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
|
||||
ldp x3, x4, [x2,#48]
|
||||
ldp x5, x6, [x2,#64]
|
||||
ldp x7, x8, [x2,#80]
|
||||
|
||||
ldp x11, x12, [x0,#48]
|
||||
ldp x13, x14, [x0,#64]
|
||||
ldp x15, x16, [x0,#80]
|
||||
|
||||
// x9 = 2 - x9
|
||||
neg x9, x9
|
||||
add x9, x9, #2
|
||||
|
||||
subs x3, x3, x9
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, xzr, xzr
|
||||
|
||||
ldp x11, x12, [x1,#48]
|
||||
ldp x13, x14, [x1,#64]
|
||||
ldp x15, x16, [x1,#80]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, x9, xzr
|
||||
|
||||
stp x3, x4, [x2,#48]
|
||||
stp x5, x6, [x2,#64]
|
||||
stp x7, x8, [x2,#80]
|
||||
|
||||
ldp x3, x4, [x2,#96]
|
||||
ldp x11, x12, [x0,#96]
|
||||
ldp x13, x14, [x1,#96]
|
||||
|
||||
// x9 = 2 - x9
|
||||
neg x9, x9
|
||||
add x9, x9, #2
|
||||
|
||||
subs x3, x3, x9
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
subs x3, x3, x13
|
||||
sbc x4, x4, x14
|
||||
stp x3, x4, [x2,#96]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
@ -1490,3 +1490,4 @@ ChaCha20_neon:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -778,3 +778,4 @@ aes_hw_ctr32_encrypt_blocks:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -974,3 +974,4 @@ bn_mul8x_mont_neon:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -1526,3 +1526,4 @@ bsaes_ctr32_encrypt_blocks:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -31,342 +31,6 @@
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
.type rem_4bit,%object
|
||||
.align 5
|
||||
rem_4bit:
|
||||
.short 0x0000,0x1C20,0x3840,0x2460
|
||||
.short 0x7080,0x6CA0,0x48C0,0x54E0
|
||||
.short 0xE100,0xFD20,0xD940,0xC560
|
||||
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
|
||||
.size rem_4bit,.-rem_4bit
|
||||
|
||||
.type rem_4bit_get,%function
|
||||
rem_4bit_get:
|
||||
#if defined(__thumb2__)
|
||||
adr r2,rem_4bit
|
||||
#else
|
||||
sub r2,pc,#8+32 @ &rem_4bit
|
||||
#endif
|
||||
b .Lrem_4bit_got
|
||||
nop
|
||||
nop
|
||||
.size rem_4bit_get,.-rem_4bit_get
|
||||
|
||||
.globl gcm_ghash_4bit
|
||||
.hidden gcm_ghash_4bit
|
||||
.type gcm_ghash_4bit,%function
|
||||
.align 4
|
||||
gcm_ghash_4bit:
|
||||
#if defined(__thumb2__)
|
||||
adr r12,rem_4bit
|
||||
#else
|
||||
sub r12,pc,#8+48 @ &rem_4bit
|
||||
#endif
|
||||
add r3,r2,r3 @ r3 to point at the end
|
||||
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
|
||||
|
||||
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
|
||||
|
||||
ldrb r12,[r2,#15]
|
||||
ldrb r14,[r0,#15]
|
||||
.Louter:
|
||||
eor r12,r12,r14
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
mov r3,#14
|
||||
|
||||
add r7,r1,r12,lsl#4
|
||||
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
|
||||
add r11,r1,r14
|
||||
ldrb r12,[r2,#14]
|
||||
|
||||
and r14,r4,#0xf @ rem
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
add r14,r14,r14
|
||||
eor r4,r8,r4,lsr#4
|
||||
ldrh r8,[sp,r14] @ rem_4bit[rem]
|
||||
eor r4,r4,r5,lsl#28
|
||||
ldrb r14,[r0,#14]
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
eor r12,r12,r14
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
eor r7,r7,r8,lsl#16
|
||||
|
||||
.Linner:
|
||||
add r11,r1,r12,lsl#4
|
||||
and r12,r4,#0xf @ rem
|
||||
subs r3,r3,#1
|
||||
add r12,r12,r12
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
ldrh r8,[sp,r12] @ rem_4bit[rem]
|
||||
eor r6,r10,r6,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r12,[r2,r3]
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
add r14,r14,r14
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
eor r4,r8,r4,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r8,[r0,r3]
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
ldrh r9,[sp,r14]
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
eorpl r12,r12,r8
|
||||
eor r7,r11,r7,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl r14,r12,#0xf0
|
||||
andpl r12,r12,#0x0f
|
||||
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
|
||||
bpl .Linner
|
||||
|
||||
ldr r3,[sp,#32] @ re-load r3/end
|
||||
add r2,r2,#16
|
||||
mov r14,r4
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r4,r4
|
||||
str r4,[r0,#12]
|
||||
#elif defined(__ARMEB__)
|
||||
str r4,[r0,#12]
|
||||
#else
|
||||
mov r9,r4,lsr#8
|
||||
strb r4,[r0,#12+3]
|
||||
mov r10,r4,lsr#16
|
||||
strb r9,[r0,#12+2]
|
||||
mov r11,r4,lsr#24
|
||||
strb r10,[r0,#12+1]
|
||||
strb r11,[r0,#12]
|
||||
#endif
|
||||
cmp r2,r3
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r5,r5
|
||||
str r5,[r0,#8]
|
||||
#elif defined(__ARMEB__)
|
||||
str r5,[r0,#8]
|
||||
#else
|
||||
mov r9,r5,lsr#8
|
||||
strb r5,[r0,#8+3]
|
||||
mov r10,r5,lsr#16
|
||||
strb r9,[r0,#8+2]
|
||||
mov r11,r5,lsr#24
|
||||
strb r10,[r0,#8+1]
|
||||
strb r11,[r0,#8]
|
||||
#endif
|
||||
|
||||
#ifdef __thumb2__
|
||||
it ne
|
||||
#endif
|
||||
ldrneb r12,[r2,#15]
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r6,r6
|
||||
str r6,[r0,#4]
|
||||
#elif defined(__ARMEB__)
|
||||
str r6,[r0,#4]
|
||||
#else
|
||||
mov r9,r6,lsr#8
|
||||
strb r6,[r0,#4+3]
|
||||
mov r10,r6,lsr#16
|
||||
strb r9,[r0,#4+2]
|
||||
mov r11,r6,lsr#24
|
||||
strb r10,[r0,#4+1]
|
||||
strb r11,[r0,#4]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r7,r7
|
||||
str r7,[r0,#0]
|
||||
#elif defined(__ARMEB__)
|
||||
str r7,[r0,#0]
|
||||
#else
|
||||
mov r9,r7,lsr#8
|
||||
strb r7,[r0,#0+3]
|
||||
mov r10,r7,lsr#16
|
||||
strb r9,[r0,#0+2]
|
||||
mov r11,r7,lsr#24
|
||||
strb r10,[r0,#0+1]
|
||||
strb r11,[r0,#0]
|
||||
#endif
|
||||
|
||||
bne .Louter
|
||||
|
||||
add sp,sp,#36
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size gcm_ghash_4bit,.-gcm_ghash_4bit
|
||||
|
||||
.globl gcm_gmult_4bit
|
||||
.hidden gcm_gmult_4bit
|
||||
.type gcm_gmult_4bit,%function
|
||||
gcm_gmult_4bit:
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
ldrb r12,[r0,#15]
|
||||
b rem_4bit_get
|
||||
.Lrem_4bit_got:
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
mov r3,#14
|
||||
|
||||
add r7,r1,r12,lsl#4
|
||||
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
|
||||
ldrb r12,[r0,#14]
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
add r14,r14,r14
|
||||
eor r4,r8,r4,lsr#4
|
||||
ldrh r8,[r2,r14] @ rem_4bit[rem]
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
and r14,r12,#0xf0
|
||||
eor r7,r7,r8,lsl#16
|
||||
and r12,r12,#0x0f
|
||||
|
||||
.Loop:
|
||||
add r11,r1,r12,lsl#4
|
||||
and r12,r4,#0xf @ rem
|
||||
subs r3,r3,#1
|
||||
add r12,r12,r12
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
ldrh r8,[r2,r12] @ rem_4bit[rem]
|
||||
eor r6,r10,r6,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r12,[r0,r3]
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
add r14,r14,r14
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
ldrh r8,[r2,r14] @ rem_4bit[rem]
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl r14,r12,#0xf0
|
||||
andpl r12,r12,#0x0f
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
bpl .Loop
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r4,r4
|
||||
str r4,[r0,#12]
|
||||
#elif defined(__ARMEB__)
|
||||
str r4,[r0,#12]
|
||||
#else
|
||||
mov r9,r4,lsr#8
|
||||
strb r4,[r0,#12+3]
|
||||
mov r10,r4,lsr#16
|
||||
strb r9,[r0,#12+2]
|
||||
mov r11,r4,lsr#24
|
||||
strb r10,[r0,#12+1]
|
||||
strb r11,[r0,#12]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r5,r5
|
||||
str r5,[r0,#8]
|
||||
#elif defined(__ARMEB__)
|
||||
str r5,[r0,#8]
|
||||
#else
|
||||
mov r9,r5,lsr#8
|
||||
strb r5,[r0,#8+3]
|
||||
mov r10,r5,lsr#16
|
||||
strb r9,[r0,#8+2]
|
||||
mov r11,r5,lsr#24
|
||||
strb r10,[r0,#8+1]
|
||||
strb r11,[r0,#8]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r6,r6
|
||||
str r6,[r0,#4]
|
||||
#elif defined(__ARMEB__)
|
||||
str r6,[r0,#4]
|
||||
#else
|
||||
mov r9,r6,lsr#8
|
||||
strb r6,[r0,#4+3]
|
||||
mov r10,r6,lsr#16
|
||||
strb r9,[r0,#4+2]
|
||||
mov r11,r6,lsr#24
|
||||
strb r10,[r0,#4+1]
|
||||
strb r11,[r0,#4]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r7,r7
|
||||
str r7,[r0,#0]
|
||||
#elif defined(__ARMEB__)
|
||||
str r7,[r0,#0]
|
||||
#else
|
||||
mov r9,r7,lsr#8
|
||||
strb r7,[r0,#0+3]
|
||||
mov r10,r7,lsr#16
|
||||
strb r9,[r0,#0+2]
|
||||
mov r11,r7,lsr#24
|
||||
strb r10,[r0,#0+1]
|
||||
strb r11,[r0,#0]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size gcm_gmult_4bit,.-gcm_gmult_4bit
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
@ -588,3 +252,4 @@ gcm_ghash_neon:
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -250,3 +250,4 @@ gcm_ghash_v8:
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -1508,3 +1508,4 @@ sha1_block_data_order_armv8:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -2836,3 +2836,4 @@ sha256_block_data_order_armv8:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -1891,3 +1891,4 @@ sha512_block_data_order_neon:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
1236
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
1236
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -31,7 +31,6 @@
|
||||
.hidden abi_test_trampoline
|
||||
.align 4
|
||||
abi_test_trampoline:
|
||||
.Labi_test_trampoline_begin:
|
||||
@ Save parameters and all callee-saved registers. For convenience, we
|
||||
@ save r9 on iOS even though it's volatile.
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
@ -377,3 +376,4 @@ abi_test_clobber_d15:
|
||||
.size abi_test_clobber_d15,.-abi_test_clobber_d15
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -3667,3 +3667,4 @@ _aesp8_xts_dec5x:
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
#endif // !OPENSSL_NO_ASM && __powerpc64__
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -584,3 +584,4 @@ gcm_ghash_p8:
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM && __powerpc64__
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
1410
contrib/boringssl-cmake/linux-ppc64le/crypto/test/trampoline-ppc.S
Normal file
1410
contrib/boringssl-cmake/linux-ppc64le/crypto/test/trampoline-ppc.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -972,3 +972,4 @@ ChaCha20_ssse3:
|
||||
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
|
||||
.byte 114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,7 @@
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
#endif
|
||||
.globl aes_hw_encrypt
|
||||
.hidden aes_hw_encrypt
|
||||
@ -14,7 +14,7 @@
|
||||
.align 16
|
||||
aes_hw_encrypt:
|
||||
.L_aes_hw_encrypt_begin:
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L000pic
|
||||
@ -845,7 +845,7 @@ aes_hw_ctr32_encrypt_blocks:
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L038pic
|
||||
@ -2440,7 +2440,7 @@ _aesni_set_encrypt_key:
|
||||
.align 16
|
||||
aes_hw_set_encrypt_key:
|
||||
.L_aes_hw_set_encrypt_key_begin:
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L116pic
|
||||
@ -2510,3 +2510,4 @@ aes_hw_set_decrypt_key:
|
||||
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
|
||||
.byte 115,108,46,111,114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -993,551 +993,5 @@ bn_sub_words:
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_sub_words,.-.L_bn_sub_words_begin
|
||||
.globl bn_sub_part_words
|
||||
.hidden bn_sub_part_words
|
||||
.type bn_sub_part_words,@function
|
||||
.align 16
|
||||
bn_sub_part_words:
|
||||
.L_bn_sub_part_words_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl 20(%esp),%ebx
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edi
|
||||
movl 32(%esp),%ebp
|
||||
xorl %eax,%eax
|
||||
andl $4294967288,%ebp
|
||||
jz .L029aw_finish
|
||||
.L030aw_loop:
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,4(%ebx)
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,8(%ebx)
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,12(%ebx)
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,16(%ebx)
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,20(%ebx)
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
|
||||
movl 28(%esi),%ecx
|
||||
movl 28(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,28(%ebx)
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%edi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz .L030aw_loop
|
||||
.L029aw_finish:
|
||||
movl 32(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
.L031aw_end:
|
||||
cmpl $0,36(%esp)
|
||||
je .L032pw_end
|
||||
movl 36(%esp),%ebp
|
||||
cmpl $0,%ebp
|
||||
je .L032pw_end
|
||||
jge .L033pw_pos
|
||||
|
||||
movl $0,%edx
|
||||
subl %ebp,%edx
|
||||
movl %edx,%ebp
|
||||
andl $4294967288,%ebp
|
||||
jz .L034pw_neg_finish
|
||||
.L035pw_neg_loop:
|
||||
|
||||
movl $0,%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,4(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,8(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,12(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,16(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,20(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 28(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,28(%ebx)
|
||||
|
||||
addl $32,%edi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz .L035pw_neg_loop
|
||||
.L034pw_neg_finish:
|
||||
movl 36(%esp),%edx
|
||||
movl $0,%ebp
|
||||
subl %edx,%ebp
|
||||
andl $7,%ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,(%ebx)
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,4(%ebx)
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,8(%ebx)
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,12(%ebx)
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,16(%ebx)
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,20(%ebx)
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
jmp .L032pw_end
|
||||
.L033pw_pos:
|
||||
andl $4294967288,%ebp
|
||||
jz .L036pw_pos_finish
|
||||
.L037pw_pos_loop:
|
||||
|
||||
movl (%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,(%ebx)
|
||||
jnc .L038pw_nc0
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,4(%ebx)
|
||||
jnc .L039pw_nc1
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,8(%ebx)
|
||||
jnc .L040pw_nc2
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,12(%ebx)
|
||||
jnc .L041pw_nc3
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,16(%ebx)
|
||||
jnc .L042pw_nc4
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,20(%ebx)
|
||||
jnc .L043pw_nc5
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,24(%ebx)
|
||||
jnc .L044pw_nc6
|
||||
|
||||
movl 28(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,28(%ebx)
|
||||
jnc .L045pw_nc7
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz .L037pw_pos_loop
|
||||
.L036pw_pos_finish:
|
||||
movl 36(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,(%ebx)
|
||||
jnc .L046pw_tail_nc0
|
||||
decl %ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,4(%ebx)
|
||||
jnc .L047pw_tail_nc1
|
||||
decl %ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,8(%ebx)
|
||||
jnc .L048pw_tail_nc2
|
||||
decl %ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,12(%ebx)
|
||||
jnc .L049pw_tail_nc3
|
||||
decl %ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,16(%ebx)
|
||||
jnc .L050pw_tail_nc4
|
||||
decl %ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,20(%ebx)
|
||||
jnc .L051pw_tail_nc5
|
||||
decl %ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,24(%ebx)
|
||||
jnc .L052pw_tail_nc6
|
||||
movl $1,%eax
|
||||
jmp .L032pw_end
|
||||
.L053pw_nc_loop:
|
||||
movl (%esi),%ecx
|
||||
movl %ecx,(%ebx)
|
||||
.L038pw_nc0:
|
||||
movl 4(%esi),%ecx
|
||||
movl %ecx,4(%ebx)
|
||||
.L039pw_nc1:
|
||||
movl 8(%esi),%ecx
|
||||
movl %ecx,8(%ebx)
|
||||
.L040pw_nc2:
|
||||
movl 12(%esi),%ecx
|
||||
movl %ecx,12(%ebx)
|
||||
.L041pw_nc3:
|
||||
movl 16(%esi),%ecx
|
||||
movl %ecx,16(%ebx)
|
||||
.L042pw_nc4:
|
||||
movl 20(%esi),%ecx
|
||||
movl %ecx,20(%ebx)
|
||||
.L043pw_nc5:
|
||||
movl 24(%esi),%ecx
|
||||
movl %ecx,24(%ebx)
|
||||
.L044pw_nc6:
|
||||
movl 28(%esi),%ecx
|
||||
movl %ecx,28(%ebx)
|
||||
.L045pw_nc7:
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz .L053pw_nc_loop
|
||||
movl 36(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz .L054pw_nc_end
|
||||
movl (%esi),%ecx
|
||||
movl %ecx,(%ebx)
|
||||
.L046pw_tail_nc0:
|
||||
decl %ebp
|
||||
jz .L054pw_nc_end
|
||||
movl 4(%esi),%ecx
|
||||
movl %ecx,4(%ebx)
|
||||
.L047pw_tail_nc1:
|
||||
decl %ebp
|
||||
jz .L054pw_nc_end
|
||||
movl 8(%esi),%ecx
|
||||
movl %ecx,8(%ebx)
|
||||
.L048pw_tail_nc2:
|
||||
decl %ebp
|
||||
jz .L054pw_nc_end
|
||||
movl 12(%esi),%ecx
|
||||
movl %ecx,12(%ebx)
|
||||
.L049pw_tail_nc3:
|
||||
decl %ebp
|
||||
jz .L054pw_nc_end
|
||||
movl 16(%esi),%ecx
|
||||
movl %ecx,16(%ebx)
|
||||
.L050pw_tail_nc4:
|
||||
decl %ebp
|
||||
jz .L054pw_nc_end
|
||||
movl 20(%esi),%ecx
|
||||
movl %ecx,20(%ebx)
|
||||
.L051pw_tail_nc5:
|
||||
decl %ebp
|
||||
jz .L054pw_nc_end
|
||||
movl 24(%esi),%ecx
|
||||
movl %ecx,24(%ebx)
|
||||
.L052pw_tail_nc6:
|
||||
.L054pw_nc_end:
|
||||
movl $0,%eax
|
||||
.L032pw_end:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -1263,3 +1263,4 @@ bn_sqr_comba4:
|
||||
ret
|
||||
.size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -291,3 +291,4 @@ gcm_ghash_ssse3:
|
||||
.Llow4_mask:
|
||||
.long 252645135,252645135,252645135,252645135
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -6,711 +6,6 @@
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl gcm_gmult_4bit_mmx
|
||||
.hidden gcm_gmult_4bit_mmx
|
||||
.type gcm_gmult_4bit_mmx,@function
|
||||
.align 16
|
||||
gcm_gmult_4bit_mmx:
|
||||
.L_gcm_gmult_4bit_mmx_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
call .L000pic_point
|
||||
.L000pic_point:
|
||||
popl %eax
|
||||
leal .Lrem_4bit-.L000pic_point(%eax),%eax
|
||||
movzbl 15(%edi),%ebx
|
||||
xorl %ecx,%ecx
|
||||
movl %ebx,%edx
|
||||
movb %dl,%cl
|
||||
movl $14,%ebp
|
||||
shlb $4,%cl
|
||||
andl $240,%edx
|
||||
movq 8(%esi,%ecx,1),%mm0
|
||||
movq (%esi,%ecx,1),%mm1
|
||||
movd %mm0,%ebx
|
||||
jmp .L001mmx_loop
|
||||
.align 16
|
||||
.L001mmx_loop:
|
||||
psrlq $4,%mm0
|
||||
andl $15,%ebx
|
||||
movq %mm1,%mm2
|
||||
psrlq $4,%mm1
|
||||
pxor 8(%esi,%edx,1),%mm0
|
||||
movb (%edi,%ebp,1),%cl
|
||||
psllq $60,%mm2
|
||||
pxor (%eax,%ebx,8),%mm1
|
||||
decl %ebp
|
||||
movd %mm0,%ebx
|
||||
pxor (%esi,%edx,1),%mm1
|
||||
movl %ecx,%edx
|
||||
pxor %mm2,%mm0
|
||||
js .L002mmx_break
|
||||
shlb $4,%cl
|
||||
andl $15,%ebx
|
||||
psrlq $4,%mm0
|
||||
andl $240,%edx
|
||||
movq %mm1,%mm2
|
||||
psrlq $4,%mm1
|
||||
pxor 8(%esi,%ecx,1),%mm0
|
||||
psllq $60,%mm2
|
||||
pxor (%eax,%ebx,8),%mm1
|
||||
movd %mm0,%ebx
|
||||
pxor (%esi,%ecx,1),%mm1
|
||||
pxor %mm2,%mm0
|
||||
jmp .L001mmx_loop
|
||||
.align 16
|
||||
.L002mmx_break:
|
||||
shlb $4,%cl
|
||||
andl $15,%ebx
|
||||
psrlq $4,%mm0
|
||||
andl $240,%edx
|
||||
movq %mm1,%mm2
|
||||
psrlq $4,%mm1
|
||||
pxor 8(%esi,%ecx,1),%mm0
|
||||
psllq $60,%mm2
|
||||
pxor (%eax,%ebx,8),%mm1
|
||||
movd %mm0,%ebx
|
||||
pxor (%esi,%ecx,1),%mm1
|
||||
pxor %mm2,%mm0
|
||||
psrlq $4,%mm0
|
||||
andl $15,%ebx
|
||||
movq %mm1,%mm2
|
||||
psrlq $4,%mm1
|
||||
pxor 8(%esi,%edx,1),%mm0
|
||||
psllq $60,%mm2
|
||||
pxor (%eax,%ebx,8),%mm1
|
||||
movd %mm0,%ebx
|
||||
pxor (%esi,%edx,1),%mm1
|
||||
pxor %mm2,%mm0
|
||||
psrlq $32,%mm0
|
||||
movd %mm1,%edx
|
||||
psrlq $32,%mm1
|
||||
movd %mm0,%ecx
|
||||
movd %mm1,%ebp
|
||||
bswap %ebx
|
||||
bswap %edx
|
||||
bswap %ecx
|
||||
bswap %ebp
|
||||
emms
|
||||
movl %ebx,12(%edi)
|
||||
movl %edx,4(%edi)
|
||||
movl %ecx,8(%edi)
|
||||
movl %ebp,(%edi)
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
|
||||
.globl gcm_ghash_4bit_mmx
|
||||
.hidden gcm_ghash_4bit_mmx
|
||||
.type gcm_ghash_4bit_mmx,@function
|
||||
.align 16
|
||||
gcm_ghash_4bit_mmx:
|
||||
.L_gcm_ghash_4bit_mmx_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%eax
|
||||
movl 24(%esp),%ebx
|
||||
movl 28(%esp),%ecx
|
||||
movl 32(%esp),%edx
|
||||
movl %esp,%ebp
|
||||
call .L003pic_point
|
||||
.L003pic_point:
|
||||
popl %esi
|
||||
leal .Lrem_8bit-.L003pic_point(%esi),%esi
|
||||
subl $544,%esp
|
||||
andl $-64,%esp
|
||||
subl $16,%esp
|
||||
addl %ecx,%edx
|
||||
movl %eax,544(%esp)
|
||||
movl %edx,552(%esp)
|
||||
movl %ebp,556(%esp)
|
||||
addl $128,%ebx
|
||||
leal 144(%esp),%edi
|
||||
leal 400(%esp),%ebp
|
||||
movl -120(%ebx),%edx
|
||||
movq -120(%ebx),%mm0
|
||||
movq -128(%ebx),%mm3
|
||||
shll $4,%edx
|
||||
movb %dl,(%esp)
|
||||
movl -104(%ebx),%edx
|
||||
movq -104(%ebx),%mm2
|
||||
movq -112(%ebx),%mm5
|
||||
movq %mm0,-128(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,(%edi)
|
||||
movq %mm3,%mm7
|
||||
psrlq $4,%mm3
|
||||
shll $4,%edx
|
||||
movb %dl,1(%esp)
|
||||
movl -88(%ebx),%edx
|
||||
movq -88(%ebx),%mm1
|
||||
psllq $60,%mm7
|
||||
movq -96(%ebx),%mm4
|
||||
por %mm7,%mm0
|
||||
movq %mm2,-120(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,8(%edi)
|
||||
movq %mm5,%mm6
|
||||
movq %mm0,-128(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,2(%esp)
|
||||
movl -72(%ebx),%edx
|
||||
movq -72(%ebx),%mm0
|
||||
psllq $60,%mm6
|
||||
movq -80(%ebx),%mm3
|
||||
por %mm6,%mm2
|
||||
movq %mm1,-112(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,16(%edi)
|
||||
movq %mm4,%mm7
|
||||
movq %mm2,-120(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,8(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,3(%esp)
|
||||
movl -56(%ebx),%edx
|
||||
movq -56(%ebx),%mm2
|
||||
psllq $60,%mm7
|
||||
movq -64(%ebx),%mm5
|
||||
por %mm7,%mm1
|
||||
movq %mm0,-104(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,24(%edi)
|
||||
movq %mm3,%mm6
|
||||
movq %mm1,-112(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,16(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,4(%esp)
|
||||
movl -40(%ebx),%edx
|
||||
movq -40(%ebx),%mm1
|
||||
psllq $60,%mm6
|
||||
movq -48(%ebx),%mm4
|
||||
por %mm6,%mm0
|
||||
movq %mm2,-96(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,32(%edi)
|
||||
movq %mm5,%mm7
|
||||
movq %mm0,-104(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,24(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,5(%esp)
|
||||
movl -24(%ebx),%edx
|
||||
movq -24(%ebx),%mm0
|
||||
psllq $60,%mm7
|
||||
movq -32(%ebx),%mm3
|
||||
por %mm7,%mm2
|
||||
movq %mm1,-88(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,40(%edi)
|
||||
movq %mm4,%mm6
|
||||
movq %mm2,-96(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,32(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,6(%esp)
|
||||
movl -8(%ebx),%edx
|
||||
movq -8(%ebx),%mm2
|
||||
psllq $60,%mm6
|
||||
movq -16(%ebx),%mm5
|
||||
por %mm6,%mm1
|
||||
movq %mm0,-80(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,48(%edi)
|
||||
movq %mm3,%mm7
|
||||
movq %mm1,-88(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,40(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,7(%esp)
|
||||
movl 8(%ebx),%edx
|
||||
movq 8(%ebx),%mm1
|
||||
psllq $60,%mm7
|
||||
movq (%ebx),%mm4
|
||||
por %mm7,%mm0
|
||||
movq %mm2,-72(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,56(%edi)
|
||||
movq %mm5,%mm6
|
||||
movq %mm0,-80(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,48(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,8(%esp)
|
||||
movl 24(%ebx),%edx
|
||||
movq 24(%ebx),%mm0
|
||||
psllq $60,%mm6
|
||||
movq 16(%ebx),%mm3
|
||||
por %mm6,%mm2
|
||||
movq %mm1,-64(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,64(%edi)
|
||||
movq %mm4,%mm7
|
||||
movq %mm2,-72(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,56(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,9(%esp)
|
||||
movl 40(%ebx),%edx
|
||||
movq 40(%ebx),%mm2
|
||||
psllq $60,%mm7
|
||||
movq 32(%ebx),%mm5
|
||||
por %mm7,%mm1
|
||||
movq %mm0,-56(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,72(%edi)
|
||||
movq %mm3,%mm6
|
||||
movq %mm1,-64(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,64(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,10(%esp)
|
||||
movl 56(%ebx),%edx
|
||||
movq 56(%ebx),%mm1
|
||||
psllq $60,%mm6
|
||||
movq 48(%ebx),%mm4
|
||||
por %mm6,%mm0
|
||||
movq %mm2,-48(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,80(%edi)
|
||||
movq %mm5,%mm7
|
||||
movq %mm0,-56(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,72(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,11(%esp)
|
||||
movl 72(%ebx),%edx
|
||||
movq 72(%ebx),%mm0
|
||||
psllq $60,%mm7
|
||||
movq 64(%ebx),%mm3
|
||||
por %mm7,%mm2
|
||||
movq %mm1,-40(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,88(%edi)
|
||||
movq %mm4,%mm6
|
||||
movq %mm2,-48(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,80(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,12(%esp)
|
||||
movl 88(%ebx),%edx
|
||||
movq 88(%ebx),%mm2
|
||||
psllq $60,%mm6
|
||||
movq 80(%ebx),%mm5
|
||||
por %mm6,%mm1
|
||||
movq %mm0,-32(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,96(%edi)
|
||||
movq %mm3,%mm7
|
||||
movq %mm1,-40(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,88(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,13(%esp)
|
||||
movl 104(%ebx),%edx
|
||||
movq 104(%ebx),%mm1
|
||||
psllq $60,%mm7
|
||||
movq 96(%ebx),%mm4
|
||||
por %mm7,%mm0
|
||||
movq %mm2,-24(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,104(%edi)
|
||||
movq %mm5,%mm6
|
||||
movq %mm0,-32(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,96(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,14(%esp)
|
||||
movl 120(%ebx),%edx
|
||||
movq 120(%ebx),%mm0
|
||||
psllq $60,%mm6
|
||||
movq 112(%ebx),%mm3
|
||||
por %mm6,%mm2
|
||||
movq %mm1,-16(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,112(%edi)
|
||||
movq %mm4,%mm7
|
||||
movq %mm2,-24(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,104(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,15(%esp)
|
||||
psllq $60,%mm7
|
||||
por %mm7,%mm1
|
||||
movq %mm0,-8(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,120(%edi)
|
||||
movq %mm3,%mm6
|
||||
movq %mm1,-16(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,112(%ebp)
|
||||
psllq $60,%mm6
|
||||
por %mm6,%mm0
|
||||
movq %mm0,-8(%ebp)
|
||||
movq %mm3,120(%ebp)
|
||||
movq (%eax),%mm6
|
||||
movl 8(%eax),%ebx
|
||||
movl 12(%eax),%edx
|
||||
.align 16
|
||||
.L004outer:
|
||||
xorl 12(%ecx),%edx
|
||||
xorl 8(%ecx),%ebx
|
||||
pxor (%ecx),%mm6
|
||||
leal 16(%ecx),%ecx
|
||||
movl %ebx,536(%esp)
|
||||
movq %mm6,528(%esp)
|
||||
movl %ecx,548(%esp)
|
||||
xorl %eax,%eax
|
||||
roll $8,%edx
|
||||
movb %dl,%al
|
||||
movl %eax,%ebp
|
||||
andb $15,%al
|
||||
shrl $4,%ebp
|
||||
pxor %mm0,%mm0
|
||||
roll $8,%edx
|
||||
pxor %mm1,%mm1
|
||||
pxor %mm2,%mm2
|
||||
movq 16(%esp,%eax,8),%mm7
|
||||
movq 144(%esp,%eax,8),%mm6
|
||||
movb %dl,%al
|
||||
movd %mm7,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
shrl $4,%edi
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movl 536(%esp),%edx
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm1,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm0
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm0,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movl 532(%esp),%edx
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm1,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm0
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm0,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm1,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm0
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movl 528(%esp),%edx
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm0,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm1,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm0
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm0,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movl 524(%esp),%edx
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
movzbl %bl,%ebx
|
||||
pxor %mm2,%mm2
|
||||
psllq $4,%mm1
|
||||
movd %mm7,%ecx
|
||||
psrlq $4,%mm7
|
||||
movq %mm6,%mm3
|
||||
psrlq $4,%mm6
|
||||
shll $4,%ecx
|
||||
pxor 16(%esp,%edi,8),%mm7
|
||||
psllq $60,%mm3
|
||||
movzbl %cl,%ecx
|
||||
pxor %mm3,%mm7
|
||||
pxor 144(%esp,%edi,8),%mm6
|
||||
pinsrw $2,(%esi,%ebx,2),%mm0
|
||||
pxor %mm1,%mm6
|
||||
movd %mm7,%edx
|
||||
pinsrw $3,(%esi,%ecx,2),%mm2
|
||||
psllq $12,%mm0
|
||||
pxor %mm0,%mm6
|
||||
psrlq $32,%mm7
|
||||
pxor %mm2,%mm6
|
||||
movl 548(%esp),%ecx
|
||||
movd %mm7,%ebx
|
||||
movq %mm6,%mm3
|
||||
psllw $8,%mm6
|
||||
psrlw $8,%mm3
|
||||
por %mm3,%mm6
|
||||
bswap %edx
|
||||
pshufw $27,%mm6,%mm6
|
||||
bswap %ebx
|
||||
cmpl 552(%esp),%ecx
|
||||
jne .L004outer
|
||||
movl 544(%esp),%eax
|
||||
movl %edx,12(%eax)
|
||||
movl %ebx,8(%eax)
|
||||
movq %mm6,(%eax)
|
||||
movl 556(%esp),%esp
|
||||
emms
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
|
||||
.globl gcm_init_clmul
|
||||
.hidden gcm_init_clmul
|
||||
.type gcm_init_clmul,@function
|
||||
@ -719,10 +14,10 @@ gcm_init_clmul:
|
||||
.L_gcm_init_clmul_begin:
|
||||
movl 4(%esp),%edx
|
||||
movl 8(%esp),%eax
|
||||
call .L005pic
|
||||
.L005pic:
|
||||
call .L000pic
|
||||
.L000pic:
|
||||
popl %ecx
|
||||
leal .Lbswap-.L005pic(%ecx),%ecx
|
||||
leal .Lbswap-.L000pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm2
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $255,%xmm2,%xmm4
|
||||
@ -789,10 +84,10 @@ gcm_gmult_clmul:
|
||||
.L_gcm_gmult_clmul_begin:
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
call .L006pic
|
||||
.L006pic:
|
||||
call .L001pic
|
||||
.L001pic:
|
||||
popl %ecx
|
||||
leal .Lbswap-.L006pic(%ecx),%ecx
|
||||
leal .Lbswap-.L001pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
movups (%edx),%xmm2
|
||||
@ -849,16 +144,16 @@ gcm_ghash_clmul:
|
||||
movl 24(%esp),%edx
|
||||
movl 28(%esp),%esi
|
||||
movl 32(%esp),%ebx
|
||||
call .L007pic
|
||||
.L007pic:
|
||||
call .L002pic
|
||||
.L002pic:
|
||||
popl %ecx
|
||||
leal .Lbswap-.L007pic(%ecx),%ecx
|
||||
leal .Lbswap-.L002pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
movdqu (%edx),%xmm2
|
||||
.byte 102,15,56,0,197
|
||||
subl $16,%ebx
|
||||
jz .L008odd_tail
|
||||
jz .L003odd_tail
|
||||
movdqu (%esi),%xmm3
|
||||
movdqu 16(%esi),%xmm6
|
||||
.byte 102,15,56,0,221
|
||||
@ -875,10 +170,10 @@ gcm_ghash_clmul:
|
||||
movups 16(%edx),%xmm2
|
||||
nop
|
||||
subl $32,%ebx
|
||||
jbe .L009even_tail
|
||||
jmp .L010mod_loop
|
||||
jbe .L004even_tail
|
||||
jmp .L005mod_loop
|
||||
.align 32
|
||||
.L010mod_loop:
|
||||
.L005mod_loop:
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
@ -933,8 +228,8 @@ gcm_ghash_clmul:
|
||||
.byte 102,15,58,68,221,0
|
||||
leal 32(%esi),%esi
|
||||
subl $32,%ebx
|
||||
ja .L010mod_loop
|
||||
.L009even_tail:
|
||||
ja .L005mod_loop
|
||||
.L004even_tail:
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
@ -973,9 +268,9 @@ gcm_ghash_clmul:
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
testl %ebx,%ebx
|
||||
jnz .L011done
|
||||
jnz .L006done
|
||||
movups (%edx),%xmm2
|
||||
.L008odd_tail:
|
||||
.L003odd_tail:
|
||||
movdqu (%esi),%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
pxor %xmm3,%xmm0
|
||||
@ -1014,7 +309,7 @@ gcm_ghash_clmul:
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
.L011done:
|
||||
.L006done:
|
||||
.byte 102,15,56,0,197
|
||||
movdqu %xmm0,(%eax)
|
||||
popl %edi
|
||||
@ -1027,48 +322,9 @@ gcm_ghash_clmul:
|
||||
.Lbswap:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
|
||||
.align 64
|
||||
.Lrem_8bit:
|
||||
.value 0,450,900,582,1800,1738,1164,1358
|
||||
.value 3600,4050,3476,3158,2328,2266,2716,2910
|
||||
.value 7200,7650,8100,7782,6952,6890,6316,6510
|
||||
.value 4656,5106,4532,4214,5432,5370,5820,6014
|
||||
.value 14400,14722,15300,14854,16200,16010,15564,15630
|
||||
.value 13904,14226,13780,13334,12632,12442,13020,13086
|
||||
.value 9312,9634,10212,9766,9064,8874,8428,8494
|
||||
.value 10864,11186,10740,10294,11640,11450,12028,12094
|
||||
.value 28800,28994,29444,29382,30600,30282,29708,30158
|
||||
.value 32400,32594,32020,31958,31128,30810,31260,31710
|
||||
.value 27808,28002,28452,28390,27560,27242,26668,27118
|
||||
.value 25264,25458,24884,24822,26040,25722,26172,26622
|
||||
.value 18624,18690,19268,19078,20424,19978,19532,19854
|
||||
.value 18128,18194,17748,17558,16856,16410,16988,17310
|
||||
.value 21728,21794,22372,22182,21480,21034,20588,20910
|
||||
.value 23280,23346,22900,22710,24056,23610,24188,24510
|
||||
.value 57600,57538,57988,58182,58888,59338,58764,58446
|
||||
.value 61200,61138,60564,60758,59416,59866,60316,59998
|
||||
.value 64800,64738,65188,65382,64040,64490,63916,63598
|
||||
.value 62256,62194,61620,61814,62520,62970,63420,63102
|
||||
.value 55616,55426,56004,56070,56904,57226,56780,56334
|
||||
.value 55120,54930,54484,54550,53336,53658,54236,53790
|
||||
.value 50528,50338,50916,50982,49768,50090,49644,49198
|
||||
.value 52080,51890,51444,51510,52344,52666,53244,52798
|
||||
.value 37248,36930,37380,37830,38536,38730,38156,38094
|
||||
.value 40848,40530,39956,40406,39064,39258,39708,39646
|
||||
.value 36256,35938,36388,36838,35496,35690,35116,35054
|
||||
.value 33712,33394,32820,33270,33976,34170,34620,34558
|
||||
.value 43456,43010,43588,43910,44744,44810,44364,44174
|
||||
.value 42960,42514,42068,42390,41176,41242,41820,41630
|
||||
.value 46560,46114,46692,47014,45800,45866,45420,45230
|
||||
.value 48112,47666,47220,47542,48376,48442,49020,48830
|
||||
.align 64
|
||||
.Lrem_4bit:
|
||||
.long 0,0,0,471859200,0,943718400,0,610271232
|
||||
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
|
||||
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
|
||||
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
|
||||
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
|
||||
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
|
||||
.byte 0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -685,3 +685,4 @@ md5_block_asm_data_order:
|
||||
ret
|
||||
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -3805,3 +3805,4 @@ _sha1_block_data_order_avx:
|
||||
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
|
||||
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -5564,3 +5564,4 @@ sha256_block_data_order:
|
||||
ret
|
||||
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -2834,3 +2834,4 @@ sha512_block_data_order:
|
||||
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
|
||||
.byte 62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
#endif
|
||||
.align 64
|
||||
.L_vpaes_consts:
|
||||
@ -485,7 +485,7 @@ vpaes_set_encrypt_key:
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L016pic
|
||||
@ -570,7 +570,7 @@ vpaes_encrypt:
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L019pic
|
||||
@ -705,3 +705,4 @@ vpaes_cbc_encrypt:
|
||||
ret
|
||||
.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -481,3 +481,4 @@ bn_mul_mont:
|
||||
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
||||
.byte 111,114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -203,3 +203,4 @@ abi_test_clobber_xmm7:
|
||||
ret
|
||||
.size abi_test_clobber_xmm7,.-.L_abi_test_clobber_xmm7_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -1630,3 +1630,4 @@ ChaCha20_8x:
|
||||
.cfi_endproc
|
||||
.size ChaCha20_8x,.-ChaCha20_8x
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -3076,3 +3076,4 @@ aes256gcmsiv_kdf:
|
||||
.cfi_endproc
|
||||
.size aes256gcmsiv_kdf, .-aes256gcmsiv_kdf
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -3935,7 +3935,7 @@ do_length_block:
|
||||
popq %rbp
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_adjust_cfa_offset (8 * 6) + 288 + 32
|
||||
.cfi_adjust_cfa_offset (8 * 7) + 288 + 32
|
||||
|
||||
seal_sse_128:
|
||||
movdqu .chacha20_consts(%rip),%xmm0
|
||||
@ -8984,3 +8984,4 @@ seal_avx2_short_tail:
|
||||
jmp seal_sse_tail_16
|
||||
.cfi_endproc
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -556,12 +556,10 @@ _aesni_ctr32_6x:
|
||||
.align 32
|
||||
aesni_gcm_encrypt:
|
||||
.cfi_startproc
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
.extern BORINGSSL_function_hit
|
||||
.hidden BORINGSSL_function_hit
|
||||
movb $1,BORINGSSL_function_hit+2(%rip)
|
||||
#endif
|
||||
#endif
|
||||
xorq %r10,%r10
|
||||
|
||||
@ -851,3 +849,4 @@ aesni_gcm_encrypt:
|
||||
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -20,12 +20,10 @@
|
||||
.align 16
|
||||
aes_hw_encrypt:
|
||||
.cfi_startproc
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
.extern BORINGSSL_function_hit
|
||||
.hidden BORINGSSL_function_hit
|
||||
movb $1,BORINGSSL_function_hit+1(%rip)
|
||||
#endif
|
||||
#endif
|
||||
movups (%rdi),%xmm2
|
||||
movl 240(%rdx),%eax
|
||||
@ -887,10 +885,8 @@ aes_hw_ecb_encrypt:
|
||||
.align 16
|
||||
aes_hw_ctr32_encrypt_blocks:
|
||||
.cfi_startproc
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
movb $1,BORINGSSL_function_hit(%rip)
|
||||
#endif
|
||||
#endif
|
||||
cmpq $1,%rdx
|
||||
jne .Lctr32_bulk
|
||||
@ -2111,11 +2107,9 @@ aes_hw_set_decrypt_key:
|
||||
aes_hw_set_encrypt_key:
|
||||
__aesni_set_encrypt_key:
|
||||
.cfi_startproc
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
movb $1,BORINGSSL_function_hit+3(%rip)
|
||||
#endif
|
||||
#endif
|
||||
.byte 0x48,0x83,0xEC,0x08
|
||||
.cfi_adjust_cfa_offset 8
|
||||
movq $-1,%rax
|
||||
@ -2509,3 +2503,4 @@ __aesni_set_encrypt_key:
|
||||
.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -424,3 +424,4 @@ gcm_ghash_ssse3:
|
||||
.Llow4_mask:
|
||||
.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -14,709 +14,6 @@
|
||||
.text
|
||||
.extern OPENSSL_ia32cap_P
|
||||
.hidden OPENSSL_ia32cap_P
|
||||
|
||||
.globl gcm_gmult_4bit
|
||||
.hidden gcm_gmult_4bit
|
||||
.type gcm_gmult_4bit,@function
|
||||
.align 16
|
||||
gcm_gmult_4bit:
|
||||
.cfi_startproc
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
subq $280,%rsp
|
||||
.cfi_adjust_cfa_offset 280
|
||||
.Lgmult_prologue:
|
||||
|
||||
movzbq 15(%rdi),%r8
|
||||
leaq .Lrem_4bit(%rip),%r11
|
||||
xorq %rax,%rax
|
||||
xorq %rbx,%rbx
|
||||
movb %r8b,%al
|
||||
movb %r8b,%bl
|
||||
shlb $4,%al
|
||||
movq $14,%rcx
|
||||
movq 8(%rsi,%rax,1),%r8
|
||||
movq (%rsi,%rax,1),%r9
|
||||
andb $0xf0,%bl
|
||||
movq %r8,%rdx
|
||||
jmp .Loop1
|
||||
|
||||
.align 16
|
||||
.Loop1:
|
||||
shrq $4,%r8
|
||||
andq $0xf,%rdx
|
||||
movq %r9,%r10
|
||||
movb (%rdi,%rcx,1),%al
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rbx,1),%r8
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rbx,1),%r9
|
||||
movb %al,%bl
|
||||
xorq (%r11,%rdx,8),%r9
|
||||
movq %r8,%rdx
|
||||
shlb $4,%al
|
||||
xorq %r10,%r8
|
||||
decq %rcx
|
||||
js .Lbreak1
|
||||
|
||||
shrq $4,%r8
|
||||
andq $0xf,%rdx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
andb $0xf0,%bl
|
||||
xorq (%r11,%rdx,8),%r9
|
||||
movq %r8,%rdx
|
||||
xorq %r10,%r8
|
||||
jmp .Loop1
|
||||
|
||||
.align 16
|
||||
.Lbreak1:
|
||||
shrq $4,%r8
|
||||
andq $0xf,%rdx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
andb $0xf0,%bl
|
||||
xorq (%r11,%rdx,8),%r9
|
||||
movq %r8,%rdx
|
||||
xorq %r10,%r8
|
||||
|
||||
shrq $4,%r8
|
||||
andq $0xf,%rdx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rbx,1),%r8
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rbx,1),%r9
|
||||
xorq %r10,%r8
|
||||
xorq (%r11,%rdx,8),%r9
|
||||
|
||||
bswapq %r8
|
||||
bswapq %r9
|
||||
movq %r8,8(%rdi)
|
||||
movq %r9,(%rdi)
|
||||
|
||||
leaq 280+48(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lgmult_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size gcm_gmult_4bit,.-gcm_gmult_4bit
|
||||
.globl gcm_ghash_4bit
|
||||
.hidden gcm_ghash_4bit
|
||||
.type gcm_ghash_4bit,@function
|
||||
.align 16
|
||||
gcm_ghash_4bit:
|
||||
.cfi_startproc
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
subq $280,%rsp
|
||||
.cfi_adjust_cfa_offset 280
|
||||
.Lghash_prologue:
|
||||
movq %rdx,%r14
|
||||
movq %rcx,%r15
|
||||
subq $-128,%rsi
|
||||
leaq 16+128(%rsp),%rbp
|
||||
xorl %edx,%edx
|
||||
movq 0+0-128(%rsi),%r8
|
||||
movq 0+8-128(%rsi),%rax
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq 16+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq 16+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,0(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,0(%rbp)
|
||||
movq 32+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,0-128(%rbp)
|
||||
movq 32+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,1(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,8(%rbp)
|
||||
movq 48+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,8-128(%rbp)
|
||||
movq 48+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,2(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,16(%rbp)
|
||||
movq 64+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,16-128(%rbp)
|
||||
movq 64+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,3(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,24(%rbp)
|
||||
movq 80+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,24-128(%rbp)
|
||||
movq 80+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,4(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,32(%rbp)
|
||||
movq 96+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,32-128(%rbp)
|
||||
movq 96+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,5(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,40(%rbp)
|
||||
movq 112+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,40-128(%rbp)
|
||||
movq 112+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,6(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,48(%rbp)
|
||||
movq 128+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,48-128(%rbp)
|
||||
movq 128+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,7(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,56(%rbp)
|
||||
movq 144+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,56-128(%rbp)
|
||||
movq 144+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,8(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,64(%rbp)
|
||||
movq 160+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,64-128(%rbp)
|
||||
movq 160+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,9(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,72(%rbp)
|
||||
movq 176+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,72-128(%rbp)
|
||||
movq 176+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,10(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,80(%rbp)
|
||||
movq 192+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,80-128(%rbp)
|
||||
movq 192+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,11(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,88(%rbp)
|
||||
movq 208+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,88-128(%rbp)
|
||||
movq 208+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,12(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,96(%rbp)
|
||||
movq 224+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,96-128(%rbp)
|
||||
movq 224+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,13(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,104(%rbp)
|
||||
movq 240+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,104-128(%rbp)
|
||||
movq 240+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,14(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,112(%rbp)
|
||||
shlb $4,%dl
|
||||
movq %rax,112-128(%rbp)
|
||||
shlq $60,%r10
|
||||
movb %dl,15(%rsp)
|
||||
orq %r10,%rbx
|
||||
movq %r9,120(%rbp)
|
||||
movq %rbx,120-128(%rbp)
|
||||
addq $-128,%rsi
|
||||
movq 8(%rdi),%r8
|
||||
movq 0(%rdi),%r9
|
||||
addq %r14,%r15
|
||||
leaq .Lrem_8bit(%rip),%r11
|
||||
jmp .Louter_loop
|
||||
.align 16
|
||||
.Louter_loop:
|
||||
xorq (%r14),%r9
|
||||
movq 8(%r14),%rdx
|
||||
leaq 16(%r14),%r14
|
||||
xorq %r8,%rdx
|
||||
movq %r9,(%rdi)
|
||||
movq %rdx,8(%rdi)
|
||||
shrq $32,%rdx
|
||||
xorq %rax,%rax
|
||||
roll $8,%edx
|
||||
movb %dl,%al
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
shrl $4,%ebx
|
||||
roll $8,%edx
|
||||
movq 8(%rsi,%rax,1),%r8
|
||||
movq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
movl 8(%rdi),%edx
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
movl 4(%rdi),%edx
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
movl 0(%rdi),%edx
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
andl $240,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
movl -4(%rdi),%edx
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
shlq $48,%r12
|
||||
xorq %r10,%r8
|
||||
xorq %r12,%r9
|
||||
movzbq %r8b,%r13
|
||||
shrq $4,%r8
|
||||
movq %r9,%r10
|
||||
shlb $4,%r13b
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rcx,1),%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rcx,1),%r9
|
||||
xorq %r10,%r8
|
||||
shlq $48,%r13
|
||||
bswapq %r8
|
||||
xorq %r13,%r9
|
||||
bswapq %r9
|
||||
cmpq %r15,%r14
|
||||
jb .Louter_loop
|
||||
movq %r8,8(%rdi)
|
||||
movq %r9,(%rdi)
|
||||
|
||||
leaq 280+48(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
movq -48(%rsi),%r15
|
||||
.cfi_restore %r15
|
||||
movq -40(%rsi),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%rsi),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%rsi),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%rsi),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq 0(%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lghash_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size gcm_ghash_4bit,.-gcm_ghash_4bit
|
||||
.globl gcm_init_clmul
|
||||
.hidden gcm_init_clmul
|
||||
.type gcm_init_clmul,@function
|
||||
@ -1822,50 +1119,9 @@ gcm_ghash_avx:
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
|
||||
.L7_mask:
|
||||
.long 7,0,7,0
|
||||
.L7_mask_poly:
|
||||
.long 7,0,450,0
|
||||
.align 64
|
||||
.type .Lrem_4bit,@object
|
||||
.Lrem_4bit:
|
||||
.long 0,0,0,471859200,0,943718400,0,610271232
|
||||
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
|
||||
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
|
||||
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
|
||||
.type .Lrem_8bit,@object
|
||||
.Lrem_8bit:
|
||||
.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
|
||||
.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
|
||||
.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
|
||||
.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
|
||||
.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
|
||||
.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
|
||||
.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
|
||||
.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
|
||||
.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
|
||||
.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
|
||||
.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
|
||||
.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
|
||||
.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
|
||||
.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
|
||||
.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
|
||||
.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
|
||||
.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
|
||||
.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
|
||||
.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
|
||||
.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
|
||||
.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
|
||||
.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
|
||||
.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
|
||||
.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
|
||||
.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
|
||||
.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
|
||||
.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
|
||||
.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
|
||||
.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
|
||||
.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
|
||||
.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
|
||||
.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -699,3 +699,4 @@ md5_block_asm_data_order:
|
||||
.cfi_endproc
|
||||
.size md5_block_asm_data_order,.-md5_block_asm_data_order
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -4540,3 +4540,4 @@ ecp_nistz256_point_add_affinex:
|
||||
.cfi_endproc
|
||||
.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -340,3 +340,4 @@ beeu_mod_inverse_vartime:
|
||||
|
||||
.size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -60,3 +60,4 @@ CRYPTO_rdrand_multiple8_buf:
|
||||
.cfi_endproc
|
||||
.size CRYPTO_rdrand_multiple8_buf,.-CRYPTO_rdrand_multiple8_buf
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -1746,3 +1746,4 @@ rsaz_1024_gather5_avx2:
|
||||
.long 4,4,4,4, 4,4,4,4
|
||||
.align 64
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -3970,3 +3970,4 @@ sha256_block_data_order_avx:
|
||||
.cfi_endproc
|
||||
.size sha256_block_data_order_avx,.-sha256_block_data_order_avx
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -2989,3 +2989,4 @@ sha512_block_data_order_avx:
|
||||
.cfi_endproc
|
||||
.size sha512_block_data_order_avx,.-sha512_block_data_order_avx
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -812,12 +812,10 @@ _vpaes_schedule_mangle:
|
||||
.align 16
|
||||
vpaes_set_encrypt_key:
|
||||
.cfi_startproc
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
.extern BORINGSSL_function_hit
|
||||
.hidden BORINGSSL_function_hit
|
||||
movb $1,BORINGSSL_function_hit+5(%rip)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
movl %esi,%eax
|
||||
@ -863,12 +861,10 @@ vpaes_set_decrypt_key:
|
||||
.align 16
|
||||
vpaes_encrypt:
|
||||
.cfi_startproc
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
.extern BORINGSSL_function_hit
|
||||
.hidden BORINGSSL_function_hit
|
||||
movb $1,BORINGSSL_function_hit+4(%rip)
|
||||
#endif
|
||||
#endif
|
||||
movdqu (%rdi),%xmm0
|
||||
call _vpaes_preheat
|
||||
@ -1134,3 +1130,4 @@ _vpaes_consts:
|
||||
.align 64
|
||||
.size _vpaes_consts,.-_vpaes_consts
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -1257,3 +1257,4 @@ bn_mulx4x_mont:
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 16
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -3787,3 +3787,4 @@ bn_gather5:
|
||||
.long 2,2, 2,2
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -515,3 +515,4 @@ abi_test_set_direction_flag:
|
||||
.byte 0xf3,0xc3
|
||||
.size abi_test_set_direction_flag,.-abi_test_set_direction_flag
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -6,14 +6,14 @@
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
#endif
|
||||
.globl _aes_hw_encrypt
|
||||
.private_extern _aes_hw_encrypt
|
||||
.align 4
|
||||
_aes_hw_encrypt:
|
||||
L_aes_hw_encrypt_begin:
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call L000pic
|
||||
@ -818,7 +818,7 @@ L_aes_hw_ctr32_encrypt_blocks_begin:
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call L038pic
|
||||
@ -2403,7 +2403,7 @@ L097bad_keybits:
|
||||
.align 4
|
||||
_aes_hw_set_encrypt_key:
|
||||
L_aes_hw_set_encrypt_key_begin:
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call L116pic
|
||||
|
@ -981,551 +981,6 @@ L028aw_end:
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _bn_sub_part_words
|
||||
.private_extern _bn_sub_part_words
|
||||
.align 4
|
||||
_bn_sub_part_words:
|
||||
L_bn_sub_part_words_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl 20(%esp),%ebx
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edi
|
||||
movl 32(%esp),%ebp
|
||||
xorl %eax,%eax
|
||||
andl $4294967288,%ebp
|
||||
jz L029aw_finish
|
||||
L030aw_loop:
|
||||
# Round 0
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
# Round 1
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,4(%ebx)
|
||||
# Round 2
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,8(%ebx)
|
||||
# Round 3
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,12(%ebx)
|
||||
# Round 4
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,16(%ebx)
|
||||
# Round 5
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,20(%ebx)
|
||||
# Round 6
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
# Round 7
|
||||
movl 28(%esi),%ecx
|
||||
movl 28(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,28(%ebx)
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%edi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz L030aw_loop
|
||||
L029aw_finish:
|
||||
movl 32(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz L031aw_end
|
||||
# Tail Round 0
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz L031aw_end
|
||||
# Tail Round 1
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz L031aw_end
|
||||
# Tail Round 2
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz L031aw_end
|
||||
# Tail Round 3
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz L031aw_end
|
||||
# Tail Round 4
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz L031aw_end
|
||||
# Tail Round 5
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz L031aw_end
|
||||
# Tail Round 6
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
L031aw_end:
|
||||
cmpl $0,36(%esp)
|
||||
je L032pw_end
|
||||
movl 36(%esp),%ebp
|
||||
cmpl $0,%ebp
|
||||
je L032pw_end
|
||||
jge L033pw_pos
|
||||
# pw_neg
|
||||
movl $0,%edx
|
||||
subl %ebp,%edx
|
||||
movl %edx,%ebp
|
||||
andl $4294967288,%ebp
|
||||
jz L034pw_neg_finish
|
||||
L035pw_neg_loop:
|
||||
# dl<0 Round 0
|
||||
movl $0,%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
# dl<0 Round 1
|
||||
movl $0,%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,4(%ebx)
|
||||
# dl<0 Round 2
|
||||
movl $0,%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,8(%ebx)
|
||||
# dl<0 Round 3
|
||||
movl $0,%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,12(%ebx)
|
||||
# dl<0 Round 4
|
||||
movl $0,%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,16(%ebx)
|
||||
# dl<0 Round 5
|
||||
movl $0,%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,20(%ebx)
|
||||
# dl<0 Round 6
|
||||
movl $0,%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
# dl<0 Round 7
|
||||
movl $0,%ecx
|
||||
movl 28(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,28(%ebx)
|
||||
|
||||
addl $32,%edi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz L035pw_neg_loop
|
||||
L034pw_neg_finish:
|
||||
movl 36(%esp),%edx
|
||||
movl $0,%ebp
|
||||
subl %edx,%ebp
|
||||
andl $7,%ebp
|
||||
jz L032pw_end
|
||||
# dl<0 Tail Round 0
|
||||
movl $0,%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,(%ebx)
|
||||
jz L032pw_end
|
||||
# dl<0 Tail Round 1
|
||||
movl $0,%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,4(%ebx)
|
||||
jz L032pw_end
|
||||
# dl<0 Tail Round 2
|
||||
movl $0,%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,8(%ebx)
|
||||
jz L032pw_end
|
||||
# dl<0 Tail Round 3
|
||||
movl $0,%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,12(%ebx)
|
||||
jz L032pw_end
|
||||
# dl<0 Tail Round 4
|
||||
movl $0,%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,16(%ebx)
|
||||
jz L032pw_end
|
||||
# dl<0 Tail Round 5
|
||||
movl $0,%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,20(%ebx)
|
||||
jz L032pw_end
|
||||
# dl<0 Tail Round 6
|
||||
movl $0,%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
jmp L032pw_end
|
||||
L033pw_pos:
|
||||
andl $4294967288,%ebp
|
||||
jz L036pw_pos_finish
|
||||
L037pw_pos_loop:
|
||||
# dl>0 Round 0
|
||||
movl (%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,(%ebx)
|
||||
jnc L038pw_nc0
|
||||
# dl>0 Round 1
|
||||
movl 4(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,4(%ebx)
|
||||
jnc L039pw_nc1
|
||||
# dl>0 Round 2
|
||||
movl 8(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,8(%ebx)
|
||||
jnc L040pw_nc2
|
||||
# dl>0 Round 3
|
||||
movl 12(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,12(%ebx)
|
||||
jnc L041pw_nc3
|
||||
# dl>0 Round 4
|
||||
movl 16(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,16(%ebx)
|
||||
jnc L042pw_nc4
|
||||
# dl>0 Round 5
|
||||
movl 20(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,20(%ebx)
|
||||
jnc L043pw_nc5
|
||||
# dl>0 Round 6
|
||||
movl 24(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,24(%ebx)
|
||||
jnc L044pw_nc6
|
||||
# dl>0 Round 7
|
||||
movl 28(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,28(%ebx)
|
||||
jnc L045pw_nc7
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz L037pw_pos_loop
|
||||
L036pw_pos_finish:
|
||||
movl 36(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz L032pw_end
|
||||
# dl>0 Tail Round 0
|
||||
movl (%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,(%ebx)
|
||||
jnc L046pw_tail_nc0
|
||||
decl %ebp
|
||||
jz L032pw_end
|
||||
# dl>0 Tail Round 1
|
||||
movl 4(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,4(%ebx)
|
||||
jnc L047pw_tail_nc1
|
||||
decl %ebp
|
||||
jz L032pw_end
|
||||
# dl>0 Tail Round 2
|
||||
movl 8(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,8(%ebx)
|
||||
jnc L048pw_tail_nc2
|
||||
decl %ebp
|
||||
jz L032pw_end
|
||||
# dl>0 Tail Round 3
|
||||
movl 12(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,12(%ebx)
|
||||
jnc L049pw_tail_nc3
|
||||
decl %ebp
|
||||
jz L032pw_end
|
||||
# dl>0 Tail Round 4
|
||||
movl 16(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,16(%ebx)
|
||||
jnc L050pw_tail_nc4
|
||||
decl %ebp
|
||||
jz L032pw_end
|
||||
# dl>0 Tail Round 5
|
||||
movl 20(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,20(%ebx)
|
||||
jnc L051pw_tail_nc5
|
||||
decl %ebp
|
||||
jz L032pw_end
|
||||
# dl>0 Tail Round 6
|
||||
movl 24(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,24(%ebx)
|
||||
jnc L052pw_tail_nc6
|
||||
movl $1,%eax
|
||||
jmp L032pw_end
|
||||
L053pw_nc_loop:
|
||||
movl (%esi),%ecx
|
||||
movl %ecx,(%ebx)
|
||||
L038pw_nc0:
|
||||
movl 4(%esi),%ecx
|
||||
movl %ecx,4(%ebx)
|
||||
L039pw_nc1:
|
||||
movl 8(%esi),%ecx
|
||||
movl %ecx,8(%ebx)
|
||||
L040pw_nc2:
|
||||
movl 12(%esi),%ecx
|
||||
movl %ecx,12(%ebx)
|
||||
L041pw_nc3:
|
||||
movl 16(%esi),%ecx
|
||||
movl %ecx,16(%ebx)
|
||||
L042pw_nc4:
|
||||
movl 20(%esi),%ecx
|
||||
movl %ecx,20(%ebx)
|
||||
L043pw_nc5:
|
||||
movl 24(%esi),%ecx
|
||||
movl %ecx,24(%ebx)
|
||||
L044pw_nc6:
|
||||
movl 28(%esi),%ecx
|
||||
movl %ecx,28(%ebx)
|
||||
L045pw_nc7:
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz L053pw_nc_loop
|
||||
movl 36(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz L054pw_nc_end
|
||||
movl (%esi),%ecx
|
||||
movl %ecx,(%ebx)
|
||||
L046pw_tail_nc0:
|
||||
decl %ebp
|
||||
jz L054pw_nc_end
|
||||
movl 4(%esi),%ecx
|
||||
movl %ecx,4(%ebx)
|
||||
L047pw_tail_nc1:
|
||||
decl %ebp
|
||||
jz L054pw_nc_end
|
||||
movl 8(%esi),%ecx
|
||||
movl %ecx,8(%ebx)
|
||||
L048pw_tail_nc2:
|
||||
decl %ebp
|
||||
jz L054pw_nc_end
|
||||
movl 12(%esi),%ecx
|
||||
movl %ecx,12(%ebx)
|
||||
L049pw_tail_nc3:
|
||||
decl %ebp
|
||||
jz L054pw_nc_end
|
||||
movl 16(%esi),%ecx
|
||||
movl %ecx,16(%ebx)
|
||||
L050pw_tail_nc4:
|
||||
decl %ebp
|
||||
jz L054pw_nc_end
|
||||
movl 20(%esi),%ecx
|
||||
movl %ecx,20(%ebx)
|
||||
L051pw_tail_nc5:
|
||||
decl %ebp
|
||||
jz L054pw_nc_end
|
||||
movl 24(%esi),%ecx
|
||||
movl %ecx,24(%ebx)
|
||||
L052pw_tail_nc6:
|
||||
L054pw_nc_end:
|
||||
movl $0,%eax
|
||||
L032pw_end:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.section __IMPORT,__pointers,non_lazy_symbol_pointers
|
||||
L_OPENSSL_ia32cap_P$non_lazy_ptr:
|
||||
.indirect_symbol _OPENSSL_ia32cap_P
|
||||
|
@ -6,707 +6,6 @@
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl _gcm_gmult_4bit_mmx
|
||||
.private_extern _gcm_gmult_4bit_mmx
|
||||
.align 4
|
||||
_gcm_gmult_4bit_mmx:
|
||||
L_gcm_gmult_4bit_mmx_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
call L000pic_point
|
||||
L000pic_point:
|
||||
popl %eax
|
||||
leal Lrem_4bit-L000pic_point(%eax),%eax
|
||||
movzbl 15(%edi),%ebx
|
||||
xorl %ecx,%ecx
|
||||
movl %ebx,%edx
|
||||
movb %dl,%cl
|
||||
movl $14,%ebp
|
||||
shlb $4,%cl
|
||||
andl $240,%edx
|
||||
movq 8(%esi,%ecx,1),%mm0
|
||||
movq (%esi,%ecx,1),%mm1
|
||||
movd %mm0,%ebx
|
||||
jmp L001mmx_loop
|
||||
.align 4,0x90
|
||||
L001mmx_loop:
|
||||
psrlq $4,%mm0
|
||||
andl $15,%ebx
|
||||
movq %mm1,%mm2
|
||||
psrlq $4,%mm1
|
||||
pxor 8(%esi,%edx,1),%mm0
|
||||
movb (%edi,%ebp,1),%cl
|
||||
psllq $60,%mm2
|
||||
pxor (%eax,%ebx,8),%mm1
|
||||
decl %ebp
|
||||
movd %mm0,%ebx
|
||||
pxor (%esi,%edx,1),%mm1
|
||||
movl %ecx,%edx
|
||||
pxor %mm2,%mm0
|
||||
js L002mmx_break
|
||||
shlb $4,%cl
|
||||
andl $15,%ebx
|
||||
psrlq $4,%mm0
|
||||
andl $240,%edx
|
||||
movq %mm1,%mm2
|
||||
psrlq $4,%mm1
|
||||
pxor 8(%esi,%ecx,1),%mm0
|
||||
psllq $60,%mm2
|
||||
pxor (%eax,%ebx,8),%mm1
|
||||
movd %mm0,%ebx
|
||||
pxor (%esi,%ecx,1),%mm1
|
||||
pxor %mm2,%mm0
|
||||
jmp L001mmx_loop
|
||||
.align 4,0x90
|
||||
L002mmx_break:
|
||||
shlb $4,%cl
|
||||
andl $15,%ebx
|
||||
psrlq $4,%mm0
|
||||
andl $240,%edx
|
||||
movq %mm1,%mm2
|
||||
psrlq $4,%mm1
|
||||
pxor 8(%esi,%ecx,1),%mm0
|
||||
psllq $60,%mm2
|
||||
pxor (%eax,%ebx,8),%mm1
|
||||
movd %mm0,%ebx
|
||||
pxor (%esi,%ecx,1),%mm1
|
||||
pxor %mm2,%mm0
|
||||
psrlq $4,%mm0
|
||||
andl $15,%ebx
|
||||
movq %mm1,%mm2
|
||||
psrlq $4,%mm1
|
||||
pxor 8(%esi,%edx,1),%mm0
|
||||
psllq $60,%mm2
|
||||
pxor (%eax,%ebx,8),%mm1
|
||||
movd %mm0,%ebx
|
||||
pxor (%esi,%edx,1),%mm1
|
||||
pxor %mm2,%mm0
|
||||
psrlq $32,%mm0
|
||||
movd %mm1,%edx
|
||||
psrlq $32,%mm1
|
||||
movd %mm0,%ecx
|
||||
movd %mm1,%ebp
|
||||
bswap %ebx
|
||||
bswap %edx
|
||||
bswap %ecx
|
||||
bswap %ebp
|
||||
emms
|
||||
movl %ebx,12(%edi)
|
||||
movl %edx,4(%edi)
|
||||
movl %ecx,8(%edi)
|
||||
movl %ebp,(%edi)
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _gcm_ghash_4bit_mmx
|
||||
.private_extern _gcm_ghash_4bit_mmx
|
||||
.align 4
|
||||
_gcm_ghash_4bit_mmx:
|
||||
L_gcm_ghash_4bit_mmx_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%eax
|
||||
movl 24(%esp),%ebx
|
||||
movl 28(%esp),%ecx
|
||||
movl 32(%esp),%edx
|
||||
movl %esp,%ebp
|
||||
call L003pic_point
|
||||
L003pic_point:
|
||||
popl %esi
|
||||
leal Lrem_8bit-L003pic_point(%esi),%esi
|
||||
subl $544,%esp
|
||||
andl $-64,%esp
|
||||
subl $16,%esp
|
||||
addl %ecx,%edx
|
||||
movl %eax,544(%esp)
|
||||
movl %edx,552(%esp)
|
||||
movl %ebp,556(%esp)
|
||||
addl $128,%ebx
|
||||
leal 144(%esp),%edi
|
||||
leal 400(%esp),%ebp
|
||||
movl -120(%ebx),%edx
|
||||
movq -120(%ebx),%mm0
|
||||
movq -128(%ebx),%mm3
|
||||
shll $4,%edx
|
||||
movb %dl,(%esp)
|
||||
movl -104(%ebx),%edx
|
||||
movq -104(%ebx),%mm2
|
||||
movq -112(%ebx),%mm5
|
||||
movq %mm0,-128(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,(%edi)
|
||||
movq %mm3,%mm7
|
||||
psrlq $4,%mm3
|
||||
shll $4,%edx
|
||||
movb %dl,1(%esp)
|
||||
movl -88(%ebx),%edx
|
||||
movq -88(%ebx),%mm1
|
||||
psllq $60,%mm7
|
||||
movq -96(%ebx),%mm4
|
||||
por %mm7,%mm0
|
||||
movq %mm2,-120(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,8(%edi)
|
||||
movq %mm5,%mm6
|
||||
movq %mm0,-128(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,2(%esp)
|
||||
movl -72(%ebx),%edx
|
||||
movq -72(%ebx),%mm0
|
||||
psllq $60,%mm6
|
||||
movq -80(%ebx),%mm3
|
||||
por %mm6,%mm2
|
||||
movq %mm1,-112(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,16(%edi)
|
||||
movq %mm4,%mm7
|
||||
movq %mm2,-120(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,8(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,3(%esp)
|
||||
movl -56(%ebx),%edx
|
||||
movq -56(%ebx),%mm2
|
||||
psllq $60,%mm7
|
||||
movq -64(%ebx),%mm5
|
||||
por %mm7,%mm1
|
||||
movq %mm0,-104(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,24(%edi)
|
||||
movq %mm3,%mm6
|
||||
movq %mm1,-112(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,16(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,4(%esp)
|
||||
movl -40(%ebx),%edx
|
||||
movq -40(%ebx),%mm1
|
||||
psllq $60,%mm6
|
||||
movq -48(%ebx),%mm4
|
||||
por %mm6,%mm0
|
||||
movq %mm2,-96(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,32(%edi)
|
||||
movq %mm5,%mm7
|
||||
movq %mm0,-104(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,24(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,5(%esp)
|
||||
movl -24(%ebx),%edx
|
||||
movq -24(%ebx),%mm0
|
||||
psllq $60,%mm7
|
||||
movq -32(%ebx),%mm3
|
||||
por %mm7,%mm2
|
||||
movq %mm1,-88(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,40(%edi)
|
||||
movq %mm4,%mm6
|
||||
movq %mm2,-96(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,32(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,6(%esp)
|
||||
movl -8(%ebx),%edx
|
||||
movq -8(%ebx),%mm2
|
||||
psllq $60,%mm6
|
||||
movq -16(%ebx),%mm5
|
||||
por %mm6,%mm1
|
||||
movq %mm0,-80(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,48(%edi)
|
||||
movq %mm3,%mm7
|
||||
movq %mm1,-88(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,40(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,7(%esp)
|
||||
movl 8(%ebx),%edx
|
||||
movq 8(%ebx),%mm1
|
||||
psllq $60,%mm7
|
||||
movq (%ebx),%mm4
|
||||
por %mm7,%mm0
|
||||
movq %mm2,-72(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,56(%edi)
|
||||
movq %mm5,%mm6
|
||||
movq %mm0,-80(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,48(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,8(%esp)
|
||||
movl 24(%ebx),%edx
|
||||
movq 24(%ebx),%mm0
|
||||
psllq $60,%mm6
|
||||
movq 16(%ebx),%mm3
|
||||
por %mm6,%mm2
|
||||
movq %mm1,-64(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,64(%edi)
|
||||
movq %mm4,%mm7
|
||||
movq %mm2,-72(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,56(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,9(%esp)
|
||||
movl 40(%ebx),%edx
|
||||
movq 40(%ebx),%mm2
|
||||
psllq $60,%mm7
|
||||
movq 32(%ebx),%mm5
|
||||
por %mm7,%mm1
|
||||
movq %mm0,-56(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,72(%edi)
|
||||
movq %mm3,%mm6
|
||||
movq %mm1,-64(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,64(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,10(%esp)
|
||||
movl 56(%ebx),%edx
|
||||
movq 56(%ebx),%mm1
|
||||
psllq $60,%mm6
|
||||
movq 48(%ebx),%mm4
|
||||
por %mm6,%mm0
|
||||
movq %mm2,-48(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,80(%edi)
|
||||
movq %mm5,%mm7
|
||||
movq %mm0,-56(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,72(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,11(%esp)
|
||||
movl 72(%ebx),%edx
|
||||
movq 72(%ebx),%mm0
|
||||
psllq $60,%mm7
|
||||
movq 64(%ebx),%mm3
|
||||
por %mm7,%mm2
|
||||
movq %mm1,-40(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,88(%edi)
|
||||
movq %mm4,%mm6
|
||||
movq %mm2,-48(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,80(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,12(%esp)
|
||||
movl 88(%ebx),%edx
|
||||
movq 88(%ebx),%mm2
|
||||
psllq $60,%mm6
|
||||
movq 80(%ebx),%mm5
|
||||
por %mm6,%mm1
|
||||
movq %mm0,-32(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,96(%edi)
|
||||
movq %mm3,%mm7
|
||||
movq %mm1,-40(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,88(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,13(%esp)
|
||||
movl 104(%ebx),%edx
|
||||
movq 104(%ebx),%mm1
|
||||
psllq $60,%mm7
|
||||
movq 96(%ebx),%mm4
|
||||
por %mm7,%mm0
|
||||
movq %mm2,-24(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,104(%edi)
|
||||
movq %mm5,%mm6
|
||||
movq %mm0,-32(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,96(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,14(%esp)
|
||||
movl 120(%ebx),%edx
|
||||
movq 120(%ebx),%mm0
|
||||
psllq $60,%mm6
|
||||
movq 112(%ebx),%mm3
|
||||
por %mm6,%mm2
|
||||
movq %mm1,-16(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,112(%edi)
|
||||
movq %mm4,%mm7
|
||||
movq %mm2,-24(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,104(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,15(%esp)
|
||||
psllq $60,%mm7
|
||||
por %mm7,%mm1
|
||||
movq %mm0,-8(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,120(%edi)
|
||||
movq %mm3,%mm6
|
||||
movq %mm1,-16(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,112(%ebp)
|
||||
psllq $60,%mm6
|
||||
por %mm6,%mm0
|
||||
movq %mm0,-8(%ebp)
|
||||
movq %mm3,120(%ebp)
|
||||
movq (%eax),%mm6
|
||||
movl 8(%eax),%ebx
|
||||
movl 12(%eax),%edx
|
||||
.align 4,0x90
|
||||
L004outer:
|
||||
xorl 12(%ecx),%edx
|
||||
xorl 8(%ecx),%ebx
|
||||
pxor (%ecx),%mm6
|
||||
leal 16(%ecx),%ecx
|
||||
movl %ebx,536(%esp)
|
||||
movq %mm6,528(%esp)
|
||||
movl %ecx,548(%esp)
|
||||
xorl %eax,%eax
|
||||
roll $8,%edx
|
||||
movb %dl,%al
|
||||
movl %eax,%ebp
|
||||
andb $15,%al
|
||||
shrl $4,%ebp
|
||||
pxor %mm0,%mm0
|
||||
roll $8,%edx
|
||||
pxor %mm1,%mm1
|
||||
pxor %mm2,%mm2
|
||||
movq 16(%esp,%eax,8),%mm7
|
||||
movq 144(%esp,%eax,8),%mm6
|
||||
movb %dl,%al
|
||||
movd %mm7,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
shrl $4,%edi
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movl 536(%esp),%edx
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm1,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm0
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm0,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movl 532(%esp),%edx
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm1,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm0
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm0,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm1,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm0
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movl 528(%esp),%edx
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm0,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm1,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm0
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm0,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movl 524(%esp),%edx
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
movzbl %bl,%ebx
|
||||
pxor %mm2,%mm2
|
||||
psllq $4,%mm1
|
||||
movd %mm7,%ecx
|
||||
psrlq $4,%mm7
|
||||
movq %mm6,%mm3
|
||||
psrlq $4,%mm6
|
||||
shll $4,%ecx
|
||||
pxor 16(%esp,%edi,8),%mm7
|
||||
psllq $60,%mm3
|
||||
movzbl %cl,%ecx
|
||||
pxor %mm3,%mm7
|
||||
pxor 144(%esp,%edi,8),%mm6
|
||||
pinsrw $2,(%esi,%ebx,2),%mm0
|
||||
pxor %mm1,%mm6
|
||||
movd %mm7,%edx
|
||||
pinsrw $3,(%esi,%ecx,2),%mm2
|
||||
psllq $12,%mm0
|
||||
pxor %mm0,%mm6
|
||||
psrlq $32,%mm7
|
||||
pxor %mm2,%mm6
|
||||
movl 548(%esp),%ecx
|
||||
movd %mm7,%ebx
|
||||
movq %mm6,%mm3
|
||||
psllw $8,%mm6
|
||||
psrlw $8,%mm3
|
||||
por %mm3,%mm6
|
||||
bswap %edx
|
||||
pshufw $27,%mm6,%mm6
|
||||
bswap %ebx
|
||||
cmpl 552(%esp),%ecx
|
||||
jne L004outer
|
||||
movl 544(%esp),%eax
|
||||
movl %edx,12(%eax)
|
||||
movl %ebx,8(%eax)
|
||||
movq %mm6,(%eax)
|
||||
movl 556(%esp),%esp
|
||||
emms
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.globl _gcm_init_clmul
|
||||
.private_extern _gcm_init_clmul
|
||||
.align 4
|
||||
@ -714,10 +13,10 @@ _gcm_init_clmul:
|
||||
L_gcm_init_clmul_begin:
|
||||
movl 4(%esp),%edx
|
||||
movl 8(%esp),%eax
|
||||
call L005pic
|
||||
L005pic:
|
||||
call L000pic
|
||||
L000pic:
|
||||
popl %ecx
|
||||
leal Lbswap-L005pic(%ecx),%ecx
|
||||
leal Lbswap-L000pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm2
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $255,%xmm2,%xmm4
|
||||
@ -782,10 +81,10 @@ _gcm_gmult_clmul:
|
||||
L_gcm_gmult_clmul_begin:
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
call L006pic
|
||||
L006pic:
|
||||
call L001pic
|
||||
L001pic:
|
||||
popl %ecx
|
||||
leal Lbswap-L006pic(%ecx),%ecx
|
||||
leal Lbswap-L001pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
movups (%edx),%xmm2
|
||||
@ -840,16 +139,16 @@ L_gcm_ghash_clmul_begin:
|
||||
movl 24(%esp),%edx
|
||||
movl 28(%esp),%esi
|
||||
movl 32(%esp),%ebx
|
||||
call L007pic
|
||||
L007pic:
|
||||
call L002pic
|
||||
L002pic:
|
||||
popl %ecx
|
||||
leal Lbswap-L007pic(%ecx),%ecx
|
||||
leal Lbswap-L002pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
movdqu (%edx),%xmm2
|
||||
.byte 102,15,56,0,197
|
||||
subl $16,%ebx
|
||||
jz L008odd_tail
|
||||
jz L003odd_tail
|
||||
movdqu (%esi),%xmm3
|
||||
movdqu 16(%esi),%xmm6
|
||||
.byte 102,15,56,0,221
|
||||
@ -866,10 +165,10 @@ L007pic:
|
||||
movups 16(%edx),%xmm2
|
||||
nop
|
||||
subl $32,%ebx
|
||||
jbe L009even_tail
|
||||
jmp L010mod_loop
|
||||
jbe L004even_tail
|
||||
jmp L005mod_loop
|
||||
.align 5,0x90
|
||||
L010mod_loop:
|
||||
L005mod_loop:
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
@ -924,8 +223,8 @@ L010mod_loop:
|
||||
.byte 102,15,58,68,221,0
|
||||
leal 32(%esi),%esi
|
||||
subl $32,%ebx
|
||||
ja L010mod_loop
|
||||
L009even_tail:
|
||||
ja L005mod_loop
|
||||
L004even_tail:
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
@ -964,9 +263,9 @@ L009even_tail:
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
testl %ebx,%ebx
|
||||
jnz L011done
|
||||
jnz L006done
|
||||
movups (%edx),%xmm2
|
||||
L008odd_tail:
|
||||
L003odd_tail:
|
||||
movdqu (%esi),%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
pxor %xmm3,%xmm0
|
||||
@ -1005,7 +304,7 @@ L008odd_tail:
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
L011done:
|
||||
L006done:
|
||||
.byte 102,15,56,0,197
|
||||
movdqu %xmm0,(%eax)
|
||||
popl %edi
|
||||
@ -1017,46 +316,6 @@ L011done:
|
||||
Lbswap:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
|
||||
.align 6,0x90
|
||||
Lrem_8bit:
|
||||
.value 0,450,900,582,1800,1738,1164,1358
|
||||
.value 3600,4050,3476,3158,2328,2266,2716,2910
|
||||
.value 7200,7650,8100,7782,6952,6890,6316,6510
|
||||
.value 4656,5106,4532,4214,5432,5370,5820,6014
|
||||
.value 14400,14722,15300,14854,16200,16010,15564,15630
|
||||
.value 13904,14226,13780,13334,12632,12442,13020,13086
|
||||
.value 9312,9634,10212,9766,9064,8874,8428,8494
|
||||
.value 10864,11186,10740,10294,11640,11450,12028,12094
|
||||
.value 28800,28994,29444,29382,30600,30282,29708,30158
|
||||
.value 32400,32594,32020,31958,31128,30810,31260,31710
|
||||
.value 27808,28002,28452,28390,27560,27242,26668,27118
|
||||
.value 25264,25458,24884,24822,26040,25722,26172,26622
|
||||
.value 18624,18690,19268,19078,20424,19978,19532,19854
|
||||
.value 18128,18194,17748,17558,16856,16410,16988,17310
|
||||
.value 21728,21794,22372,22182,21480,21034,20588,20910
|
||||
.value 23280,23346,22900,22710,24056,23610,24188,24510
|
||||
.value 57600,57538,57988,58182,58888,59338,58764,58446
|
||||
.value 61200,61138,60564,60758,59416,59866,60316,59998
|
||||
.value 64800,64738,65188,65382,64040,64490,63916,63598
|
||||
.value 62256,62194,61620,61814,62520,62970,63420,63102
|
||||
.value 55616,55426,56004,56070,56904,57226,56780,56334
|
||||
.value 55120,54930,54484,54550,53336,53658,54236,53790
|
||||
.value 50528,50338,50916,50982,49768,50090,49644,49198
|
||||
.value 52080,51890,51444,51510,52344,52666,53244,52798
|
||||
.value 37248,36930,37380,37830,38536,38730,38156,38094
|
||||
.value 40848,40530,39956,40406,39064,39258,39708,39646
|
||||
.value 36256,35938,36388,36838,35496,35690,35116,35054
|
||||
.value 33712,33394,32820,33270,33976,34170,34620,34558
|
||||
.value 43456,43010,43588,43910,44744,44810,44364,44174
|
||||
.value 42960,42514,42068,42390,41176,41242,41820,41630
|
||||
.value 46560,46114,46692,47014,45800,45866,45420,45230
|
||||
.value 48112,47666,47220,47542,48376,48442,49020,48830
|
||||
.align 6,0x90
|
||||
Lrem_4bit:
|
||||
.long 0,0,0,471859200,0,943718400,0,610271232
|
||||
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
|
||||
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
|
||||
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
|
||||
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
|
||||
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
#endif
|
||||
.align 6,0x90
|
||||
L_vpaes_consts:
|
||||
@ -468,7 +468,7 @@ L_vpaes_set_encrypt_key_begin:
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call L016pic
|
||||
@ -549,7 +549,7 @@ L_vpaes_encrypt_begin:
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call L019pic
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -556,11 +556,9 @@ L$handle_ctr32_2:
|
||||
.p2align 5
|
||||
_aesni_gcm_encrypt:
|
||||
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
|
||||
movb $1,_BORINGSSL_function_hit+2(%rip)
|
||||
#endif
|
||||
#endif
|
||||
xorq %r10,%r10
|
||||
|
||||
|
@ -19,11 +19,9 @@
|
||||
.p2align 4
|
||||
_aes_hw_encrypt:
|
||||
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
|
||||
movb $1,_BORINGSSL_function_hit+1(%rip)
|
||||
#endif
|
||||
#endif
|
||||
movups (%rdi),%xmm2
|
||||
movl 240(%rdx),%eax
|
||||
@ -885,10 +883,8 @@ L$ecb_ret:
|
||||
.p2align 4
|
||||
_aes_hw_ctr32_encrypt_blocks:
|
||||
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
movb $1,_BORINGSSL_function_hit(%rip)
|
||||
#endif
|
||||
#endif
|
||||
cmpq $1,%rdx
|
||||
jne L$ctr32_bulk
|
||||
@ -2109,11 +2105,9 @@ L$SEH_end_set_decrypt_key:
|
||||
_aes_hw_set_encrypt_key:
|
||||
__aesni_set_encrypt_key:
|
||||
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
movb $1,_BORINGSSL_function_hit+3(%rip)
|
||||
#endif
|
||||
#endif
|
||||
.byte 0x48,0x83,0xEC,0x08
|
||||
|
||||
movq $-1,%rax
|
||||
|
@ -13,697 +13,6 @@
|
||||
#endif
|
||||
.text
|
||||
|
||||
|
||||
.globl _gcm_gmult_4bit
|
||||
.private_extern _gcm_gmult_4bit
|
||||
|
||||
.p2align 4
|
||||
_gcm_gmult_4bit:
|
||||
|
||||
pushq %rbx
|
||||
|
||||
pushq %rbp
|
||||
|
||||
pushq %r12
|
||||
|
||||
pushq %r13
|
||||
|
||||
pushq %r14
|
||||
|
||||
pushq %r15
|
||||
|
||||
subq $280,%rsp
|
||||
|
||||
L$gmult_prologue:
|
||||
|
||||
movzbq 15(%rdi),%r8
|
||||
leaq L$rem_4bit(%rip),%r11
|
||||
xorq %rax,%rax
|
||||
xorq %rbx,%rbx
|
||||
movb %r8b,%al
|
||||
movb %r8b,%bl
|
||||
shlb $4,%al
|
||||
movq $14,%rcx
|
||||
movq 8(%rsi,%rax,1),%r8
|
||||
movq (%rsi,%rax,1),%r9
|
||||
andb $0xf0,%bl
|
||||
movq %r8,%rdx
|
||||
jmp L$oop1
|
||||
|
||||
.p2align 4
|
||||
L$oop1:
|
||||
shrq $4,%r8
|
||||
andq $0xf,%rdx
|
||||
movq %r9,%r10
|
||||
movb (%rdi,%rcx,1),%al
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rbx,1),%r8
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rbx,1),%r9
|
||||
movb %al,%bl
|
||||
xorq (%r11,%rdx,8),%r9
|
||||
movq %r8,%rdx
|
||||
shlb $4,%al
|
||||
xorq %r10,%r8
|
||||
decq %rcx
|
||||
js L$break1
|
||||
|
||||
shrq $4,%r8
|
||||
andq $0xf,%rdx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
andb $0xf0,%bl
|
||||
xorq (%r11,%rdx,8),%r9
|
||||
movq %r8,%rdx
|
||||
xorq %r10,%r8
|
||||
jmp L$oop1
|
||||
|
||||
.p2align 4
|
||||
L$break1:
|
||||
shrq $4,%r8
|
||||
andq $0xf,%rdx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
andb $0xf0,%bl
|
||||
xorq (%r11,%rdx,8),%r9
|
||||
movq %r8,%rdx
|
||||
xorq %r10,%r8
|
||||
|
||||
shrq $4,%r8
|
||||
andq $0xf,%rdx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rbx,1),%r8
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rbx,1),%r9
|
||||
xorq %r10,%r8
|
||||
xorq (%r11,%rdx,8),%r9
|
||||
|
||||
bswapq %r8
|
||||
bswapq %r9
|
||||
movq %r8,8(%rdi)
|
||||
movq %r9,(%rdi)
|
||||
|
||||
leaq 280+48(%rsp),%rsi
|
||||
|
||||
movq -8(%rsi),%rbx
|
||||
|
||||
leaq (%rsi),%rsp
|
||||
|
||||
L$gmult_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
|
||||
|
||||
.globl _gcm_ghash_4bit
|
||||
.private_extern _gcm_ghash_4bit
|
||||
|
||||
.p2align 4
|
||||
_gcm_ghash_4bit:
|
||||
|
||||
pushq %rbx
|
||||
|
||||
pushq %rbp
|
||||
|
||||
pushq %r12
|
||||
|
||||
pushq %r13
|
||||
|
||||
pushq %r14
|
||||
|
||||
pushq %r15
|
||||
|
||||
subq $280,%rsp
|
||||
|
||||
L$ghash_prologue:
|
||||
movq %rdx,%r14
|
||||
movq %rcx,%r15
|
||||
subq $-128,%rsi
|
||||
leaq 16+128(%rsp),%rbp
|
||||
xorl %edx,%edx
|
||||
movq 0+0-128(%rsi),%r8
|
||||
movq 0+8-128(%rsi),%rax
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq 16+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq 16+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,0(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,0(%rbp)
|
||||
movq 32+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,0-128(%rbp)
|
||||
movq 32+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,1(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,8(%rbp)
|
||||
movq 48+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,8-128(%rbp)
|
||||
movq 48+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,2(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,16(%rbp)
|
||||
movq 64+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,16-128(%rbp)
|
||||
movq 64+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,3(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,24(%rbp)
|
||||
movq 80+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,24-128(%rbp)
|
||||
movq 80+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,4(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,32(%rbp)
|
||||
movq 96+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,32-128(%rbp)
|
||||
movq 96+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,5(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,40(%rbp)
|
||||
movq 112+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,40-128(%rbp)
|
||||
movq 112+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,6(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,48(%rbp)
|
||||
movq 128+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,48-128(%rbp)
|
||||
movq 128+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,7(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,56(%rbp)
|
||||
movq 144+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,56-128(%rbp)
|
||||
movq 144+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,8(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,64(%rbp)
|
||||
movq 160+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,64-128(%rbp)
|
||||
movq 160+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,9(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,72(%rbp)
|
||||
movq 176+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,72-128(%rbp)
|
||||
movq 176+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,10(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,80(%rbp)
|
||||
movq 192+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,80-128(%rbp)
|
||||
movq 192+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,11(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,88(%rbp)
|
||||
movq 208+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,88-128(%rbp)
|
||||
movq 208+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,12(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,96(%rbp)
|
||||
movq 224+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,96-128(%rbp)
|
||||
movq 224+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,13(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,104(%rbp)
|
||||
movq 240+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,104-128(%rbp)
|
||||
movq 240+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,14(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,112(%rbp)
|
||||
shlb $4,%dl
|
||||
movq %rax,112-128(%rbp)
|
||||
shlq $60,%r10
|
||||
movb %dl,15(%rsp)
|
||||
orq %r10,%rbx
|
||||
movq %r9,120(%rbp)
|
||||
movq %rbx,120-128(%rbp)
|
||||
addq $-128,%rsi
|
||||
movq 8(%rdi),%r8
|
||||
movq 0(%rdi),%r9
|
||||
addq %r14,%r15
|
||||
leaq L$rem_8bit(%rip),%r11
|
||||
jmp L$outer_loop
|
||||
.p2align 4
|
||||
L$outer_loop:
|
||||
xorq (%r14),%r9
|
||||
movq 8(%r14),%rdx
|
||||
leaq 16(%r14),%r14
|
||||
xorq %r8,%rdx
|
||||
movq %r9,(%rdi)
|
||||
movq %rdx,8(%rdi)
|
||||
shrq $32,%rdx
|
||||
xorq %rax,%rax
|
||||
roll $8,%edx
|
||||
movb %dl,%al
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
shrl $4,%ebx
|
||||
roll $8,%edx
|
||||
movq 8(%rsi,%rax,1),%r8
|
||||
movq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
movl 8(%rdi),%edx
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
movl 4(%rdi),%edx
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
movl 0(%rdi),%edx
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
andl $240,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
movl -4(%rdi),%edx
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
shlq $48,%r12
|
||||
xorq %r10,%r8
|
||||
xorq %r12,%r9
|
||||
movzbq %r8b,%r13
|
||||
shrq $4,%r8
|
||||
movq %r9,%r10
|
||||
shlb $4,%r13b
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rcx,1),%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rcx,1),%r9
|
||||
xorq %r10,%r8
|
||||
shlq $48,%r13
|
||||
bswapq %r8
|
||||
xorq %r13,%r9
|
||||
bswapq %r9
|
||||
cmpq %r15,%r14
|
||||
jb L$outer_loop
|
||||
movq %r8,8(%rdi)
|
||||
movq %r9,(%rdi)
|
||||
|
||||
leaq 280+48(%rsp),%rsi
|
||||
|
||||
movq -48(%rsi),%r15
|
||||
|
||||
movq -40(%rsi),%r14
|
||||
|
||||
movq -32(%rsi),%r13
|
||||
|
||||
movq -24(%rsi),%r12
|
||||
|
||||
movq -16(%rsi),%rbp
|
||||
|
||||
movq -8(%rsi),%rbx
|
||||
|
||||
leaq 0(%rsi),%rsp
|
||||
|
||||
L$ghash_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
|
||||
|
||||
.globl _gcm_init_clmul
|
||||
.private_extern _gcm_init_clmul
|
||||
|
||||
@ -1809,50 +1118,8 @@ L$0x1c2_polynomial:
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
|
||||
L$7_mask:
|
||||
.long 7,0,7,0
|
||||
L$7_mask_poly:
|
||||
.long 7,0,450,0
|
||||
.p2align 6
|
||||
|
||||
L$rem_4bit:
|
||||
.long 0,0,0,471859200,0,943718400,0,610271232
|
||||
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
|
||||
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
|
||||
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
|
||||
|
||||
L$rem_8bit:
|
||||
.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
|
||||
.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
|
||||
.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
|
||||
.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
|
||||
.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
|
||||
.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
|
||||
.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
|
||||
.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
|
||||
.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
|
||||
.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
|
||||
.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
|
||||
.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
|
||||
.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
|
||||
.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
|
||||
.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
|
||||
.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
|
||||
.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
|
||||
.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
|
||||
.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
|
||||
.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
|
||||
.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
|
||||
.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
|
||||
.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
|
||||
.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
|
||||
.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
|
||||
.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
|
||||
.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
|
||||
.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
|
||||
.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
|
||||
.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
|
||||
.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
|
||||
.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.p2align 6
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -812,11 +812,9 @@ L$schedule_mangle_both:
|
||||
.p2align 4
|
||||
_vpaes_set_encrypt_key:
|
||||
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
|
||||
movb $1,_BORINGSSL_function_hit+5(%rip)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
movl %esi,%eax
|
||||
@ -862,11 +860,9 @@ _vpaes_set_decrypt_key:
|
||||
.p2align 4
|
||||
_vpaes_encrypt:
|
||||
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
|
||||
movb $1,_BORINGSSL_function_hit+4(%rip)
|
||||
#endif
|
||||
#endif
|
||||
movdqu (%rdi),%xmm0
|
||||
call _vpaes_preheat
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -21,14 +21,14 @@ section .text code align=64
|
||||
section .text code
|
||||
%endif
|
||||
;extern _OPENSSL_ia32cap_P
|
||||
%ifndef NDEBUG
|
||||
%ifdef BORINGSSL_DISPATCH_TEST
|
||||
extern _BORINGSSL_function_hit
|
||||
%endif
|
||||
global _aes_hw_encrypt
|
||||
align 16
|
||||
_aes_hw_encrypt:
|
||||
L$_aes_hw_encrypt_begin:
|
||||
%ifndef NDEBUG
|
||||
%ifdef BORINGSSL_DISPATCH_TEST
|
||||
push ebx
|
||||
push edx
|
||||
call L$000pic
|
||||
@ -820,7 +820,7 @@ L$_aes_hw_ctr32_encrypt_blocks_begin:
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
%ifndef NDEBUG
|
||||
%ifdef BORINGSSL_DISPATCH_TEST
|
||||
push ebx
|
||||
push edx
|
||||
call L$038pic
|
||||
@ -2400,7 +2400,7 @@ global _aes_hw_set_encrypt_key
|
||||
align 16
|
||||
_aes_hw_set_encrypt_key:
|
||||
L$_aes_hw_set_encrypt_key_begin:
|
||||
%ifndef NDEBUG
|
||||
%ifdef BORINGSSL_DISPATCH_TEST
|
||||
push ebx
|
||||
push edx
|
||||
call L$116pic
|
||||
|
@ -981,549 +981,5 @@ L$025aw_end:
|
||||
pop ebx
|
||||
pop ebp
|
||||
ret
|
||||
global _bn_sub_part_words
|
||||
align 16
|
||||
_bn_sub_part_words:
|
||||
L$_bn_sub_part_words_begin:
|
||||
push ebp
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
;
|
||||
mov ebx,DWORD [20+esp]
|
||||
mov esi,DWORD [24+esp]
|
||||
mov edi,DWORD [28+esp]
|
||||
mov ebp,DWORD [32+esp]
|
||||
xor eax,eax
|
||||
and ebp,4294967288
|
||||
jz NEAR L$026aw_finish
|
||||
L$027aw_loop:
|
||||
; Round 0
|
||||
mov ecx,DWORD [esi]
|
||||
mov edx,DWORD [edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [ebx],ecx
|
||||
; Round 1
|
||||
mov ecx,DWORD [4+esi]
|
||||
mov edx,DWORD [4+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [4+ebx],ecx
|
||||
; Round 2
|
||||
mov ecx,DWORD [8+esi]
|
||||
mov edx,DWORD [8+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [8+ebx],ecx
|
||||
; Round 3
|
||||
mov ecx,DWORD [12+esi]
|
||||
mov edx,DWORD [12+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [12+ebx],ecx
|
||||
; Round 4
|
||||
mov ecx,DWORD [16+esi]
|
||||
mov edx,DWORD [16+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [16+ebx],ecx
|
||||
; Round 5
|
||||
mov ecx,DWORD [20+esi]
|
||||
mov edx,DWORD [20+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [20+ebx],ecx
|
||||
; Round 6
|
||||
mov ecx,DWORD [24+esi]
|
||||
mov edx,DWORD [24+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [24+ebx],ecx
|
||||
; Round 7
|
||||
mov ecx,DWORD [28+esi]
|
||||
mov edx,DWORD [28+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [28+ebx],ecx
|
||||
;
|
||||
add esi,32
|
||||
add edi,32
|
||||
add ebx,32
|
||||
sub ebp,8
|
||||
jnz NEAR L$027aw_loop
|
||||
L$026aw_finish:
|
||||
mov ebp,DWORD [32+esp]
|
||||
and ebp,7
|
||||
jz NEAR L$028aw_end
|
||||
; Tail Round 0
|
||||
mov ecx,DWORD [esi]
|
||||
mov edx,DWORD [edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [ebx],ecx
|
||||
add esi,4
|
||||
add edi,4
|
||||
add ebx,4
|
||||
dec ebp
|
||||
jz NEAR L$028aw_end
|
||||
; Tail Round 1
|
||||
mov ecx,DWORD [esi]
|
||||
mov edx,DWORD [edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [ebx],ecx
|
||||
add esi,4
|
||||
add edi,4
|
||||
add ebx,4
|
||||
dec ebp
|
||||
jz NEAR L$028aw_end
|
||||
; Tail Round 2
|
||||
mov ecx,DWORD [esi]
|
||||
mov edx,DWORD [edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [ebx],ecx
|
||||
add esi,4
|
||||
add edi,4
|
||||
add ebx,4
|
||||
dec ebp
|
||||
jz NEAR L$028aw_end
|
||||
; Tail Round 3
|
||||
mov ecx,DWORD [esi]
|
||||
mov edx,DWORD [edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [ebx],ecx
|
||||
add esi,4
|
||||
add edi,4
|
||||
add ebx,4
|
||||
dec ebp
|
||||
jz NEAR L$028aw_end
|
||||
; Tail Round 4
|
||||
mov ecx,DWORD [esi]
|
||||
mov edx,DWORD [edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [ebx],ecx
|
||||
add esi,4
|
||||
add edi,4
|
||||
add ebx,4
|
||||
dec ebp
|
||||
jz NEAR L$028aw_end
|
||||
; Tail Round 5
|
||||
mov ecx,DWORD [esi]
|
||||
mov edx,DWORD [edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [ebx],ecx
|
||||
add esi,4
|
||||
add edi,4
|
||||
add ebx,4
|
||||
dec ebp
|
||||
jz NEAR L$028aw_end
|
||||
; Tail Round 6
|
||||
mov ecx,DWORD [esi]
|
||||
mov edx,DWORD [edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [ebx],ecx
|
||||
add esi,4
|
||||
add edi,4
|
||||
add ebx,4
|
||||
L$028aw_end:
|
||||
cmp DWORD [36+esp],0
|
||||
je NEAR L$029pw_end
|
||||
mov ebp,DWORD [36+esp]
|
||||
cmp ebp,0
|
||||
je NEAR L$029pw_end
|
||||
jge NEAR L$030pw_pos
|
||||
; pw_neg
|
||||
mov edx,0
|
||||
sub edx,ebp
|
||||
mov ebp,edx
|
||||
and ebp,4294967288
|
||||
jz NEAR L$031pw_neg_finish
|
||||
L$032pw_neg_loop:
|
||||
; dl<0 Round 0
|
||||
mov ecx,0
|
||||
mov edx,DWORD [edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [ebx],ecx
|
||||
; dl<0 Round 1
|
||||
mov ecx,0
|
||||
mov edx,DWORD [4+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [4+ebx],ecx
|
||||
; dl<0 Round 2
|
||||
mov ecx,0
|
||||
mov edx,DWORD [8+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [8+ebx],ecx
|
||||
; dl<0 Round 3
|
||||
mov ecx,0
|
||||
mov edx,DWORD [12+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [12+ebx],ecx
|
||||
; dl<0 Round 4
|
||||
mov ecx,0
|
||||
mov edx,DWORD [16+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [16+ebx],ecx
|
||||
; dl<0 Round 5
|
||||
mov ecx,0
|
||||
mov edx,DWORD [20+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [20+ebx],ecx
|
||||
; dl<0 Round 6
|
||||
mov ecx,0
|
||||
mov edx,DWORD [24+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [24+ebx],ecx
|
||||
; dl<0 Round 7
|
||||
mov ecx,0
|
||||
mov edx,DWORD [28+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [28+ebx],ecx
|
||||
;
|
||||
add edi,32
|
||||
add ebx,32
|
||||
sub ebp,8
|
||||
jnz NEAR L$032pw_neg_loop
|
||||
L$031pw_neg_finish:
|
||||
mov edx,DWORD [36+esp]
|
||||
mov ebp,0
|
||||
sub ebp,edx
|
||||
and ebp,7
|
||||
jz NEAR L$029pw_end
|
||||
; dl<0 Tail Round 0
|
||||
mov ecx,0
|
||||
mov edx,DWORD [edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
dec ebp
|
||||
mov DWORD [ebx],ecx
|
||||
jz NEAR L$029pw_end
|
||||
; dl<0 Tail Round 1
|
||||
mov ecx,0
|
||||
mov edx,DWORD [4+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
dec ebp
|
||||
mov DWORD [4+ebx],ecx
|
||||
jz NEAR L$029pw_end
|
||||
; dl<0 Tail Round 2
|
||||
mov ecx,0
|
||||
mov edx,DWORD [8+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
dec ebp
|
||||
mov DWORD [8+ebx],ecx
|
||||
jz NEAR L$029pw_end
|
||||
; dl<0 Tail Round 3
|
||||
mov ecx,0
|
||||
mov edx,DWORD [12+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
dec ebp
|
||||
mov DWORD [12+ebx],ecx
|
||||
jz NEAR L$029pw_end
|
||||
; dl<0 Tail Round 4
|
||||
mov ecx,0
|
||||
mov edx,DWORD [16+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
dec ebp
|
||||
mov DWORD [16+ebx],ecx
|
||||
jz NEAR L$029pw_end
|
||||
; dl<0 Tail Round 5
|
||||
mov ecx,0
|
||||
mov edx,DWORD [20+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
dec ebp
|
||||
mov DWORD [20+ebx],ecx
|
||||
jz NEAR L$029pw_end
|
||||
; dl<0 Tail Round 6
|
||||
mov ecx,0
|
||||
mov edx,DWORD [24+edi]
|
||||
sub ecx,eax
|
||||
mov eax,0
|
||||
adc eax,eax
|
||||
sub ecx,edx
|
||||
adc eax,0
|
||||
mov DWORD [24+ebx],ecx
|
||||
jmp NEAR L$029pw_end
|
||||
L$030pw_pos:
|
||||
and ebp,4294967288
|
||||
jz NEAR L$033pw_pos_finish
|
||||
L$034pw_pos_loop:
|
||||
; dl>0 Round 0
|
||||
mov ecx,DWORD [esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [ebx],ecx
|
||||
jnc NEAR L$035pw_nc0
|
||||
; dl>0 Round 1
|
||||
mov ecx,DWORD [4+esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [4+ebx],ecx
|
||||
jnc NEAR L$036pw_nc1
|
||||
; dl>0 Round 2
|
||||
mov ecx,DWORD [8+esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [8+ebx],ecx
|
||||
jnc NEAR L$037pw_nc2
|
||||
; dl>0 Round 3
|
||||
mov ecx,DWORD [12+esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [12+ebx],ecx
|
||||
jnc NEAR L$038pw_nc3
|
||||
; dl>0 Round 4
|
||||
mov ecx,DWORD [16+esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [16+ebx],ecx
|
||||
jnc NEAR L$039pw_nc4
|
||||
; dl>0 Round 5
|
||||
mov ecx,DWORD [20+esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [20+ebx],ecx
|
||||
jnc NEAR L$040pw_nc5
|
||||
; dl>0 Round 6
|
||||
mov ecx,DWORD [24+esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [24+ebx],ecx
|
||||
jnc NEAR L$041pw_nc6
|
||||
; dl>0 Round 7
|
||||
mov ecx,DWORD [28+esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [28+ebx],ecx
|
||||
jnc NEAR L$042pw_nc7
|
||||
;
|
||||
add esi,32
|
||||
add ebx,32
|
||||
sub ebp,8
|
||||
jnz NEAR L$034pw_pos_loop
|
||||
L$033pw_pos_finish:
|
||||
mov ebp,DWORD [36+esp]
|
||||
and ebp,7
|
||||
jz NEAR L$029pw_end
|
||||
; dl>0 Tail Round 0
|
||||
mov ecx,DWORD [esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [ebx],ecx
|
||||
jnc NEAR L$043pw_tail_nc0
|
||||
dec ebp
|
||||
jz NEAR L$029pw_end
|
||||
; dl>0 Tail Round 1
|
||||
mov ecx,DWORD [4+esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [4+ebx],ecx
|
||||
jnc NEAR L$044pw_tail_nc1
|
||||
dec ebp
|
||||
jz NEAR L$029pw_end
|
||||
; dl>0 Tail Round 2
|
||||
mov ecx,DWORD [8+esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [8+ebx],ecx
|
||||
jnc NEAR L$045pw_tail_nc2
|
||||
dec ebp
|
||||
jz NEAR L$029pw_end
|
||||
; dl>0 Tail Round 3
|
||||
mov ecx,DWORD [12+esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [12+ebx],ecx
|
||||
jnc NEAR L$046pw_tail_nc3
|
||||
dec ebp
|
||||
jz NEAR L$029pw_end
|
||||
; dl>0 Tail Round 4
|
||||
mov ecx,DWORD [16+esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [16+ebx],ecx
|
||||
jnc NEAR L$047pw_tail_nc4
|
||||
dec ebp
|
||||
jz NEAR L$029pw_end
|
||||
; dl>0 Tail Round 5
|
||||
mov ecx,DWORD [20+esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [20+ebx],ecx
|
||||
jnc NEAR L$048pw_tail_nc5
|
||||
dec ebp
|
||||
jz NEAR L$029pw_end
|
||||
; dl>0 Tail Round 6
|
||||
mov ecx,DWORD [24+esi]
|
||||
sub ecx,eax
|
||||
mov DWORD [24+ebx],ecx
|
||||
jnc NEAR L$049pw_tail_nc6
|
||||
mov eax,1
|
||||
jmp NEAR L$029pw_end
|
||||
L$050pw_nc_loop:
|
||||
mov ecx,DWORD [esi]
|
||||
mov DWORD [ebx],ecx
|
||||
L$035pw_nc0:
|
||||
mov ecx,DWORD [4+esi]
|
||||
mov DWORD [4+ebx],ecx
|
||||
L$036pw_nc1:
|
||||
mov ecx,DWORD [8+esi]
|
||||
mov DWORD [8+ebx],ecx
|
||||
L$037pw_nc2:
|
||||
mov ecx,DWORD [12+esi]
|
||||
mov DWORD [12+ebx],ecx
|
||||
L$038pw_nc3:
|
||||
mov ecx,DWORD [16+esi]
|
||||
mov DWORD [16+ebx],ecx
|
||||
L$039pw_nc4:
|
||||
mov ecx,DWORD [20+esi]
|
||||
mov DWORD [20+ebx],ecx
|
||||
L$040pw_nc5:
|
||||
mov ecx,DWORD [24+esi]
|
||||
mov DWORD [24+ebx],ecx
|
||||
L$041pw_nc6:
|
||||
mov ecx,DWORD [28+esi]
|
||||
mov DWORD [28+ebx],ecx
|
||||
L$042pw_nc7:
|
||||
;
|
||||
add esi,32
|
||||
add ebx,32
|
||||
sub ebp,8
|
||||
jnz NEAR L$050pw_nc_loop
|
||||
mov ebp,DWORD [36+esp]
|
||||
and ebp,7
|
||||
jz NEAR L$051pw_nc_end
|
||||
mov ecx,DWORD [esi]
|
||||
mov DWORD [ebx],ecx
|
||||
L$043pw_tail_nc0:
|
||||
dec ebp
|
||||
jz NEAR L$051pw_nc_end
|
||||
mov ecx,DWORD [4+esi]
|
||||
mov DWORD [4+ebx],ecx
|
||||
L$044pw_tail_nc1:
|
||||
dec ebp
|
||||
jz NEAR L$051pw_nc_end
|
||||
mov ecx,DWORD [8+esi]
|
||||
mov DWORD [8+ebx],ecx
|
||||
L$045pw_tail_nc2:
|
||||
dec ebp
|
||||
jz NEAR L$051pw_nc_end
|
||||
mov ecx,DWORD [12+esi]
|
||||
mov DWORD [12+ebx],ecx
|
||||
L$046pw_tail_nc3:
|
||||
dec ebp
|
||||
jz NEAR L$051pw_nc_end
|
||||
mov ecx,DWORD [16+esi]
|
||||
mov DWORD [16+ebx],ecx
|
||||
L$047pw_tail_nc4:
|
||||
dec ebp
|
||||
jz NEAR L$051pw_nc_end
|
||||
mov ecx,DWORD [20+esi]
|
||||
mov DWORD [20+ebx],ecx
|
||||
L$048pw_tail_nc5:
|
||||
dec ebp
|
||||
jz NEAR L$051pw_nc_end
|
||||
mov ecx,DWORD [24+esi]
|
||||
mov DWORD [24+ebx],ecx
|
||||
L$049pw_tail_nc6:
|
||||
L$051pw_nc_end:
|
||||
mov eax,0
|
||||
L$029pw_end:
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
pop ebp
|
||||
ret
|
||||
segment .bss
|
||||
common _OPENSSL_ia32cap_P 16
|
||||
|
@ -20,715 +20,16 @@ section .text code align=64
|
||||
%else
|
||||
section .text code
|
||||
%endif
|
||||
global _gcm_gmult_4bit_mmx
|
||||
align 16
|
||||
_gcm_gmult_4bit_mmx:
|
||||
L$_gcm_gmult_4bit_mmx_begin:
|
||||
push ebp
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
mov edi,DWORD [20+esp]
|
||||
mov esi,DWORD [24+esp]
|
||||
call L$000pic_point
|
||||
L$000pic_point:
|
||||
pop eax
|
||||
lea eax,[(L$rem_4bit-L$000pic_point)+eax]
|
||||
movzx ebx,BYTE [15+edi]
|
||||
xor ecx,ecx
|
||||
mov edx,ebx
|
||||
mov cl,dl
|
||||
mov ebp,14
|
||||
shl cl,4
|
||||
and edx,240
|
||||
movq mm0,[8+ecx*1+esi]
|
||||
movq mm1,[ecx*1+esi]
|
||||
movd ebx,mm0
|
||||
jmp NEAR L$001mmx_loop
|
||||
align 16
|
||||
L$001mmx_loop:
|
||||
psrlq mm0,4
|
||||
and ebx,15
|
||||
movq mm2,mm1
|
||||
psrlq mm1,4
|
||||
pxor mm0,[8+edx*1+esi]
|
||||
mov cl,BYTE [ebp*1+edi]
|
||||
psllq mm2,60
|
||||
pxor mm1,[ebx*8+eax]
|
||||
dec ebp
|
||||
movd ebx,mm0
|
||||
pxor mm1,[edx*1+esi]
|
||||
mov edx,ecx
|
||||
pxor mm0,mm2
|
||||
js NEAR L$002mmx_break
|
||||
shl cl,4
|
||||
and ebx,15
|
||||
psrlq mm0,4
|
||||
and edx,240
|
||||
movq mm2,mm1
|
||||
psrlq mm1,4
|
||||
pxor mm0,[8+ecx*1+esi]
|
||||
psllq mm2,60
|
||||
pxor mm1,[ebx*8+eax]
|
||||
movd ebx,mm0
|
||||
pxor mm1,[ecx*1+esi]
|
||||
pxor mm0,mm2
|
||||
jmp NEAR L$001mmx_loop
|
||||
align 16
|
||||
L$002mmx_break:
|
||||
shl cl,4
|
||||
and ebx,15
|
||||
psrlq mm0,4
|
||||
and edx,240
|
||||
movq mm2,mm1
|
||||
psrlq mm1,4
|
||||
pxor mm0,[8+ecx*1+esi]
|
||||
psllq mm2,60
|
||||
pxor mm1,[ebx*8+eax]
|
||||
movd ebx,mm0
|
||||
pxor mm1,[ecx*1+esi]
|
||||
pxor mm0,mm2
|
||||
psrlq mm0,4
|
||||
and ebx,15
|
||||
movq mm2,mm1
|
||||
psrlq mm1,4
|
||||
pxor mm0,[8+edx*1+esi]
|
||||
psllq mm2,60
|
||||
pxor mm1,[ebx*8+eax]
|
||||
movd ebx,mm0
|
||||
pxor mm1,[edx*1+esi]
|
||||
pxor mm0,mm2
|
||||
psrlq mm0,32
|
||||
movd edx,mm1
|
||||
psrlq mm1,32
|
||||
movd ecx,mm0
|
||||
movd ebp,mm1
|
||||
bswap ebx
|
||||
bswap edx
|
||||
bswap ecx
|
||||
bswap ebp
|
||||
emms
|
||||
mov DWORD [12+edi],ebx
|
||||
mov DWORD [4+edi],edx
|
||||
mov DWORD [8+edi],ecx
|
||||
mov DWORD [edi],ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
pop ebp
|
||||
ret
|
||||
global _gcm_ghash_4bit_mmx
|
||||
align 16
|
||||
_gcm_ghash_4bit_mmx:
|
||||
L$_gcm_ghash_4bit_mmx_begin:
|
||||
push ebp
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
mov eax,DWORD [20+esp]
|
||||
mov ebx,DWORD [24+esp]
|
||||
mov ecx,DWORD [28+esp]
|
||||
mov edx,DWORD [32+esp]
|
||||
mov ebp,esp
|
||||
call L$003pic_point
|
||||
L$003pic_point:
|
||||
pop esi
|
||||
lea esi,[(L$rem_8bit-L$003pic_point)+esi]
|
||||
sub esp,544
|
||||
and esp,-64
|
||||
sub esp,16
|
||||
add edx,ecx
|
||||
mov DWORD [544+esp],eax
|
||||
mov DWORD [552+esp],edx
|
||||
mov DWORD [556+esp],ebp
|
||||
add ebx,128
|
||||
lea edi,[144+esp]
|
||||
lea ebp,[400+esp]
|
||||
mov edx,DWORD [ebx-120]
|
||||
movq mm0,[ebx-120]
|
||||
movq mm3,[ebx-128]
|
||||
shl edx,4
|
||||
mov BYTE [esp],dl
|
||||
mov edx,DWORD [ebx-104]
|
||||
movq mm2,[ebx-104]
|
||||
movq mm5,[ebx-112]
|
||||
movq [edi-128],mm0
|
||||
psrlq mm0,4
|
||||
movq [edi],mm3
|
||||
movq mm7,mm3
|
||||
psrlq mm3,4
|
||||
shl edx,4
|
||||
mov BYTE [1+esp],dl
|
||||
mov edx,DWORD [ebx-88]
|
||||
movq mm1,[ebx-88]
|
||||
psllq mm7,60
|
||||
movq mm4,[ebx-96]
|
||||
por mm0,mm7
|
||||
movq [edi-120],mm2
|
||||
psrlq mm2,4
|
||||
movq [8+edi],mm5
|
||||
movq mm6,mm5
|
||||
movq [ebp-128],mm0
|
||||
psrlq mm5,4
|
||||
movq [ebp],mm3
|
||||
shl edx,4
|
||||
mov BYTE [2+esp],dl
|
||||
mov edx,DWORD [ebx-72]
|
||||
movq mm0,[ebx-72]
|
||||
psllq mm6,60
|
||||
movq mm3,[ebx-80]
|
||||
por mm2,mm6
|
||||
movq [edi-112],mm1
|
||||
psrlq mm1,4
|
||||
movq [16+edi],mm4
|
||||
movq mm7,mm4
|
||||
movq [ebp-120],mm2
|
||||
psrlq mm4,4
|
||||
movq [8+ebp],mm5
|
||||
shl edx,4
|
||||
mov BYTE [3+esp],dl
|
||||
mov edx,DWORD [ebx-56]
|
||||
movq mm2,[ebx-56]
|
||||
psllq mm7,60
|
||||
movq mm5,[ebx-64]
|
||||
por mm1,mm7
|
||||
movq [edi-104],mm0
|
||||
psrlq mm0,4
|
||||
movq [24+edi],mm3
|
||||
movq mm6,mm3
|
||||
movq [ebp-112],mm1
|
||||
psrlq mm3,4
|
||||
movq [16+ebp],mm4
|
||||
shl edx,4
|
||||
mov BYTE [4+esp],dl
|
||||
mov edx,DWORD [ebx-40]
|
||||
movq mm1,[ebx-40]
|
||||
psllq mm6,60
|
||||
movq mm4,[ebx-48]
|
||||
por mm0,mm6
|
||||
movq [edi-96],mm2
|
||||
psrlq mm2,4
|
||||
movq [32+edi],mm5
|
||||
movq mm7,mm5
|
||||
movq [ebp-104],mm0
|
||||
psrlq mm5,4
|
||||
movq [24+ebp],mm3
|
||||
shl edx,4
|
||||
mov BYTE [5+esp],dl
|
||||
mov edx,DWORD [ebx-24]
|
||||
movq mm0,[ebx-24]
|
||||
psllq mm7,60
|
||||
movq mm3,[ebx-32]
|
||||
por mm2,mm7
|
||||
movq [edi-88],mm1
|
||||
psrlq mm1,4
|
||||
movq [40+edi],mm4
|
||||
movq mm6,mm4
|
||||
movq [ebp-96],mm2
|
||||
psrlq mm4,4
|
||||
movq [32+ebp],mm5
|
||||
shl edx,4
|
||||
mov BYTE [6+esp],dl
|
||||
mov edx,DWORD [ebx-8]
|
||||
movq mm2,[ebx-8]
|
||||
psllq mm6,60
|
||||
movq mm5,[ebx-16]
|
||||
por mm1,mm6
|
||||
movq [edi-80],mm0
|
||||
psrlq mm0,4
|
||||
movq [48+edi],mm3
|
||||
movq mm7,mm3
|
||||
movq [ebp-88],mm1
|
||||
psrlq mm3,4
|
||||
movq [40+ebp],mm4
|
||||
shl edx,4
|
||||
mov BYTE [7+esp],dl
|
||||
mov edx,DWORD [8+ebx]
|
||||
movq mm1,[8+ebx]
|
||||
psllq mm7,60
|
||||
movq mm4,[ebx]
|
||||
por mm0,mm7
|
||||
movq [edi-72],mm2
|
||||
psrlq mm2,4
|
||||
movq [56+edi],mm5
|
||||
movq mm6,mm5
|
||||
movq [ebp-80],mm0
|
||||
psrlq mm5,4
|
||||
movq [48+ebp],mm3
|
||||
shl edx,4
|
||||
mov BYTE [8+esp],dl
|
||||
mov edx,DWORD [24+ebx]
|
||||
movq mm0,[24+ebx]
|
||||
psllq mm6,60
|
||||
movq mm3,[16+ebx]
|
||||
por mm2,mm6
|
||||
movq [edi-64],mm1
|
||||
psrlq mm1,4
|
||||
movq [64+edi],mm4
|
||||
movq mm7,mm4
|
||||
movq [ebp-72],mm2
|
||||
psrlq mm4,4
|
||||
movq [56+ebp],mm5
|
||||
shl edx,4
|
||||
mov BYTE [9+esp],dl
|
||||
mov edx,DWORD [40+ebx]
|
||||
movq mm2,[40+ebx]
|
||||
psllq mm7,60
|
||||
movq mm5,[32+ebx]
|
||||
por mm1,mm7
|
||||
movq [edi-56],mm0
|
||||
psrlq mm0,4
|
||||
movq [72+edi],mm3
|
||||
movq mm6,mm3
|
||||
movq [ebp-64],mm1
|
||||
psrlq mm3,4
|
||||
movq [64+ebp],mm4
|
||||
shl edx,4
|
||||
mov BYTE [10+esp],dl
|
||||
mov edx,DWORD [56+ebx]
|
||||
movq mm1,[56+ebx]
|
||||
psllq mm6,60
|
||||
movq mm4,[48+ebx]
|
||||
por mm0,mm6
|
||||
movq [edi-48],mm2
|
||||
psrlq mm2,4
|
||||
movq [80+edi],mm5
|
||||
movq mm7,mm5
|
||||
movq [ebp-56],mm0
|
||||
psrlq mm5,4
|
||||
movq [72+ebp],mm3
|
||||
shl edx,4
|
||||
mov BYTE [11+esp],dl
|
||||
mov edx,DWORD [72+ebx]
|
||||
movq mm0,[72+ebx]
|
||||
psllq mm7,60
|
||||
movq mm3,[64+ebx]
|
||||
por mm2,mm7
|
||||
movq [edi-40],mm1
|
||||
psrlq mm1,4
|
||||
movq [88+edi],mm4
|
||||
movq mm6,mm4
|
||||
movq [ebp-48],mm2
|
||||
psrlq mm4,4
|
||||
movq [80+ebp],mm5
|
||||
shl edx,4
|
||||
mov BYTE [12+esp],dl
|
||||
mov edx,DWORD [88+ebx]
|
||||
movq mm2,[88+ebx]
|
||||
psllq mm6,60
|
||||
movq mm5,[80+ebx]
|
||||
por mm1,mm6
|
||||
movq [edi-32],mm0
|
||||
psrlq mm0,4
|
||||
movq [96+edi],mm3
|
||||
movq mm7,mm3
|
||||
movq [ebp-40],mm1
|
||||
psrlq mm3,4
|
||||
movq [88+ebp],mm4
|
||||
shl edx,4
|
||||
mov BYTE [13+esp],dl
|
||||
mov edx,DWORD [104+ebx]
|
||||
movq mm1,[104+ebx]
|
||||
psllq mm7,60
|
||||
movq mm4,[96+ebx]
|
||||
por mm0,mm7
|
||||
movq [edi-24],mm2
|
||||
psrlq mm2,4
|
||||
movq [104+edi],mm5
|
||||
movq mm6,mm5
|
||||
movq [ebp-32],mm0
|
||||
psrlq mm5,4
|
||||
movq [96+ebp],mm3
|
||||
shl edx,4
|
||||
mov BYTE [14+esp],dl
|
||||
mov edx,DWORD [120+ebx]
|
||||
movq mm0,[120+ebx]
|
||||
psllq mm6,60
|
||||
movq mm3,[112+ebx]
|
||||
por mm2,mm6
|
||||
movq [edi-16],mm1
|
||||
psrlq mm1,4
|
||||
movq [112+edi],mm4
|
||||
movq mm7,mm4
|
||||
movq [ebp-24],mm2
|
||||
psrlq mm4,4
|
||||
movq [104+ebp],mm5
|
||||
shl edx,4
|
||||
mov BYTE [15+esp],dl
|
||||
psllq mm7,60
|
||||
por mm1,mm7
|
||||
movq [edi-8],mm0
|
||||
psrlq mm0,4
|
||||
movq [120+edi],mm3
|
||||
movq mm6,mm3
|
||||
movq [ebp-16],mm1
|
||||
psrlq mm3,4
|
||||
movq [112+ebp],mm4
|
||||
psllq mm6,60
|
||||
por mm0,mm6
|
||||
movq [ebp-8],mm0
|
||||
movq [120+ebp],mm3
|
||||
movq mm6,[eax]
|
||||
mov ebx,DWORD [8+eax]
|
||||
mov edx,DWORD [12+eax]
|
||||
align 16
|
||||
L$004outer:
|
||||
xor edx,DWORD [12+ecx]
|
||||
xor ebx,DWORD [8+ecx]
|
||||
pxor mm6,[ecx]
|
||||
lea ecx,[16+ecx]
|
||||
mov DWORD [536+esp],ebx
|
||||
movq [528+esp],mm6
|
||||
mov DWORD [548+esp],ecx
|
||||
xor eax,eax
|
||||
rol edx,8
|
||||
mov al,dl
|
||||
mov ebp,eax
|
||||
and al,15
|
||||
shr ebp,4
|
||||
pxor mm0,mm0
|
||||
rol edx,8
|
||||
pxor mm1,mm1
|
||||
pxor mm2,mm2
|
||||
movq mm7,[16+eax*8+esp]
|
||||
movq mm6,[144+eax*8+esp]
|
||||
mov al,dl
|
||||
movd ebx,mm7
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov edi,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+ebp*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
shr edi,4
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+ebp*8+esp]
|
||||
xor bl,BYTE [ebp*1+esp]
|
||||
mov al,dl
|
||||
movd ecx,mm7
|
||||
movzx ebx,bl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov ebp,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+edi*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
shr ebp,4
|
||||
pinsrw mm2,WORD [ebx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+edi*8+esp]
|
||||
xor cl,BYTE [edi*1+esp]
|
||||
mov al,dl
|
||||
mov edx,DWORD [536+esp]
|
||||
movd ebx,mm7
|
||||
movzx ecx,cl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov edi,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+ebp*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
pxor mm6,mm2
|
||||
shr edi,4
|
||||
pinsrw mm1,WORD [ecx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+ebp*8+esp]
|
||||
xor bl,BYTE [ebp*1+esp]
|
||||
mov al,dl
|
||||
movd ecx,mm7
|
||||
movzx ebx,bl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov ebp,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+edi*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
pxor mm6,mm1
|
||||
shr ebp,4
|
||||
pinsrw mm0,WORD [ebx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+edi*8+esp]
|
||||
xor cl,BYTE [edi*1+esp]
|
||||
mov al,dl
|
||||
movd ebx,mm7
|
||||
movzx ecx,cl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov edi,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+ebp*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
pxor mm6,mm0
|
||||
shr edi,4
|
||||
pinsrw mm2,WORD [ecx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+ebp*8+esp]
|
||||
xor bl,BYTE [ebp*1+esp]
|
||||
mov al,dl
|
||||
movd ecx,mm7
|
||||
movzx ebx,bl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov ebp,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+edi*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
pxor mm6,mm2
|
||||
shr ebp,4
|
||||
pinsrw mm1,WORD [ebx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+edi*8+esp]
|
||||
xor cl,BYTE [edi*1+esp]
|
||||
mov al,dl
|
||||
mov edx,DWORD [532+esp]
|
||||
movd ebx,mm7
|
||||
movzx ecx,cl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov edi,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+ebp*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
pxor mm6,mm1
|
||||
shr edi,4
|
||||
pinsrw mm0,WORD [ecx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+ebp*8+esp]
|
||||
xor bl,BYTE [ebp*1+esp]
|
||||
mov al,dl
|
||||
movd ecx,mm7
|
||||
movzx ebx,bl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov ebp,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+edi*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
pxor mm6,mm0
|
||||
shr ebp,4
|
||||
pinsrw mm2,WORD [ebx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+edi*8+esp]
|
||||
xor cl,BYTE [edi*1+esp]
|
||||
mov al,dl
|
||||
movd ebx,mm7
|
||||
movzx ecx,cl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov edi,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+ebp*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
pxor mm6,mm2
|
||||
shr edi,4
|
||||
pinsrw mm1,WORD [ecx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+ebp*8+esp]
|
||||
xor bl,BYTE [ebp*1+esp]
|
||||
mov al,dl
|
||||
movd ecx,mm7
|
||||
movzx ebx,bl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov ebp,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+edi*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
pxor mm6,mm1
|
||||
shr ebp,4
|
||||
pinsrw mm0,WORD [ebx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+edi*8+esp]
|
||||
xor cl,BYTE [edi*1+esp]
|
||||
mov al,dl
|
||||
mov edx,DWORD [528+esp]
|
||||
movd ebx,mm7
|
||||
movzx ecx,cl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov edi,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+ebp*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
pxor mm6,mm0
|
||||
shr edi,4
|
||||
pinsrw mm2,WORD [ecx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+ebp*8+esp]
|
||||
xor bl,BYTE [ebp*1+esp]
|
||||
mov al,dl
|
||||
movd ecx,mm7
|
||||
movzx ebx,bl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov ebp,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+edi*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
pxor mm6,mm2
|
||||
shr ebp,4
|
||||
pinsrw mm1,WORD [ebx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+edi*8+esp]
|
||||
xor cl,BYTE [edi*1+esp]
|
||||
mov al,dl
|
||||
movd ebx,mm7
|
||||
movzx ecx,cl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov edi,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+ebp*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
pxor mm6,mm1
|
||||
shr edi,4
|
||||
pinsrw mm0,WORD [ecx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+ebp*8+esp]
|
||||
xor bl,BYTE [ebp*1+esp]
|
||||
mov al,dl
|
||||
movd ecx,mm7
|
||||
movzx ebx,bl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov ebp,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+edi*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
pxor mm6,mm0
|
||||
shr ebp,4
|
||||
pinsrw mm2,WORD [ebx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
rol edx,8
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+edi*8+esp]
|
||||
xor cl,BYTE [edi*1+esp]
|
||||
mov al,dl
|
||||
mov edx,DWORD [524+esp]
|
||||
movd ebx,mm7
|
||||
movzx ecx,cl
|
||||
psrlq mm7,8
|
||||
movq mm3,mm6
|
||||
mov edi,eax
|
||||
psrlq mm6,8
|
||||
pxor mm7,[272+ebp*8+esp]
|
||||
and al,15
|
||||
psllq mm3,56
|
||||
pxor mm6,mm2
|
||||
shr edi,4
|
||||
pinsrw mm1,WORD [ecx*2+esi],2
|
||||
pxor mm7,[16+eax*8+esp]
|
||||
pxor mm6,[144+eax*8+esp]
|
||||
xor bl,BYTE [ebp*1+esp]
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[400+ebp*8+esp]
|
||||
movzx ebx,bl
|
||||
pxor mm2,mm2
|
||||
psllq mm1,4
|
||||
movd ecx,mm7
|
||||
psrlq mm7,4
|
||||
movq mm3,mm6
|
||||
psrlq mm6,4
|
||||
shl ecx,4
|
||||
pxor mm7,[16+edi*8+esp]
|
||||
psllq mm3,60
|
||||
movzx ecx,cl
|
||||
pxor mm7,mm3
|
||||
pxor mm6,[144+edi*8+esp]
|
||||
pinsrw mm0,WORD [ebx*2+esi],2
|
||||
pxor mm6,mm1
|
||||
movd edx,mm7
|
||||
pinsrw mm2,WORD [ecx*2+esi],3
|
||||
psllq mm0,12
|
||||
pxor mm6,mm0
|
||||
psrlq mm7,32
|
||||
pxor mm6,mm2
|
||||
mov ecx,DWORD [548+esp]
|
||||
movd ebx,mm7
|
||||
movq mm3,mm6
|
||||
psllw mm6,8
|
||||
psrlw mm3,8
|
||||
por mm6,mm3
|
||||
bswap edx
|
||||
pshufw mm6,mm6,27
|
||||
bswap ebx
|
||||
cmp ecx,DWORD [552+esp]
|
||||
jne NEAR L$004outer
|
||||
mov eax,DWORD [544+esp]
|
||||
mov DWORD [12+eax],edx
|
||||
mov DWORD [8+eax],ebx
|
||||
movq [eax],mm6
|
||||
mov esp,DWORD [556+esp]
|
||||
emms
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
pop ebp
|
||||
ret
|
||||
global _gcm_init_clmul
|
||||
align 16
|
||||
_gcm_init_clmul:
|
||||
L$_gcm_init_clmul_begin:
|
||||
mov edx,DWORD [4+esp]
|
||||
mov eax,DWORD [8+esp]
|
||||
call L$005pic
|
||||
L$005pic:
|
||||
call L$000pic
|
||||
L$000pic:
|
||||
pop ecx
|
||||
lea ecx,[(L$bswap-L$005pic)+ecx]
|
||||
lea ecx,[(L$bswap-L$000pic)+ecx]
|
||||
movdqu xmm2,[eax]
|
||||
pshufd xmm2,xmm2,78
|
||||
pshufd xmm4,xmm2,255
|
||||
@ -792,10 +93,10 @@ _gcm_gmult_clmul:
|
||||
L$_gcm_gmult_clmul_begin:
|
||||
mov eax,DWORD [4+esp]
|
||||
mov edx,DWORD [8+esp]
|
||||
call L$006pic
|
||||
L$006pic:
|
||||
call L$001pic
|
||||
L$001pic:
|
||||
pop ecx
|
||||
lea ecx,[(L$bswap-L$006pic)+ecx]
|
||||
lea ecx,[(L$bswap-L$001pic)+ecx]
|
||||
movdqu xmm0,[eax]
|
||||
movdqa xmm5,[ecx]
|
||||
movups xmm2,[edx]
|
||||
@ -849,16 +150,16 @@ L$_gcm_ghash_clmul_begin:
|
||||
mov edx,DWORD [24+esp]
|
||||
mov esi,DWORD [28+esp]
|
||||
mov ebx,DWORD [32+esp]
|
||||
call L$007pic
|
||||
L$007pic:
|
||||
call L$002pic
|
||||
L$002pic:
|
||||
pop ecx
|
||||
lea ecx,[(L$bswap-L$007pic)+ecx]
|
||||
lea ecx,[(L$bswap-L$002pic)+ecx]
|
||||
movdqu xmm0,[eax]
|
||||
movdqa xmm5,[ecx]
|
||||
movdqu xmm2,[edx]
|
||||
db 102,15,56,0,197
|
||||
sub ebx,16
|
||||
jz NEAR L$008odd_tail
|
||||
jz NEAR L$003odd_tail
|
||||
movdqu xmm3,[esi]
|
||||
movdqu xmm6,[16+esi]
|
||||
db 102,15,56,0,221
|
||||
@ -875,10 +176,10 @@ db 102,15,58,68,221,0
|
||||
movups xmm2,[16+edx]
|
||||
nop
|
||||
sub ebx,32
|
||||
jbe NEAR L$009even_tail
|
||||
jmp NEAR L$010mod_loop
|
||||
jbe NEAR L$004even_tail
|
||||
jmp NEAR L$005mod_loop
|
||||
align 32
|
||||
L$010mod_loop:
|
||||
L$005mod_loop:
|
||||
pshufd xmm4,xmm0,78
|
||||
movdqa xmm1,xmm0
|
||||
pxor xmm4,xmm0
|
||||
@ -933,8 +234,8 @@ db 102,15,58,68,250,17
|
||||
db 102,15,58,68,221,0
|
||||
lea esi,[32+esi]
|
||||
sub ebx,32
|
||||
ja NEAR L$010mod_loop
|
||||
L$009even_tail:
|
||||
ja NEAR L$005mod_loop
|
||||
L$004even_tail:
|
||||
pshufd xmm4,xmm0,78
|
||||
movdqa xmm1,xmm0
|
||||
pxor xmm4,xmm0
|
||||
@ -973,9 +274,9 @@ db 102,15,58,68,229,16
|
||||
psrlq xmm0,1
|
||||
pxor xmm0,xmm1
|
||||
test ebx,ebx
|
||||
jnz NEAR L$011done
|
||||
jnz NEAR L$006done
|
||||
movups xmm2,[edx]
|
||||
L$008odd_tail:
|
||||
L$003odd_tail:
|
||||
movdqu xmm3,[esi]
|
||||
db 102,15,56,0,221
|
||||
pxor xmm0,xmm3
|
||||
@ -1014,7 +315,7 @@ db 102,15,58,68,220,0
|
||||
pxor xmm0,xmm4
|
||||
psrlq xmm0,1
|
||||
pxor xmm0,xmm1
|
||||
L$011done:
|
||||
L$006done:
|
||||
db 102,15,56,0,197
|
||||
movdqu [eax],xmm0
|
||||
pop edi
|
||||
@ -1026,46 +327,6 @@ align 64
|
||||
L$bswap:
|
||||
db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
db 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
|
||||
align 64
|
||||
L$rem_8bit:
|
||||
dw 0,450,900,582,1800,1738,1164,1358
|
||||
dw 3600,4050,3476,3158,2328,2266,2716,2910
|
||||
dw 7200,7650,8100,7782,6952,6890,6316,6510
|
||||
dw 4656,5106,4532,4214,5432,5370,5820,6014
|
||||
dw 14400,14722,15300,14854,16200,16010,15564,15630
|
||||
dw 13904,14226,13780,13334,12632,12442,13020,13086
|
||||
dw 9312,9634,10212,9766,9064,8874,8428,8494
|
||||
dw 10864,11186,10740,10294,11640,11450,12028,12094
|
||||
dw 28800,28994,29444,29382,30600,30282,29708,30158
|
||||
dw 32400,32594,32020,31958,31128,30810,31260,31710
|
||||
dw 27808,28002,28452,28390,27560,27242,26668,27118
|
||||
dw 25264,25458,24884,24822,26040,25722,26172,26622
|
||||
dw 18624,18690,19268,19078,20424,19978,19532,19854
|
||||
dw 18128,18194,17748,17558,16856,16410,16988,17310
|
||||
dw 21728,21794,22372,22182,21480,21034,20588,20910
|
||||
dw 23280,23346,22900,22710,24056,23610,24188,24510
|
||||
dw 57600,57538,57988,58182,58888,59338,58764,58446
|
||||
dw 61200,61138,60564,60758,59416,59866,60316,59998
|
||||
dw 64800,64738,65188,65382,64040,64490,63916,63598
|
||||
dw 62256,62194,61620,61814,62520,62970,63420,63102
|
||||
dw 55616,55426,56004,56070,56904,57226,56780,56334
|
||||
dw 55120,54930,54484,54550,53336,53658,54236,53790
|
||||
dw 50528,50338,50916,50982,49768,50090,49644,49198
|
||||
dw 52080,51890,51444,51510,52344,52666,53244,52798
|
||||
dw 37248,36930,37380,37830,38536,38730,38156,38094
|
||||
dw 40848,40530,39956,40406,39064,39258,39708,39646
|
||||
dw 36256,35938,36388,36838,35496,35690,35116,35054
|
||||
dw 33712,33394,32820,33270,33976,34170,34620,34558
|
||||
dw 43456,43010,43588,43910,44744,44810,44364,44174
|
||||
dw 42960,42514,42068,42390,41176,41242,41820,41630
|
||||
dw 46560,46114,46692,47014,45800,45866,45420,45230
|
||||
dw 48112,47666,47220,47542,48376,48442,49020,48830
|
||||
align 64
|
||||
L$rem_4bit:
|
||||
dd 0,0,0,471859200,0,943718400,0,610271232
|
||||
dd 0,1887436800,0,1822425088,0,1220542464,0,1423966208
|
||||
dd 0,3774873600,0,4246732800,0,3644850176,0,3311403008
|
||||
dd 0,2441084928,0,2376073216,0,2847932416,0,3051356160
|
||||
db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
|
||||
db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
|
||||
db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
|
||||
|
@ -20,7 +20,7 @@ section .text code align=64
|
||||
%else
|
||||
section .text code
|
||||
%endif
|
||||
%ifndef NDEBUG
|
||||
%ifdef BORINGSSL_DISPATCH_TEST
|
||||
extern _BORINGSSL_function_hit
|
||||
%endif
|
||||
align 64
|
||||
@ -474,7 +474,7 @@ L$_vpaes_set_encrypt_key_begin:
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
%ifndef NDEBUG
|
||||
%ifdef BORINGSSL_DISPATCH_TEST
|
||||
push ebx
|
||||
push edx
|
||||
call L$016pic
|
||||
@ -553,7 +553,7 @@ L$_vpaes_encrypt_begin:
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
%ifndef NDEBUG
|
||||
%ifdef BORINGSSL_DISPATCH_TEST
|
||||
push ebx
|
||||
push edx
|
||||
call L$019pic
|
||||
|
@ -9,6 +9,8 @@ default rel
|
||||
%ifdef BORINGSSL_PREFIX
|
||||
%include "boringssl_prefix_symbols_nasm.inc"
|
||||
%endif
|
||||
section .text code align=64
|
||||
|
||||
global dummy_chacha20_poly1305_asm
|
||||
|
||||
dummy_chacha20_poly1305_asm:
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user