Merge branch 'master' into dictinct_in_order_optimization

This commit is contained in:
mergify[bot] 2022-06-24 22:52:14 +00:00 committed by GitHub
commit b65cf4e1fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
193 changed files with 13657 additions and 36996 deletions

View File

@ -1,102 +1,21 @@
#!/bin/bash -e
if [[ -n $1 ]]; then
SCALE=$1
else
SCALE=100
fi
TABLE="hits_${SCALE}m_obfuscated"
DATASET="${TABLE}_v1.tar.xz"
TABLE="hits_100m_obfuscated"
QUERIES_FILE="queries.sql"
TRIES=3
# Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'.
mkdir -p clickhouse-benchmark
pushd clickhouse-benchmark
FASTER_DOWNLOAD=wget
if command -v axel >/dev/null; then
FASTER_DOWNLOAD=axel
else
echo "It's recommended to install 'axel' for faster downloads."
# Download the binary
if [[ ! -x clickhouse ]]; then
curl https://clickhouse.com/ | sh
fi
if command -v pixz >/dev/null; then
TAR_PARAMS='-Ipixz'
else
echo "It's recommended to install 'pixz' for faster decompression of the dataset."
fi
mkdir -p clickhouse-benchmark-$SCALE
pushd clickhouse-benchmark-$SCALE
OS=$(uname -s)
ARCH=$(uname -m)
DIR=
if [ "${OS}" = "Linux" ]
then
if [ "${ARCH}" = "x86_64" ]
then
DIR="amd64"
elif [ "${ARCH}" = "aarch64" ]
then
DIR="aarch64"
elif [ "${ARCH}" = "powerpc64le" ]
then
DIR="powerpc64le"
fi
elif [ "${OS}" = "FreeBSD" ]
then
if [ "${ARCH}" = "x86_64" ]
then
DIR="freebsd"
elif [ "${ARCH}" = "aarch64" ]
then
DIR="freebsd-aarch64"
elif [ "${ARCH}" = "powerpc64le" ]
then
DIR="freebsd-powerpc64le"
fi
elif [ "${OS}" = "Darwin" ]
then
if [ "${ARCH}" = "x86_64" ]
then
DIR="macos"
elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
then
DIR="macos-aarch64"
fi
fi
if [ -z "${DIR}" ]
then
echo "The '${OS}' operating system with the '${ARCH}' architecture is not supported."
exit 1
fi
URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
echo
echo "Will download ${URL}"
echo
curl -O "${URL}" && chmod a+x clickhouse || exit 1
echo
echo "Successfully downloaded the ClickHouse binary"
chmod a+x clickhouse
if [[ ! -f $QUERIES_FILE ]]; then
wget "https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/$QUERIES_FILE"
fi
if [[ ! -d data ]]; then
if [[ ! -f $DATASET ]]; then
$FASTER_DOWNLOAD "https://datasets.clickhouse.com/hits/partitions/$DATASET"
fi
tar $TAR_PARAMS --strip-components=1 --directory=. -x -v -f $DATASET
fi
uptime
echo "Starting clickhouse-server"
@ -114,10 +33,20 @@ echo "Waiting for clickhouse-server to start"
for i in {1..30}; do
sleep 1
./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM $TABLE" 2>/dev/null && break || echo '.'
./clickhouse client --query "SELECT 'Ok.'" 2>/dev/null && break || echo -n '.'
if [[ $i == 30 ]]; then exit 1; fi
done
echo "Will download the dataset"
./clickhouse client --max_insert_threads $(nproc || 4) --progress --query "
CREATE OR REPLACE TABLE ${TABLE} ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime)
AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')"
./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM ${TABLE}"
echo "Will prepare the dataset"
./clickhouse client --query "OPTIMIZE TABLE ${TABLE} FINAL"
echo
echo "Will perform benchmark. Results:"
echo
@ -133,7 +62,7 @@ cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$(./clickhouse client --max_memory_usage 100G --time --format=Null --query="$query" 2>&1 ||:)
RES=$(./clickhouse client --time --format=Null --query="$query" 2>&1 ||:)
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
done
@ -180,10 +109,10 @@ else
cat /proc/meminfo | grep MemTotal
echo '----RAID Info-------------------'
cat /proc/mdstat
#echo '----PCI-------------------------'
#lspci
#echo '----All Hardware Info-----------'
#lshw
echo '--------------------------------'
fi
echo
echo "Instance type from IMDS (if available):"
curl --connect-timeout 1 http://169.254.169.254/latest/meta-data/instance-type
echo

2
contrib/boringssl vendored

@ -1 +1 @@
Subproject commit 9c0715ce459de443e7b08f270a518c1702f1a380
Subproject commit c1e01a441d6db234f4f12e63a7657d1f9e6db9c1

View File

@ -159,14 +159,12 @@ set(
ios-aarch64/crypto/fipsmodule/sha512-armv8.S
ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
ios-aarch64/crypto/test/trampoline-armv8.S
ios-aarch64/crypto/third_party/sike/asm/fp-armv8.S
)
set(
CRYPTO_ios_arm_SOURCES
ios-arm/crypto/chacha/chacha-armv4.S
ios-arm/crypto/fipsmodule/aes-armv4.S
ios-arm/crypto/fipsmodule/aesv8-armx32.S
ios-arm/crypto/fipsmodule/armv4-mont.S
ios-arm/crypto/fipsmodule/bsaes-armv7.S
@ -192,14 +190,12 @@ set(
linux-aarch64/crypto/fipsmodule/sha512-armv8.S
linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
linux-aarch64/crypto/test/trampoline-armv8.S
linux-aarch64/crypto/third_party/sike/asm/fp-armv8.S
)
set(
CRYPTO_linux_arm_SOURCES
linux-arm/crypto/chacha/chacha-armv4.S
linux-arm/crypto/fipsmodule/aes-armv4.S
linux-arm/crypto/fipsmodule/aesv8-armx32.S
linux-arm/crypto/fipsmodule/armv4-mont.S
linux-arm/crypto/fipsmodule/bsaes-armv7.S
@ -219,13 +215,13 @@ set(
linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
linux-ppc64le/crypto/test/trampoline-ppc.S
)
set(
CRYPTO_linux_x86_SOURCES
linux-x86/crypto/chacha/chacha-x86.S
linux-x86/crypto/fipsmodule/aes-586.S
linux-x86/crypto/fipsmodule/aesni-x86.S
linux-x86/crypto/fipsmodule/bn-586.S
linux-x86/crypto/fipsmodule/co-586.S
@ -246,7 +242,6 @@ set(
linux-x86_64/crypto/chacha/chacha-x86_64.S
linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
linux-x86_64/crypto/fipsmodule/aes-x86_64.S
linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
@ -263,7 +258,6 @@ set(
linux-x86_64/crypto/fipsmodule/x86_64-mont.S
linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
linux-x86_64/crypto/test/trampoline-x86_64.S
linux-x86_64/crypto/third_party/sike/asm/fp-x86_64.S
"${BORINGSSL_SOURCE_DIR}/crypto/hrss/asm/poly_rq_mul.S"
)
@ -271,7 +265,6 @@ set(
CRYPTO_mac_x86_SOURCES
mac-x86/crypto/chacha/chacha-x86.S
mac-x86/crypto/fipsmodule/aes-586.S
mac-x86/crypto/fipsmodule/aesni-x86.S
mac-x86/crypto/fipsmodule/bn-586.S
mac-x86/crypto/fipsmodule/co-586.S
@ -292,7 +285,6 @@ set(
mac-x86_64/crypto/chacha/chacha-x86_64.S
mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
mac-x86_64/crypto/fipsmodule/aes-x86_64.S
mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
mac-x86_64/crypto/fipsmodule/aesni-x86_64.S
mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
@ -309,7 +301,6 @@ set(
mac-x86_64/crypto/fipsmodule/x86_64-mont.S
mac-x86_64/crypto/fipsmodule/x86_64-mont5.S
mac-x86_64/crypto/test/trampoline-x86_64.S
mac-x86_64/crypto/third_party/sike/asm/fp-x86_64.S
)
set(
@ -331,7 +322,6 @@ set(
CRYPTO_win_x86_SOURCES
win-x86/crypto/chacha/chacha-x86.asm
win-x86/crypto/fipsmodule/aes-586.asm
win-x86/crypto/fipsmodule/aesni-x86.asm
win-x86/crypto/fipsmodule/bn-586.asm
win-x86/crypto/fipsmodule/co-586.asm
@ -352,7 +342,6 @@ set(
win-x86_64/crypto/chacha/chacha-x86_64.asm
win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
win-x86_64/crypto/fipsmodule/aes-x86_64.asm
win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm
@ -369,7 +358,6 @@ set(
win-x86_64/crypto/fipsmodule/x86_64-mont.asm
win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
win-x86_64/crypto/test/trampoline-x86_64.asm
win-x86_64/crypto/third_party/sike/asm/fp-x86_64.asm
)
if(APPLE AND ARCH STREQUAL "aarch64")
@ -401,6 +389,7 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strex.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c"
@ -430,6 +419,7 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c"
"${BORINGSSL_SOURCE_DIR}/crypto/blake2/blake2.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c"
"${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c"
@ -454,20 +444,22 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-win.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-ppc64le.c"
"${BORINGSSL_SOURCE_DIR}/crypto/crypto.c"
"${BORINGSSL_SOURCE_DIR}/crypto/curve25519/curve25519.c"
"${BORINGSSL_SOURCE_DIR}/crypto/curve25519/spake25519.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh/check.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh/dh.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh/dh_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh/params.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/dh_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/params.c"
"${BORINGSSL_SOURCE_DIR}/crypto/digest_extra/digest_extra.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_derive.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/hash_to_curve.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ecdh_extra/ecdh_extra.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ecdsa_extra/ecdsa_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/engine/engine.c"
@ -492,8 +484,8 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c"
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c"
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/is_fips.c"
"${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c"
"${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c"
"${BORINGSSL_SOURCE_DIR}/crypto/lhash/lhash.c"
"${BORINGSSL_SOURCE_DIR}/crypto/mem.c"
@ -519,6 +511,7 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/deterministic.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/forkunsafe.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/fuchsia.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/passive.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/rand_extra.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/windows.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rc4/rc4.c"
@ -532,15 +525,18 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/thread_none.c"
"${BORINGSSL_SOURCE_DIR}/crypto/thread_pthread.c"
"${BORINGSSL_SOURCE_DIR}/crypto/thread_win.c"
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/pmbtoken.c"
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/trust_token.c"
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_strex.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/name_print.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c"
@ -606,19 +602,11 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pci.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcia.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcons.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pku.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pmaps.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_prn.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_purp.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_skey.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_sxnet.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_utl.c"
"${BORINGSSL_SOURCE_DIR}/third_party/fiat/curve25519.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/asm/fp_generic.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/curve_params.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/fpx.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/isogeny.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/sike.c"
)
add_library(
@ -631,6 +619,8 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/encrypted_client_hello.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/extensions.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc"
@ -653,7 +643,6 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/t1_lib.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc"
@ -674,7 +663,9 @@ add_executable(
"${BORINGSSL_SOURCE_DIR}/tool/client.cc"
"${BORINGSSL_SOURCE_DIR}/tool/const.cc"
"${BORINGSSL_SOURCE_DIR}/tool/digest.cc"
"${BORINGSSL_SOURCE_DIR}/tool/fd.cc"
"${BORINGSSL_SOURCE_DIR}/tool/file.cc"
"${BORINGSSL_SOURCE_DIR}/tool/generate_ech.cc"
"${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc"
"${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc"
"${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc"

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -33,6 +33,7 @@ Lone:
.align 5
_ChaCha20_ctr32:
AARCH64_VALID_CALL_TARGET
cbz x2,Labort
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x5,:pg_hi21_nc:_OPENSSL_armcap_P
@ -46,6 +47,7 @@ _ChaCha20_ctr32:
b.ne ChaCha20_neon
Lshort:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -258,6 +260,7 @@ Loop:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
Labort:
ret
@ -314,12 +317,14 @@ Loop_tail:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.align 5
ChaCha20_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -700,6 +705,7 @@ Loop_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
Ltail_neon:
@ -809,11 +815,13 @@ Ldone_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.align 5
ChaCha20_512_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -1977,6 +1985,7 @@ Ldone_512_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
#endif // !OPENSSL_NO_ASM

View File

@ -32,6 +32,8 @@ Lrcon:
.align 5
_aes_hw_set_encrypt_key:
Lenc_key:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
@ -200,6 +202,7 @@ Lenc_key_abort:
.align 5
_aes_hw_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl Lenc_key
@ -233,6 +236,7 @@ Loop_imc:
eor x0,x0,x0 // return value
Ldec_key_abort:
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _aes_hw_encrypt
@ -240,6 +244,7 @@ Ldec_key_abort:
.align 5
_aes_hw_encrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
@ -270,6 +275,7 @@ Loop_enc:
.align 5
_aes_hw_decrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
@ -300,6 +306,8 @@ Loop_dec:
.align 5
_aes_hw_cbc_encrypt:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
@ -591,6 +599,8 @@ Lcbc_abort:
.align 5
_aes_hw_ctr32_encrypt_blocks:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]

View File

@ -12,6 +12,8 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl _bn_mul_mont
@ -19,6 +21,7 @@
.align 5
_bn_mul_mont:
AARCH64_SIGN_LINK_REGISTER
tst x5,#7
b.eq __bn_sqr8x_mont
tst x5,#3
@ -216,11 +219,14 @@ Lcond_copy:
mov x0,#1
ldp x23,x24,[x29,#48]
ldr x29,[sp],#64
AARCH64_VALIDATE_LINK_REGISTER
ret
.align 5
__bn_sqr8x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
// only from bn_mul_mont which has already signed the return address.
cmp x1,x2
b.ne __bn_mul4x_mont
Lsqr8x_mont:
@ -974,11 +980,16 @@ Lsqr8x_done:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.align 5
__bn_mul4x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
// return address.
stp x29,x30,[sp,#-128]!
add x29,sp,#0
stp x19,x20,[sp,#16]
@ -1412,6 +1423,8 @@ Lmul4x_done:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0

View File

@ -12,6 +12,8 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl _gcm_init_neon
@ -19,6 +21,7 @@
.align 4
_gcm_init_neon:
AARCH64_VALID_CALL_TARGET
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
@ -44,6 +47,7 @@ _gcm_init_neon:
.align 4
_gcm_gmult_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
@ -63,6 +67,7 @@ _gcm_gmult_neon:
.align 4
_gcm_ghash_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]

View File

@ -21,6 +21,7 @@
.align 4
_gcm_init_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
@ -72,6 +73,7 @@ _gcm_init_v8:
.align 4
_gcm_gmult_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
@ -114,6 +116,7 @@ _gcm_gmult_v8:
.align 4
_gcm_ghash_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have

View File

@ -22,6 +22,8 @@
.align 6
_sha1_block_data_order:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
#else
@ -1089,6 +1091,8 @@ Loop:
.align 6
sha1_block_armv8:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
Lv8_entry:
stp x29,x30,[sp,#-16]!
add x29,sp,#0

View File

@ -63,6 +63,7 @@
.align 6
_sha256_block_data_order:
AARCH64_VALID_CALL_TARGET
#ifndef __KERNEL__
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
@ -73,6 +74,7 @@ _sha256_block_data_order:
tst w16,#ARMV8_SHA256
b.ne Lv8_entry
#endif
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]!
add x29,sp,#0
@ -1033,6 +1035,7 @@ Loop_16_xx:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -1067,6 +1070,7 @@ LK256:
.align 6
sha256_block_armv8:
Lv8_entry:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
stp x29,x30,[sp,#-16]!
add x29,sp,#0

View File

@ -63,6 +63,7 @@
.align 6
_sha512_block_data_order:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]!
add x29,sp,#0
@ -1023,6 +1024,7 @@ Loop_16_xx:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret

View File

@ -12,6 +12,8 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.section __TEXT,__const
@ -214,6 +216,7 @@ Lenc_entry:
.align 4
_vpaes_encrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -223,6 +226,7 @@ _vpaes_encrypt:
st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -451,6 +455,7 @@ Ldec_entry:
.align 4
_vpaes_decrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -460,6 +465,7 @@ _vpaes_decrypt:
st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -629,6 +635,7 @@ _vpaes_key_preheat:
.align 4
_vpaes_schedule_core:
AARCH64_SIGN_LINK_REGISTER
stp x29, x30, [sp,#-16]!
add x29,sp,#0
@ -798,6 +805,7 @@ Lschedule_mangle_last_dec:
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
ldp x29, x30, [sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -1011,6 +1019,7 @@ Lschedule_mangle_both:
.align 4
_vpaes_set_encrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1026,6 +1035,7 @@ _vpaes_set_encrypt_key:
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -1034,6 +1044,7 @@ _vpaes_set_encrypt_key:
.align 4
_vpaes_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1053,6 +1064,7 @@ _vpaes_set_decrypt_key:
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _vpaes_cbc_encrypt
@ -1060,6 +1072,7 @@ _vpaes_set_decrypt_key:
.align 4
_vpaes_cbc_encrypt:
AARCH64_SIGN_LINK_REGISTER
cbz x2, Lcbc_abort
cmp w5, #0 // check direction
b.eq vpaes_cbc_decrypt
@ -1086,6 +1099,7 @@ Lcbc_enc_loop:
st1 {v0.16b}, [x4] // write ivec
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
Lcbc_abort:
ret
@ -1093,6 +1107,8 @@ Lcbc_abort:
.align 4
vpaes_cbc_decrypt:
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
// only from vpaes_cbc_encrypt which has already signed the return address.
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1134,6 +1150,7 @@ Lcbc_dec_done:
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _vpaes_ctr32_encrypt_blocks
@ -1141,6 +1158,7 @@ Lcbc_dec_done:
.align 4
_vpaes_ctr32_encrypt_blocks:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1208,6 +1226,7 @@ Lctr32_done:
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
#endif // !OPENSSL_NO_ASM

View File

@ -12,6 +12,8 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ -26,6 +28,7 @@
.align 4
_abi_test_trampoline:
Labi_test_trampoline_begin:
AARCH64_SIGN_LINK_REGISTER
// Stack layout (low to high addresses)
// x29,x30 (16 bytes)
// d8-d15 (64 bytes)
@ -128,6 +131,7 @@ Lx29_ok:
ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -135,6 +139,7 @@ Lx29_ok:
.private_extern _abi_test_clobber_x0
.align 4
_abi_test_clobber_x0:
AARCH64_VALID_CALL_TARGET
mov x0, xzr
ret
@ -143,6 +148,7 @@ _abi_test_clobber_x0:
.private_extern _abi_test_clobber_x1
.align 4
_abi_test_clobber_x1:
AARCH64_VALID_CALL_TARGET
mov x1, xzr
ret
@ -151,6 +157,7 @@ _abi_test_clobber_x1:
.private_extern _abi_test_clobber_x2
.align 4
_abi_test_clobber_x2:
AARCH64_VALID_CALL_TARGET
mov x2, xzr
ret
@ -159,6 +166,7 @@ _abi_test_clobber_x2:
.private_extern _abi_test_clobber_x3
.align 4
_abi_test_clobber_x3:
AARCH64_VALID_CALL_TARGET
mov x3, xzr
ret
@ -167,6 +175,7 @@ _abi_test_clobber_x3:
.private_extern _abi_test_clobber_x4
.align 4
_abi_test_clobber_x4:
AARCH64_VALID_CALL_TARGET
mov x4, xzr
ret
@ -175,6 +184,7 @@ _abi_test_clobber_x4:
.private_extern _abi_test_clobber_x5
.align 4
_abi_test_clobber_x5:
AARCH64_VALID_CALL_TARGET
mov x5, xzr
ret
@ -183,6 +193,7 @@ _abi_test_clobber_x5:
.private_extern _abi_test_clobber_x6
.align 4
_abi_test_clobber_x6:
AARCH64_VALID_CALL_TARGET
mov x6, xzr
ret
@ -191,6 +202,7 @@ _abi_test_clobber_x6:
.private_extern _abi_test_clobber_x7
.align 4
_abi_test_clobber_x7:
AARCH64_VALID_CALL_TARGET
mov x7, xzr
ret
@ -199,6 +211,7 @@ _abi_test_clobber_x7:
.private_extern _abi_test_clobber_x8
.align 4
_abi_test_clobber_x8:
AARCH64_VALID_CALL_TARGET
mov x8, xzr
ret
@ -207,6 +220,7 @@ _abi_test_clobber_x8:
.private_extern _abi_test_clobber_x9
.align 4
_abi_test_clobber_x9:
AARCH64_VALID_CALL_TARGET
mov x9, xzr
ret
@ -215,6 +229,7 @@ _abi_test_clobber_x9:
.private_extern _abi_test_clobber_x10
.align 4
_abi_test_clobber_x10:
AARCH64_VALID_CALL_TARGET
mov x10, xzr
ret
@ -223,6 +238,7 @@ _abi_test_clobber_x10:
.private_extern _abi_test_clobber_x11
.align 4
_abi_test_clobber_x11:
AARCH64_VALID_CALL_TARGET
mov x11, xzr
ret
@ -231,6 +247,7 @@ _abi_test_clobber_x11:
.private_extern _abi_test_clobber_x12
.align 4
_abi_test_clobber_x12:
AARCH64_VALID_CALL_TARGET
mov x12, xzr
ret
@ -239,6 +256,7 @@ _abi_test_clobber_x12:
.private_extern _abi_test_clobber_x13
.align 4
_abi_test_clobber_x13:
AARCH64_VALID_CALL_TARGET
mov x13, xzr
ret
@ -247,6 +265,7 @@ _abi_test_clobber_x13:
.private_extern _abi_test_clobber_x14
.align 4
_abi_test_clobber_x14:
AARCH64_VALID_CALL_TARGET
mov x14, xzr
ret
@ -255,6 +274,7 @@ _abi_test_clobber_x14:
.private_extern _abi_test_clobber_x15
.align 4
_abi_test_clobber_x15:
AARCH64_VALID_CALL_TARGET
mov x15, xzr
ret
@ -263,6 +283,7 @@ _abi_test_clobber_x15:
.private_extern _abi_test_clobber_x16
.align 4
_abi_test_clobber_x16:
AARCH64_VALID_CALL_TARGET
mov x16, xzr
ret
@ -271,6 +292,7 @@ _abi_test_clobber_x16:
.private_extern _abi_test_clobber_x17
.align 4
_abi_test_clobber_x17:
AARCH64_VALID_CALL_TARGET
mov x17, xzr
ret
@ -279,6 +301,7 @@ _abi_test_clobber_x17:
.private_extern _abi_test_clobber_x19
.align 4
_abi_test_clobber_x19:
AARCH64_VALID_CALL_TARGET
mov x19, xzr
ret
@ -287,6 +310,7 @@ _abi_test_clobber_x19:
.private_extern _abi_test_clobber_x20
.align 4
_abi_test_clobber_x20:
AARCH64_VALID_CALL_TARGET
mov x20, xzr
ret
@ -295,6 +319,7 @@ _abi_test_clobber_x20:
.private_extern _abi_test_clobber_x21
.align 4
_abi_test_clobber_x21:
AARCH64_VALID_CALL_TARGET
mov x21, xzr
ret
@ -303,6 +328,7 @@ _abi_test_clobber_x21:
.private_extern _abi_test_clobber_x22
.align 4
_abi_test_clobber_x22:
AARCH64_VALID_CALL_TARGET
mov x22, xzr
ret
@ -311,6 +337,7 @@ _abi_test_clobber_x22:
.private_extern _abi_test_clobber_x23
.align 4
_abi_test_clobber_x23:
AARCH64_VALID_CALL_TARGET
mov x23, xzr
ret
@ -319,6 +346,7 @@ _abi_test_clobber_x23:
.private_extern _abi_test_clobber_x24
.align 4
_abi_test_clobber_x24:
AARCH64_VALID_CALL_TARGET
mov x24, xzr
ret
@ -327,6 +355,7 @@ _abi_test_clobber_x24:
.private_extern _abi_test_clobber_x25
.align 4
_abi_test_clobber_x25:
AARCH64_VALID_CALL_TARGET
mov x25, xzr
ret
@ -335,6 +364,7 @@ _abi_test_clobber_x25:
.private_extern _abi_test_clobber_x26
.align 4
_abi_test_clobber_x26:
AARCH64_VALID_CALL_TARGET
mov x26, xzr
ret
@ -343,6 +373,7 @@ _abi_test_clobber_x26:
.private_extern _abi_test_clobber_x27
.align 4
_abi_test_clobber_x27:
AARCH64_VALID_CALL_TARGET
mov x27, xzr
ret
@ -351,6 +382,7 @@ _abi_test_clobber_x27:
.private_extern _abi_test_clobber_x28
.align 4
_abi_test_clobber_x28:
AARCH64_VALID_CALL_TARGET
mov x28, xzr
ret
@ -359,6 +391,7 @@ _abi_test_clobber_x28:
.private_extern _abi_test_clobber_x29
.align 4
_abi_test_clobber_x29:
AARCH64_VALID_CALL_TARGET
mov x29, xzr
ret
@ -367,6 +400,7 @@ _abi_test_clobber_x29:
.private_extern _abi_test_clobber_d0
.align 4
_abi_test_clobber_d0:
AARCH64_VALID_CALL_TARGET
fmov d0, xzr
ret
@ -375,6 +409,7 @@ _abi_test_clobber_d0:
.private_extern _abi_test_clobber_d1
.align 4
_abi_test_clobber_d1:
AARCH64_VALID_CALL_TARGET
fmov d1, xzr
ret
@ -383,6 +418,7 @@ _abi_test_clobber_d1:
.private_extern _abi_test_clobber_d2
.align 4
_abi_test_clobber_d2:
AARCH64_VALID_CALL_TARGET
fmov d2, xzr
ret
@ -391,6 +427,7 @@ _abi_test_clobber_d2:
.private_extern _abi_test_clobber_d3
.align 4
_abi_test_clobber_d3:
AARCH64_VALID_CALL_TARGET
fmov d3, xzr
ret
@ -399,6 +436,7 @@ _abi_test_clobber_d3:
.private_extern _abi_test_clobber_d4
.align 4
_abi_test_clobber_d4:
AARCH64_VALID_CALL_TARGET
fmov d4, xzr
ret
@ -407,6 +445,7 @@ _abi_test_clobber_d4:
.private_extern _abi_test_clobber_d5
.align 4
_abi_test_clobber_d5:
AARCH64_VALID_CALL_TARGET
fmov d5, xzr
ret
@ -415,6 +454,7 @@ _abi_test_clobber_d5:
.private_extern _abi_test_clobber_d6
.align 4
_abi_test_clobber_d6:
AARCH64_VALID_CALL_TARGET
fmov d6, xzr
ret
@ -423,6 +463,7 @@ _abi_test_clobber_d6:
.private_extern _abi_test_clobber_d7
.align 4
_abi_test_clobber_d7:
AARCH64_VALID_CALL_TARGET
fmov d7, xzr
ret
@ -431,6 +472,7 @@ _abi_test_clobber_d7:
.private_extern _abi_test_clobber_d8
.align 4
_abi_test_clobber_d8:
AARCH64_VALID_CALL_TARGET
fmov d8, xzr
ret
@ -439,6 +481,7 @@ _abi_test_clobber_d8:
.private_extern _abi_test_clobber_d9
.align 4
_abi_test_clobber_d9:
AARCH64_VALID_CALL_TARGET
fmov d9, xzr
ret
@ -447,6 +490,7 @@ _abi_test_clobber_d9:
.private_extern _abi_test_clobber_d10
.align 4
_abi_test_clobber_d10:
AARCH64_VALID_CALL_TARGET
fmov d10, xzr
ret
@ -455,6 +499,7 @@ _abi_test_clobber_d10:
.private_extern _abi_test_clobber_d11
.align 4
_abi_test_clobber_d11:
AARCH64_VALID_CALL_TARGET
fmov d11, xzr
ret
@ -463,6 +508,7 @@ _abi_test_clobber_d11:
.private_extern _abi_test_clobber_d12
.align 4
_abi_test_clobber_d12:
AARCH64_VALID_CALL_TARGET
fmov d12, xzr
ret
@ -471,6 +517,7 @@ _abi_test_clobber_d12:
.private_extern _abi_test_clobber_d13
.align 4
_abi_test_clobber_d13:
AARCH64_VALID_CALL_TARGET
fmov d13, xzr
ret
@ -479,6 +526,7 @@ _abi_test_clobber_d13:
.private_extern _abi_test_clobber_d14
.align 4
_abi_test_clobber_d14:
AARCH64_VALID_CALL_TARGET
fmov d14, xzr
ret
@ -487,6 +535,7 @@ _abi_test_clobber_d14:
.private_extern _abi_test_clobber_d15
.align 4
_abi_test_clobber_d15:
AARCH64_VALID_CALL_TARGET
fmov d15, xzr
ret
@ -495,6 +544,7 @@ _abi_test_clobber_d15:
.private_extern _abi_test_clobber_d16
.align 4
_abi_test_clobber_d16:
AARCH64_VALID_CALL_TARGET
fmov d16, xzr
ret
@ -503,6 +553,7 @@ _abi_test_clobber_d16:
.private_extern _abi_test_clobber_d17
.align 4
_abi_test_clobber_d17:
AARCH64_VALID_CALL_TARGET
fmov d17, xzr
ret
@ -511,6 +562,7 @@ _abi_test_clobber_d17:
.private_extern _abi_test_clobber_d18
.align 4
_abi_test_clobber_d18:
AARCH64_VALID_CALL_TARGET
fmov d18, xzr
ret
@ -519,6 +571,7 @@ _abi_test_clobber_d18:
.private_extern _abi_test_clobber_d19
.align 4
_abi_test_clobber_d19:
AARCH64_VALID_CALL_TARGET
fmov d19, xzr
ret
@ -527,6 +580,7 @@ _abi_test_clobber_d19:
.private_extern _abi_test_clobber_d20
.align 4
_abi_test_clobber_d20:
AARCH64_VALID_CALL_TARGET
fmov d20, xzr
ret
@ -535,6 +589,7 @@ _abi_test_clobber_d20:
.private_extern _abi_test_clobber_d21
.align 4
_abi_test_clobber_d21:
AARCH64_VALID_CALL_TARGET
fmov d21, xzr
ret
@ -543,6 +598,7 @@ _abi_test_clobber_d21:
.private_extern _abi_test_clobber_d22
.align 4
_abi_test_clobber_d22:
AARCH64_VALID_CALL_TARGET
fmov d22, xzr
ret
@ -551,6 +607,7 @@ _abi_test_clobber_d22:
.private_extern _abi_test_clobber_d23
.align 4
_abi_test_clobber_d23:
AARCH64_VALID_CALL_TARGET
fmov d23, xzr
ret
@ -559,6 +616,7 @@ _abi_test_clobber_d23:
.private_extern _abi_test_clobber_d24
.align 4
_abi_test_clobber_d24:
AARCH64_VALID_CALL_TARGET
fmov d24, xzr
ret
@ -567,6 +625,7 @@ _abi_test_clobber_d24:
.private_extern _abi_test_clobber_d25
.align 4
_abi_test_clobber_d25:
AARCH64_VALID_CALL_TARGET
fmov d25, xzr
ret
@ -575,6 +634,7 @@ _abi_test_clobber_d25:
.private_extern _abi_test_clobber_d26
.align 4
_abi_test_clobber_d26:
AARCH64_VALID_CALL_TARGET
fmov d26, xzr
ret
@ -583,6 +643,7 @@ _abi_test_clobber_d26:
.private_extern _abi_test_clobber_d27
.align 4
_abi_test_clobber_d27:
AARCH64_VALID_CALL_TARGET
fmov d27, xzr
ret
@ -591,6 +652,7 @@ _abi_test_clobber_d27:
.private_extern _abi_test_clobber_d28
.align 4
_abi_test_clobber_d28:
AARCH64_VALID_CALL_TARGET
fmov d28, xzr
ret
@ -599,6 +661,7 @@ _abi_test_clobber_d28:
.private_extern _abi_test_clobber_d29
.align 4
_abi_test_clobber_d29:
AARCH64_VALID_CALL_TARGET
fmov d29, xzr
ret
@ -607,6 +670,7 @@ _abi_test_clobber_d29:
.private_extern _abi_test_clobber_d30
.align 4
_abi_test_clobber_d30:
AARCH64_VALID_CALL_TARGET
fmov d30, xzr
ret
@ -615,6 +679,7 @@ _abi_test_clobber_d30:
.private_extern _abi_test_clobber_d31
.align 4
_abi_test_clobber_d31:
AARCH64_VALID_CALL_TARGET
fmov d31, xzr
ret
@ -623,6 +688,7 @@ _abi_test_clobber_d31:
.private_extern _abi_test_clobber_v8_upper
.align 4
_abi_test_clobber_v8_upper:
AARCH64_VALID_CALL_TARGET
fmov v8.d[1], xzr
ret
@ -631,6 +697,7 @@ _abi_test_clobber_v8_upper:
.private_extern _abi_test_clobber_v9_upper
.align 4
_abi_test_clobber_v9_upper:
AARCH64_VALID_CALL_TARGET
fmov v9.d[1], xzr
ret
@ -639,6 +706,7 @@ _abi_test_clobber_v9_upper:
.private_extern _abi_test_clobber_v10_upper
.align 4
_abi_test_clobber_v10_upper:
AARCH64_VALID_CALL_TARGET
fmov v10.d[1], xzr
ret
@ -647,6 +715,7 @@ _abi_test_clobber_v10_upper:
.private_extern _abi_test_clobber_v11_upper
.align 4
_abi_test_clobber_v11_upper:
AARCH64_VALID_CALL_TARGET
fmov v11.d[1], xzr
ret
@ -655,6 +724,7 @@ _abi_test_clobber_v11_upper:
.private_extern _abi_test_clobber_v12_upper
.align 4
_abi_test_clobber_v12_upper:
AARCH64_VALID_CALL_TARGET
fmov v12.d[1], xzr
ret
@ -663,6 +733,7 @@ _abi_test_clobber_v12_upper:
.private_extern _abi_test_clobber_v13_upper
.align 4
_abi_test_clobber_v13_upper:
AARCH64_VALID_CALL_TARGET
fmov v13.d[1], xzr
ret
@ -671,6 +742,7 @@ _abi_test_clobber_v13_upper:
.private_extern _abi_test_clobber_v14_upper
.align 4
_abi_test_clobber_v14_upper:
AARCH64_VALID_CALL_TARGET
fmov v14.d[1], xzr
ret
@ -679,6 +751,7 @@ _abi_test_clobber_v14_upper:
.private_extern _abi_test_clobber_v15_upper
.align 4
_abi_test_clobber_v15_upper:
AARCH64_VALID_CALL_TARGET
fmov v15.d[1], xzr
ret

View File

@ -1,996 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.section __TEXT,__const
# p434 x 2
Lp434x2:
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
# p434 + 1
Lp434p1:
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
.text
.globl _sike_mpmul
.private_extern _sike_mpmul
.align 4
_sike_mpmul:
stp x29, x30, [sp,#-96]!
add x29, sp, #0
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
ldp x14, x15, [x1,#32]
ldr x16, [x1,#48]
// x3-x7 <- AH + AL, x7 <- carry
adds x3, x3, x7
adcs x4, x4, x8
adcs x5, x5, x9
adcs x6, x6, xzr
adc x7, xzr, xzr
// x10-x13 <- BH + BL, x8 <- carry
adds x10, x10, x14
adcs x11, x11, x15
adcs x12, x12, x16
adcs x13, x13, xzr
adc x8, xzr, xzr
// x9 <- combined carry
and x9, x7, x8
// x7-x8 <- mask
sub x7, xzr, x7
sub x8, xzr, x8
// x15-x19 <- masked (BH + BL)
and x14, x10, x7
and x15, x11, x7
and x16, x12, x7
and x17, x13, x7
// x20-x23 <- masked (AH + AL)
and x20, x3, x8
and x21, x4, x8
and x22, x5, x8
and x23, x6, x8
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
adds x14, x14, x20
adcs x15, x15, x21
adcs x16, x16, x22
adcs x17, x17, x23
adc x7, x9, xzr
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
stp x3, x4, [x2,#0]
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x25, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x23, x10, x12
adcs x26, x11, x13
adc x24, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x19, xzr, x25
sub x20, xzr, x24
and x8, x23, x19
and x9, x26, x19
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x21, x3, x20
and x22, x4, x20
mul x19, x3, x23
mul x20, x3, x26
and x25, x25, x24
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x8, x21, x8
umulh x21, x3, x26
adcs x9, x22, x9
umulh x22, x3, x23
adc x25, x25, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x23
umulh x23, x4, x23
adds x20, x20, x22
adc x21, x21, xzr
mul x24, x4, x26
umulh x26, x4, x26
adds x20, x20, x3
adcs x21, x21, x23
adc x22, xzr, xzr
adds x21, x21, x24
adc x22, x22, x26
ldp x3, x4, [x2,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x21, x8, x21
umulh x24, x3, x10
umulh x26, x3, x11
adcs x22, x9, x22
mul x8, x3, x10
mul x9, x3, x11
adc x25, x25, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x9, x9, x24
adc x26, x26, xzr
mul x23, x4, x11
umulh x11, x4, x11
adds x9, x9, x3
adcs x26, x26, x10
adc x24, xzr, xzr
adds x26, x26, x23
adc x24, x24, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x19, x19, x8
sbcs x20, x20, x9
sbcs x21, x21, x26
mul x4, x5, x13
umulh x23, x5, x13
sbcs x22, x22, x24
sbc x25, x25, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x23, x23, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x23, x23, x12
adc x10, xzr, xzr
adds x23, x23, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x19, x19, x3
sbcs x20, x20, x4
sbcs x21, x21, x23
sbcs x22, x22, x10
sbc x25, x25, xzr
adds x19, x19, x26
adcs x20, x20, x24
adcs x21, x21, x3
adcs x22, x22, x4
adcs x23, x25, x23
adc x24, x10, xzr
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
adds x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adc x7, x7, xzr
// Load AL
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
// Load BL
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
// Temporarily store x8 in x2
stp x8, x9, [x2,#0]
// x21-x28 <- AL x BL
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x8, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x27, x10, x12
adcs x9, x11, x13
adc x28, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x23, xzr, x8
sub x24, xzr, x28
and x21, x27, x23
and x22, x9, x23
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x25, x3, x24
and x26, x4, x24
mul x23, x3, x27
mul x24, x3, x9
and x8, x8, x28
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x21, x25, x21
umulh x25, x3, x9
adcs x22, x26, x22
umulh x26, x3, x27
adc x8, x8, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x27
umulh x27, x4, x27
adds x24, x24, x26
adc x25, x25, xzr
mul x28, x4, x9
umulh x9, x4, x9
adds x24, x24, x3
adcs x25, x25, x27
adc x26, xzr, xzr
adds x25, x25, x28
adc x26, x26, x9
ldp x3, x4, [x0,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x25, x21, x25
umulh x28, x3, x10
umulh x9, x3, x11
adcs x26, x22, x26
mul x21, x3, x10
mul x22, x3, x11
adc x8, x8, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x22, x22, x28
adc x9, x9, xzr
mul x27, x4, x11
umulh x11, x4, x11
adds x22, x22, x3
adcs x9, x9, x10
adc x28, xzr, xzr
adds x9, x9, x27
adc x28, x28, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x23, x23, x21
sbcs x24, x24, x22
sbcs x25, x25, x9
mul x4, x5, x13
umulh x27, x5, x13
sbcs x26, x26, x28
sbc x8, x8, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x27, x27, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x27, x27, x12
adc x10, xzr, xzr
adds x27, x27, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x23, x23, x3
sbcs x24, x24, x4
sbcs x25, x25, x27
sbcs x26, x26, x10
sbc x8, x8, xzr
adds x23, x23, x9
adcs x24, x24, x28
adcs x25, x25, x3
adcs x26, x26, x4
adcs x27, x8, x27
adc x28, x10, xzr
// Restore x8
ldp x8, x9, [x2,#0]
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, x27
sbcs x17, x17, x28
sbc x7, x7, xzr
// Store ALxBL, low
stp x21, x22, [x2]
stp x23, x24, [x2,#16]
// Load AH
ldp x3, x4, [x0,#32]
ldr x5, [x0,#48]
// Load BH
ldp x10, x11, [x1,#32]
ldr x12, [x1,#48]
adds x8, x8, x25
adcs x9, x9, x26
adcs x19, x19, x27
adcs x20, x20, x28
adc x1, xzr, xzr
add x0, x0, #32
// Temporarily store x8,x9 in x2
stp x8,x9, [x2,#32]
// x21-x28 <- AH x BH
// A0 * B0
mul x21, x3, x10 // C0
umulh x24, x3, x10
// A0 * B1
mul x22, x3, x11
umulh x23, x3, x11
// A1 * B0
mul x8, x4, x10
umulh x9, x4, x10
adds x22, x22, x24
adc x23, x23, xzr
// A0 * B2
mul x27, x3, x12
umulh x28, x3, x12
adds x22, x22, x8 // C1
adcs x23, x23, x9
adc x24, xzr, xzr
// A2 * B0
mul x8, x5, x10
umulh x25, x5, x10
adds x23, x23, x27
adcs x24, x24, x25
adc x25, xzr, xzr
// A1 * B1
mul x27, x4, x11
umulh x9, x4, x11
adds x23, x23, x8
adcs x24, x24, x28
adc x25, x25, xzr
// A1 * B2
mul x8, x4, x12
umulh x28, x4, x12
adds x23, x23, x27 // C2
adcs x24, x24, x9
adc x25, x25, xzr
// A2 * B1
mul x27, x5, x11
umulh x9, x5, x11
adds x24, x24, x8
adcs x25, x25, x28
adc x26, xzr, xzr
// A2 * B2
mul x8, x5, x12
umulh x28, x5, x12
adds x24, x24, x27 // C3
adcs x25, x25, x9
adc x26, x26, xzr
adds x25, x25, x8 // C4
adc x26, x26, x28 // C5
// Restore x8,x9
ldp x8,x9, [x2,#32]
neg x1, x1
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, xzr
sbcs x17, x17, xzr
sbc x7, x7, xzr
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
stp x8, x9, [x2,#32]
stp x19, x20, [x2,#48]
adds x1, x1, #1
adcs x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adcs x25, x7, x25
adc x26, x26, xzr
stp x14, x15, [x2,#64]
stp x16, x17, [x2,#80]
stp x25, x26, [x2,#96]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl _sike_fprdc
.private_extern _sike_fprdc
.align 4
_sike_fprdc:
stp x29, x30, [sp, #-96]!
add x29, sp, xzr
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x2, x3, [x0,#0] // a[0-1]
// Load the prime constant
adrp x26, Lp434p1@PAGE
add x26, x26, Lp434p1@PAGEOFF
ldp x23, x24, [x26, #0x0]
ldp x25, x26, [x26,#0x10]
// a[0-1] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x10, x3, x23
umulh x11, x3, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x10 // C1
adcs x6, x6, x11
adc x7, xzr, xzr
mul x10, x3, x24
umulh x11, x3, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x10 // C2
adcs x7, x7, x11
adc x8, x8, xzr
mul x10, x3, x25
umulh x11, x3, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x3, x26
umulh x28, x3, x26
adds x7, x7, x10 // C3
adcs x8, x8, x11
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
ldp x10, x11, [x0, #0x18]
ldp x12, x13, [x0, #0x28]
ldp x14, x15, [x0, #0x38]
ldp x16, x17, [x0, #0x48]
ldp x19, x20, [x0, #0x58]
ldr x21, [x0, #0x68]
adds x10, x10, x4
adcs x11, x11, x5
adcs x12, x12, x6
adcs x13, x13, x7
adcs x14, x14, x8
adcs x15, x15, x9
adcs x22, x16, xzr
adcs x17, x17, xzr
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
ldr x2, [x0,#0x10] // a[2]
// a[2-3] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x0, x10, x23
umulh x3, x10, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x0 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x0, x10, x24
umulh x3, x10, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x0 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x0, x10, x25
umulh x3, x10, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x10, x26
umulh x28, x10, x26
adds x7, x7, x0 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x12, x12, x4
adcs x13, x13, x5
adcs x14, x14, x6
adcs x15, x15, x7
adcs x16, x22, x8
adcs x17, x17, x9
adcs x22, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
mul x4, x11, x23 // C0
umulh x7, x11, x23
mul x5, x11, x24
umulh x6, x11, x24
mul x10, x12, x23
umulh x3, x12, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x11, x25
umulh x28, x11, x25
adds x5, x5, x10 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x10, x12, x24
umulh x3, x12, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x11, x26
umulh x28, x11, x26
adds x6, x6, x10 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x10, x12, x25
umulh x3, x12, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x12, x26
umulh x28, x12, x26
adds x7, x7, x10 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x14, x14, x4
adcs x15, x15, x5
adcs x16, x16, x6
adcs x17, x17, x7
adcs x19, x22, x8
adcs x20, x20, x9
adc x22, x21, xzr
stp x14, x15, [x1, #0x0] // C0, C1
mul x4, x13, x23 // C0
umulh x10, x13, x23
mul x5, x13, x24
umulh x27, x13, x24
adds x5, x5, x10 // C1
adc x10, xzr, xzr
mul x6, x13, x25
umulh x28, x13, x25
adds x27, x10, x27
adcs x6, x6, x27 // C2
adc x10, xzr, xzr
mul x7, x13, x26
umulh x8, x13, x26
adds x28, x10, x28
adcs x7, x7, x28 // C3
adc x8, x8, xzr // C4
adds x16, x16, x4
adcs x17, x17, x5
adcs x19, x19, x6
adcs x20, x20, x7
adc x21, x22, x8
str x16, [x1, #0x10]
stp x17, x19, [x1, #0x18]
stp x20, x21, [x1, #0x28]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl _sike_fpadd
.private_extern _sike_fpadd
.align 4
_sike_fpadd:
stp x29,x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Add a + b
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
// Subtract 2xp434
adrp x17, Lp434x2@PAGE
add x17, x17, Lp434x2@PAGEOFF
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x12
sbcs x6, x6, x13
sbcs x7, x7, x14
sbcs x8, x8, x15
sbcs x9, x9, x16
sbc x0, xzr, xzr // x0 can be reused now
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl _sike_fpsub
.private_extern _sike_fpsub
.align 4
_sike_fpsub:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Subtract a - b
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
sbcs x9, x9, x17
sbc x0, xzr, xzr
// Add 2xp434 anded with the mask in x0
adrp x17, Lp434x2@PAGE
add x17, x17, Lp434x2@PAGEOFF
// First half
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl _sike_mpadd_asm
.private_extern _sike_mpadd_asm
.align 4
_sike_mpadd_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl _sike_mpsubx2_asm
.private_extern _sike_mpsubx2_asm
.align 4
_sike_mpsubx2_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x11, x12, [x1,#32]
ldp x13, x14, [x1,#48]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbcs x9, x9, x13
sbcs x10, x10, x14
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ldp x3, x4, [x0,#64]
ldp x5, x6, [x0,#80]
ldp x11, x12, [x1,#64]
ldp x13, x14, [x1,#80]
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#96]
ldp x11, x12, [x1,#96]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbc x0, xzr, xzr
stp x3, x4, [x2,#64]
stp x5, x6, [x2,#80]
stp x7, x8, [x2,#96]
ldp x29, x30, [sp],#16
ret
.globl _sike_mpdblsubx2_asm
.private_extern _sike_mpdblsubx2_asm
.align 4
_sike_mpdblsubx2_asm:
stp x29, x30, [sp, #-16]!
add x29, sp, #0
ldp x3, x4, [x2, #0]
ldp x5, x6, [x2,#16]
ldp x7, x8, [x2,#32]
ldp x11, x12, [x0, #0]
ldp x13, x14, [x0,#16]
ldp x15, x16, [x0,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
// x9 stores carry
adc x9, xzr, xzr
ldp x11, x12, [x1, #0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2, #0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
ldp x3, x4, [x2,#48]
ldp x5, x6, [x2,#64]
ldp x7, x8, [x2,#80]
ldp x11, x12, [x0,#48]
ldp x13, x14, [x0,#64]
ldp x15, x16, [x0,#80]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, xzr, xzr
ldp x11, x12, [x1,#48]
ldp x13, x14, [x1,#64]
ldp x15, x16, [x1,#80]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2,#48]
stp x5, x6, [x2,#64]
stp x7, x8, [x2,#80]
ldp x3, x4, [x2,#96]
ldp x11, x12, [x0,#96]
ldp x13, x14, [x1,#96]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
subs x3, x3, x13
sbc x4, x4, x14
stp x3, x4, [x2,#96]
ldp x29, x30, [sp],#16
ret
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

View File

@ -30,348 +30,6 @@
#else
.code 32
#endif
.align 5
rem_4bit:
.short 0x0000,0x1C20,0x3840,0x2460
.short 0x7080,0x6CA0,0x48C0,0x54E0
.short 0xE100,0xFD20,0xD940,0xC560
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
#ifdef __thumb2__
.thumb_func rem_4bit_get
#endif
rem_4bit_get:
#if defined(__thumb2__)
adr r2,rem_4bit
#else
sub r2,pc,#8+32 @ &rem_4bit
#endif
b Lrem_4bit_got
nop
nop
.globl _gcm_ghash_4bit
.private_extern _gcm_ghash_4bit
#ifdef __thumb2__
.thumb_func _gcm_ghash_4bit
#endif
.align 4
_gcm_ghash_4bit:
#if defined(__thumb2__)
adr r12,rem_4bit
#else
sub r12,pc,#8+48 @ &rem_4bit
#endif
add r3,r2,r3 @ r3 to point at the end
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
ldrb r12,[r2,#15]
ldrb r14,[r0,#15]
Louter:
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
add r11,r1,r14
ldrb r12,[r2,#14]
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[sp,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
ldrb r14,[r0,#14]
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
eor r7,r7,r8,lsl#16
Linner:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[sp,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r2,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r8,[r0,r3]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r9,[sp,r14]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
#ifdef __thumb2__
it pl
#endif
eorpl r12,r12,r8
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
bpl Linner
ldr r3,[sp,#32] @ re-load r3/end
add r2,r2,#16
mov r14,r4
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
cmp r2,r3
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#ifdef __thumb2__
it ne
#endif
ldrneb r12,[r2,#15]
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
bne Louter
add sp,sp,#36
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.globl _gcm_gmult_4bit
.private_extern _gcm_gmult_4bit
#ifdef __thumb2__
.thumb_func _gcm_gmult_4bit
#endif
_gcm_gmult_4bit:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldrb r12,[r0,#15]
b rem_4bit_get
Lrem_4bit_got:
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
ldrb r12,[r0,#14]
add r11,r1,r14
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
and r14,r12,#0xf0
eor r7,r7,r8,lsl#16
and r12,r12,#0x0f
Loop:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[r2,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r0,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
bpl Loop
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
#if __ARM_MAX_ARCH__>=7

File diff suppressed because it is too large Load Diff

View File

@ -30,7 +30,6 @@
.private_extern _abi_test_trampoline
.align 4
_abi_test_trampoline:
Labi_test_trampoline_begin:
@ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}

View File

@ -34,6 +34,7 @@
.type ChaCha20_ctr32,%function
.align 5
ChaCha20_ctr32:
AARCH64_VALID_CALL_TARGET
cbz x2,.Labort
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x5,:pg_hi21_nc:OPENSSL_armcap_P
@ -47,6 +48,7 @@ ChaCha20_ctr32:
b.ne ChaCha20_neon
.Lshort:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -259,6 +261,7 @@ ChaCha20_ctr32:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
.Labort:
ret
@ -315,12 +318,14 @@ ChaCha20_ctr32:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.size ChaCha20_ctr32,.-ChaCha20_ctr32
.type ChaCha20_neon,%function
.align 5
ChaCha20_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -701,6 +706,7 @@ ChaCha20_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.Ltail_neon:
@ -810,11 +816,13 @@ ChaCha20_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.size ChaCha20_neon,.-ChaCha20_neon
.type ChaCha20_512_neon,%function
.align 5
ChaCha20_512_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -1978,7 +1986,9 @@ ChaCha20_512_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.size ChaCha20_512_neon,.-ChaCha20_512_neon
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -33,6 +33,8 @@
.align 5
aes_hw_set_encrypt_key:
.Lenc_key:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
@ -201,6 +203,7 @@ aes_hw_set_encrypt_key:
.type aes_hw_set_decrypt_key,%function
.align 5
aes_hw_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl .Lenc_key
@ -234,6 +237,7 @@ aes_hw_set_decrypt_key:
eor x0,x0,x0 // return value
.Ldec_key_abort:
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
.globl aes_hw_encrypt
@ -241,6 +245,7 @@ aes_hw_set_decrypt_key:
.type aes_hw_encrypt,%function
.align 5
aes_hw_encrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
@ -271,6 +276,7 @@ aes_hw_encrypt:
.type aes_hw_decrypt,%function
.align 5
aes_hw_decrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
@ -301,6 +307,8 @@ aes_hw_decrypt:
.type aes_hw_cbc_encrypt,%function
.align 5
aes_hw_cbc_encrypt:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
@ -592,6 +600,8 @@ aes_hw_cbc_encrypt:
.type aes_hw_ctr32_encrypt_blocks,%function
.align 5
aes_hw_ctr32_encrypt_blocks:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]
@ -772,3 +782,4 @@ aes_hw_ctr32_encrypt_blocks:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -13,6 +13,8 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl bn_mul_mont
@ -20,6 +22,7 @@
.type bn_mul_mont,%function
.align 5
bn_mul_mont:
AARCH64_SIGN_LINK_REGISTER
tst x5,#7
b.eq __bn_sqr8x_mont
tst x5,#3
@ -217,11 +220,14 @@ bn_mul_mont:
mov x0,#1
ldp x23,x24,[x29,#48]
ldr x29,[sp],#64
AARCH64_VALIDATE_LINK_REGISTER
ret
.size bn_mul_mont,.-bn_mul_mont
.type __bn_sqr8x_mont,%function
.align 5
__bn_sqr8x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
// only from bn_mul_mont which has already signed the return address.
cmp x1,x2
b.ne __bn_mul4x_mont
.Lsqr8x_mont:
@ -975,11 +981,16 @@ __bn_sqr8x_mont:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
.type __bn_mul4x_mont,%function
.align 5
__bn_mul4x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
// return address.
stp x29,x30,[sp,#-128]!
add x29,sp,#0
stp x19,x20,[sp,#16]
@ -1413,6 +1424,8 @@ __bn_mul4x_mont:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.size __bn_mul4x_mont,.-__bn_mul4x_mont
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
@ -1420,3 +1433,4 @@ __bn_mul4x_mont:
.align 4
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -13,6 +13,8 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl gcm_init_neon
@ -20,6 +22,7 @@
.type gcm_init_neon,%function
.align 4
gcm_init_neon:
AARCH64_VALID_CALL_TARGET
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
@ -45,6 +48,7 @@ gcm_init_neon:
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
@ -64,6 +68,7 @@ gcm_gmult_neon:
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
@ -338,3 +343,4 @@ gcm_ghash_neon:
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -22,6 +22,7 @@
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
@ -73,6 +74,7 @@ gcm_init_v8:
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
@ -115,6 +117,7 @@ gcm_gmult_v8:
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
@ -246,3 +249,4 @@ gcm_ghash_v8:
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -23,6 +23,8 @@
.type sha1_block_data_order,%function
.align 6
sha1_block_data_order:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
#else
@ -1090,6 +1092,8 @@ sha1_block_data_order:
.type sha1_block_armv8,%function
.align 6
sha1_block_armv8:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
.Lv8_entry:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -1232,3 +1236,4 @@ sha1_block_armv8:
.hidden OPENSSL_armcap_P
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -64,6 +64,7 @@
.type sha256_block_data_order,%function
.align 6
sha256_block_data_order:
AARCH64_VALID_CALL_TARGET
#ifndef __KERNEL__
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
@ -74,6 +75,7 @@ sha256_block_data_order:
tst w16,#ARMV8_SHA256
b.ne .Lv8_entry
#endif
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]!
add x29,sp,#0
@ -1034,6 +1036,7 @@ sha256_block_data_order:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret
.size sha256_block_data_order,.-sha256_block_data_order
@ -1068,6 +1071,7 @@ sha256_block_data_order:
.align 6
sha256_block_armv8:
.Lv8_entry:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -1210,3 +1214,4 @@ sha256_block_armv8:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -64,6 +64,7 @@
.type sha512_block_data_order,%function
.align 6
sha512_block_data_order:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]!
add x29,sp,#0
@ -1024,6 +1025,7 @@ sha512_block_data_order:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret
.size sha512_block_data_order,.-sha512_block_data_order
@ -1082,3 +1084,4 @@ sha512_block_data_order:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -13,6 +13,8 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.section .rodata
.type _vpaes_consts,%object
@ -215,6 +217,7 @@ _vpaes_encrypt_core:
.type vpaes_encrypt,%function
.align 4
vpaes_encrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -224,6 +227,7 @@ vpaes_encrypt:
st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size vpaes_encrypt,.-vpaes_encrypt
@ -452,6 +456,7 @@ _vpaes_decrypt_core:
.type vpaes_decrypt,%function
.align 4
vpaes_decrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -461,6 +466,7 @@ vpaes_decrypt:
st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size vpaes_decrypt,.-vpaes_decrypt
@ -630,6 +636,7 @@ _vpaes_key_preheat:
.type _vpaes_schedule_core,%function
.align 4
_vpaes_schedule_core:
AARCH64_SIGN_LINK_REGISTER
stp x29, x30, [sp,#-16]!
add x29,sp,#0
@ -799,6 +806,7 @@ _vpaes_schedule_core:
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
ldp x29, x30, [sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size _vpaes_schedule_core,.-_vpaes_schedule_core
@ -1012,6 +1020,7 @@ _vpaes_schedule_mangle:
.type vpaes_set_encrypt_key,%function
.align 4
vpaes_set_encrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1027,6 +1036,7 @@ vpaes_set_encrypt_key:
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
@ -1035,6 +1045,7 @@ vpaes_set_encrypt_key:
.type vpaes_set_decrypt_key,%function
.align 4
vpaes_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1054,6 +1065,7 @@ vpaes_set_decrypt_key:
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
.globl vpaes_cbc_encrypt
@ -1061,6 +1073,7 @@ vpaes_set_decrypt_key:
.type vpaes_cbc_encrypt,%function
.align 4
vpaes_cbc_encrypt:
AARCH64_SIGN_LINK_REGISTER
cbz x2, .Lcbc_abort
cmp w5, #0 // check direction
b.eq vpaes_cbc_decrypt
@ -1087,6 +1100,7 @@ vpaes_cbc_encrypt:
st1 {v0.16b}, [x4] // write ivec
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
.Lcbc_abort:
ret
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
@ -1094,6 +1108,8 @@ vpaes_cbc_encrypt:
.type vpaes_cbc_decrypt,%function
.align 4
vpaes_cbc_decrypt:
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
// only from vpaes_cbc_encrypt which has already signed the return address.
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1135,6 +1151,7 @@ vpaes_cbc_decrypt:
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt
.globl vpaes_ctr32_encrypt_blocks
@ -1142,6 +1159,7 @@ vpaes_cbc_decrypt:
.type vpaes_ctr32_encrypt_blocks,%function
.align 4
vpaes_ctr32_encrypt_blocks:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1209,7 +1227,9 @@ vpaes_ctr32_encrypt_blocks:
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -13,6 +13,8 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ -27,6 +29,7 @@
.align 4
abi_test_trampoline:
.Labi_test_trampoline_begin:
AARCH64_SIGN_LINK_REGISTER
// Stack layout (low to high addresses)
// x29,x30 (16 bytes)
// d8-d15 (64 bytes)
@ -129,6 +132,7 @@ abi_test_trampoline:
ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176
AARCH64_VALIDATE_LINK_REGISTER
ret
.size abi_test_trampoline,.-abi_test_trampoline
.type abi_test_clobber_x0, %function
@ -136,6 +140,7 @@ abi_test_trampoline:
.hidden abi_test_clobber_x0
.align 4
abi_test_clobber_x0:
AARCH64_VALID_CALL_TARGET
mov x0, xzr
ret
.size abi_test_clobber_x0,.-abi_test_clobber_x0
@ -144,6 +149,7 @@ abi_test_clobber_x0:
.hidden abi_test_clobber_x1
.align 4
abi_test_clobber_x1:
AARCH64_VALID_CALL_TARGET
mov x1, xzr
ret
.size abi_test_clobber_x1,.-abi_test_clobber_x1
@ -152,6 +158,7 @@ abi_test_clobber_x1:
.hidden abi_test_clobber_x2
.align 4
abi_test_clobber_x2:
AARCH64_VALID_CALL_TARGET
mov x2, xzr
ret
.size abi_test_clobber_x2,.-abi_test_clobber_x2
@ -160,6 +167,7 @@ abi_test_clobber_x2:
.hidden abi_test_clobber_x3
.align 4
abi_test_clobber_x3:
AARCH64_VALID_CALL_TARGET
mov x3, xzr
ret
.size abi_test_clobber_x3,.-abi_test_clobber_x3
@ -168,6 +176,7 @@ abi_test_clobber_x3:
.hidden abi_test_clobber_x4
.align 4
abi_test_clobber_x4:
AARCH64_VALID_CALL_TARGET
mov x4, xzr
ret
.size abi_test_clobber_x4,.-abi_test_clobber_x4
@ -176,6 +185,7 @@ abi_test_clobber_x4:
.hidden abi_test_clobber_x5
.align 4
abi_test_clobber_x5:
AARCH64_VALID_CALL_TARGET
mov x5, xzr
ret
.size abi_test_clobber_x5,.-abi_test_clobber_x5
@ -184,6 +194,7 @@ abi_test_clobber_x5:
.hidden abi_test_clobber_x6
.align 4
abi_test_clobber_x6:
AARCH64_VALID_CALL_TARGET
mov x6, xzr
ret
.size abi_test_clobber_x6,.-abi_test_clobber_x6
@ -192,6 +203,7 @@ abi_test_clobber_x6:
.hidden abi_test_clobber_x7
.align 4
abi_test_clobber_x7:
AARCH64_VALID_CALL_TARGET
mov x7, xzr
ret
.size abi_test_clobber_x7,.-abi_test_clobber_x7
@ -200,6 +212,7 @@ abi_test_clobber_x7:
.hidden abi_test_clobber_x8
.align 4
abi_test_clobber_x8:
AARCH64_VALID_CALL_TARGET
mov x8, xzr
ret
.size abi_test_clobber_x8,.-abi_test_clobber_x8
@ -208,6 +221,7 @@ abi_test_clobber_x8:
.hidden abi_test_clobber_x9
.align 4
abi_test_clobber_x9:
AARCH64_VALID_CALL_TARGET
mov x9, xzr
ret
.size abi_test_clobber_x9,.-abi_test_clobber_x9
@ -216,6 +230,7 @@ abi_test_clobber_x9:
.hidden abi_test_clobber_x10
.align 4
abi_test_clobber_x10:
AARCH64_VALID_CALL_TARGET
mov x10, xzr
ret
.size abi_test_clobber_x10,.-abi_test_clobber_x10
@ -224,6 +239,7 @@ abi_test_clobber_x10:
.hidden abi_test_clobber_x11
.align 4
abi_test_clobber_x11:
AARCH64_VALID_CALL_TARGET
mov x11, xzr
ret
.size abi_test_clobber_x11,.-abi_test_clobber_x11
@ -232,6 +248,7 @@ abi_test_clobber_x11:
.hidden abi_test_clobber_x12
.align 4
abi_test_clobber_x12:
AARCH64_VALID_CALL_TARGET
mov x12, xzr
ret
.size abi_test_clobber_x12,.-abi_test_clobber_x12
@ -240,6 +257,7 @@ abi_test_clobber_x12:
.hidden abi_test_clobber_x13
.align 4
abi_test_clobber_x13:
AARCH64_VALID_CALL_TARGET
mov x13, xzr
ret
.size abi_test_clobber_x13,.-abi_test_clobber_x13
@ -248,6 +266,7 @@ abi_test_clobber_x13:
.hidden abi_test_clobber_x14
.align 4
abi_test_clobber_x14:
AARCH64_VALID_CALL_TARGET
mov x14, xzr
ret
.size abi_test_clobber_x14,.-abi_test_clobber_x14
@ -256,6 +275,7 @@ abi_test_clobber_x14:
.hidden abi_test_clobber_x15
.align 4
abi_test_clobber_x15:
AARCH64_VALID_CALL_TARGET
mov x15, xzr
ret
.size abi_test_clobber_x15,.-abi_test_clobber_x15
@ -264,6 +284,7 @@ abi_test_clobber_x15:
.hidden abi_test_clobber_x16
.align 4
abi_test_clobber_x16:
AARCH64_VALID_CALL_TARGET
mov x16, xzr
ret
.size abi_test_clobber_x16,.-abi_test_clobber_x16
@ -272,6 +293,7 @@ abi_test_clobber_x16:
.hidden abi_test_clobber_x17
.align 4
abi_test_clobber_x17:
AARCH64_VALID_CALL_TARGET
mov x17, xzr
ret
.size abi_test_clobber_x17,.-abi_test_clobber_x17
@ -280,6 +302,7 @@ abi_test_clobber_x17:
.hidden abi_test_clobber_x19
.align 4
abi_test_clobber_x19:
AARCH64_VALID_CALL_TARGET
mov x19, xzr
ret
.size abi_test_clobber_x19,.-abi_test_clobber_x19
@ -288,6 +311,7 @@ abi_test_clobber_x19:
.hidden abi_test_clobber_x20
.align 4
abi_test_clobber_x20:
AARCH64_VALID_CALL_TARGET
mov x20, xzr
ret
.size abi_test_clobber_x20,.-abi_test_clobber_x20
@ -296,6 +320,7 @@ abi_test_clobber_x20:
.hidden abi_test_clobber_x21
.align 4
abi_test_clobber_x21:
AARCH64_VALID_CALL_TARGET
mov x21, xzr
ret
.size abi_test_clobber_x21,.-abi_test_clobber_x21
@ -304,6 +329,7 @@ abi_test_clobber_x21:
.hidden abi_test_clobber_x22
.align 4
abi_test_clobber_x22:
AARCH64_VALID_CALL_TARGET
mov x22, xzr
ret
.size abi_test_clobber_x22,.-abi_test_clobber_x22
@ -312,6 +338,7 @@ abi_test_clobber_x22:
.hidden abi_test_clobber_x23
.align 4
abi_test_clobber_x23:
AARCH64_VALID_CALL_TARGET
mov x23, xzr
ret
.size abi_test_clobber_x23,.-abi_test_clobber_x23
@ -320,6 +347,7 @@ abi_test_clobber_x23:
.hidden abi_test_clobber_x24
.align 4
abi_test_clobber_x24:
AARCH64_VALID_CALL_TARGET
mov x24, xzr
ret
.size abi_test_clobber_x24,.-abi_test_clobber_x24
@ -328,6 +356,7 @@ abi_test_clobber_x24:
.hidden abi_test_clobber_x25
.align 4
abi_test_clobber_x25:
AARCH64_VALID_CALL_TARGET
mov x25, xzr
ret
.size abi_test_clobber_x25,.-abi_test_clobber_x25
@ -336,6 +365,7 @@ abi_test_clobber_x25:
.hidden abi_test_clobber_x26
.align 4
abi_test_clobber_x26:
AARCH64_VALID_CALL_TARGET
mov x26, xzr
ret
.size abi_test_clobber_x26,.-abi_test_clobber_x26
@ -344,6 +374,7 @@ abi_test_clobber_x26:
.hidden abi_test_clobber_x27
.align 4
abi_test_clobber_x27:
AARCH64_VALID_CALL_TARGET
mov x27, xzr
ret
.size abi_test_clobber_x27,.-abi_test_clobber_x27
@ -352,6 +383,7 @@ abi_test_clobber_x27:
.hidden abi_test_clobber_x28
.align 4
abi_test_clobber_x28:
AARCH64_VALID_CALL_TARGET
mov x28, xzr
ret
.size abi_test_clobber_x28,.-abi_test_clobber_x28
@ -360,6 +392,7 @@ abi_test_clobber_x28:
.hidden abi_test_clobber_x29
.align 4
abi_test_clobber_x29:
AARCH64_VALID_CALL_TARGET
mov x29, xzr
ret
.size abi_test_clobber_x29,.-abi_test_clobber_x29
@ -368,6 +401,7 @@ abi_test_clobber_x29:
.hidden abi_test_clobber_d0
.align 4
abi_test_clobber_d0:
AARCH64_VALID_CALL_TARGET
fmov d0, xzr
ret
.size abi_test_clobber_d0,.-abi_test_clobber_d0
@ -376,6 +410,7 @@ abi_test_clobber_d0:
.hidden abi_test_clobber_d1
.align 4
abi_test_clobber_d1:
AARCH64_VALID_CALL_TARGET
fmov d1, xzr
ret
.size abi_test_clobber_d1,.-abi_test_clobber_d1
@ -384,6 +419,7 @@ abi_test_clobber_d1:
.hidden abi_test_clobber_d2
.align 4
abi_test_clobber_d2:
AARCH64_VALID_CALL_TARGET
fmov d2, xzr
ret
.size abi_test_clobber_d2,.-abi_test_clobber_d2
@ -392,6 +428,7 @@ abi_test_clobber_d2:
.hidden abi_test_clobber_d3
.align 4
abi_test_clobber_d3:
AARCH64_VALID_CALL_TARGET
fmov d3, xzr
ret
.size abi_test_clobber_d3,.-abi_test_clobber_d3
@ -400,6 +437,7 @@ abi_test_clobber_d3:
.hidden abi_test_clobber_d4
.align 4
abi_test_clobber_d4:
AARCH64_VALID_CALL_TARGET
fmov d4, xzr
ret
.size abi_test_clobber_d4,.-abi_test_clobber_d4
@ -408,6 +446,7 @@ abi_test_clobber_d4:
.hidden abi_test_clobber_d5
.align 4
abi_test_clobber_d5:
AARCH64_VALID_CALL_TARGET
fmov d5, xzr
ret
.size abi_test_clobber_d5,.-abi_test_clobber_d5
@ -416,6 +455,7 @@ abi_test_clobber_d5:
.hidden abi_test_clobber_d6
.align 4
abi_test_clobber_d6:
AARCH64_VALID_CALL_TARGET
fmov d6, xzr
ret
.size abi_test_clobber_d6,.-abi_test_clobber_d6
@ -424,6 +464,7 @@ abi_test_clobber_d6:
.hidden abi_test_clobber_d7
.align 4
abi_test_clobber_d7:
AARCH64_VALID_CALL_TARGET
fmov d7, xzr
ret
.size abi_test_clobber_d7,.-abi_test_clobber_d7
@ -432,6 +473,7 @@ abi_test_clobber_d7:
.hidden abi_test_clobber_d8
.align 4
abi_test_clobber_d8:
AARCH64_VALID_CALL_TARGET
fmov d8, xzr
ret
.size abi_test_clobber_d8,.-abi_test_clobber_d8
@ -440,6 +482,7 @@ abi_test_clobber_d8:
.hidden abi_test_clobber_d9
.align 4
abi_test_clobber_d9:
AARCH64_VALID_CALL_TARGET
fmov d9, xzr
ret
.size abi_test_clobber_d9,.-abi_test_clobber_d9
@ -448,6 +491,7 @@ abi_test_clobber_d9:
.hidden abi_test_clobber_d10
.align 4
abi_test_clobber_d10:
AARCH64_VALID_CALL_TARGET
fmov d10, xzr
ret
.size abi_test_clobber_d10,.-abi_test_clobber_d10
@ -456,6 +500,7 @@ abi_test_clobber_d10:
.hidden abi_test_clobber_d11
.align 4
abi_test_clobber_d11:
AARCH64_VALID_CALL_TARGET
fmov d11, xzr
ret
.size abi_test_clobber_d11,.-abi_test_clobber_d11
@ -464,6 +509,7 @@ abi_test_clobber_d11:
.hidden abi_test_clobber_d12
.align 4
abi_test_clobber_d12:
AARCH64_VALID_CALL_TARGET
fmov d12, xzr
ret
.size abi_test_clobber_d12,.-abi_test_clobber_d12
@ -472,6 +518,7 @@ abi_test_clobber_d12:
.hidden abi_test_clobber_d13
.align 4
abi_test_clobber_d13:
AARCH64_VALID_CALL_TARGET
fmov d13, xzr
ret
.size abi_test_clobber_d13,.-abi_test_clobber_d13
@ -480,6 +527,7 @@ abi_test_clobber_d13:
.hidden abi_test_clobber_d14
.align 4
abi_test_clobber_d14:
AARCH64_VALID_CALL_TARGET
fmov d14, xzr
ret
.size abi_test_clobber_d14,.-abi_test_clobber_d14
@ -488,6 +536,7 @@ abi_test_clobber_d14:
.hidden abi_test_clobber_d15
.align 4
abi_test_clobber_d15:
AARCH64_VALID_CALL_TARGET
fmov d15, xzr
ret
.size abi_test_clobber_d15,.-abi_test_clobber_d15
@ -496,6 +545,7 @@ abi_test_clobber_d15:
.hidden abi_test_clobber_d16
.align 4
abi_test_clobber_d16:
AARCH64_VALID_CALL_TARGET
fmov d16, xzr
ret
.size abi_test_clobber_d16,.-abi_test_clobber_d16
@ -504,6 +554,7 @@ abi_test_clobber_d16:
.hidden abi_test_clobber_d17
.align 4
abi_test_clobber_d17:
AARCH64_VALID_CALL_TARGET
fmov d17, xzr
ret
.size abi_test_clobber_d17,.-abi_test_clobber_d17
@ -512,6 +563,7 @@ abi_test_clobber_d17:
.hidden abi_test_clobber_d18
.align 4
abi_test_clobber_d18:
AARCH64_VALID_CALL_TARGET
fmov d18, xzr
ret
.size abi_test_clobber_d18,.-abi_test_clobber_d18
@ -520,6 +572,7 @@ abi_test_clobber_d18:
.hidden abi_test_clobber_d19
.align 4
abi_test_clobber_d19:
AARCH64_VALID_CALL_TARGET
fmov d19, xzr
ret
.size abi_test_clobber_d19,.-abi_test_clobber_d19
@ -528,6 +581,7 @@ abi_test_clobber_d19:
.hidden abi_test_clobber_d20
.align 4
abi_test_clobber_d20:
AARCH64_VALID_CALL_TARGET
fmov d20, xzr
ret
.size abi_test_clobber_d20,.-abi_test_clobber_d20
@ -536,6 +590,7 @@ abi_test_clobber_d20:
.hidden abi_test_clobber_d21
.align 4
abi_test_clobber_d21:
AARCH64_VALID_CALL_TARGET
fmov d21, xzr
ret
.size abi_test_clobber_d21,.-abi_test_clobber_d21
@ -544,6 +599,7 @@ abi_test_clobber_d21:
.hidden abi_test_clobber_d22
.align 4
abi_test_clobber_d22:
AARCH64_VALID_CALL_TARGET
fmov d22, xzr
ret
.size abi_test_clobber_d22,.-abi_test_clobber_d22
@ -552,6 +608,7 @@ abi_test_clobber_d22:
.hidden abi_test_clobber_d23
.align 4
abi_test_clobber_d23:
AARCH64_VALID_CALL_TARGET
fmov d23, xzr
ret
.size abi_test_clobber_d23,.-abi_test_clobber_d23
@ -560,6 +617,7 @@ abi_test_clobber_d23:
.hidden abi_test_clobber_d24
.align 4
abi_test_clobber_d24:
AARCH64_VALID_CALL_TARGET
fmov d24, xzr
ret
.size abi_test_clobber_d24,.-abi_test_clobber_d24
@ -568,6 +626,7 @@ abi_test_clobber_d24:
.hidden abi_test_clobber_d25
.align 4
abi_test_clobber_d25:
AARCH64_VALID_CALL_TARGET
fmov d25, xzr
ret
.size abi_test_clobber_d25,.-abi_test_clobber_d25
@ -576,6 +635,7 @@ abi_test_clobber_d25:
.hidden abi_test_clobber_d26
.align 4
abi_test_clobber_d26:
AARCH64_VALID_CALL_TARGET
fmov d26, xzr
ret
.size abi_test_clobber_d26,.-abi_test_clobber_d26
@ -584,6 +644,7 @@ abi_test_clobber_d26:
.hidden abi_test_clobber_d27
.align 4
abi_test_clobber_d27:
AARCH64_VALID_CALL_TARGET
fmov d27, xzr
ret
.size abi_test_clobber_d27,.-abi_test_clobber_d27
@ -592,6 +653,7 @@ abi_test_clobber_d27:
.hidden abi_test_clobber_d28
.align 4
abi_test_clobber_d28:
AARCH64_VALID_CALL_TARGET
fmov d28, xzr
ret
.size abi_test_clobber_d28,.-abi_test_clobber_d28
@ -600,6 +662,7 @@ abi_test_clobber_d28:
.hidden abi_test_clobber_d29
.align 4
abi_test_clobber_d29:
AARCH64_VALID_CALL_TARGET
fmov d29, xzr
ret
.size abi_test_clobber_d29,.-abi_test_clobber_d29
@ -608,6 +671,7 @@ abi_test_clobber_d29:
.hidden abi_test_clobber_d30
.align 4
abi_test_clobber_d30:
AARCH64_VALID_CALL_TARGET
fmov d30, xzr
ret
.size abi_test_clobber_d30,.-abi_test_clobber_d30
@ -616,6 +680,7 @@ abi_test_clobber_d30:
.hidden abi_test_clobber_d31
.align 4
abi_test_clobber_d31:
AARCH64_VALID_CALL_TARGET
fmov d31, xzr
ret
.size abi_test_clobber_d31,.-abi_test_clobber_d31
@ -624,6 +689,7 @@ abi_test_clobber_d31:
.hidden abi_test_clobber_v8_upper
.align 4
abi_test_clobber_v8_upper:
AARCH64_VALID_CALL_TARGET
fmov v8.d[1], xzr
ret
.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
@ -632,6 +698,7 @@ abi_test_clobber_v8_upper:
.hidden abi_test_clobber_v9_upper
.align 4
abi_test_clobber_v9_upper:
AARCH64_VALID_CALL_TARGET
fmov v9.d[1], xzr
ret
.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
@ -640,6 +707,7 @@ abi_test_clobber_v9_upper:
.hidden abi_test_clobber_v10_upper
.align 4
abi_test_clobber_v10_upper:
AARCH64_VALID_CALL_TARGET
fmov v10.d[1], xzr
ret
.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
@ -648,6 +716,7 @@ abi_test_clobber_v10_upper:
.hidden abi_test_clobber_v11_upper
.align 4
abi_test_clobber_v11_upper:
AARCH64_VALID_CALL_TARGET
fmov v11.d[1], xzr
ret
.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
@ -656,6 +725,7 @@ abi_test_clobber_v11_upper:
.hidden abi_test_clobber_v12_upper
.align 4
abi_test_clobber_v12_upper:
AARCH64_VALID_CALL_TARGET
fmov v12.d[1], xzr
ret
.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
@ -664,6 +734,7 @@ abi_test_clobber_v12_upper:
.hidden abi_test_clobber_v13_upper
.align 4
abi_test_clobber_v13_upper:
AARCH64_VALID_CALL_TARGET
fmov v13.d[1], xzr
ret
.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
@ -672,6 +743,7 @@ abi_test_clobber_v13_upper:
.hidden abi_test_clobber_v14_upper
.align 4
abi_test_clobber_v14_upper:
AARCH64_VALID_CALL_TARGET
fmov v14.d[1], xzr
ret
.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
@ -680,8 +752,10 @@ abi_test_clobber_v14_upper:
.hidden abi_test_clobber_v15_upper
.align 4
abi_test_clobber_v15_upper:
AARCH64_VALID_CALL_TARGET
fmov v15.d[1], xzr
ret
.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,998 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.section .rodata
# p434 x 2
.Lp434x2:
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
# p434 + 1
.Lp434p1:
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
.text
.globl sike_mpmul
.hidden sike_mpmul
.align 4
sike_mpmul:
stp x29, x30, [sp,#-96]!
add x29, sp, #0
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
ldp x14, x15, [x1,#32]
ldr x16, [x1,#48]
// x3-x7 <- AH + AL, x7 <- carry
adds x3, x3, x7
adcs x4, x4, x8
adcs x5, x5, x9
adcs x6, x6, xzr
adc x7, xzr, xzr
// x10-x13 <- BH + BL, x8 <- carry
adds x10, x10, x14
adcs x11, x11, x15
adcs x12, x12, x16
adcs x13, x13, xzr
adc x8, xzr, xzr
// x9 <- combined carry
and x9, x7, x8
// x7-x8 <- mask
sub x7, xzr, x7
sub x8, xzr, x8
// x15-x19 <- masked (BH + BL)
and x14, x10, x7
and x15, x11, x7
and x16, x12, x7
and x17, x13, x7
// x20-x23 <- masked (AH + AL)
and x20, x3, x8
and x21, x4, x8
and x22, x5, x8
and x23, x6, x8
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
adds x14, x14, x20
adcs x15, x15, x21
adcs x16, x16, x22
adcs x17, x17, x23
adc x7, x9, xzr
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
stp x3, x4, [x2,#0]
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x25, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x23, x10, x12
adcs x26, x11, x13
adc x24, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x19, xzr, x25
sub x20, xzr, x24
and x8, x23, x19
and x9, x26, x19
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x21, x3, x20
and x22, x4, x20
mul x19, x3, x23
mul x20, x3, x26
and x25, x25, x24
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x8, x21, x8
umulh x21, x3, x26
adcs x9, x22, x9
umulh x22, x3, x23
adc x25, x25, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x23
umulh x23, x4, x23
adds x20, x20, x22
adc x21, x21, xzr
mul x24, x4, x26
umulh x26, x4, x26
adds x20, x20, x3
adcs x21, x21, x23
adc x22, xzr, xzr
adds x21, x21, x24
adc x22, x22, x26
ldp x3, x4, [x2,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x21, x8, x21
umulh x24, x3, x10
umulh x26, x3, x11
adcs x22, x9, x22
mul x8, x3, x10
mul x9, x3, x11
adc x25, x25, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x9, x9, x24
adc x26, x26, xzr
mul x23, x4, x11
umulh x11, x4, x11
adds x9, x9, x3
adcs x26, x26, x10
adc x24, xzr, xzr
adds x26, x26, x23
adc x24, x24, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x19, x19, x8
sbcs x20, x20, x9
sbcs x21, x21, x26
mul x4, x5, x13
umulh x23, x5, x13
sbcs x22, x22, x24
sbc x25, x25, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x23, x23, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x23, x23, x12
adc x10, xzr, xzr
adds x23, x23, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x19, x19, x3
sbcs x20, x20, x4
sbcs x21, x21, x23
sbcs x22, x22, x10
sbc x25, x25, xzr
adds x19, x19, x26
adcs x20, x20, x24
adcs x21, x21, x3
adcs x22, x22, x4
adcs x23, x25, x23
adc x24, x10, xzr
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
adds x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adc x7, x7, xzr
// Load AL
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
// Load BL
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
// Temporarily store x8 in x2
stp x8, x9, [x2,#0]
// x21-x28 <- AL x BL
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x8, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x27, x10, x12
adcs x9, x11, x13
adc x28, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x23, xzr, x8
sub x24, xzr, x28
and x21, x27, x23
and x22, x9, x23
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x25, x3, x24
and x26, x4, x24
mul x23, x3, x27
mul x24, x3, x9
and x8, x8, x28
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x21, x25, x21
umulh x25, x3, x9
adcs x22, x26, x22
umulh x26, x3, x27
adc x8, x8, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x27
umulh x27, x4, x27
adds x24, x24, x26
adc x25, x25, xzr
mul x28, x4, x9
umulh x9, x4, x9
adds x24, x24, x3
adcs x25, x25, x27
adc x26, xzr, xzr
adds x25, x25, x28
adc x26, x26, x9
ldp x3, x4, [x0,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x25, x21, x25
umulh x28, x3, x10
umulh x9, x3, x11
adcs x26, x22, x26
mul x21, x3, x10
mul x22, x3, x11
adc x8, x8, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x22, x22, x28
adc x9, x9, xzr
mul x27, x4, x11
umulh x11, x4, x11
adds x22, x22, x3
adcs x9, x9, x10
adc x28, xzr, xzr
adds x9, x9, x27
adc x28, x28, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x23, x23, x21
sbcs x24, x24, x22
sbcs x25, x25, x9
mul x4, x5, x13
umulh x27, x5, x13
sbcs x26, x26, x28
sbc x8, x8, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x27, x27, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x27, x27, x12
adc x10, xzr, xzr
adds x27, x27, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x23, x23, x3
sbcs x24, x24, x4
sbcs x25, x25, x27
sbcs x26, x26, x10
sbc x8, x8, xzr
adds x23, x23, x9
adcs x24, x24, x28
adcs x25, x25, x3
adcs x26, x26, x4
adcs x27, x8, x27
adc x28, x10, xzr
// Restore x8
ldp x8, x9, [x2,#0]
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, x27
sbcs x17, x17, x28
sbc x7, x7, xzr
// Store ALxBL, low
stp x21, x22, [x2]
stp x23, x24, [x2,#16]
// Load AH
ldp x3, x4, [x0,#32]
ldr x5, [x0,#48]
// Load BH
ldp x10, x11, [x1,#32]
ldr x12, [x1,#48]
adds x8, x8, x25
adcs x9, x9, x26
adcs x19, x19, x27
adcs x20, x20, x28
adc x1, xzr, xzr
add x0, x0, #32
// Temporarily store x8,x9 in x2
stp x8,x9, [x2,#32]
// x21-x28 <- AH x BH
// A0 * B0
mul x21, x3, x10 // C0
umulh x24, x3, x10
// A0 * B1
mul x22, x3, x11
umulh x23, x3, x11
// A1 * B0
mul x8, x4, x10
umulh x9, x4, x10
adds x22, x22, x24
adc x23, x23, xzr
// A0 * B2
mul x27, x3, x12
umulh x28, x3, x12
adds x22, x22, x8 // C1
adcs x23, x23, x9
adc x24, xzr, xzr
// A2 * B0
mul x8, x5, x10
umulh x25, x5, x10
adds x23, x23, x27
adcs x24, x24, x25
adc x25, xzr, xzr
// A1 * B1
mul x27, x4, x11
umulh x9, x4, x11
adds x23, x23, x8
adcs x24, x24, x28
adc x25, x25, xzr
// A1 * B2
mul x8, x4, x12
umulh x28, x4, x12
adds x23, x23, x27 // C2
adcs x24, x24, x9
adc x25, x25, xzr
// A2 * B1
mul x27, x5, x11
umulh x9, x5, x11
adds x24, x24, x8
adcs x25, x25, x28
adc x26, xzr, xzr
// A2 * B2
mul x8, x5, x12
umulh x28, x5, x12
adds x24, x24, x27 // C3
adcs x25, x25, x9
adc x26, x26, xzr
adds x25, x25, x8 // C4
adc x26, x26, x28 // C5
// Restore x8,x9
ldp x8,x9, [x2,#32]
neg x1, x1
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, xzr
sbcs x17, x17, xzr
sbc x7, x7, xzr
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
stp x8, x9, [x2,#32]
stp x19, x20, [x2,#48]
adds x1, x1, #1
adcs x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adcs x25, x7, x25
adc x26, x26, xzr
stp x14, x15, [x2,#64]
stp x16, x17, [x2,#80]
stp x25, x26, [x2,#96]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl sike_fprdc
.hidden sike_fprdc
.align 4
sike_fprdc:
stp x29, x30, [sp, #-96]!
add x29, sp, xzr
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x2, x3, [x0,#0] // a[0-1]
// Load the prime constant
adrp x26, .Lp434p1
add x26, x26, :lo12:.Lp434p1
ldp x23, x24, [x26, #0x0]
ldp x25, x26, [x26,#0x10]
// a[0-1] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x10, x3, x23
umulh x11, x3, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x10 // C1
adcs x6, x6, x11
adc x7, xzr, xzr
mul x10, x3, x24
umulh x11, x3, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x10 // C2
adcs x7, x7, x11
adc x8, x8, xzr
mul x10, x3, x25
umulh x11, x3, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x3, x26
umulh x28, x3, x26
adds x7, x7, x10 // C3
adcs x8, x8, x11
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
ldp x10, x11, [x0, #0x18]
ldp x12, x13, [x0, #0x28]
ldp x14, x15, [x0, #0x38]
ldp x16, x17, [x0, #0x48]
ldp x19, x20, [x0, #0x58]
ldr x21, [x0, #0x68]
adds x10, x10, x4
adcs x11, x11, x5
adcs x12, x12, x6
adcs x13, x13, x7
adcs x14, x14, x8
adcs x15, x15, x9
adcs x22, x16, xzr
adcs x17, x17, xzr
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
ldr x2, [x0,#0x10] // a[2]
// a[2-3] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x0, x10, x23
umulh x3, x10, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x0 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x0, x10, x24
umulh x3, x10, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x0 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x0, x10, x25
umulh x3, x10, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x10, x26
umulh x28, x10, x26
adds x7, x7, x0 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x12, x12, x4
adcs x13, x13, x5
adcs x14, x14, x6
adcs x15, x15, x7
adcs x16, x22, x8
adcs x17, x17, x9
adcs x22, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
mul x4, x11, x23 // C0
umulh x7, x11, x23
mul x5, x11, x24
umulh x6, x11, x24
mul x10, x12, x23
umulh x3, x12, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x11, x25
umulh x28, x11, x25
adds x5, x5, x10 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x10, x12, x24
umulh x3, x12, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x11, x26
umulh x28, x11, x26
adds x6, x6, x10 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x10, x12, x25
umulh x3, x12, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x12, x26
umulh x28, x12, x26
adds x7, x7, x10 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x14, x14, x4
adcs x15, x15, x5
adcs x16, x16, x6
adcs x17, x17, x7
adcs x19, x22, x8
adcs x20, x20, x9
adc x22, x21, xzr
stp x14, x15, [x1, #0x0] // C0, C1
mul x4, x13, x23 // C0
umulh x10, x13, x23
mul x5, x13, x24
umulh x27, x13, x24
adds x5, x5, x10 // C1
adc x10, xzr, xzr
mul x6, x13, x25
umulh x28, x13, x25
adds x27, x10, x27
adcs x6, x6, x27 // C2
adc x10, xzr, xzr
mul x7, x13, x26
umulh x8, x13, x26
adds x28, x10, x28
adcs x7, x7, x28 // C3
adc x8, x8, xzr // C4
adds x16, x16, x4
adcs x17, x17, x5
adcs x19, x19, x6
adcs x20, x20, x7
adc x21, x22, x8
str x16, [x1, #0x10]
stp x17, x19, [x1, #0x18]
stp x20, x21, [x1, #0x28]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl sike_fpadd
.hidden sike_fpadd
.align 4
sike_fpadd:
stp x29,x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Add a + b
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
// Subtract 2xp434
adrp x17, .Lp434x2
add x17, x17, :lo12:.Lp434x2
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x12
sbcs x6, x6, x13
sbcs x7, x7, x14
sbcs x8, x8, x15
sbcs x9, x9, x16
sbc x0, xzr, xzr // x0 can be reused now
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl sike_fpsub
.hidden sike_fpsub
.align 4
sike_fpsub:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Subtract a - b
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
sbcs x9, x9, x17
sbc x0, xzr, xzr
// Add 2xp434 anded with the mask in x0
adrp x17, .Lp434x2
add x17, x17, :lo12:.Lp434x2
// First half
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl sike_mpadd_asm
.hidden sike_mpadd_asm
.align 4
sike_mpadd_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl sike_mpsubx2_asm
.hidden sike_mpsubx2_asm
.align 4
sike_mpsubx2_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x11, x12, [x1,#32]
ldp x13, x14, [x1,#48]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbcs x9, x9, x13
sbcs x10, x10, x14
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ldp x3, x4, [x0,#64]
ldp x5, x6, [x0,#80]
ldp x11, x12, [x1,#64]
ldp x13, x14, [x1,#80]
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#96]
ldp x11, x12, [x1,#96]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbc x0, xzr, xzr
stp x3, x4, [x2,#64]
stp x5, x6, [x2,#80]
stp x7, x8, [x2,#96]
ldp x29, x30, [sp],#16
ret
.globl sike_mpdblsubx2_asm
.hidden sike_mpdblsubx2_asm
.align 4
sike_mpdblsubx2_asm:
stp x29, x30, [sp, #-16]!
add x29, sp, #0
ldp x3, x4, [x2, #0]
ldp x5, x6, [x2,#16]
ldp x7, x8, [x2,#32]
ldp x11, x12, [x0, #0]
ldp x13, x14, [x0,#16]
ldp x15, x16, [x0,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
// x9 stores carry
adc x9, xzr, xzr
ldp x11, x12, [x1, #0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2, #0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
ldp x3, x4, [x2,#48]
ldp x5, x6, [x2,#64]
ldp x7, x8, [x2,#80]
ldp x11, x12, [x0,#48]
ldp x13, x14, [x0,#64]
ldp x15, x16, [x0,#80]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, xzr, xzr
ldp x11, x12, [x1,#48]
ldp x13, x14, [x1,#64]
ldp x15, x16, [x1,#80]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2,#48]
stp x5, x6, [x2,#64]
stp x7, x8, [x2,#80]
ldp x3, x4, [x2,#96]
ldp x11, x12, [x0,#96]
ldp x13, x14, [x1,#96]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
subs x3, x3, x13
sbc x4, x4, x14
stp x3, x4, [x2,#96]
ldp x29, x30, [sp],#16
ret
#endif
#endif // !OPENSSL_NO_ASM

View File

@ -1490,3 +1490,4 @@ ChaCha20_neon:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -778,3 +778,4 @@ aes_hw_ctr32_encrypt_blocks:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -974,3 +974,4 @@ bn_mul8x_mont_neon:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1526,3 +1526,4 @@ bsaes_ctr32_encrypt_blocks:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -31,342 +31,6 @@
#else
.code 32
#endif
.type rem_4bit,%object
.align 5
rem_4bit:
.short 0x0000,0x1C20,0x3840,0x2460
.short 0x7080,0x6CA0,0x48C0,0x54E0
.short 0xE100,0xFD20,0xD940,0xC560
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
.size rem_4bit,.-rem_4bit
.type rem_4bit_get,%function
rem_4bit_get:
#if defined(__thumb2__)
adr r2,rem_4bit
#else
sub r2,pc,#8+32 @ &rem_4bit
#endif
b .Lrem_4bit_got
nop
nop
.size rem_4bit_get,.-rem_4bit_get
.globl gcm_ghash_4bit
.hidden gcm_ghash_4bit
.type gcm_ghash_4bit,%function
.align 4
gcm_ghash_4bit:
#if defined(__thumb2__)
adr r12,rem_4bit
#else
sub r12,pc,#8+48 @ &rem_4bit
#endif
add r3,r2,r3 @ r3 to point at the end
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
ldrb r12,[r2,#15]
ldrb r14,[r0,#15]
.Louter:
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
add r11,r1,r14
ldrb r12,[r2,#14]
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[sp,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
ldrb r14,[r0,#14]
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
eor r7,r7,r8,lsl#16
.Linner:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[sp,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r2,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r8,[r0,r3]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r9,[sp,r14]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
#ifdef __thumb2__
it pl
#endif
eorpl r12,r12,r8
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
bpl .Linner
ldr r3,[sp,#32] @ re-load r3/end
add r2,r2,#16
mov r14,r4
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
cmp r2,r3
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#ifdef __thumb2__
it ne
#endif
ldrneb r12,[r2,#15]
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
bne .Louter
add sp,sp,#36
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size gcm_ghash_4bit,.-gcm_ghash_4bit
.globl gcm_gmult_4bit
.hidden gcm_gmult_4bit
.type gcm_gmult_4bit,%function
gcm_gmult_4bit:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldrb r12,[r0,#15]
b rem_4bit_get
.Lrem_4bit_got:
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
ldrb r12,[r0,#14]
add r11,r1,r14
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
and r14,r12,#0xf0
eor r7,r7,r8,lsl#16
and r12,r12,#0x0f
.Loop:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[r2,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r0,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
bpl .Loop
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size gcm_gmult_4bit,.-gcm_gmult_4bit
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
@ -588,3 +252,4 @@ gcm_ghash_neon:
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -250,3 +250,4 @@ gcm_ghash_v8:
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1508,3 +1508,4 @@ sha1_block_data_order_armv8:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -2836,3 +2836,4 @@ sha256_block_data_order_armv8:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1891,3 +1891,4 @@ sha512_block_data_order_neon:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -31,7 +31,6 @@
.hidden abi_test_trampoline
.align 4
abi_test_trampoline:
.Labi_test_trampoline_begin:
@ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
@ -377,3 +376,4 @@ abi_test_clobber_d15:
.size abi_test_clobber_d15,.-abi_test_clobber_d15
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -3667,3 +3667,4 @@ _aesp8_xts_dec5x:
.long 0
.byte 0,12,0x14,0,0,0,0,0
#endif // !OPENSSL_NO_ASM && __powerpc64__
.section .note.GNU-stack,"",@progbits

View File

@ -584,3 +584,4 @@ gcm_ghash_p8:
.align 2
.align 2
#endif // !OPENSSL_NO_ASM && __powerpc64__
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -972,3 +972,4 @@ ChaCha20_ssse3:
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte 114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
#endif
.globl aes_hw_encrypt
.hidden aes_hw_encrypt
@ -14,7 +14,7 @@
.align 16
aes_hw_encrypt:
.L_aes_hw_encrypt_begin:
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call .L000pic
@ -845,7 +845,7 @@ aes_hw_ctr32_encrypt_blocks:
pushl %ebx
pushl %esi
pushl %edi
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call .L038pic
@ -2440,7 +2440,7 @@ _aesni_set_encrypt_key:
.align 16
aes_hw_set_encrypt_key:
.L_aes_hw_set_encrypt_key_begin:
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call .L116pic
@ -2510,3 +2510,4 @@ aes_hw_set_decrypt_key:
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
.byte 115,108,46,111,114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -993,551 +993,5 @@ bn_sub_words:
popl %ebp
ret
.size bn_sub_words,.-.L_bn_sub_words_begin
.globl bn_sub_part_words
.hidden bn_sub_part_words
.type bn_sub_part_words,@function
.align 16
bn_sub_part_words:
.L_bn_sub_part_words_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebx
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
jz .L029aw_finish
.L030aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
movl 4(%esi),%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
movl 8(%esi),%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
movl 12(%esi),%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
movl 16(%esi),%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
movl 20(%esi),%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
movl 24(%esi),%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
movl 28(%esi),%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%esi
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz .L030aw_loop
.L029aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
.L031aw_end:
cmpl $0,36(%esp)
je .L032pw_end
movl 36(%esp),%ebp
cmpl $0,%ebp
je .L032pw_end
jge .L033pw_pos
movl $0,%edx
subl %ebp,%edx
movl %edx,%ebp
andl $4294967288,%ebp
jz .L034pw_neg_finish
.L035pw_neg_loop:
movl $0,%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
movl $0,%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
movl $0,%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
movl $0,%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
movl $0,%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
movl $0,%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
movl $0,%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
movl $0,%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz .L035pw_neg_loop
.L034pw_neg_finish:
movl 36(%esp),%edx
movl $0,%ebp
subl %edx,%ebp
andl $7,%ebp
jz .L032pw_end
movl $0,%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
jmp .L032pw_end
.L033pw_pos:
andl $4294967288,%ebp
jz .L036pw_pos_finish
.L037pw_pos_loop:
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
jnc .L038pw_nc0
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
jnc .L039pw_nc1
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
jnc .L040pw_nc2
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
jnc .L041pw_nc3
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
jnc .L042pw_nc4
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
jnc .L043pw_nc5
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
jnc .L044pw_nc6
movl 28(%esi),%ecx
subl %eax,%ecx
movl %ecx,28(%ebx)
jnc .L045pw_nc7
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
jnz .L037pw_pos_loop
.L036pw_pos_finish:
movl 36(%esp),%ebp
andl $7,%ebp
jz .L032pw_end
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
jnc .L046pw_tail_nc0
decl %ebp
jz .L032pw_end
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
jnc .L047pw_tail_nc1
decl %ebp
jz .L032pw_end
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
jnc .L048pw_tail_nc2
decl %ebp
jz .L032pw_end
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
jnc .L049pw_tail_nc3
decl %ebp
jz .L032pw_end
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
jnc .L050pw_tail_nc4
decl %ebp
jz .L032pw_end
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
jnc .L051pw_tail_nc5
decl %ebp
jz .L032pw_end
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
jnc .L052pw_tail_nc6
movl $1,%eax
jmp .L032pw_end
.L053pw_nc_loop:
movl (%esi),%ecx
movl %ecx,(%ebx)
.L038pw_nc0:
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
.L039pw_nc1:
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
.L040pw_nc2:
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
.L041pw_nc3:
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
.L042pw_nc4:
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
.L043pw_nc5:
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
.L044pw_nc6:
movl 28(%esi),%ecx
movl %ecx,28(%ebx)
.L045pw_nc7:
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
jnz .L053pw_nc_loop
movl 36(%esp),%ebp
andl $7,%ebp
jz .L054pw_nc_end
movl (%esi),%ecx
movl %ecx,(%ebx)
.L046pw_tail_nc0:
decl %ebp
jz .L054pw_nc_end
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
.L047pw_tail_nc1:
decl %ebp
jz .L054pw_nc_end
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
.L048pw_tail_nc2:
decl %ebp
jz .L054pw_nc_end
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
.L049pw_tail_nc3:
decl %ebp
jz .L054pw_nc_end
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
.L050pw_tail_nc4:
decl %ebp
jz .L054pw_nc_end
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
.L051pw_tail_nc5:
decl %ebp
jz .L054pw_nc_end
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
.L052pw_tail_nc6:
.L054pw_nc_end:
movl $0,%eax
.L032pw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1263,3 +1263,4 @@ bn_sqr_comba4:
ret
.size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -291,3 +291,4 @@ gcm_ghash_ssse3:
.Llow4_mask:
.long 252645135,252645135,252645135,252645135
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -6,711 +6,6 @@
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl gcm_gmult_4bit_mmx
.hidden gcm_gmult_4bit_mmx
.type gcm_gmult_4bit_mmx,@function
.align 16
gcm_gmult_4bit_mmx:
.L_gcm_gmult_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
call .L000pic_point
.L000pic_point:
popl %eax
leal .Lrem_4bit-.L000pic_point(%eax),%eax
movzbl 15(%edi),%ebx
xorl %ecx,%ecx
movl %ebx,%edx
movb %dl,%cl
movl $14,%ebp
shlb $4,%cl
andl $240,%edx
movq 8(%esi,%ecx,1),%mm0
movq (%esi,%ecx,1),%mm1
movd %mm0,%ebx
jmp .L001mmx_loop
.align 16
.L001mmx_loop:
psrlq $4,%mm0
andl $15,%ebx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb (%edi,%ebp,1),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
decl %ebp
movd %mm0,%ebx
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
pxor %mm2,%mm0
js .L002mmx_break
shlb $4,%cl
andl $15,%ebx
psrlq $4,%mm0
andl $240,%edx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0
jmp .L001mmx_loop
.align 16
.L002mmx_break:
shlb $4,%cl
andl $15,%ebx
psrlq $4,%mm0
andl $240,%edx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0
psrlq $4,%mm0
andl $15,%ebx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%edx,1),%mm1
pxor %mm2,%mm0
psrlq $32,%mm0
movd %mm1,%edx
psrlq $32,%mm1
movd %mm0,%ecx
movd %mm1,%ebp
bswap %ebx
bswap %edx
bswap %ecx
bswap %ebp
emms
movl %ebx,12(%edi)
movl %edx,4(%edi)
movl %ecx,8(%edi)
movl %ebp,(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
.globl gcm_ghash_4bit_mmx
.hidden gcm_ghash_4bit_mmx
.type gcm_ghash_4bit_mmx,@function
.align 16
gcm_ghash_4bit_mmx:
.L_gcm_ghash_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%eax
movl 24(%esp),%ebx
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl %esp,%ebp
call .L003pic_point
.L003pic_point:
popl %esi
leal .Lrem_8bit-.L003pic_point(%esi),%esi
subl $544,%esp
andl $-64,%esp
subl $16,%esp
addl %ecx,%edx
movl %eax,544(%esp)
movl %edx,552(%esp)
movl %ebp,556(%esp)
addl $128,%ebx
leal 144(%esp),%edi
leal 400(%esp),%ebp
movl -120(%ebx),%edx
movq -120(%ebx),%mm0
movq -128(%ebx),%mm3
shll $4,%edx
movb %dl,(%esp)
movl -104(%ebx),%edx
movq -104(%ebx),%mm2
movq -112(%ebx),%mm5
movq %mm0,-128(%edi)
psrlq $4,%mm0
movq %mm3,(%edi)
movq %mm3,%mm7
psrlq $4,%mm3
shll $4,%edx
movb %dl,1(%esp)
movl -88(%ebx),%edx
movq -88(%ebx),%mm1
psllq $60,%mm7
movq -96(%ebx),%mm4
por %mm7,%mm0
movq %mm2,-120(%edi)
psrlq $4,%mm2
movq %mm5,8(%edi)
movq %mm5,%mm6
movq %mm0,-128(%ebp)
psrlq $4,%mm5
movq %mm3,(%ebp)
shll $4,%edx
movb %dl,2(%esp)
movl -72(%ebx),%edx
movq -72(%ebx),%mm0
psllq $60,%mm6
movq -80(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-112(%edi)
psrlq $4,%mm1
movq %mm4,16(%edi)
movq %mm4,%mm7
movq %mm2,-120(%ebp)
psrlq $4,%mm4
movq %mm5,8(%ebp)
shll $4,%edx
movb %dl,3(%esp)
movl -56(%ebx),%edx
movq -56(%ebx),%mm2
psllq $60,%mm7
movq -64(%ebx),%mm5
por %mm7,%mm1
movq %mm0,-104(%edi)
psrlq $4,%mm0
movq %mm3,24(%edi)
movq %mm3,%mm6
movq %mm1,-112(%ebp)
psrlq $4,%mm3
movq %mm4,16(%ebp)
shll $4,%edx
movb %dl,4(%esp)
movl -40(%ebx),%edx
movq -40(%ebx),%mm1
psllq $60,%mm6
movq -48(%ebx),%mm4
por %mm6,%mm0
movq %mm2,-96(%edi)
psrlq $4,%mm2
movq %mm5,32(%edi)
movq %mm5,%mm7
movq %mm0,-104(%ebp)
psrlq $4,%mm5
movq %mm3,24(%ebp)
shll $4,%edx
movb %dl,5(%esp)
movl -24(%ebx),%edx
movq -24(%ebx),%mm0
psllq $60,%mm7
movq -32(%ebx),%mm3
por %mm7,%mm2
movq %mm1,-88(%edi)
psrlq $4,%mm1
movq %mm4,40(%edi)
movq %mm4,%mm6
movq %mm2,-96(%ebp)
psrlq $4,%mm4
movq %mm5,32(%ebp)
shll $4,%edx
movb %dl,6(%esp)
movl -8(%ebx),%edx
movq -8(%ebx),%mm2
psllq $60,%mm6
movq -16(%ebx),%mm5
por %mm6,%mm1
movq %mm0,-80(%edi)
psrlq $4,%mm0
movq %mm3,48(%edi)
movq %mm3,%mm7
movq %mm1,-88(%ebp)
psrlq $4,%mm3
movq %mm4,40(%ebp)
shll $4,%edx
movb %dl,7(%esp)
movl 8(%ebx),%edx
movq 8(%ebx),%mm1
psllq $60,%mm7
movq (%ebx),%mm4
por %mm7,%mm0
movq %mm2,-72(%edi)
psrlq $4,%mm2
movq %mm5,56(%edi)
movq %mm5,%mm6
movq %mm0,-80(%ebp)
psrlq $4,%mm5
movq %mm3,48(%ebp)
shll $4,%edx
movb %dl,8(%esp)
movl 24(%ebx),%edx
movq 24(%ebx),%mm0
psllq $60,%mm6
movq 16(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-64(%edi)
psrlq $4,%mm1
movq %mm4,64(%edi)
movq %mm4,%mm7
movq %mm2,-72(%ebp)
psrlq $4,%mm4
movq %mm5,56(%ebp)
shll $4,%edx
movb %dl,9(%esp)
movl 40(%ebx),%edx
movq 40(%ebx),%mm2
psllq $60,%mm7
movq 32(%ebx),%mm5
por %mm7,%mm1
movq %mm0,-56(%edi)
psrlq $4,%mm0
movq %mm3,72(%edi)
movq %mm3,%mm6
movq %mm1,-64(%ebp)
psrlq $4,%mm3
movq %mm4,64(%ebp)
shll $4,%edx
movb %dl,10(%esp)
movl 56(%ebx),%edx
movq 56(%ebx),%mm1
psllq $60,%mm6
movq 48(%ebx),%mm4
por %mm6,%mm0
movq %mm2,-48(%edi)
psrlq $4,%mm2
movq %mm5,80(%edi)
movq %mm5,%mm7
movq %mm0,-56(%ebp)
psrlq $4,%mm5
movq %mm3,72(%ebp)
shll $4,%edx
movb %dl,11(%esp)
movl 72(%ebx),%edx
movq 72(%ebx),%mm0
psllq $60,%mm7
movq 64(%ebx),%mm3
por %mm7,%mm2
movq %mm1,-40(%edi)
psrlq $4,%mm1
movq %mm4,88(%edi)
movq %mm4,%mm6
movq %mm2,-48(%ebp)
psrlq $4,%mm4
movq %mm5,80(%ebp)
shll $4,%edx
movb %dl,12(%esp)
movl 88(%ebx),%edx
movq 88(%ebx),%mm2
psllq $60,%mm6
movq 80(%ebx),%mm5
por %mm6,%mm1
movq %mm0,-32(%edi)
psrlq $4,%mm0
movq %mm3,96(%edi)
movq %mm3,%mm7
movq %mm1,-40(%ebp)
psrlq $4,%mm3
movq %mm4,88(%ebp)
shll $4,%edx
movb %dl,13(%esp)
movl 104(%ebx),%edx
movq 104(%ebx),%mm1
psllq $60,%mm7
movq 96(%ebx),%mm4
por %mm7,%mm0
movq %mm2,-24(%edi)
psrlq $4,%mm2
movq %mm5,104(%edi)
movq %mm5,%mm6
movq %mm0,-32(%ebp)
psrlq $4,%mm5
movq %mm3,96(%ebp)
shll $4,%edx
movb %dl,14(%esp)
movl 120(%ebx),%edx
movq 120(%ebx),%mm0
psllq $60,%mm6
movq 112(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-16(%edi)
psrlq $4,%mm1
movq %mm4,112(%edi)
movq %mm4,%mm7
movq %mm2,-24(%ebp)
psrlq $4,%mm4
movq %mm5,104(%ebp)
shll $4,%edx
movb %dl,15(%esp)
psllq $60,%mm7
por %mm7,%mm1
movq %mm0,-8(%edi)
psrlq $4,%mm0
movq %mm3,120(%edi)
movq %mm3,%mm6
movq %mm1,-16(%ebp)
psrlq $4,%mm3
movq %mm4,112(%ebp)
psllq $60,%mm6
por %mm6,%mm0
movq %mm0,-8(%ebp)
movq %mm3,120(%ebp)
movq (%eax),%mm6
movl 8(%eax),%ebx
movl 12(%eax),%edx
.align 16
.L004outer:
xorl 12(%ecx),%edx
xorl 8(%ecx),%ebx
pxor (%ecx),%mm6
leal 16(%ecx),%ecx
movl %ebx,536(%esp)
movq %mm6,528(%esp)
movl %ecx,548(%esp)
xorl %eax,%eax
roll $8,%edx
movb %dl,%al
movl %eax,%ebp
andb $15,%al
shrl $4,%ebp
pxor %mm0,%mm0
roll $8,%edx
pxor %mm1,%mm1
pxor %mm2,%mm2
movq 16(%esp,%eax,8),%mm7
movq 144(%esp,%eax,8),%mm6
movb %dl,%al
movd %mm7,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
shrl $4,%edi
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 536(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 532(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 528(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 524(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
pxor 144(%esp,%eax,8),%mm6
xorb (%esp,%ebp,1),%bl
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
movzbl %bl,%ebx
pxor %mm2,%mm2
psllq $4,%mm1
movd %mm7,%ecx
psrlq $4,%mm7
movq %mm6,%mm3
psrlq $4,%mm6
shll $4,%ecx
pxor 16(%esp,%edi,8),%mm7
psllq $60,%mm3
movzbl %cl,%ecx
pxor %mm3,%mm7
pxor 144(%esp,%edi,8),%mm6
pinsrw $2,(%esi,%ebx,2),%mm0
pxor %mm1,%mm6
movd %mm7,%edx
pinsrw $3,(%esi,%ecx,2),%mm2
psllq $12,%mm0
pxor %mm0,%mm6
psrlq $32,%mm7
pxor %mm2,%mm6
movl 548(%esp),%ecx
movd %mm7,%ebx
movq %mm6,%mm3
psllw $8,%mm6
psrlw $8,%mm3
por %mm3,%mm6
bswap %edx
pshufw $27,%mm6,%mm6
bswap %ebx
cmpl 552(%esp),%ecx
jne .L004outer
movl 544(%esp),%eax
movl %edx,12(%eax)
movl %ebx,8(%eax)
movq %mm6,(%eax)
movl 556(%esp),%esp
emms
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
.globl gcm_init_clmul
.hidden gcm_init_clmul
.type gcm_init_clmul,@function
@ -719,10 +14,10 @@ gcm_init_clmul:
.L_gcm_init_clmul_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
call .L005pic
.L005pic:
call .L000pic
.L000pic:
popl %ecx
leal .Lbswap-.L005pic(%ecx),%ecx
leal .Lbswap-.L000pic(%ecx),%ecx
movdqu (%eax),%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $255,%xmm2,%xmm4
@ -789,10 +84,10 @@ gcm_gmult_clmul:
.L_gcm_gmult_clmul_begin:
movl 4(%esp),%eax
movl 8(%esp),%edx
call .L006pic
.L006pic:
call .L001pic
.L001pic:
popl %ecx
leal .Lbswap-.L006pic(%ecx),%ecx
leal .Lbswap-.L001pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movups (%edx),%xmm2
@ -849,16 +144,16 @@ gcm_ghash_clmul:
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebx
call .L007pic
.L007pic:
call .L002pic
.L002pic:
popl %ecx
leal .Lbswap-.L007pic(%ecx),%ecx
leal .Lbswap-.L002pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movdqu (%edx),%xmm2
.byte 102,15,56,0,197
subl $16,%ebx
jz .L008odd_tail
jz .L003odd_tail
movdqu (%esi),%xmm3
movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221
@ -875,10 +170,10 @@ gcm_ghash_clmul:
movups 16(%edx),%xmm2
nop
subl $32,%ebx
jbe .L009even_tail
jmp .L010mod_loop
jbe .L004even_tail
jmp .L005mod_loop
.align 32
.L010mod_loop:
.L005mod_loop:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
@ -933,8 +228,8 @@ gcm_ghash_clmul:
.byte 102,15,58,68,221,0
leal 32(%esi),%esi
subl $32,%ebx
ja .L010mod_loop
.L009even_tail:
ja .L005mod_loop
.L004even_tail:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
@ -973,9 +268,9 @@ gcm_ghash_clmul:
psrlq $1,%xmm0
pxor %xmm1,%xmm0
testl %ebx,%ebx
jnz .L011done
jnz .L006done
movups (%edx),%xmm2
.L008odd_tail:
.L003odd_tail:
movdqu (%esi),%xmm3
.byte 102,15,56,0,221
pxor %xmm3,%xmm0
@ -1014,7 +309,7 @@ gcm_ghash_clmul:
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.L011done:
.L006done:
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
popl %edi
@ -1027,48 +322,9 @@ gcm_ghash_clmul:
.Lbswap:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.align 64
.Lrem_8bit:
.value 0,450,900,582,1800,1738,1164,1358
.value 3600,4050,3476,3158,2328,2266,2716,2910
.value 7200,7650,8100,7782,6952,6890,6316,6510
.value 4656,5106,4532,4214,5432,5370,5820,6014
.value 14400,14722,15300,14854,16200,16010,15564,15630
.value 13904,14226,13780,13334,12632,12442,13020,13086
.value 9312,9634,10212,9766,9064,8874,8428,8494
.value 10864,11186,10740,10294,11640,11450,12028,12094
.value 28800,28994,29444,29382,30600,30282,29708,30158
.value 32400,32594,32020,31958,31128,30810,31260,31710
.value 27808,28002,28452,28390,27560,27242,26668,27118
.value 25264,25458,24884,24822,26040,25722,26172,26622
.value 18624,18690,19268,19078,20424,19978,19532,19854
.value 18128,18194,17748,17558,16856,16410,16988,17310
.value 21728,21794,22372,22182,21480,21034,20588,20910
.value 23280,23346,22900,22710,24056,23610,24188,24510
.value 57600,57538,57988,58182,58888,59338,58764,58446
.value 61200,61138,60564,60758,59416,59866,60316,59998
.value 64800,64738,65188,65382,64040,64490,63916,63598
.value 62256,62194,61620,61814,62520,62970,63420,63102
.value 55616,55426,56004,56070,56904,57226,56780,56334
.value 55120,54930,54484,54550,53336,53658,54236,53790
.value 50528,50338,50916,50982,49768,50090,49644,49198
.value 52080,51890,51444,51510,52344,52666,53244,52798
.value 37248,36930,37380,37830,38536,38730,38156,38094
.value 40848,40530,39956,40406,39064,39258,39708,39646
.value 36256,35938,36388,36838,35496,35690,35116,35054
.value 33712,33394,32820,33270,33976,34170,34620,34558
.value 43456,43010,43588,43910,44744,44810,44364,44174
.value 42960,42514,42068,42390,41176,41242,41820,41630
.value 46560,46114,46692,47014,45800,45866,45420,45230
.value 48112,47666,47220,47542,48376,48442,49020,48830
.align 64
.Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte 0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -685,3 +685,4 @@ md5_block_asm_data_order:
ret
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -3805,3 +3805,4 @@ _sha1_block_data_order_avx:
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -5564,3 +5564,4 @@ sha256_block_data_order:
ret
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -2834,3 +2834,4 @@ sha512_block_data_order:
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -6,7 +6,7 @@
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
#endif
.align 64
.L_vpaes_consts:
@ -485,7 +485,7 @@ vpaes_set_encrypt_key:
pushl %ebx
pushl %esi
pushl %edi
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call .L016pic
@ -570,7 +570,7 @@ vpaes_encrypt:
pushl %ebx
pushl %esi
pushl %edi
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call .L019pic
@ -705,3 +705,4 @@ vpaes_cbc_encrypt:
ret
.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -481,3 +481,4 @@ bn_mul_mont:
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -203,3 +203,4 @@ abi_test_clobber_xmm7:
ret
.size abi_test_clobber_xmm7,.-.L_abi_test_clobber_xmm7_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1630,3 +1630,4 @@ ChaCha20_8x:
.cfi_endproc
.size ChaCha20_8x,.-ChaCha20_8x
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -3076,3 +3076,4 @@ aes256gcmsiv_kdf:
.cfi_endproc
.size aes256gcmsiv_kdf, .-aes256gcmsiv_kdf
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -3935,7 +3935,7 @@ do_length_block:
popq %rbp
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_adjust_cfa_offset (8 * 6) + 288 + 32
.cfi_adjust_cfa_offset (8 * 7) + 288 + 32
seal_sse_128:
movdqu .chacha20_consts(%rip),%xmm0
@ -8984,3 +8984,4 @@ seal_avx2_short_tail:
jmp seal_sse_tail_16
.cfi_endproc
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -556,12 +556,10 @@ _aesni_ctr32_6x:
.align 32
aesni_gcm_encrypt:
.cfi_startproc
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+2(%rip)
#endif
#endif
xorq %r10,%r10
@ -851,3 +849,4 @@ aesni_gcm_encrypt:
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -20,12 +20,10 @@
.align 16
aes_hw_encrypt:
.cfi_startproc
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+1(%rip)
#endif
#endif
movups (%rdi),%xmm2
movl 240(%rdx),%eax
@ -887,10 +885,8 @@ aes_hw_ecb_encrypt:
.align 16
aes_hw_ctr32_encrypt_blocks:
.cfi_startproc
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,BORINGSSL_function_hit(%rip)
#endif
#endif
cmpq $1,%rdx
jne .Lctr32_bulk
@ -2111,11 +2107,9 @@ aes_hw_set_decrypt_key:
aes_hw_set_encrypt_key:
__aesni_set_encrypt_key:
.cfi_startproc
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,BORINGSSL_function_hit+3(%rip)
#endif
#endif
.byte 0x48,0x83,0xEC,0x08
.cfi_adjust_cfa_offset 8
movq $-1,%rax
@ -2509,3 +2503,4 @@ __aesni_set_encrypt_key:
.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -424,3 +424,4 @@ gcm_ghash_ssse3:
.Llow4_mask:
.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -14,709 +14,6 @@
.text
.extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P
.globl gcm_gmult_4bit
.hidden gcm_gmult_4bit
.type gcm_gmult_4bit,@function
.align 16
gcm_gmult_4bit:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $280,%rsp
.cfi_adjust_cfa_offset 280
.Lgmult_prologue:
movzbq 15(%rdi),%r8
leaq .Lrem_4bit(%rip),%r11
xorq %rax,%rax
xorq %rbx,%rbx
movb %r8b,%al
movb %r8b,%bl
shlb $4,%al
movq $14,%rcx
movq 8(%rsi,%rax,1),%r8
movq (%rsi,%rax,1),%r9
andb $0xf0,%bl
movq %r8,%rdx
jmp .Loop1
.align 16
.Loop1:
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
movb (%rdi,%rcx,1),%al
shrq $4,%r9
xorq 8(%rsi,%rbx,1),%r8
shlq $60,%r10
xorq (%rsi,%rbx,1),%r9
movb %al,%bl
xorq (%r11,%rdx,8),%r9
movq %r8,%rdx
shlb $4,%al
xorq %r10,%r8
decq %rcx
js .Lbreak1
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
shrq $4,%r9
xorq 8(%rsi,%rax,1),%r8
shlq $60,%r10
xorq (%rsi,%rax,1),%r9
andb $0xf0,%bl
xorq (%r11,%rdx,8),%r9
movq %r8,%rdx
xorq %r10,%r8
jmp .Loop1
.align 16
.Lbreak1:
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
shrq $4,%r9
xorq 8(%rsi,%rax,1),%r8
shlq $60,%r10
xorq (%rsi,%rax,1),%r9
andb $0xf0,%bl
xorq (%r11,%rdx,8),%r9
movq %r8,%rdx
xorq %r10,%r8
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
shrq $4,%r9
xorq 8(%rsi,%rbx,1),%r8
shlq $60,%r10
xorq (%rsi,%rbx,1),%r9
xorq %r10,%r8
xorq (%r11,%rdx,8),%r9
bswapq %r8
bswapq %r9
movq %r8,8(%rdi)
movq %r9,(%rdi)
leaq 280+48(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lgmult_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_gmult_4bit,.-gcm_gmult_4bit
.globl gcm_ghash_4bit
.hidden gcm_ghash_4bit
.type gcm_ghash_4bit,@function
.align 16
gcm_ghash_4bit:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $280,%rsp
.cfi_adjust_cfa_offset 280
.Lghash_prologue:
movq %rdx,%r14
movq %rcx,%r15
subq $-128,%rsi
leaq 16+128(%rsp),%rbp
xorl %edx,%edx
movq 0+0-128(%rsi),%r8
movq 0+8-128(%rsi),%rax
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq 16+0-128(%rsi),%r9
shlb $4,%dl
movq 16+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,0(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,0(%rbp)
movq 32+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,0-128(%rbp)
movq 32+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,1(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,8(%rbp)
movq 48+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,8-128(%rbp)
movq 48+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,2(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,16(%rbp)
movq 64+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,16-128(%rbp)
movq 64+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,3(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,24(%rbp)
movq 80+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,24-128(%rbp)
movq 80+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,4(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,32(%rbp)
movq 96+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,32-128(%rbp)
movq 96+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,5(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,40(%rbp)
movq 112+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,40-128(%rbp)
movq 112+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,6(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,48(%rbp)
movq 128+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,48-128(%rbp)
movq 128+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,7(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,56(%rbp)
movq 144+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,56-128(%rbp)
movq 144+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,8(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,64(%rbp)
movq 160+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,64-128(%rbp)
movq 160+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,9(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,72(%rbp)
movq 176+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,72-128(%rbp)
movq 176+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,10(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,80(%rbp)
movq 192+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,80-128(%rbp)
movq 192+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,11(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,88(%rbp)
movq 208+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,88-128(%rbp)
movq 208+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,12(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,96(%rbp)
movq 224+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,96-128(%rbp)
movq 224+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,13(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,104(%rbp)
movq 240+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,104-128(%rbp)
movq 240+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,14(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,112(%rbp)
shlb $4,%dl
movq %rax,112-128(%rbp)
shlq $60,%r10
movb %dl,15(%rsp)
orq %r10,%rbx
movq %r9,120(%rbp)
movq %rbx,120-128(%rbp)
addq $-128,%rsi
movq 8(%rdi),%r8
movq 0(%rdi),%r9
addq %r14,%r15
leaq .Lrem_8bit(%rip),%r11
jmp .Louter_loop
.align 16
.Louter_loop:
xorq (%r14),%r9
movq 8(%r14),%rdx
leaq 16(%r14),%r14
xorq %r8,%rdx
movq %r9,(%rdi)
movq %rdx,8(%rdi)
shrq $32,%rdx
xorq %rax,%rax
roll $8,%edx
movb %dl,%al
movzbl %dl,%ebx
shlb $4,%al
shrl $4,%ebx
roll $8,%edx
movq 8(%rsi,%rax,1),%r8
movq (%rsi,%rax,1),%r9
movb %dl,%al
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
xorq %r8,%r12
movq %r9,%r10
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl 8(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl 4(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl 0(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
andl $240,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl -4(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
movzwq (%r11,%r12,2),%r12
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
shlq $48,%r12
xorq %r10,%r8
xorq %r12,%r9
movzbq %r8b,%r13
shrq $4,%r8
movq %r9,%r10
shlb $4,%r13b
shrq $4,%r9
xorq 8(%rsi,%rcx,1),%r8
movzwq (%r11,%r13,2),%r13
shlq $60,%r10
xorq (%rsi,%rcx,1),%r9
xorq %r10,%r8
shlq $48,%r13
bswapq %r8
xorq %r13,%r9
bswapq %r9
cmpq %r15,%r14
jb .Louter_loop
movq %r8,8(%rdi)
movq %r9,(%rdi)
leaq 280+48(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq 0(%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lghash_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_ghash_4bit,.-gcm_ghash_4bit
.globl gcm_init_clmul
.hidden gcm_init_clmul
.type gcm_init_clmul,@function
@ -1822,50 +1119,9 @@ gcm_ghash_avx:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
.L7_mask:
.long 7,0,7,0
.L7_mask_poly:
.long 7,0,450,0
.align 64
.type .Lrem_4bit,@object
.Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.type .Lrem_8bit,@object
.Lrem_8bit:
.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -699,3 +699,4 @@ md5_block_asm_data_order:
.cfi_endproc
.size md5_block_asm_data_order,.-md5_block_asm_data_order
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -4540,3 +4540,4 @@ ecp_nistz256_point_add_affinex:
.cfi_endproc
.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -340,3 +340,4 @@ beeu_mod_inverse_vartime:
.size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -60,3 +60,4 @@ CRYPTO_rdrand_multiple8_buf:
.cfi_endproc
.size CRYPTO_rdrand_multiple8_buf,.-CRYPTO_rdrand_multiple8_buf
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1746,3 +1746,4 @@ rsaz_1024_gather5_avx2:
.long 4,4,4,4, 4,4,4,4
.align 64
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -3970,3 +3970,4 @@ sha256_block_data_order_avx:
.cfi_endproc
.size sha256_block_data_order_avx,.-sha256_block_data_order_avx
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -2989,3 +2989,4 @@ sha512_block_data_order_avx:
.cfi_endproc
.size sha512_block_data_order_avx,.-sha512_block_data_order_avx
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -812,12 +812,10 @@ _vpaes_schedule_mangle:
.align 16
vpaes_set_encrypt_key:
.cfi_startproc
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+5(%rip)
#endif
#endif
movl %esi,%eax
@ -863,12 +861,10 @@ vpaes_set_decrypt_key:
.align 16
vpaes_encrypt:
.cfi_startproc
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+4(%rip)
#endif
#endif
movdqu (%rdi),%xmm0
call _vpaes_preheat
@ -1134,3 +1130,4 @@ _vpaes_consts:
.align 64
.size _vpaes_consts,.-_vpaes_consts
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1257,3 +1257,4 @@ bn_mulx4x_mont:
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -3787,3 +3787,4 @@ bn_gather5:
.long 2,2, 2,2
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -515,3 +515,4 @@ abi_test_set_direction_flag:
.byte 0xf3,0xc3
.size abi_test_set_direction_flag,.-abi_test_set_direction_flag
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -6,14 +6,14 @@
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
#endif
.globl _aes_hw_encrypt
.private_extern _aes_hw_encrypt
.align 4
_aes_hw_encrypt:
L_aes_hw_encrypt_begin:
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call L000pic
@ -818,7 +818,7 @@ L_aes_hw_ctr32_encrypt_blocks_begin:
pushl %ebx
pushl %esi
pushl %edi
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call L038pic
@ -2403,7 +2403,7 @@ L097bad_keybits:
.align 4
_aes_hw_set_encrypt_key:
L_aes_hw_set_encrypt_key_begin:
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call L116pic

View File

@ -981,551 +981,6 @@ L028aw_end:
popl %ebx
popl %ebp
ret
.globl _bn_sub_part_words
.private_extern _bn_sub_part_words
.align 4
_bn_sub_part_words:
L_bn_sub_part_words_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebx
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
jz L029aw_finish
L030aw_loop:
# Round 0
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
# Round 1
movl 4(%esi),%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
# Round 2
movl 8(%esi),%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
# Round 3
movl 12(%esi),%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
# Round 4
movl 16(%esi),%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
# Round 5
movl 20(%esi),%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
# Round 6
movl 24(%esi),%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
# Round 7
movl 28(%esi),%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%esi
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz L030aw_loop
L029aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
jz L031aw_end
# Tail Round 0
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz L031aw_end
# Tail Round 1
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz L031aw_end
# Tail Round 2
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz L031aw_end
# Tail Round 3
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz L031aw_end
# Tail Round 4
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz L031aw_end
# Tail Round 5
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz L031aw_end
# Tail Round 6
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
L031aw_end:
cmpl $0,36(%esp)
je L032pw_end
movl 36(%esp),%ebp
cmpl $0,%ebp
je L032pw_end
jge L033pw_pos
# pw_neg
movl $0,%edx
subl %ebp,%edx
movl %edx,%ebp
andl $4294967288,%ebp
jz L034pw_neg_finish
L035pw_neg_loop:
# dl<0 Round 0
movl $0,%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
# dl<0 Round 1
movl $0,%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
# dl<0 Round 2
movl $0,%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
# dl<0 Round 3
movl $0,%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
# dl<0 Round 4
movl $0,%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
# dl<0 Round 5
movl $0,%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
# dl<0 Round 6
movl $0,%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
# dl<0 Round 7
movl $0,%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz L035pw_neg_loop
L034pw_neg_finish:
movl 36(%esp),%edx
movl $0,%ebp
subl %edx,%ebp
andl $7,%ebp
jz L032pw_end
# dl<0 Tail Round 0
movl $0,%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
jz L032pw_end
# dl<0 Tail Round 1
movl $0,%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
jz L032pw_end
# dl<0 Tail Round 2
movl $0,%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
jz L032pw_end
# dl<0 Tail Round 3
movl $0,%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
jz L032pw_end
# dl<0 Tail Round 4
movl $0,%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
jz L032pw_end
# dl<0 Tail Round 5
movl $0,%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
jz L032pw_end
# dl<0 Tail Round 6
movl $0,%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
jmp L032pw_end
L033pw_pos:
andl $4294967288,%ebp
jz L036pw_pos_finish
L037pw_pos_loop:
# dl>0 Round 0
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
jnc L038pw_nc0
# dl>0 Round 1
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
jnc L039pw_nc1
# dl>0 Round 2
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
jnc L040pw_nc2
# dl>0 Round 3
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
jnc L041pw_nc3
# dl>0 Round 4
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
jnc L042pw_nc4
# dl>0 Round 5
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
jnc L043pw_nc5
# dl>0 Round 6
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
jnc L044pw_nc6
# dl>0 Round 7
movl 28(%esi),%ecx
subl %eax,%ecx
movl %ecx,28(%ebx)
jnc L045pw_nc7
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
jnz L037pw_pos_loop
L036pw_pos_finish:
movl 36(%esp),%ebp
andl $7,%ebp
jz L032pw_end
# dl>0 Tail Round 0
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
jnc L046pw_tail_nc0
decl %ebp
jz L032pw_end
# dl>0 Tail Round 1
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
jnc L047pw_tail_nc1
decl %ebp
jz L032pw_end
# dl>0 Tail Round 2
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
jnc L048pw_tail_nc2
decl %ebp
jz L032pw_end
# dl>0 Tail Round 3
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
jnc L049pw_tail_nc3
decl %ebp
jz L032pw_end
# dl>0 Tail Round 4
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
jnc L050pw_tail_nc4
decl %ebp
jz L032pw_end
# dl>0 Tail Round 5
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
jnc L051pw_tail_nc5
decl %ebp
jz L032pw_end
# dl>0 Tail Round 6
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
jnc L052pw_tail_nc6
movl $1,%eax
jmp L032pw_end
L053pw_nc_loop:
movl (%esi),%ecx
movl %ecx,(%ebx)
L038pw_nc0:
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
L039pw_nc1:
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
L040pw_nc2:
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
L041pw_nc3:
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
L042pw_nc4:
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
L043pw_nc5:
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
L044pw_nc6:
movl 28(%esi),%ecx
movl %ecx,28(%ebx)
L045pw_nc7:
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
jnz L053pw_nc_loop
movl 36(%esp),%ebp
andl $7,%ebp
jz L054pw_nc_end
movl (%esi),%ecx
movl %ecx,(%ebx)
L046pw_tail_nc0:
decl %ebp
jz L054pw_nc_end
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
L047pw_tail_nc1:
decl %ebp
jz L054pw_nc_end
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
L048pw_tail_nc2:
decl %ebp
jz L054pw_nc_end
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
L049pw_tail_nc3:
decl %ebp
jz L054pw_nc_end
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
L050pw_tail_nc4:
decl %ebp
jz L054pw_nc_end
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
L051pw_tail_nc5:
decl %ebp
jz L054pw_nc_end
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
L052pw_tail_nc6:
L054pw_nc_end:
movl $0,%eax
L032pw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.section __IMPORT,__pointers,non_lazy_symbol_pointers
L_OPENSSL_ia32cap_P$non_lazy_ptr:
.indirect_symbol _OPENSSL_ia32cap_P

View File

@ -6,707 +6,6 @@
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _gcm_gmult_4bit_mmx
.private_extern _gcm_gmult_4bit_mmx
.align 4
_gcm_gmult_4bit_mmx:
L_gcm_gmult_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
call L000pic_point
L000pic_point:
popl %eax
leal Lrem_4bit-L000pic_point(%eax),%eax
movzbl 15(%edi),%ebx
xorl %ecx,%ecx
movl %ebx,%edx
movb %dl,%cl
movl $14,%ebp
shlb $4,%cl
andl $240,%edx
movq 8(%esi,%ecx,1),%mm0
movq (%esi,%ecx,1),%mm1
movd %mm0,%ebx
jmp L001mmx_loop
.align 4,0x90
L001mmx_loop:
psrlq $4,%mm0
andl $15,%ebx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb (%edi,%ebp,1),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
decl %ebp
movd %mm0,%ebx
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
pxor %mm2,%mm0
js L002mmx_break
shlb $4,%cl
andl $15,%ebx
psrlq $4,%mm0
andl $240,%edx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0
jmp L001mmx_loop
.align 4,0x90
L002mmx_break:
shlb $4,%cl
andl $15,%ebx
psrlq $4,%mm0
andl $240,%edx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0
psrlq $4,%mm0
andl $15,%ebx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%edx,1),%mm1
pxor %mm2,%mm0
psrlq $32,%mm0
movd %mm1,%edx
psrlq $32,%mm1
movd %mm0,%ecx
movd %mm1,%ebp
bswap %ebx
bswap %edx
bswap %ecx
bswap %ebp
emms
movl %ebx,12(%edi)
movl %edx,4(%edi)
movl %ecx,8(%edi)
movl %ebp,(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_ghash_4bit_mmx
.private_extern _gcm_ghash_4bit_mmx
.align 4
_gcm_ghash_4bit_mmx:
L_gcm_ghash_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%eax
movl 24(%esp),%ebx
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl %esp,%ebp
call L003pic_point
L003pic_point:
popl %esi
leal Lrem_8bit-L003pic_point(%esi),%esi
subl $544,%esp
andl $-64,%esp
subl $16,%esp
addl %ecx,%edx
movl %eax,544(%esp)
movl %edx,552(%esp)
movl %ebp,556(%esp)
addl $128,%ebx
leal 144(%esp),%edi
leal 400(%esp),%ebp
movl -120(%ebx),%edx
movq -120(%ebx),%mm0
movq -128(%ebx),%mm3
shll $4,%edx
movb %dl,(%esp)
movl -104(%ebx),%edx
movq -104(%ebx),%mm2
movq -112(%ebx),%mm5
movq %mm0,-128(%edi)
psrlq $4,%mm0
movq %mm3,(%edi)
movq %mm3,%mm7
psrlq $4,%mm3
shll $4,%edx
movb %dl,1(%esp)
movl -88(%ebx),%edx
movq -88(%ebx),%mm1
psllq $60,%mm7
movq -96(%ebx),%mm4
por %mm7,%mm0
movq %mm2,-120(%edi)
psrlq $4,%mm2
movq %mm5,8(%edi)
movq %mm5,%mm6
movq %mm0,-128(%ebp)
psrlq $4,%mm5
movq %mm3,(%ebp)
shll $4,%edx
movb %dl,2(%esp)
movl -72(%ebx),%edx
movq -72(%ebx),%mm0
psllq $60,%mm6
movq -80(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-112(%edi)
psrlq $4,%mm1
movq %mm4,16(%edi)
movq %mm4,%mm7
movq %mm2,-120(%ebp)
psrlq $4,%mm4
movq %mm5,8(%ebp)
shll $4,%edx
movb %dl,3(%esp)
movl -56(%ebx),%edx
movq -56(%ebx),%mm2
psllq $60,%mm7
movq -64(%ebx),%mm5
por %mm7,%mm1
movq %mm0,-104(%edi)
psrlq $4,%mm0
movq %mm3,24(%edi)
movq %mm3,%mm6
movq %mm1,-112(%ebp)
psrlq $4,%mm3
movq %mm4,16(%ebp)
shll $4,%edx
movb %dl,4(%esp)
movl -40(%ebx),%edx
movq -40(%ebx),%mm1
psllq $60,%mm6
movq -48(%ebx),%mm4
por %mm6,%mm0
movq %mm2,-96(%edi)
psrlq $4,%mm2
movq %mm5,32(%edi)
movq %mm5,%mm7
movq %mm0,-104(%ebp)
psrlq $4,%mm5
movq %mm3,24(%ebp)
shll $4,%edx
movb %dl,5(%esp)
movl -24(%ebx),%edx
movq -24(%ebx),%mm0
psllq $60,%mm7
movq -32(%ebx),%mm3
por %mm7,%mm2
movq %mm1,-88(%edi)
psrlq $4,%mm1
movq %mm4,40(%edi)
movq %mm4,%mm6
movq %mm2,-96(%ebp)
psrlq $4,%mm4
movq %mm5,32(%ebp)
shll $4,%edx
movb %dl,6(%esp)
movl -8(%ebx),%edx
movq -8(%ebx),%mm2
psllq $60,%mm6
movq -16(%ebx),%mm5
por %mm6,%mm1
movq %mm0,-80(%edi)
psrlq $4,%mm0
movq %mm3,48(%edi)
movq %mm3,%mm7
movq %mm1,-88(%ebp)
psrlq $4,%mm3
movq %mm4,40(%ebp)
shll $4,%edx
movb %dl,7(%esp)
movl 8(%ebx),%edx
movq 8(%ebx),%mm1
psllq $60,%mm7
movq (%ebx),%mm4
por %mm7,%mm0
movq %mm2,-72(%edi)
psrlq $4,%mm2
movq %mm5,56(%edi)
movq %mm5,%mm6
movq %mm0,-80(%ebp)
psrlq $4,%mm5
movq %mm3,48(%ebp)
shll $4,%edx
movb %dl,8(%esp)
movl 24(%ebx),%edx
movq 24(%ebx),%mm0
psllq $60,%mm6
movq 16(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-64(%edi)
psrlq $4,%mm1
movq %mm4,64(%edi)
movq %mm4,%mm7
movq %mm2,-72(%ebp)
psrlq $4,%mm4
movq %mm5,56(%ebp)
shll $4,%edx
movb %dl,9(%esp)
movl 40(%ebx),%edx
movq 40(%ebx),%mm2
psllq $60,%mm7
movq 32(%ebx),%mm5
por %mm7,%mm1
movq %mm0,-56(%edi)
psrlq $4,%mm0
movq %mm3,72(%edi)
movq %mm3,%mm6
movq %mm1,-64(%ebp)
psrlq $4,%mm3
movq %mm4,64(%ebp)
shll $4,%edx
movb %dl,10(%esp)
movl 56(%ebx),%edx
movq 56(%ebx),%mm1
psllq $60,%mm6
movq 48(%ebx),%mm4
por %mm6,%mm0
movq %mm2,-48(%edi)
psrlq $4,%mm2
movq %mm5,80(%edi)
movq %mm5,%mm7
movq %mm0,-56(%ebp)
psrlq $4,%mm5
movq %mm3,72(%ebp)
shll $4,%edx
movb %dl,11(%esp)
movl 72(%ebx),%edx
movq 72(%ebx),%mm0
psllq $60,%mm7
movq 64(%ebx),%mm3
por %mm7,%mm2
movq %mm1,-40(%edi)
psrlq $4,%mm1
movq %mm4,88(%edi)
movq %mm4,%mm6
movq %mm2,-48(%ebp)
psrlq $4,%mm4
movq %mm5,80(%ebp)
shll $4,%edx
movb %dl,12(%esp)
movl 88(%ebx),%edx
movq 88(%ebx),%mm2
psllq $60,%mm6
movq 80(%ebx),%mm5
por %mm6,%mm1
movq %mm0,-32(%edi)
psrlq $4,%mm0
movq %mm3,96(%edi)
movq %mm3,%mm7
movq %mm1,-40(%ebp)
psrlq $4,%mm3
movq %mm4,88(%ebp)
shll $4,%edx
movb %dl,13(%esp)
movl 104(%ebx),%edx
movq 104(%ebx),%mm1
psllq $60,%mm7
movq 96(%ebx),%mm4
por %mm7,%mm0
movq %mm2,-24(%edi)
psrlq $4,%mm2
movq %mm5,104(%edi)
movq %mm5,%mm6
movq %mm0,-32(%ebp)
psrlq $4,%mm5
movq %mm3,96(%ebp)
shll $4,%edx
movb %dl,14(%esp)
movl 120(%ebx),%edx
movq 120(%ebx),%mm0
psllq $60,%mm6
movq 112(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-16(%edi)
psrlq $4,%mm1
movq %mm4,112(%edi)
movq %mm4,%mm7
movq %mm2,-24(%ebp)
psrlq $4,%mm4
movq %mm5,104(%ebp)
shll $4,%edx
movb %dl,15(%esp)
psllq $60,%mm7
por %mm7,%mm1
movq %mm0,-8(%edi)
psrlq $4,%mm0
movq %mm3,120(%edi)
movq %mm3,%mm6
movq %mm1,-16(%ebp)
psrlq $4,%mm3
movq %mm4,112(%ebp)
psllq $60,%mm6
por %mm6,%mm0
movq %mm0,-8(%ebp)
movq %mm3,120(%ebp)
movq (%eax),%mm6
movl 8(%eax),%ebx
movl 12(%eax),%edx
.align 4,0x90
L004outer:
xorl 12(%ecx),%edx
xorl 8(%ecx),%ebx
pxor (%ecx),%mm6
leal 16(%ecx),%ecx
movl %ebx,536(%esp)
movq %mm6,528(%esp)
movl %ecx,548(%esp)
xorl %eax,%eax
roll $8,%edx
movb %dl,%al
movl %eax,%ebp
andb $15,%al
shrl $4,%ebp
pxor %mm0,%mm0
roll $8,%edx
pxor %mm1,%mm1
pxor %mm2,%mm2
movq 16(%esp,%eax,8),%mm7
movq 144(%esp,%eax,8),%mm6
movb %dl,%al
movd %mm7,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
shrl $4,%edi
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 536(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 532(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 528(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 524(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
pxor 144(%esp,%eax,8),%mm6
xorb (%esp,%ebp,1),%bl
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
movzbl %bl,%ebx
pxor %mm2,%mm2
psllq $4,%mm1
movd %mm7,%ecx
psrlq $4,%mm7
movq %mm6,%mm3
psrlq $4,%mm6
shll $4,%ecx
pxor 16(%esp,%edi,8),%mm7
psllq $60,%mm3
movzbl %cl,%ecx
pxor %mm3,%mm7
pxor 144(%esp,%edi,8),%mm6
pinsrw $2,(%esi,%ebx,2),%mm0
pxor %mm1,%mm6
movd %mm7,%edx
pinsrw $3,(%esi,%ecx,2),%mm2
psllq $12,%mm0
pxor %mm0,%mm6
psrlq $32,%mm7
pxor %mm2,%mm6
movl 548(%esp),%ecx
movd %mm7,%ebx
movq %mm6,%mm3
psllw $8,%mm6
psrlw $8,%mm3
por %mm3,%mm6
bswap %edx
pshufw $27,%mm6,%mm6
bswap %ebx
cmpl 552(%esp),%ecx
jne L004outer
movl 544(%esp),%eax
movl %edx,12(%eax)
movl %ebx,8(%eax)
movq %mm6,(%eax)
movl 556(%esp),%esp
emms
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_init_clmul
.private_extern _gcm_init_clmul
.align 4
@ -714,10 +13,10 @@ _gcm_init_clmul:
L_gcm_init_clmul_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
call L005pic
L005pic:
call L000pic
L000pic:
popl %ecx
leal Lbswap-L005pic(%ecx),%ecx
leal Lbswap-L000pic(%ecx),%ecx
movdqu (%eax),%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $255,%xmm2,%xmm4
@ -782,10 +81,10 @@ _gcm_gmult_clmul:
L_gcm_gmult_clmul_begin:
movl 4(%esp),%eax
movl 8(%esp),%edx
call L006pic
L006pic:
call L001pic
L001pic:
popl %ecx
leal Lbswap-L006pic(%ecx),%ecx
leal Lbswap-L001pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movups (%edx),%xmm2
@ -840,16 +139,16 @@ L_gcm_ghash_clmul_begin:
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebx
call L007pic
L007pic:
call L002pic
L002pic:
popl %ecx
leal Lbswap-L007pic(%ecx),%ecx
leal Lbswap-L002pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movdqu (%edx),%xmm2
.byte 102,15,56,0,197
subl $16,%ebx
jz L008odd_tail
jz L003odd_tail
movdqu (%esi),%xmm3
movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221
@ -866,10 +165,10 @@ L007pic:
movups 16(%edx),%xmm2
nop
subl $32,%ebx
jbe L009even_tail
jmp L010mod_loop
jbe L004even_tail
jmp L005mod_loop
.align 5,0x90
L010mod_loop:
L005mod_loop:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
@ -924,8 +223,8 @@ L010mod_loop:
.byte 102,15,58,68,221,0
leal 32(%esi),%esi
subl $32,%ebx
ja L010mod_loop
L009even_tail:
ja L005mod_loop
L004even_tail:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
@ -964,9 +263,9 @@ L009even_tail:
psrlq $1,%xmm0
pxor %xmm1,%xmm0
testl %ebx,%ebx
jnz L011done
jnz L006done
movups (%edx),%xmm2
L008odd_tail:
L003odd_tail:
movdqu (%esi),%xmm3
.byte 102,15,56,0,221
pxor %xmm3,%xmm0
@ -1005,7 +304,7 @@ L008odd_tail:
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
L011done:
L006done:
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
popl %edi
@ -1017,46 +316,6 @@ L011done:
Lbswap:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.align 6,0x90
Lrem_8bit:
.value 0,450,900,582,1800,1738,1164,1358
.value 3600,4050,3476,3158,2328,2266,2716,2910
.value 7200,7650,8100,7782,6952,6890,6316,6510
.value 4656,5106,4532,4214,5432,5370,5820,6014
.value 14400,14722,15300,14854,16200,16010,15564,15630
.value 13904,14226,13780,13334,12632,12442,13020,13086
.value 9312,9634,10212,9766,9064,8874,8428,8494
.value 10864,11186,10740,10294,11640,11450,12028,12094
.value 28800,28994,29444,29382,30600,30282,29708,30158
.value 32400,32594,32020,31958,31128,30810,31260,31710
.value 27808,28002,28452,28390,27560,27242,26668,27118
.value 25264,25458,24884,24822,26040,25722,26172,26622
.value 18624,18690,19268,19078,20424,19978,19532,19854
.value 18128,18194,17748,17558,16856,16410,16988,17310
.value 21728,21794,22372,22182,21480,21034,20588,20910
.value 23280,23346,22900,22710,24056,23610,24188,24510
.value 57600,57538,57988,58182,58888,59338,58764,58446
.value 61200,61138,60564,60758,59416,59866,60316,59998
.value 64800,64738,65188,65382,64040,64490,63916,63598
.value 62256,62194,61620,61814,62520,62970,63420,63102
.value 55616,55426,56004,56070,56904,57226,56780,56334
.value 55120,54930,54484,54550,53336,53658,54236,53790
.value 50528,50338,50916,50982,49768,50090,49644,49198
.value 52080,51890,51444,51510,52344,52666,53244,52798
.value 37248,36930,37380,37830,38536,38730,38156,38094
.value 40848,40530,39956,40406,39064,39258,39708,39646
.value 36256,35938,36388,36838,35496,35690,35116,35054
.value 33712,33394,32820,33270,33976,34170,34620,34558
.value 43456,43010,43588,43910,44744,44810,44364,44174
.value 42960,42514,42068,42390,41176,41242,41820,41630
.value 46560,46114,46692,47014,45800,45866,45420,45230
.value 48112,47666,47220,47542,48376,48442,49020,48830
.align 6,0x90
Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62

View File

@ -6,7 +6,7 @@
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
#endif
.align 6,0x90
L_vpaes_consts:
@ -468,7 +468,7 @@ L_vpaes_set_encrypt_key_begin:
pushl %ebx
pushl %esi
pushl %edi
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call L016pic
@ -549,7 +549,7 @@ L_vpaes_encrypt_begin:
pushl %ebx
pushl %esi
pushl %edi
#ifndef NDEBUG
#ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx
pushl %edx
call L019pic

View File

@ -556,11 +556,9 @@ L$handle_ctr32_2:
.p2align 5
_aesni_gcm_encrypt:
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit+2(%rip)
#endif
#endif
xorq %r10,%r10

View File

@ -19,11 +19,9 @@
.p2align 4
_aes_hw_encrypt:
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit+1(%rip)
#endif
#endif
movups (%rdi),%xmm2
movl 240(%rdx),%eax
@ -885,10 +883,8 @@ L$ecb_ret:
.p2align 4
_aes_hw_ctr32_encrypt_blocks:
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit(%rip)
#endif
#endif
cmpq $1,%rdx
jne L$ctr32_bulk
@ -2109,11 +2105,9 @@ L$SEH_end_set_decrypt_key:
_aes_hw_set_encrypt_key:
__aesni_set_encrypt_key:
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit+3(%rip)
#endif
#endif
.byte 0x48,0x83,0xEC,0x08
movq $-1,%rax

View File

@ -13,697 +13,6 @@
#endif
.text
.globl _gcm_gmult_4bit
.private_extern _gcm_gmult_4bit
.p2align 4
_gcm_gmult_4bit:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
subq $280,%rsp
L$gmult_prologue:
movzbq 15(%rdi),%r8
leaq L$rem_4bit(%rip),%r11
xorq %rax,%rax
xorq %rbx,%rbx
movb %r8b,%al
movb %r8b,%bl
shlb $4,%al
movq $14,%rcx
movq 8(%rsi,%rax,1),%r8
movq (%rsi,%rax,1),%r9
andb $0xf0,%bl
movq %r8,%rdx
jmp L$oop1
.p2align 4
L$oop1:
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
movb (%rdi,%rcx,1),%al
shrq $4,%r9
xorq 8(%rsi,%rbx,1),%r8
shlq $60,%r10
xorq (%rsi,%rbx,1),%r9
movb %al,%bl
xorq (%r11,%rdx,8),%r9
movq %r8,%rdx
shlb $4,%al
xorq %r10,%r8
decq %rcx
js L$break1
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
shrq $4,%r9
xorq 8(%rsi,%rax,1),%r8
shlq $60,%r10
xorq (%rsi,%rax,1),%r9
andb $0xf0,%bl
xorq (%r11,%rdx,8),%r9
movq %r8,%rdx
xorq %r10,%r8
jmp L$oop1
.p2align 4
L$break1:
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
shrq $4,%r9
xorq 8(%rsi,%rax,1),%r8
shlq $60,%r10
xorq (%rsi,%rax,1),%r9
andb $0xf0,%bl
xorq (%r11,%rdx,8),%r9
movq %r8,%rdx
xorq %r10,%r8
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
shrq $4,%r9
xorq 8(%rsi,%rbx,1),%r8
shlq $60,%r10
xorq (%rsi,%rbx,1),%r9
xorq %r10,%r8
xorq (%r11,%rdx,8),%r9
bswapq %r8
bswapq %r9
movq %r8,8(%rdi)
movq %r9,(%rdi)
leaq 280+48(%rsp),%rsi
movq -8(%rsi),%rbx
leaq (%rsi),%rsp
L$gmult_epilogue:
.byte 0xf3,0xc3
.globl _gcm_ghash_4bit
.private_extern _gcm_ghash_4bit
.p2align 4
_gcm_ghash_4bit:
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
subq $280,%rsp
L$ghash_prologue:
movq %rdx,%r14
movq %rcx,%r15
subq $-128,%rsi
leaq 16+128(%rsp),%rbp
xorl %edx,%edx
movq 0+0-128(%rsi),%r8
movq 0+8-128(%rsi),%rax
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq 16+0-128(%rsi),%r9
shlb $4,%dl
movq 16+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,0(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,0(%rbp)
movq 32+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,0-128(%rbp)
movq 32+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,1(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,8(%rbp)
movq 48+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,8-128(%rbp)
movq 48+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,2(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,16(%rbp)
movq 64+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,16-128(%rbp)
movq 64+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,3(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,24(%rbp)
movq 80+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,24-128(%rbp)
movq 80+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,4(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,32(%rbp)
movq 96+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,32-128(%rbp)
movq 96+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,5(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,40(%rbp)
movq 112+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,40-128(%rbp)
movq 112+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,6(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,48(%rbp)
movq 128+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,48-128(%rbp)
movq 128+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,7(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,56(%rbp)
movq 144+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,56-128(%rbp)
movq 144+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,8(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,64(%rbp)
movq 160+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,64-128(%rbp)
movq 160+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,9(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,72(%rbp)
movq 176+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,72-128(%rbp)
movq 176+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,10(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,80(%rbp)
movq 192+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,80-128(%rbp)
movq 192+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,11(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,88(%rbp)
movq 208+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,88-128(%rbp)
movq 208+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,12(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,96(%rbp)
movq 224+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,96-128(%rbp)
movq 224+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,13(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,104(%rbp)
movq 240+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,104-128(%rbp)
movq 240+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,14(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,112(%rbp)
shlb $4,%dl
movq %rax,112-128(%rbp)
shlq $60,%r10
movb %dl,15(%rsp)
orq %r10,%rbx
movq %r9,120(%rbp)
movq %rbx,120-128(%rbp)
addq $-128,%rsi
movq 8(%rdi),%r8
movq 0(%rdi),%r9
addq %r14,%r15
leaq L$rem_8bit(%rip),%r11
jmp L$outer_loop
.p2align 4
L$outer_loop:
xorq (%r14),%r9
movq 8(%r14),%rdx
leaq 16(%r14),%r14
xorq %r8,%rdx
movq %r9,(%rdi)
movq %rdx,8(%rdi)
shrq $32,%rdx
xorq %rax,%rax
roll $8,%edx
movb %dl,%al
movzbl %dl,%ebx
shlb $4,%al
shrl $4,%ebx
roll $8,%edx
movq 8(%rsi,%rax,1),%r8
movq (%rsi,%rax,1),%r9
movb %dl,%al
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
xorq %r8,%r12
movq %r9,%r10
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl 8(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl 4(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl 0(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
andl $240,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl -4(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
movzwq (%r11,%r12,2),%r12
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
shlq $48,%r12
xorq %r10,%r8
xorq %r12,%r9
movzbq %r8b,%r13
shrq $4,%r8
movq %r9,%r10
shlb $4,%r13b
shrq $4,%r9
xorq 8(%rsi,%rcx,1),%r8
movzwq (%r11,%r13,2),%r13
shlq $60,%r10
xorq (%rsi,%rcx,1),%r9
xorq %r10,%r8
shlq $48,%r13
bswapq %r8
xorq %r13,%r9
bswapq %r9
cmpq %r15,%r14
jb L$outer_loop
movq %r8,8(%rdi)
movq %r9,(%rdi)
leaq 280+48(%rsp),%rsi
movq -48(%rsi),%r15
movq -40(%rsi),%r14
movq -32(%rsi),%r13
movq -24(%rsi),%r12
movq -16(%rsi),%rbp
movq -8(%rsi),%rbx
leaq 0(%rsi),%rsp
L$ghash_epilogue:
.byte 0xf3,0xc3
.globl _gcm_init_clmul
.private_extern _gcm_init_clmul
@ -1809,50 +1118,8 @@ L$0x1c2_polynomial:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
L$7_mask:
.long 7,0,7,0
L$7_mask_poly:
.long 7,0,450,0
.p2align 6
L$rem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
L$rem_8bit:
.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 6
#endif

View File

@ -812,11 +812,9 @@ L$schedule_mangle_both:
.p2align 4
_vpaes_set_encrypt_key:
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit+5(%rip)
#endif
#endif
movl %esi,%eax
@ -862,11 +860,9 @@ _vpaes_set_decrypt_key:
.p2align 4
_vpaes_encrypt:
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit+4(%rip)
#endif
#endif
movdqu (%rdi),%xmm0
call _vpaes_preheat

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -21,14 +21,14 @@ section .text code align=64
section .text code
%endif
;extern _OPENSSL_ia32cap_P
%ifndef NDEBUG
%ifdef BORINGSSL_DISPATCH_TEST
extern _BORINGSSL_function_hit
%endif
global _aes_hw_encrypt
align 16
_aes_hw_encrypt:
L$_aes_hw_encrypt_begin:
%ifndef NDEBUG
%ifdef BORINGSSL_DISPATCH_TEST
push ebx
push edx
call L$000pic
@ -820,7 +820,7 @@ L$_aes_hw_ctr32_encrypt_blocks_begin:
push ebx
push esi
push edi
%ifndef NDEBUG
%ifdef BORINGSSL_DISPATCH_TEST
push ebx
push edx
call L$038pic
@ -2400,7 +2400,7 @@ global _aes_hw_set_encrypt_key
align 16
_aes_hw_set_encrypt_key:
L$_aes_hw_set_encrypt_key_begin:
%ifndef NDEBUG
%ifdef BORINGSSL_DISPATCH_TEST
push ebx
push edx
call L$116pic

View File

@ -981,549 +981,5 @@ L$025aw_end:
pop ebx
pop ebp
ret
global _bn_sub_part_words
align 16
_bn_sub_part_words:
L$_bn_sub_part_words_begin:
push ebp
push ebx
push esi
push edi
;
mov ebx,DWORD [20+esp]
mov esi,DWORD [24+esp]
mov edi,DWORD [28+esp]
mov ebp,DWORD [32+esp]
xor eax,eax
and ebp,4294967288
jz NEAR L$026aw_finish
L$027aw_loop:
; Round 0
mov ecx,DWORD [esi]
mov edx,DWORD [edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [ebx],ecx
; Round 1
mov ecx,DWORD [4+esi]
mov edx,DWORD [4+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [4+ebx],ecx
; Round 2
mov ecx,DWORD [8+esi]
mov edx,DWORD [8+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [8+ebx],ecx
; Round 3
mov ecx,DWORD [12+esi]
mov edx,DWORD [12+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [12+ebx],ecx
; Round 4
mov ecx,DWORD [16+esi]
mov edx,DWORD [16+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [16+ebx],ecx
; Round 5
mov ecx,DWORD [20+esi]
mov edx,DWORD [20+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [20+ebx],ecx
; Round 6
mov ecx,DWORD [24+esi]
mov edx,DWORD [24+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [24+ebx],ecx
; Round 7
mov ecx,DWORD [28+esi]
mov edx,DWORD [28+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [28+ebx],ecx
;
add esi,32
add edi,32
add ebx,32
sub ebp,8
jnz NEAR L$027aw_loop
L$026aw_finish:
mov ebp,DWORD [32+esp]
and ebp,7
jz NEAR L$028aw_end
; Tail Round 0
mov ecx,DWORD [esi]
mov edx,DWORD [edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [ebx],ecx
add esi,4
add edi,4
add ebx,4
dec ebp
jz NEAR L$028aw_end
; Tail Round 1
mov ecx,DWORD [esi]
mov edx,DWORD [edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [ebx],ecx
add esi,4
add edi,4
add ebx,4
dec ebp
jz NEAR L$028aw_end
; Tail Round 2
mov ecx,DWORD [esi]
mov edx,DWORD [edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [ebx],ecx
add esi,4
add edi,4
add ebx,4
dec ebp
jz NEAR L$028aw_end
; Tail Round 3
mov ecx,DWORD [esi]
mov edx,DWORD [edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [ebx],ecx
add esi,4
add edi,4
add ebx,4
dec ebp
jz NEAR L$028aw_end
; Tail Round 4
mov ecx,DWORD [esi]
mov edx,DWORD [edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [ebx],ecx
add esi,4
add edi,4
add ebx,4
dec ebp
jz NEAR L$028aw_end
; Tail Round 5
mov ecx,DWORD [esi]
mov edx,DWORD [edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [ebx],ecx
add esi,4
add edi,4
add ebx,4
dec ebp
jz NEAR L$028aw_end
; Tail Round 6
mov ecx,DWORD [esi]
mov edx,DWORD [edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [ebx],ecx
add esi,4
add edi,4
add ebx,4
L$028aw_end:
cmp DWORD [36+esp],0
je NEAR L$029pw_end
mov ebp,DWORD [36+esp]
cmp ebp,0
je NEAR L$029pw_end
jge NEAR L$030pw_pos
; pw_neg
mov edx,0
sub edx,ebp
mov ebp,edx
and ebp,4294967288
jz NEAR L$031pw_neg_finish
L$032pw_neg_loop:
; dl<0 Round 0
mov ecx,0
mov edx,DWORD [edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [ebx],ecx
; dl<0 Round 1
mov ecx,0
mov edx,DWORD [4+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [4+ebx],ecx
; dl<0 Round 2
mov ecx,0
mov edx,DWORD [8+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [8+ebx],ecx
; dl<0 Round 3
mov ecx,0
mov edx,DWORD [12+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [12+ebx],ecx
; dl<0 Round 4
mov ecx,0
mov edx,DWORD [16+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [16+ebx],ecx
; dl<0 Round 5
mov ecx,0
mov edx,DWORD [20+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [20+ebx],ecx
; dl<0 Round 6
mov ecx,0
mov edx,DWORD [24+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [24+ebx],ecx
; dl<0 Round 7
mov ecx,0
mov edx,DWORD [28+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [28+ebx],ecx
;
add edi,32
add ebx,32
sub ebp,8
jnz NEAR L$032pw_neg_loop
L$031pw_neg_finish:
mov edx,DWORD [36+esp]
mov ebp,0
sub ebp,edx
and ebp,7
jz NEAR L$029pw_end
; dl<0 Tail Round 0
mov ecx,0
mov edx,DWORD [edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
dec ebp
mov DWORD [ebx],ecx
jz NEAR L$029pw_end
; dl<0 Tail Round 1
mov ecx,0
mov edx,DWORD [4+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
dec ebp
mov DWORD [4+ebx],ecx
jz NEAR L$029pw_end
; dl<0 Tail Round 2
mov ecx,0
mov edx,DWORD [8+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
dec ebp
mov DWORD [8+ebx],ecx
jz NEAR L$029pw_end
; dl<0 Tail Round 3
mov ecx,0
mov edx,DWORD [12+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
dec ebp
mov DWORD [12+ebx],ecx
jz NEAR L$029pw_end
; dl<0 Tail Round 4
mov ecx,0
mov edx,DWORD [16+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
dec ebp
mov DWORD [16+ebx],ecx
jz NEAR L$029pw_end
; dl<0 Tail Round 5
mov ecx,0
mov edx,DWORD [20+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
dec ebp
mov DWORD [20+ebx],ecx
jz NEAR L$029pw_end
; dl<0 Tail Round 6
mov ecx,0
mov edx,DWORD [24+edi]
sub ecx,eax
mov eax,0
adc eax,eax
sub ecx,edx
adc eax,0
mov DWORD [24+ebx],ecx
jmp NEAR L$029pw_end
L$030pw_pos:
and ebp,4294967288
jz NEAR L$033pw_pos_finish
L$034pw_pos_loop:
; dl>0 Round 0
mov ecx,DWORD [esi]
sub ecx,eax
mov DWORD [ebx],ecx
jnc NEAR L$035pw_nc0
; dl>0 Round 1
mov ecx,DWORD [4+esi]
sub ecx,eax
mov DWORD [4+ebx],ecx
jnc NEAR L$036pw_nc1
; dl>0 Round 2
mov ecx,DWORD [8+esi]
sub ecx,eax
mov DWORD [8+ebx],ecx
jnc NEAR L$037pw_nc2
; dl>0 Round 3
mov ecx,DWORD [12+esi]
sub ecx,eax
mov DWORD [12+ebx],ecx
jnc NEAR L$038pw_nc3
; dl>0 Round 4
mov ecx,DWORD [16+esi]
sub ecx,eax
mov DWORD [16+ebx],ecx
jnc NEAR L$039pw_nc4
; dl>0 Round 5
mov ecx,DWORD [20+esi]
sub ecx,eax
mov DWORD [20+ebx],ecx
jnc NEAR L$040pw_nc5
; dl>0 Round 6
mov ecx,DWORD [24+esi]
sub ecx,eax
mov DWORD [24+ebx],ecx
jnc NEAR L$041pw_nc6
; dl>0 Round 7
mov ecx,DWORD [28+esi]
sub ecx,eax
mov DWORD [28+ebx],ecx
jnc NEAR L$042pw_nc7
;
add esi,32
add ebx,32
sub ebp,8
jnz NEAR L$034pw_pos_loop
L$033pw_pos_finish:
mov ebp,DWORD [36+esp]
and ebp,7
jz NEAR L$029pw_end
; dl>0 Tail Round 0
mov ecx,DWORD [esi]
sub ecx,eax
mov DWORD [ebx],ecx
jnc NEAR L$043pw_tail_nc0
dec ebp
jz NEAR L$029pw_end
; dl>0 Tail Round 1
mov ecx,DWORD [4+esi]
sub ecx,eax
mov DWORD [4+ebx],ecx
jnc NEAR L$044pw_tail_nc1
dec ebp
jz NEAR L$029pw_end
; dl>0 Tail Round 2
mov ecx,DWORD [8+esi]
sub ecx,eax
mov DWORD [8+ebx],ecx
jnc NEAR L$045pw_tail_nc2
dec ebp
jz NEAR L$029pw_end
; dl>0 Tail Round 3
mov ecx,DWORD [12+esi]
sub ecx,eax
mov DWORD [12+ebx],ecx
jnc NEAR L$046pw_tail_nc3
dec ebp
jz NEAR L$029pw_end
; dl>0 Tail Round 4
mov ecx,DWORD [16+esi]
sub ecx,eax
mov DWORD [16+ebx],ecx
jnc NEAR L$047pw_tail_nc4
dec ebp
jz NEAR L$029pw_end
; dl>0 Tail Round 5
mov ecx,DWORD [20+esi]
sub ecx,eax
mov DWORD [20+ebx],ecx
jnc NEAR L$048pw_tail_nc5
dec ebp
jz NEAR L$029pw_end
; dl>0 Tail Round 6
mov ecx,DWORD [24+esi]
sub ecx,eax
mov DWORD [24+ebx],ecx
jnc NEAR L$049pw_tail_nc6
mov eax,1
jmp NEAR L$029pw_end
L$050pw_nc_loop:
mov ecx,DWORD [esi]
mov DWORD [ebx],ecx
L$035pw_nc0:
mov ecx,DWORD [4+esi]
mov DWORD [4+ebx],ecx
L$036pw_nc1:
mov ecx,DWORD [8+esi]
mov DWORD [8+ebx],ecx
L$037pw_nc2:
mov ecx,DWORD [12+esi]
mov DWORD [12+ebx],ecx
L$038pw_nc3:
mov ecx,DWORD [16+esi]
mov DWORD [16+ebx],ecx
L$039pw_nc4:
mov ecx,DWORD [20+esi]
mov DWORD [20+ebx],ecx
L$040pw_nc5:
mov ecx,DWORD [24+esi]
mov DWORD [24+ebx],ecx
L$041pw_nc6:
mov ecx,DWORD [28+esi]
mov DWORD [28+ebx],ecx
L$042pw_nc7:
;
add esi,32
add ebx,32
sub ebp,8
jnz NEAR L$050pw_nc_loop
mov ebp,DWORD [36+esp]
and ebp,7
jz NEAR L$051pw_nc_end
mov ecx,DWORD [esi]
mov DWORD [ebx],ecx
L$043pw_tail_nc0:
dec ebp
jz NEAR L$051pw_nc_end
mov ecx,DWORD [4+esi]
mov DWORD [4+ebx],ecx
L$044pw_tail_nc1:
dec ebp
jz NEAR L$051pw_nc_end
mov ecx,DWORD [8+esi]
mov DWORD [8+ebx],ecx
L$045pw_tail_nc2:
dec ebp
jz NEAR L$051pw_nc_end
mov ecx,DWORD [12+esi]
mov DWORD [12+ebx],ecx
L$046pw_tail_nc3:
dec ebp
jz NEAR L$051pw_nc_end
mov ecx,DWORD [16+esi]
mov DWORD [16+ebx],ecx
L$047pw_tail_nc4:
dec ebp
jz NEAR L$051pw_nc_end
mov ecx,DWORD [20+esi]
mov DWORD [20+ebx],ecx
L$048pw_tail_nc5:
dec ebp
jz NEAR L$051pw_nc_end
mov ecx,DWORD [24+esi]
mov DWORD [24+ebx],ecx
L$049pw_tail_nc6:
L$051pw_nc_end:
mov eax,0
L$029pw_end:
pop edi
pop esi
pop ebx
pop ebp
ret
segment .bss
common _OPENSSL_ia32cap_P 16

View File

@ -20,715 +20,16 @@ section .text code align=64
%else
section .text code
%endif
global _gcm_gmult_4bit_mmx
align 16
_gcm_gmult_4bit_mmx:
L$_gcm_gmult_4bit_mmx_begin:
push ebp
push ebx
push esi
push edi
mov edi,DWORD [20+esp]
mov esi,DWORD [24+esp]
call L$000pic_point
L$000pic_point:
pop eax
lea eax,[(L$rem_4bit-L$000pic_point)+eax]
movzx ebx,BYTE [15+edi]
xor ecx,ecx
mov edx,ebx
mov cl,dl
mov ebp,14
shl cl,4
and edx,240
movq mm0,[8+ecx*1+esi]
movq mm1,[ecx*1+esi]
movd ebx,mm0
jmp NEAR L$001mmx_loop
align 16
L$001mmx_loop:
psrlq mm0,4
and ebx,15
movq mm2,mm1
psrlq mm1,4
pxor mm0,[8+edx*1+esi]
mov cl,BYTE [ebp*1+edi]
psllq mm2,60
pxor mm1,[ebx*8+eax]
dec ebp
movd ebx,mm0
pxor mm1,[edx*1+esi]
mov edx,ecx
pxor mm0,mm2
js NEAR L$002mmx_break
shl cl,4
and ebx,15
psrlq mm0,4
and edx,240
movq mm2,mm1
psrlq mm1,4
pxor mm0,[8+ecx*1+esi]
psllq mm2,60
pxor mm1,[ebx*8+eax]
movd ebx,mm0
pxor mm1,[ecx*1+esi]
pxor mm0,mm2
jmp NEAR L$001mmx_loop
align 16
L$002mmx_break:
shl cl,4
and ebx,15
psrlq mm0,4
and edx,240
movq mm2,mm1
psrlq mm1,4
pxor mm0,[8+ecx*1+esi]
psllq mm2,60
pxor mm1,[ebx*8+eax]
movd ebx,mm0
pxor mm1,[ecx*1+esi]
pxor mm0,mm2
psrlq mm0,4
and ebx,15
movq mm2,mm1
psrlq mm1,4
pxor mm0,[8+edx*1+esi]
psllq mm2,60
pxor mm1,[ebx*8+eax]
movd ebx,mm0
pxor mm1,[edx*1+esi]
pxor mm0,mm2
psrlq mm0,32
movd edx,mm1
psrlq mm1,32
movd ecx,mm0
movd ebp,mm1
bswap ebx
bswap edx
bswap ecx
bswap ebp
emms
mov DWORD [12+edi],ebx
mov DWORD [4+edi],edx
mov DWORD [8+edi],ecx
mov DWORD [edi],ebp
pop edi
pop esi
pop ebx
pop ebp
ret
global _gcm_ghash_4bit_mmx
align 16
_gcm_ghash_4bit_mmx:
L$_gcm_ghash_4bit_mmx_begin:
push ebp
push ebx
push esi
push edi
mov eax,DWORD [20+esp]
mov ebx,DWORD [24+esp]
mov ecx,DWORD [28+esp]
mov edx,DWORD [32+esp]
mov ebp,esp
call L$003pic_point
L$003pic_point:
pop esi
lea esi,[(L$rem_8bit-L$003pic_point)+esi]
sub esp,544
and esp,-64
sub esp,16
add edx,ecx
mov DWORD [544+esp],eax
mov DWORD [552+esp],edx
mov DWORD [556+esp],ebp
add ebx,128
lea edi,[144+esp]
lea ebp,[400+esp]
mov edx,DWORD [ebx-120]
movq mm0,[ebx-120]
movq mm3,[ebx-128]
shl edx,4
mov BYTE [esp],dl
mov edx,DWORD [ebx-104]
movq mm2,[ebx-104]
movq mm5,[ebx-112]
movq [edi-128],mm0
psrlq mm0,4
movq [edi],mm3
movq mm7,mm3
psrlq mm3,4
shl edx,4
mov BYTE [1+esp],dl
mov edx,DWORD [ebx-88]
movq mm1,[ebx-88]
psllq mm7,60
movq mm4,[ebx-96]
por mm0,mm7
movq [edi-120],mm2
psrlq mm2,4
movq [8+edi],mm5
movq mm6,mm5
movq [ebp-128],mm0
psrlq mm5,4
movq [ebp],mm3
shl edx,4
mov BYTE [2+esp],dl
mov edx,DWORD [ebx-72]
movq mm0,[ebx-72]
psllq mm6,60
movq mm3,[ebx-80]
por mm2,mm6
movq [edi-112],mm1
psrlq mm1,4
movq [16+edi],mm4
movq mm7,mm4
movq [ebp-120],mm2
psrlq mm4,4
movq [8+ebp],mm5
shl edx,4
mov BYTE [3+esp],dl
mov edx,DWORD [ebx-56]
movq mm2,[ebx-56]
psllq mm7,60
movq mm5,[ebx-64]
por mm1,mm7
movq [edi-104],mm0
psrlq mm0,4
movq [24+edi],mm3
movq mm6,mm3
movq [ebp-112],mm1
psrlq mm3,4
movq [16+ebp],mm4
shl edx,4
mov BYTE [4+esp],dl
mov edx,DWORD [ebx-40]
movq mm1,[ebx-40]
psllq mm6,60
movq mm4,[ebx-48]
por mm0,mm6
movq [edi-96],mm2
psrlq mm2,4
movq [32+edi],mm5
movq mm7,mm5
movq [ebp-104],mm0
psrlq mm5,4
movq [24+ebp],mm3
shl edx,4
mov BYTE [5+esp],dl
mov edx,DWORD [ebx-24]
movq mm0,[ebx-24]
psllq mm7,60
movq mm3,[ebx-32]
por mm2,mm7
movq [edi-88],mm1
psrlq mm1,4
movq [40+edi],mm4
movq mm6,mm4
movq [ebp-96],mm2
psrlq mm4,4
movq [32+ebp],mm5
shl edx,4
mov BYTE [6+esp],dl
mov edx,DWORD [ebx-8]
movq mm2,[ebx-8]
psllq mm6,60
movq mm5,[ebx-16]
por mm1,mm6
movq [edi-80],mm0
psrlq mm0,4
movq [48+edi],mm3
movq mm7,mm3
movq [ebp-88],mm1
psrlq mm3,4
movq [40+ebp],mm4
shl edx,4
mov BYTE [7+esp],dl
mov edx,DWORD [8+ebx]
movq mm1,[8+ebx]
psllq mm7,60
movq mm4,[ebx]
por mm0,mm7
movq [edi-72],mm2
psrlq mm2,4
movq [56+edi],mm5
movq mm6,mm5
movq [ebp-80],mm0
psrlq mm5,4
movq [48+ebp],mm3
shl edx,4
mov BYTE [8+esp],dl
mov edx,DWORD [24+ebx]
movq mm0,[24+ebx]
psllq mm6,60
movq mm3,[16+ebx]
por mm2,mm6
movq [edi-64],mm1
psrlq mm1,4
movq [64+edi],mm4
movq mm7,mm4
movq [ebp-72],mm2
psrlq mm4,4
movq [56+ebp],mm5
shl edx,4
mov BYTE [9+esp],dl
mov edx,DWORD [40+ebx]
movq mm2,[40+ebx]
psllq mm7,60
movq mm5,[32+ebx]
por mm1,mm7
movq [edi-56],mm0
psrlq mm0,4
movq [72+edi],mm3
movq mm6,mm3
movq [ebp-64],mm1
psrlq mm3,4
movq [64+ebp],mm4
shl edx,4
mov BYTE [10+esp],dl
mov edx,DWORD [56+ebx]
movq mm1,[56+ebx]
psllq mm6,60
movq mm4,[48+ebx]
por mm0,mm6
movq [edi-48],mm2
psrlq mm2,4
movq [80+edi],mm5
movq mm7,mm5
movq [ebp-56],mm0
psrlq mm5,4
movq [72+ebp],mm3
shl edx,4
mov BYTE [11+esp],dl
mov edx,DWORD [72+ebx]
movq mm0,[72+ebx]
psllq mm7,60
movq mm3,[64+ebx]
por mm2,mm7
movq [edi-40],mm1
psrlq mm1,4
movq [88+edi],mm4
movq mm6,mm4
movq [ebp-48],mm2
psrlq mm4,4
movq [80+ebp],mm5
shl edx,4
mov BYTE [12+esp],dl
mov edx,DWORD [88+ebx]
movq mm2,[88+ebx]
psllq mm6,60
movq mm5,[80+ebx]
por mm1,mm6
movq [edi-32],mm0
psrlq mm0,4
movq [96+edi],mm3
movq mm7,mm3
movq [ebp-40],mm1
psrlq mm3,4
movq [88+ebp],mm4
shl edx,4
mov BYTE [13+esp],dl
mov edx,DWORD [104+ebx]
movq mm1,[104+ebx]
psllq mm7,60
movq mm4,[96+ebx]
por mm0,mm7
movq [edi-24],mm2
psrlq mm2,4
movq [104+edi],mm5
movq mm6,mm5
movq [ebp-32],mm0
psrlq mm5,4
movq [96+ebp],mm3
shl edx,4
mov BYTE [14+esp],dl
mov edx,DWORD [120+ebx]
movq mm0,[120+ebx]
psllq mm6,60
movq mm3,[112+ebx]
por mm2,mm6
movq [edi-16],mm1
psrlq mm1,4
movq [112+edi],mm4
movq mm7,mm4
movq [ebp-24],mm2
psrlq mm4,4
movq [104+ebp],mm5
shl edx,4
mov BYTE [15+esp],dl
psllq mm7,60
por mm1,mm7
movq [edi-8],mm0
psrlq mm0,4
movq [120+edi],mm3
movq mm6,mm3
movq [ebp-16],mm1
psrlq mm3,4
movq [112+ebp],mm4
psllq mm6,60
por mm0,mm6
movq [ebp-8],mm0
movq [120+ebp],mm3
movq mm6,[eax]
mov ebx,DWORD [8+eax]
mov edx,DWORD [12+eax]
align 16
L$004outer:
xor edx,DWORD [12+ecx]
xor ebx,DWORD [8+ecx]
pxor mm6,[ecx]
lea ecx,[16+ecx]
mov DWORD [536+esp],ebx
movq [528+esp],mm6
mov DWORD [548+esp],ecx
xor eax,eax
rol edx,8
mov al,dl
mov ebp,eax
and al,15
shr ebp,4
pxor mm0,mm0
rol edx,8
pxor mm1,mm1
pxor mm2,mm2
movq mm7,[16+eax*8+esp]
movq mm6,[144+eax*8+esp]
mov al,dl
movd ebx,mm7
psrlq mm7,8
movq mm3,mm6
mov edi,eax
psrlq mm6,8
pxor mm7,[272+ebp*8+esp]
and al,15
psllq mm3,56
shr edi,4
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+ebp*8+esp]
xor bl,BYTE [ebp*1+esp]
mov al,dl
movd ecx,mm7
movzx ebx,bl
psrlq mm7,8
movq mm3,mm6
mov ebp,eax
psrlq mm6,8
pxor mm7,[272+edi*8+esp]
and al,15
psllq mm3,56
shr ebp,4
pinsrw mm2,WORD [ebx*2+esi],2
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+edi*8+esp]
xor cl,BYTE [edi*1+esp]
mov al,dl
mov edx,DWORD [536+esp]
movd ebx,mm7
movzx ecx,cl
psrlq mm7,8
movq mm3,mm6
mov edi,eax
psrlq mm6,8
pxor mm7,[272+ebp*8+esp]
and al,15
psllq mm3,56
pxor mm6,mm2
shr edi,4
pinsrw mm1,WORD [ecx*2+esi],2
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+ebp*8+esp]
xor bl,BYTE [ebp*1+esp]
mov al,dl
movd ecx,mm7
movzx ebx,bl
psrlq mm7,8
movq mm3,mm6
mov ebp,eax
psrlq mm6,8
pxor mm7,[272+edi*8+esp]
and al,15
psllq mm3,56
pxor mm6,mm1
shr ebp,4
pinsrw mm0,WORD [ebx*2+esi],2
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+edi*8+esp]
xor cl,BYTE [edi*1+esp]
mov al,dl
movd ebx,mm7
movzx ecx,cl
psrlq mm7,8
movq mm3,mm6
mov edi,eax
psrlq mm6,8
pxor mm7,[272+ebp*8+esp]
and al,15
psllq mm3,56
pxor mm6,mm0
shr edi,4
pinsrw mm2,WORD [ecx*2+esi],2
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+ebp*8+esp]
xor bl,BYTE [ebp*1+esp]
mov al,dl
movd ecx,mm7
movzx ebx,bl
psrlq mm7,8
movq mm3,mm6
mov ebp,eax
psrlq mm6,8
pxor mm7,[272+edi*8+esp]
and al,15
psllq mm3,56
pxor mm6,mm2
shr ebp,4
pinsrw mm1,WORD [ebx*2+esi],2
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+edi*8+esp]
xor cl,BYTE [edi*1+esp]
mov al,dl
mov edx,DWORD [532+esp]
movd ebx,mm7
movzx ecx,cl
psrlq mm7,8
movq mm3,mm6
mov edi,eax
psrlq mm6,8
pxor mm7,[272+ebp*8+esp]
and al,15
psllq mm3,56
pxor mm6,mm1
shr edi,4
pinsrw mm0,WORD [ecx*2+esi],2
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+ebp*8+esp]
xor bl,BYTE [ebp*1+esp]
mov al,dl
movd ecx,mm7
movzx ebx,bl
psrlq mm7,8
movq mm3,mm6
mov ebp,eax
psrlq mm6,8
pxor mm7,[272+edi*8+esp]
and al,15
psllq mm3,56
pxor mm6,mm0
shr ebp,4
pinsrw mm2,WORD [ebx*2+esi],2
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+edi*8+esp]
xor cl,BYTE [edi*1+esp]
mov al,dl
movd ebx,mm7
movzx ecx,cl
psrlq mm7,8
movq mm3,mm6
mov edi,eax
psrlq mm6,8
pxor mm7,[272+ebp*8+esp]
and al,15
psllq mm3,56
pxor mm6,mm2
shr edi,4
pinsrw mm1,WORD [ecx*2+esi],2
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+ebp*8+esp]
xor bl,BYTE [ebp*1+esp]
mov al,dl
movd ecx,mm7
movzx ebx,bl
psrlq mm7,8
movq mm3,mm6
mov ebp,eax
psrlq mm6,8
pxor mm7,[272+edi*8+esp]
and al,15
psllq mm3,56
pxor mm6,mm1
shr ebp,4
pinsrw mm0,WORD [ebx*2+esi],2
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+edi*8+esp]
xor cl,BYTE [edi*1+esp]
mov al,dl
mov edx,DWORD [528+esp]
movd ebx,mm7
movzx ecx,cl
psrlq mm7,8
movq mm3,mm6
mov edi,eax
psrlq mm6,8
pxor mm7,[272+ebp*8+esp]
and al,15
psllq mm3,56
pxor mm6,mm0
shr edi,4
pinsrw mm2,WORD [ecx*2+esi],2
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+ebp*8+esp]
xor bl,BYTE [ebp*1+esp]
mov al,dl
movd ecx,mm7
movzx ebx,bl
psrlq mm7,8
movq mm3,mm6
mov ebp,eax
psrlq mm6,8
pxor mm7,[272+edi*8+esp]
and al,15
psllq mm3,56
pxor mm6,mm2
shr ebp,4
pinsrw mm1,WORD [ebx*2+esi],2
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+edi*8+esp]
xor cl,BYTE [edi*1+esp]
mov al,dl
movd ebx,mm7
movzx ecx,cl
psrlq mm7,8
movq mm3,mm6
mov edi,eax
psrlq mm6,8
pxor mm7,[272+ebp*8+esp]
and al,15
psllq mm3,56
pxor mm6,mm1
shr edi,4
pinsrw mm0,WORD [ecx*2+esi],2
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+ebp*8+esp]
xor bl,BYTE [ebp*1+esp]
mov al,dl
movd ecx,mm7
movzx ebx,bl
psrlq mm7,8
movq mm3,mm6
mov ebp,eax
psrlq mm6,8
pxor mm7,[272+edi*8+esp]
and al,15
psllq mm3,56
pxor mm6,mm0
shr ebp,4
pinsrw mm2,WORD [ebx*2+esi],2
pxor mm7,[16+eax*8+esp]
rol edx,8
pxor mm6,[144+eax*8+esp]
pxor mm7,mm3
pxor mm6,[400+edi*8+esp]
xor cl,BYTE [edi*1+esp]
mov al,dl
mov edx,DWORD [524+esp]
movd ebx,mm7
movzx ecx,cl
psrlq mm7,8
movq mm3,mm6
mov edi,eax
psrlq mm6,8
pxor mm7,[272+ebp*8+esp]
and al,15
psllq mm3,56
pxor mm6,mm2
shr edi,4
pinsrw mm1,WORD [ecx*2+esi],2
pxor mm7,[16+eax*8+esp]
pxor mm6,[144+eax*8+esp]
xor bl,BYTE [ebp*1+esp]
pxor mm7,mm3
pxor mm6,[400+ebp*8+esp]
movzx ebx,bl
pxor mm2,mm2
psllq mm1,4
movd ecx,mm7
psrlq mm7,4
movq mm3,mm6
psrlq mm6,4
shl ecx,4
pxor mm7,[16+edi*8+esp]
psllq mm3,60
movzx ecx,cl
pxor mm7,mm3
pxor mm6,[144+edi*8+esp]
pinsrw mm0,WORD [ebx*2+esi],2
pxor mm6,mm1
movd edx,mm7
pinsrw mm2,WORD [ecx*2+esi],3
psllq mm0,12
pxor mm6,mm0
psrlq mm7,32
pxor mm6,mm2
mov ecx,DWORD [548+esp]
movd ebx,mm7
movq mm3,mm6
psllw mm6,8
psrlw mm3,8
por mm6,mm3
bswap edx
pshufw mm6,mm6,27
bswap ebx
cmp ecx,DWORD [552+esp]
jne NEAR L$004outer
mov eax,DWORD [544+esp]
mov DWORD [12+eax],edx
mov DWORD [8+eax],ebx
movq [eax],mm6
mov esp,DWORD [556+esp]
emms
pop edi
pop esi
pop ebx
pop ebp
ret
global _gcm_init_clmul
align 16
_gcm_init_clmul:
L$_gcm_init_clmul_begin:
mov edx,DWORD [4+esp]
mov eax,DWORD [8+esp]
call L$005pic
L$005pic:
call L$000pic
L$000pic:
pop ecx
lea ecx,[(L$bswap-L$005pic)+ecx]
lea ecx,[(L$bswap-L$000pic)+ecx]
movdqu xmm2,[eax]
pshufd xmm2,xmm2,78
pshufd xmm4,xmm2,255
@ -792,10 +93,10 @@ _gcm_gmult_clmul:
L$_gcm_gmult_clmul_begin:
mov eax,DWORD [4+esp]
mov edx,DWORD [8+esp]
call L$006pic
L$006pic:
call L$001pic
L$001pic:
pop ecx
lea ecx,[(L$bswap-L$006pic)+ecx]
lea ecx,[(L$bswap-L$001pic)+ecx]
movdqu xmm0,[eax]
movdqa xmm5,[ecx]
movups xmm2,[edx]
@ -849,16 +150,16 @@ L$_gcm_ghash_clmul_begin:
mov edx,DWORD [24+esp]
mov esi,DWORD [28+esp]
mov ebx,DWORD [32+esp]
call L$007pic
L$007pic:
call L$002pic
L$002pic:
pop ecx
lea ecx,[(L$bswap-L$007pic)+ecx]
lea ecx,[(L$bswap-L$002pic)+ecx]
movdqu xmm0,[eax]
movdqa xmm5,[ecx]
movdqu xmm2,[edx]
db 102,15,56,0,197
sub ebx,16
jz NEAR L$008odd_tail
jz NEAR L$003odd_tail
movdqu xmm3,[esi]
movdqu xmm6,[16+esi]
db 102,15,56,0,221
@ -875,10 +176,10 @@ db 102,15,58,68,221,0
movups xmm2,[16+edx]
nop
sub ebx,32
jbe NEAR L$009even_tail
jmp NEAR L$010mod_loop
jbe NEAR L$004even_tail
jmp NEAR L$005mod_loop
align 32
L$010mod_loop:
L$005mod_loop:
pshufd xmm4,xmm0,78
movdqa xmm1,xmm0
pxor xmm4,xmm0
@ -933,8 +234,8 @@ db 102,15,58,68,250,17
db 102,15,58,68,221,0
lea esi,[32+esi]
sub ebx,32
ja NEAR L$010mod_loop
L$009even_tail:
ja NEAR L$005mod_loop
L$004even_tail:
pshufd xmm4,xmm0,78
movdqa xmm1,xmm0
pxor xmm4,xmm0
@ -973,9 +274,9 @@ db 102,15,58,68,229,16
psrlq xmm0,1
pxor xmm0,xmm1
test ebx,ebx
jnz NEAR L$011done
jnz NEAR L$006done
movups xmm2,[edx]
L$008odd_tail:
L$003odd_tail:
movdqu xmm3,[esi]
db 102,15,56,0,221
pxor xmm0,xmm3
@ -1014,7 +315,7 @@ db 102,15,58,68,220,0
pxor xmm0,xmm4
psrlq xmm0,1
pxor xmm0,xmm1
L$011done:
L$006done:
db 102,15,56,0,197
movdqu [eax],xmm0
pop edi
@ -1026,46 +327,6 @@ align 64
L$bswap:
db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
db 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
align 64
L$rem_8bit:
dw 0,450,900,582,1800,1738,1164,1358
dw 3600,4050,3476,3158,2328,2266,2716,2910
dw 7200,7650,8100,7782,6952,6890,6316,6510
dw 4656,5106,4532,4214,5432,5370,5820,6014
dw 14400,14722,15300,14854,16200,16010,15564,15630
dw 13904,14226,13780,13334,12632,12442,13020,13086
dw 9312,9634,10212,9766,9064,8874,8428,8494
dw 10864,11186,10740,10294,11640,11450,12028,12094
dw 28800,28994,29444,29382,30600,30282,29708,30158
dw 32400,32594,32020,31958,31128,30810,31260,31710
dw 27808,28002,28452,28390,27560,27242,26668,27118
dw 25264,25458,24884,24822,26040,25722,26172,26622
dw 18624,18690,19268,19078,20424,19978,19532,19854
dw 18128,18194,17748,17558,16856,16410,16988,17310
dw 21728,21794,22372,22182,21480,21034,20588,20910
dw 23280,23346,22900,22710,24056,23610,24188,24510
dw 57600,57538,57988,58182,58888,59338,58764,58446
dw 61200,61138,60564,60758,59416,59866,60316,59998
dw 64800,64738,65188,65382,64040,64490,63916,63598
dw 62256,62194,61620,61814,62520,62970,63420,63102
dw 55616,55426,56004,56070,56904,57226,56780,56334
dw 55120,54930,54484,54550,53336,53658,54236,53790
dw 50528,50338,50916,50982,49768,50090,49644,49198
dw 52080,51890,51444,51510,52344,52666,53244,52798
dw 37248,36930,37380,37830,38536,38730,38156,38094
dw 40848,40530,39956,40406,39064,39258,39708,39646
dw 36256,35938,36388,36838,35496,35690,35116,35054
dw 33712,33394,32820,33270,33976,34170,34620,34558
dw 43456,43010,43588,43910,44744,44810,44364,44174
dw 42960,42514,42068,42390,41176,41242,41820,41630
dw 46560,46114,46692,47014,45800,45866,45420,45230
dw 48112,47666,47220,47542,48376,48442,49020,48830
align 64
L$rem_4bit:
dd 0,0,0,471859200,0,943718400,0,610271232
dd 0,1887436800,0,1822425088,0,1220542464,0,1423966208
dd 0,3774873600,0,4246732800,0,3644850176,0,3311403008
dd 0,2441084928,0,2376073216,0,2847932416,0,3051356160
db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62

View File

@ -20,7 +20,7 @@ section .text code align=64
%else
section .text code
%endif
%ifndef NDEBUG
%ifdef BORINGSSL_DISPATCH_TEST
extern _BORINGSSL_function_hit
%endif
align 64
@ -474,7 +474,7 @@ L$_vpaes_set_encrypt_key_begin:
push ebx
push esi
push edi
%ifndef NDEBUG
%ifdef BORINGSSL_DISPATCH_TEST
push ebx
push edx
call L$016pic
@ -553,7 +553,7 @@ L$_vpaes_encrypt_begin:
push ebx
push esi
push edi
%ifndef NDEBUG
%ifdef BORINGSSL_DISPATCH_TEST
push ebx
push edx
call L$019pic

View File

@ -9,6 +9,8 @@ default rel
%ifdef BORINGSSL_PREFIX
%include "boringssl_prefix_symbols_nasm.inc"
%endif
section .text code align=64
global dummy_chacha20_poly1305_asm
dummy_chacha20_poly1305_asm:

Some files were not shown because too many files have changed in this diff Show More