Merge branch 'dev_intel_iaa_deflate' of https://github.com/jinjunzh/ClickHouse into dev_intel_iaa_deflate

This commit is contained in:
jinjunzh 2022-05-05 12:00:30 -04:00
commit fd5085c290
1106 changed files with 63157 additions and 24973 deletions

View File

@ -4,7 +4,7 @@ env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
on: # yamllint disable-line rule:truthy
on: # yamllint disable-line rule:truthy
pull_request:
types:
- synchronize
@ -15,6 +15,7 @@ on: # yamllint disable-line rule:truthy
paths:
- 'docs/**'
- 'website/**'
- 'docker/docs/**'
jobs:
CheckLabels:
runs-on: [self-hosted, style-checker]

View File

@ -44,7 +44,7 @@ enable_language(C CXX ASM)
include (cmake/arch.cmake)
include (cmake/target.cmake)
include (cmake/tools.cmake)
include (cmake/analysis.cmake)
include (cmake/clang_tidy.cmake)
include (cmake/git_status.cmake)
# Ignore export() since we don't use it,
@ -417,7 +417,7 @@ if (COMPILER_CLANG)
endif ()
elseif (ENABLE_THINLTO)
message (${RECONFIGURE_MESSAGE_LEVEL} "ThinLTO is only available with CLang")
message (${RECONFIGURE_MESSAGE_LEVEL} "ThinLTO is only available with Clang")
endif ()
# Turns on all external libs like s3, kafka, ODBC, ...

View File

@ -19,6 +19,31 @@ if (NOT DEFINED ENV{CLION_IDE} AND NOT DEFINED ENV{XCODE_IDE})
endif ()
endif()
# Check if environment is polluted.
if (DEFINED ENV{CFLAGS} OR DEFINED ENV{CXXFLAGS} OR DEFINED ENV{LDFLAGS}
OR CMAKE_C_FLAGS OR CMAKE_CXX_FLAGS OR CMAKE_EXE_LINKER_FLAGS OR CMAKE_SHARED_LINKER_FLAGS OR CMAKE_MODULE_LINKER_FLAGS
OR CMAKE_C_FLAGS_INIT OR CMAKE_CXX_FLAGS_INIT OR CMAKE_EXE_LINKER_FLAGS_INIT OR CMAKE_SHARED_LINKER_FLAGS_INIT OR CMAKE_MODULE_LINKER_FLAGS_INIT)
message("CFLAGS: $ENV{CFLAGS}")
message("CXXFLAGS: $ENV{CXXFLAGS}")
message("LDFLAGS: $ENV{LDFLAGS}")
message("CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}")
message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
message("CMAKE_EXE_LINKER_FLAGS: ${CMAKE_EXE_LINKER_FLAGS}")
message("CMAKE_SHARED_LINKER_FLAGS: ${CMAKE_SHARED_LINKER_FLAGS}")
message("CMAKE_MODULE_LINKER_FLAGS: ${CMAKE_MODULE_LINKER_FLAGS}")
message(FATAL_ERROR "
Some of the variables like CFLAGS, CXXFLAGS, LDFLAGS are not empty.
It is not possible to build ClickHouse with custom flags.
These variables can be set up by previous invocation of some other build tools.
You should cleanup these variables and start over again.
Run the `env` command to check the details.
You will also need to remove the contents of the build directory.
Note: if you don't like this behavior, you can manually edit the cmake files, but please don't complain to developers.")
endif()
# Default toolchain - this is needed to avoid dependency on OS files.
execute_process(COMMAND uname -s OUTPUT_VARIABLE OS)

View File

@ -3,9 +3,6 @@ if (USE_CLANG_TIDY)
endif ()
add_subdirectory (base)
add_subdirectory (daemon)
add_subdirectory (loggers)
add_subdirectory (pcg-random)
add_subdirectory (widechar_width)
add_subdirectory (readpassphrase)
add_subdirectory (bridge)

View File

@ -19,6 +19,7 @@ set (SRCS
errnoToString.cpp
StringRef.cpp
safeExit.cpp
throwError.cpp
)
if (ENABLE_REPLXX)

View File

@ -37,7 +37,8 @@ struct StringRef
size_t size = 0;
/// Non-constexpr due to reinterpret_cast.
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
template <typename CharT>
requires (sizeof(CharT) == 1)
StringRef(const CharT * data_, size_t size_) : data(reinterpret_cast<const char *>(data_)), size(size_)
{
/// Sanity check for overflowed values.
@ -51,6 +52,8 @@ struct StringRef
constexpr StringRef(const char * data_) : StringRef(std::string_view{data_}) {} /// NOLINT
constexpr StringRef() = default;
bool empty() const { return size == 0; }
std::string toString() const { return std::string(data, size); }
explicit operator std::string() const { return toString(); }

View File

@ -21,10 +21,12 @@ public:
return *this;
}
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
template <typename G>
requires std::is_convertible_v<G, F>
constexpr basic_scope_guard(basic_scope_guard<G> && src) : function{src.release()} {}
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
template <typename G>
requires std::is_convertible_v<G, F>
constexpr basic_scope_guard & operator=(basic_scope_guard<G> && src)
{
if (this != &src)
@ -35,10 +37,12 @@ public:
return *this;
}
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
template <typename G>
requires std::is_convertible_v<G, F>
constexpr basic_scope_guard(const G & function_) : function{function_} {}
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
template <typename G>
requires std::is_convertible_v<G, F>
constexpr basic_scope_guard(G && function_) : function{std::move(function_)} {}
~basic_scope_guard() { invoke(); }
@ -64,7 +68,8 @@ public:
return std::exchange(function, {});
}
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
template <typename G>
requires std::is_convertible_v<G, F>
basic_scope_guard<F> & join(basic_scope_guard<G> && other)
{
if (other.function)

8
base/base/throwError.cpp Normal file
View File

@ -0,0 +1,8 @@
#include <base/throwError.h>
#include <stdexcept>
[[noreturn]] void throwError(const char * err)
{
throw std::runtime_error(err);
}

View File

@ -1,15 +1,8 @@
#pragma once
#include <stdexcept>
/// Throw DB::Exception-like exception before its definition.
/// DB::Exception derived from Poco::Exception derived from std::exception.
/// DB::Exception generally caught as Poco::Exception. std::exception generally has other catch blocks and could lead to other outcomes.
/// DB::Exception is not defined yet. It'd better to throw Poco::Exception but we do not want to include any big header here, even <string>.
/// So we throw some std::exception instead in the hope its catch block is the same as DB::Exception one.
template <typename T>
[[noreturn]] inline void throwError(const T & err)
{
throw std::runtime_error(err);
}
[[noreturn]] void throwError(const char * err);

View File

@ -1,13 +0,0 @@
add_library (bridge
IBridge.cpp
)
target_include_directories (daemon PUBLIC ..)
target_link_libraries (bridge
PRIVATE
daemon
dbms
Poco::Data
Poco::Data::ODBC
)

View File

@ -2,7 +2,8 @@
option (ENABLE_CLANG_TIDY "Use clang-tidy static analyzer" OFF)
if (ENABLE_CLANG_TIDY)
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-13" "clang-tidy-12" "clang-tidy-11" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8")
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
if (CLANG_TIDY_PATH)
message(STATUS
@ -18,11 +19,7 @@ if (ENABLE_CLANG_TIDY)
# The variable CMAKE_CXX_CLANG_TIDY will be set inside src and base directories with non third-party code.
# set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
elseif (FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION)
message(FATAL_ERROR "clang-tidy is not found")
else ()
message(STATUS
"clang-tidy is not found.
This is normal - the tool is only used for code static analysis and isn't essential for the build.")
message(${RECONFIGURE_MESSAGE_LEVEL} "clang-tidy is not found")
endif ()
endif ()

View File

@ -158,12 +158,8 @@ elseif (COMPILER_GCC)
add_cxx_compile_options(-Wsizeof-array-argument)
# Warn for suspicious length parameters to certain string and memory built-in functions if the argument uses sizeof
add_cxx_compile_options(-Wsizeof-pointer-memaccess)
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9)
# Warn about overriding virtual functions that are not marked with the override keyword
add_cxx_compile_options(-Wsuggest-override)
endif ()
# Warn about overriding virtual functions that are not marked with the override keyword
add_cxx_compile_options(-Wsuggest-override)
# Warn whenever a switch statement has an index of boolean type and the case values are outside the range of a boolean type
add_cxx_compile_options(-Wswitch-bool)
# Warn if a self-comparison always evaluates to true or false
@ -178,41 +174,36 @@ elseif (COMPILER_GCC)
# Warn when a literal 0 is used as null pointer constant.
add_cxx_compile_options(-Wzero-as-null-pointer-constant)
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10)
# XXX: gcc10 stuck with this option while compiling GatherUtils code
# (anyway there are builds with clang, that will warn)
add_cxx_compile_options(-Wno-sequence-point)
# XXX: gcc10 false positive with this warning in MergeTreePartition.cpp
# inlined from 'void writeHexByteLowercase(UInt8, void*)' at ../src/Common/hex.h:39:11,
# inlined from 'DB::String DB::MergeTreePartition::getID(const DB::Block&) const' at ../src/Storages/MergeTree/MergeTreePartition.cpp:85:30:
# ../contrib/libc-headers/x86_64-linux-gnu/bits/string_fortified.h:34:33: error: writing 2 bytes into a region of size 0 [-Werror=stringop-overflow=]
# 34 | return __builtin___memcpy_chk (__dest, __src, __len, __bos0 (__dest));
# For some reason (bug in gcc?) macro 'GCC diagnostic ignored "-Wstringop-overflow"' doesn't help.
add_cxx_compile_options(-Wno-stringop-overflow)
endif()
# The following warnings are generally useful but had to be disabled because of compiler bugs with older GCCs.
# XXX: We should try again on more recent GCCs (--> see CMake variable GCC_MINIMUM_VERSION).
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 11)
# reinterpretAs.cpp:182:31: error: void* memcpy(void*, const void*, size_t) copying an object of non-trivial type
# using ToFieldType = using FieldType = using UUID = struct StrongTypedef<wide::integer<128, unsigned int>, DB::UUIDTag>
# {aka struct StrongTypedef<wide::integer<128, unsigned int>, DB::UUIDTag>} from an array of const char8_t
add_cxx_compile_options(-Wno-error=class-memaccess)
# Maybe false positive...
# In file included from /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/memory:673,
# In function void std::__1::__libcpp_operator_delete(_Args ...) [with _Args = {void*, long unsigned int}],
# inlined from void std::__1::__do_deallocate_handle_size(void*, size_t, _Args ...) [with _Args = {}] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:271:34,
# inlined from void std::__1::__libcpp_deallocate(void*, size_t, size_t) at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:285:41,
# inlined from constexpr void std::__1::allocator<_Tp>::deallocate(_Tp*, size_t) [with _Tp = char] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/memory:849:39,
# inlined from static constexpr void std::__1::allocator_traits<_Alloc>::deallocate(std::__1::allocator_traits<_Alloc>::allocator_type&, std::__1::allocator_traits<_Alloc>::pointer, std::__1::allocator_traits<_Alloc>::size_type) [with _Alloc = std::__1::allocator<char>] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/__memory/allocator_traits.h:476:24,
# inlined from std::__1::basic_string<_CharT, _Traits, _Allocator>::~basic_string() [with _CharT = char; _Traits = std::__1::char_traits<char>; _Allocator = std::__1::allocator<char>] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/string:2219:35,
# inlined from std::__1::basic_string<_CharT, _Traits, _Allocator>::~basic_string() [with _CharT = char; _Traits = std::__1::char_traits<char>; _Allocator = std::__1::allocator<char>] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/string:2213:1,
# inlined from DB::JSONBuilder::JSONMap::Pair::~Pair() at /home/jakalletti/ClickHouse/ClickHouse/src/Common/JSONBuilder.h:90:12,
# inlined from void DB::JSONBuilder::JSONMap::add(std::__1::string, DB::JSONBuilder::ItemPtr) at /home/jakalletti/ClickHouse/ClickHouse/src/Common/JSONBuilder.h:97:68,
# inlined from virtual void DB::ExpressionStep::describeActions(DB::JSONBuilder::JSONMap&) const at /home/jakalletti/ClickHouse/ClickHouse/src/Processors/QueryPlan/ExpressionStep.cpp:102:12:
# /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:247:20: error: void operator delete(void*, size_t) called on a pointer to an unallocated object 7598543875853023301 [-Werror=free-nonheap-object]
add_cxx_compile_options(-Wno-error=free-nonheap-object)
# AggregateFunctionAvg.h:203:100: error: this pointer is null [-Werror=nonnull]
add_cxx_compile_options(-Wno-error=nonnull)
endif()
# gcc10 stuck with this option while compiling GatherUtils code, anyway there are builds with clang that will warn
add_cxx_compile_options(-Wno-sequence-point)
# gcc10 false positive with this warning in MergeTreePartition.cpp
# inlined from 'void writeHexByteLowercase(UInt8, void*)' at ../src/Common/hex.h:39:11,
# inlined from 'DB::String DB::MergeTreePartition::getID(const DB::Block&) const' at ../src/Storages/MergeTree/MergeTreePartition.cpp:85:30:
# ../contrib/libc-headers/x86_64-linux-gnu/bits/string_fortified.h:34:33: error: writing 2 bytes into a region of size 0 [-Werror=stringop-overflow=]
# 34 | return __builtin___memcpy_chk (__dest, __src, __len, __bos0 (__dest));
# For some reason (bug in gcc?) macro 'GCC diagnostic ignored "-Wstringop-overflow"' doesn't help.
add_cxx_compile_options(-Wno-stringop-overflow)
# reinterpretAs.cpp:182:31: error: void* memcpy(void*, const void*, size_t) copying an object of non-trivial type
# using ToFieldType = using FieldType = using UUID = struct StrongTypedef<wide::integer<128, unsigned int>, DB::UUIDTag>
# {aka struct StrongTypedef<wide::integer<128, unsigned int>, DB::UUIDTag>} from an array of const char8_t
add_cxx_compile_options(-Wno-error=class-memaccess)
# Maybe false positive...
# In file included from /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/memory:673,
# In function void std::__1::__libcpp_operator_delete(_Args ...) [with _Args = {void*, long unsigned int}],
# inlined from void std::__1::__do_deallocate_handle_size(void*, size_t, _Args ...) [with _Args = {}] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:271:34,
# inlined from void std::__1::__libcpp_deallocate(void*, size_t, size_t) at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:285:41,
# inlined from constexpr void std::__1::allocator<_Tp>::deallocate(_Tp*, size_t) [with _Tp = char] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/memory:849:39,
# inlined from static constexpr void std::__1::allocator_traits<_Alloc>::deallocate(std::__1::allocator_traits<_Alloc>::allocator_type&, std::__1::allocator_traits<_Alloc>::pointer, std::__1::allocator_traits<_Alloc>::size_type) [with _Alloc = std::__1::allocator<char>] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/__memory/allocator_traits.h:476:24,
# inlined from std::__1::basic_string<_CharT, _Traits, _Allocator>::~basic_string() [with _CharT = char; _Traits = std::__1::char_traits<char>; _Allocator = std::__1::allocator<char>] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/string:2219:35,
# inlined from std::__1::basic_string<_CharT, _Traits, _Allocator>::~basic_string() [with _CharT = char; _Traits = std::__1::char_traits<char>; _Allocator = std::__1::allocator<char>] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/string:2213:1,
# inlined from DB::JSONBuilder::JSONMap::Pair::~Pair() at /home/jakalletti/ClickHouse/ClickHouse/src/Common/JSONBuilder.h:90:12,
# inlined from void DB::JSONBuilder::JSONMap::add(std::__1::string, DB::JSONBuilder::ItemPtr) at /home/jakalletti/ClickHouse/ClickHouse/src/Common/JSONBuilder.h:97:68,
# inlined from virtual void DB::ExpressionStep::describeActions(DB::JSONBuilder::JSONMap&) const at /home/jakalletti/ClickHouse/ClickHouse/src/Processors/QueryPlan/ExpressionStep.cpp:102:12:
# /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:247:20: error: void operator delete(void*, size_t) called on a pointer to an unallocated object 7598543875853023301 [-Werror=free-nonheap-object]
add_cxx_compile_options(-Wno-error=free-nonheap-object)
# AggregateFunctionAvg.h:203:100: error: this pointer is null [-Werror=nonnull]
add_cxx_compile_options(-Wno-error=nonnull)
endif ()

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 1707a7572aa66ec5d0a2dbe2bf5effa3352e6b2d
Subproject commit 24a13f15cf0838b93f3b1beb62ed010dffdb2117

2
contrib/boringssl vendored

@ -1 +1 @@
Subproject commit c1e01a441d6db234f4f12e63a7657d1f9e6db9c1
Subproject commit 9c0715ce459de443e7b08f270a518c1702f1a380

View File

@ -154,12 +154,14 @@ set(
ios-aarch64/crypto/fipsmodule/sha512-armv8.S
ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
ios-aarch64/crypto/test/trampoline-armv8.S
ios-aarch64/crypto/third_party/sike/asm/fp-armv8.S
)
set(
CRYPTO_ios_arm_SOURCES
ios-arm/crypto/chacha/chacha-armv4.S
ios-arm/crypto/fipsmodule/aes-armv4.S
ios-arm/crypto/fipsmodule/aesv8-armx32.S
ios-arm/crypto/fipsmodule/armv4-mont.S
ios-arm/crypto/fipsmodule/bsaes-armv7.S
@ -185,12 +187,14 @@ set(
linux-aarch64/crypto/fipsmodule/sha512-armv8.S
linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
linux-aarch64/crypto/test/trampoline-armv8.S
linux-aarch64/crypto/third_party/sike/asm/fp-armv8.S
)
set(
CRYPTO_linux_arm_SOURCES
linux-arm/crypto/chacha/chacha-armv4.S
linux-arm/crypto/fipsmodule/aes-armv4.S
linux-arm/crypto/fipsmodule/aesv8-armx32.S
linux-arm/crypto/fipsmodule/armv4-mont.S
linux-arm/crypto/fipsmodule/bsaes-armv7.S
@ -210,13 +214,13 @@ set(
linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
linux-ppc64le/crypto/test/trampoline-ppc.S
)
set(
CRYPTO_linux_x86_SOURCES
linux-x86/crypto/chacha/chacha-x86.S
linux-x86/crypto/fipsmodule/aes-586.S
linux-x86/crypto/fipsmodule/aesni-x86.S
linux-x86/crypto/fipsmodule/bn-586.S
linux-x86/crypto/fipsmodule/co-586.S
@ -237,6 +241,7 @@ set(
linux-x86_64/crypto/chacha/chacha-x86_64.S
linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
linux-x86_64/crypto/fipsmodule/aes-x86_64.S
linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
@ -253,6 +258,7 @@ set(
linux-x86_64/crypto/fipsmodule/x86_64-mont.S
linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
linux-x86_64/crypto/test/trampoline-x86_64.S
linux-x86_64/crypto/third_party/sike/asm/fp-x86_64.S
"${BORINGSSL_SOURCE_DIR}/crypto/hrss/asm/poly_rq_mul.S"
)
@ -260,6 +266,7 @@ set(
CRYPTO_mac_x86_SOURCES
mac-x86/crypto/chacha/chacha-x86.S
mac-x86/crypto/fipsmodule/aes-586.S
mac-x86/crypto/fipsmodule/aesni-x86.S
mac-x86/crypto/fipsmodule/bn-586.S
mac-x86/crypto/fipsmodule/co-586.S
@ -280,6 +287,7 @@ set(
mac-x86_64/crypto/chacha/chacha-x86_64.S
mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
mac-x86_64/crypto/fipsmodule/aes-x86_64.S
mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
mac-x86_64/crypto/fipsmodule/aesni-x86_64.S
mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
@ -296,6 +304,7 @@ set(
mac-x86_64/crypto/fipsmodule/x86_64-mont.S
mac-x86_64/crypto/fipsmodule/x86_64-mont5.S
mac-x86_64/crypto/test/trampoline-x86_64.S
mac-x86_64/crypto/third_party/sike/asm/fp-x86_64.S
)
set(
@ -317,6 +326,7 @@ set(
CRYPTO_win_x86_SOURCES
win-x86/crypto/chacha/chacha-x86.asm
win-x86/crypto/fipsmodule/aes-586.asm
win-x86/crypto/fipsmodule/aesni-x86.asm
win-x86/crypto/fipsmodule/bn-586.asm
win-x86/crypto/fipsmodule/co-586.asm
@ -337,6 +347,7 @@ set(
win-x86_64/crypto/chacha/chacha-x86_64.asm
win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
win-x86_64/crypto/fipsmodule/aes-x86_64.asm
win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm
@ -353,6 +364,7 @@ set(
win-x86_64/crypto/fipsmodule/x86_64-mont.asm
win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
win-x86_64/crypto/test/trampoline-x86_64.asm
win-x86_64/crypto/third_party/sike/asm/fp-x86_64.asm
)
if(APPLE AND ARCH STREQUAL "aarch64")
@ -384,7 +396,6 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strex.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c"
@ -414,7 +425,6 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c"
"${BORINGSSL_SOURCE_DIR}/crypto/blake2/blake2.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c"
"${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c"
@ -439,22 +449,20 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-win.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-ppc64le.c"
"${BORINGSSL_SOURCE_DIR}/crypto/crypto.c"
"${BORINGSSL_SOURCE_DIR}/crypto/curve25519/curve25519.c"
"${BORINGSSL_SOURCE_DIR}/crypto/curve25519/spake25519.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/dh_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/params.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh/check.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh/dh.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh/dh_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh/params.c"
"${BORINGSSL_SOURCE_DIR}/crypto/digest_extra/digest_extra.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_derive.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/hash_to_curve.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ecdh_extra/ecdh_extra.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ecdsa_extra/ecdsa_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/engine/engine.c"
@ -479,8 +487,8 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c"
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c"
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/is_fips.c"
"${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c"
"${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c"
"${BORINGSSL_SOURCE_DIR}/crypto/lhash/lhash.c"
"${BORINGSSL_SOURCE_DIR}/crypto/mem.c"
@ -506,7 +514,6 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/deterministic.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/forkunsafe.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/fuchsia.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/passive.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/rand_extra.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/windows.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rc4/rc4.c"
@ -520,18 +527,15 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/thread_none.c"
"${BORINGSSL_SOURCE_DIR}/crypto/thread_pthread.c"
"${BORINGSSL_SOURCE_DIR}/crypto/thread_win.c"
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/pmbtoken.c"
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/trust_token.c"
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_strex.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/name_print.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c"
@ -597,11 +601,19 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pci.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcia.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcons.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pku.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pmaps.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_prn.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_purp.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_skey.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_sxnet.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_utl.c"
"${BORINGSSL_SOURCE_DIR}/third_party/fiat/curve25519.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/asm/fp_generic.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/curve_params.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/fpx.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/isogeny.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/sike.c"
)
add_library(
@ -614,8 +626,6 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/encrypted_client_hello.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/extensions.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc"
@ -638,6 +648,7 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/t1_lib.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc"
@ -658,9 +669,7 @@ add_executable(
"${BORINGSSL_SOURCE_DIR}/tool/client.cc"
"${BORINGSSL_SOURCE_DIR}/tool/const.cc"
"${BORINGSSL_SOURCE_DIR}/tool/digest.cc"
"${BORINGSSL_SOURCE_DIR}/tool/fd.cc"
"${BORINGSSL_SOURCE_DIR}/tool/file.cc"
"${BORINGSSL_SOURCE_DIR}/tool/generate_ech.cc"
"${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc"
"${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc"
"${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc"

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -33,7 +33,6 @@ Lone:
.align 5
_ChaCha20_ctr32:
AARCH64_VALID_CALL_TARGET
cbz x2,Labort
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x5,:pg_hi21_nc:_OPENSSL_armcap_P
@ -47,7 +46,6 @@ _ChaCha20_ctr32:
b.ne ChaCha20_neon
Lshort:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -260,7 +258,6 @@ Loop:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
Labort:
ret
@ -317,14 +314,12 @@ Loop_tail:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.align 5
ChaCha20_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -705,7 +700,6 @@ Loop_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
Ltail_neon:
@ -815,13 +809,11 @@ Ldone_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.align 5
ChaCha20_512_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -1985,7 +1977,6 @@ Ldone_512_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
#endif // !OPENSSL_NO_ASM

View File

@ -32,8 +32,6 @@ Lrcon:
.align 5
_aes_hw_set_encrypt_key:
Lenc_key:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
@ -202,7 +200,6 @@ Lenc_key_abort:
.align 5
_aes_hw_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl Lenc_key
@ -236,7 +233,6 @@ Loop_imc:
eor x0,x0,x0 // return value
Ldec_key_abort:
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _aes_hw_encrypt
@ -244,7 +240,6 @@ Ldec_key_abort:
.align 5
_aes_hw_encrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
@ -275,7 +270,6 @@ Loop_enc:
.align 5
_aes_hw_decrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
@ -306,8 +300,6 @@ Loop_dec:
.align 5
_aes_hw_cbc_encrypt:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
@ -599,8 +591,6 @@ Lcbc_abort:
.align 5
_aes_hw_ctr32_encrypt_blocks:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]

View File

@ -12,8 +12,6 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl _bn_mul_mont
@ -21,7 +19,6 @@
.align 5
_bn_mul_mont:
AARCH64_SIGN_LINK_REGISTER
tst x5,#7
b.eq __bn_sqr8x_mont
tst x5,#3
@ -219,14 +216,11 @@ Lcond_copy:
mov x0,#1
ldp x23,x24,[x29,#48]
ldr x29,[sp],#64
AARCH64_VALIDATE_LINK_REGISTER
ret
.align 5
__bn_sqr8x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
// only from bn_mul_mont which has already signed the return address.
cmp x1,x2
b.ne __bn_mul4x_mont
Lsqr8x_mont:
@ -980,16 +974,11 @@ Lsqr8x_done:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.align 5
__bn_mul4x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
// return address.
stp x29,x30,[sp,#-128]!
add x29,sp,#0
stp x19,x20,[sp,#16]
@ -1423,8 +1412,6 @@ Lmul4x_done:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0

View File

@ -12,8 +12,6 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl _gcm_init_neon
@ -21,7 +19,6 @@
.align 4
_gcm_init_neon:
AARCH64_VALID_CALL_TARGET
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
@ -47,7 +44,6 @@ _gcm_init_neon:
.align 4
_gcm_gmult_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
@ -67,7 +63,6 @@ _gcm_gmult_neon:
.align 4
_gcm_ghash_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]

View File

@ -21,7 +21,6 @@
.align 4
_gcm_init_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
@ -73,7 +72,6 @@ _gcm_init_v8:
.align 4
_gcm_gmult_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
@ -116,7 +114,6 @@ _gcm_gmult_v8:
.align 4
_gcm_ghash_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have

View File

@ -22,8 +22,6 @@
.align 6
_sha1_block_data_order:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
#else
@ -1091,8 +1089,6 @@ Loop:
.align 6
sha1_block_armv8:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
Lv8_entry:
stp x29,x30,[sp,#-16]!
add x29,sp,#0

View File

@ -63,7 +63,6 @@
.align 6
_sha256_block_data_order:
AARCH64_VALID_CALL_TARGET
#ifndef __KERNEL__
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
@ -74,7 +73,6 @@ _sha256_block_data_order:
tst w16,#ARMV8_SHA256
b.ne Lv8_entry
#endif
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]!
add x29,sp,#0
@ -1035,7 +1033,6 @@ Loop_16_xx:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -1070,7 +1067,6 @@ LK256:
.align 6
sha256_block_armv8:
Lv8_entry:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
stp x29,x30,[sp,#-16]!
add x29,sp,#0

View File

@ -63,7 +63,6 @@
.align 6
_sha512_block_data_order:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]!
add x29,sp,#0
@ -1024,7 +1023,6 @@ Loop_16_xx:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret

View File

@ -12,8 +12,6 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.section __TEXT,__const
@ -216,7 +214,6 @@ Lenc_entry:
.align 4
_vpaes_encrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -226,7 +223,6 @@ _vpaes_encrypt:
st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -455,7 +451,6 @@ Ldec_entry:
.align 4
_vpaes_decrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -465,7 +460,6 @@ _vpaes_decrypt:
st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -635,7 +629,6 @@ _vpaes_key_preheat:
.align 4
_vpaes_schedule_core:
AARCH64_SIGN_LINK_REGISTER
stp x29, x30, [sp,#-16]!
add x29,sp,#0
@ -805,7 +798,6 @@ Lschedule_mangle_last_dec:
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
ldp x29, x30, [sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -1019,7 +1011,6 @@ Lschedule_mangle_both:
.align 4
_vpaes_set_encrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1035,7 +1026,6 @@ _vpaes_set_encrypt_key:
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -1044,7 +1034,6 @@ _vpaes_set_encrypt_key:
.align 4
_vpaes_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1064,7 +1053,6 @@ _vpaes_set_decrypt_key:
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _vpaes_cbc_encrypt
@ -1072,7 +1060,6 @@ _vpaes_set_decrypt_key:
.align 4
_vpaes_cbc_encrypt:
AARCH64_SIGN_LINK_REGISTER
cbz x2, Lcbc_abort
cmp w5, #0 // check direction
b.eq vpaes_cbc_decrypt
@ -1099,7 +1086,6 @@ Lcbc_enc_loop:
st1 {v0.16b}, [x4] // write ivec
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
Lcbc_abort:
ret
@ -1107,8 +1093,6 @@ Lcbc_abort:
.align 4
vpaes_cbc_decrypt:
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
// only from vpaes_cbc_encrypt which has already signed the return address.
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1150,7 +1134,6 @@ Lcbc_dec_done:
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _vpaes_ctr32_encrypt_blocks
@ -1158,7 +1141,6 @@ Lcbc_dec_done:
.align 4
_vpaes_ctr32_encrypt_blocks:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1226,7 +1208,6 @@ Lctr32_done:
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
#endif // !OPENSSL_NO_ASM

View File

@ -12,8 +12,6 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ -28,7 +26,6 @@
.align 4
_abi_test_trampoline:
Labi_test_trampoline_begin:
AARCH64_SIGN_LINK_REGISTER
// Stack layout (low to high addresses)
// x29,x30 (16 bytes)
// d8-d15 (64 bytes)
@ -131,7 +128,6 @@ Lx29_ok:
ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176
AARCH64_VALIDATE_LINK_REGISTER
ret
@ -139,7 +135,6 @@ Lx29_ok:
.private_extern _abi_test_clobber_x0
.align 4
_abi_test_clobber_x0:
AARCH64_VALID_CALL_TARGET
mov x0, xzr
ret
@ -148,7 +143,6 @@ _abi_test_clobber_x0:
.private_extern _abi_test_clobber_x1
.align 4
_abi_test_clobber_x1:
AARCH64_VALID_CALL_TARGET
mov x1, xzr
ret
@ -157,7 +151,6 @@ _abi_test_clobber_x1:
.private_extern _abi_test_clobber_x2
.align 4
_abi_test_clobber_x2:
AARCH64_VALID_CALL_TARGET
mov x2, xzr
ret
@ -166,7 +159,6 @@ _abi_test_clobber_x2:
.private_extern _abi_test_clobber_x3
.align 4
_abi_test_clobber_x3:
AARCH64_VALID_CALL_TARGET
mov x3, xzr
ret
@ -175,7 +167,6 @@ _abi_test_clobber_x3:
.private_extern _abi_test_clobber_x4
.align 4
_abi_test_clobber_x4:
AARCH64_VALID_CALL_TARGET
mov x4, xzr
ret
@ -184,7 +175,6 @@ _abi_test_clobber_x4:
.private_extern _abi_test_clobber_x5
.align 4
_abi_test_clobber_x5:
AARCH64_VALID_CALL_TARGET
mov x5, xzr
ret
@ -193,7 +183,6 @@ _abi_test_clobber_x5:
.private_extern _abi_test_clobber_x6
.align 4
_abi_test_clobber_x6:
AARCH64_VALID_CALL_TARGET
mov x6, xzr
ret
@ -202,7 +191,6 @@ _abi_test_clobber_x6:
.private_extern _abi_test_clobber_x7
.align 4
_abi_test_clobber_x7:
AARCH64_VALID_CALL_TARGET
mov x7, xzr
ret
@ -211,7 +199,6 @@ _abi_test_clobber_x7:
.private_extern _abi_test_clobber_x8
.align 4
_abi_test_clobber_x8:
AARCH64_VALID_CALL_TARGET
mov x8, xzr
ret
@ -220,7 +207,6 @@ _abi_test_clobber_x8:
.private_extern _abi_test_clobber_x9
.align 4
_abi_test_clobber_x9:
AARCH64_VALID_CALL_TARGET
mov x9, xzr
ret
@ -229,7 +215,6 @@ _abi_test_clobber_x9:
.private_extern _abi_test_clobber_x10
.align 4
_abi_test_clobber_x10:
AARCH64_VALID_CALL_TARGET
mov x10, xzr
ret
@ -238,7 +223,6 @@ _abi_test_clobber_x10:
.private_extern _abi_test_clobber_x11
.align 4
_abi_test_clobber_x11:
AARCH64_VALID_CALL_TARGET
mov x11, xzr
ret
@ -247,7 +231,6 @@ _abi_test_clobber_x11:
.private_extern _abi_test_clobber_x12
.align 4
_abi_test_clobber_x12:
AARCH64_VALID_CALL_TARGET
mov x12, xzr
ret
@ -256,7 +239,6 @@ _abi_test_clobber_x12:
.private_extern _abi_test_clobber_x13
.align 4
_abi_test_clobber_x13:
AARCH64_VALID_CALL_TARGET
mov x13, xzr
ret
@ -265,7 +247,6 @@ _abi_test_clobber_x13:
.private_extern _abi_test_clobber_x14
.align 4
_abi_test_clobber_x14:
AARCH64_VALID_CALL_TARGET
mov x14, xzr
ret
@ -274,7 +255,6 @@ _abi_test_clobber_x14:
.private_extern _abi_test_clobber_x15
.align 4
_abi_test_clobber_x15:
AARCH64_VALID_CALL_TARGET
mov x15, xzr
ret
@ -283,7 +263,6 @@ _abi_test_clobber_x15:
.private_extern _abi_test_clobber_x16
.align 4
_abi_test_clobber_x16:
AARCH64_VALID_CALL_TARGET
mov x16, xzr
ret
@ -292,7 +271,6 @@ _abi_test_clobber_x16:
.private_extern _abi_test_clobber_x17
.align 4
_abi_test_clobber_x17:
AARCH64_VALID_CALL_TARGET
mov x17, xzr
ret
@ -301,7 +279,6 @@ _abi_test_clobber_x17:
.private_extern _abi_test_clobber_x19
.align 4
_abi_test_clobber_x19:
AARCH64_VALID_CALL_TARGET
mov x19, xzr
ret
@ -310,7 +287,6 @@ _abi_test_clobber_x19:
.private_extern _abi_test_clobber_x20
.align 4
_abi_test_clobber_x20:
AARCH64_VALID_CALL_TARGET
mov x20, xzr
ret
@ -319,7 +295,6 @@ _abi_test_clobber_x20:
.private_extern _abi_test_clobber_x21
.align 4
_abi_test_clobber_x21:
AARCH64_VALID_CALL_TARGET
mov x21, xzr
ret
@ -328,7 +303,6 @@ _abi_test_clobber_x21:
.private_extern _abi_test_clobber_x22
.align 4
_abi_test_clobber_x22:
AARCH64_VALID_CALL_TARGET
mov x22, xzr
ret
@ -337,7 +311,6 @@ _abi_test_clobber_x22:
.private_extern _abi_test_clobber_x23
.align 4
_abi_test_clobber_x23:
AARCH64_VALID_CALL_TARGET
mov x23, xzr
ret
@ -346,7 +319,6 @@ _abi_test_clobber_x23:
.private_extern _abi_test_clobber_x24
.align 4
_abi_test_clobber_x24:
AARCH64_VALID_CALL_TARGET
mov x24, xzr
ret
@ -355,7 +327,6 @@ _abi_test_clobber_x24:
.private_extern _abi_test_clobber_x25
.align 4
_abi_test_clobber_x25:
AARCH64_VALID_CALL_TARGET
mov x25, xzr
ret
@ -364,7 +335,6 @@ _abi_test_clobber_x25:
.private_extern _abi_test_clobber_x26
.align 4
_abi_test_clobber_x26:
AARCH64_VALID_CALL_TARGET
mov x26, xzr
ret
@ -373,7 +343,6 @@ _abi_test_clobber_x26:
.private_extern _abi_test_clobber_x27
.align 4
_abi_test_clobber_x27:
AARCH64_VALID_CALL_TARGET
mov x27, xzr
ret
@ -382,7 +351,6 @@ _abi_test_clobber_x27:
.private_extern _abi_test_clobber_x28
.align 4
_abi_test_clobber_x28:
AARCH64_VALID_CALL_TARGET
mov x28, xzr
ret
@ -391,7 +359,6 @@ _abi_test_clobber_x28:
.private_extern _abi_test_clobber_x29
.align 4
_abi_test_clobber_x29:
AARCH64_VALID_CALL_TARGET
mov x29, xzr
ret
@ -400,7 +367,6 @@ _abi_test_clobber_x29:
.private_extern _abi_test_clobber_d0
.align 4
_abi_test_clobber_d0:
AARCH64_VALID_CALL_TARGET
fmov d0, xzr
ret
@ -409,7 +375,6 @@ _abi_test_clobber_d0:
.private_extern _abi_test_clobber_d1
.align 4
_abi_test_clobber_d1:
AARCH64_VALID_CALL_TARGET
fmov d1, xzr
ret
@ -418,7 +383,6 @@ _abi_test_clobber_d1:
.private_extern _abi_test_clobber_d2
.align 4
_abi_test_clobber_d2:
AARCH64_VALID_CALL_TARGET
fmov d2, xzr
ret
@ -427,7 +391,6 @@ _abi_test_clobber_d2:
.private_extern _abi_test_clobber_d3
.align 4
_abi_test_clobber_d3:
AARCH64_VALID_CALL_TARGET
fmov d3, xzr
ret
@ -436,7 +399,6 @@ _abi_test_clobber_d3:
.private_extern _abi_test_clobber_d4
.align 4
_abi_test_clobber_d4:
AARCH64_VALID_CALL_TARGET
fmov d4, xzr
ret
@ -445,7 +407,6 @@ _abi_test_clobber_d4:
.private_extern _abi_test_clobber_d5
.align 4
_abi_test_clobber_d5:
AARCH64_VALID_CALL_TARGET
fmov d5, xzr
ret
@ -454,7 +415,6 @@ _abi_test_clobber_d5:
.private_extern _abi_test_clobber_d6
.align 4
_abi_test_clobber_d6:
AARCH64_VALID_CALL_TARGET
fmov d6, xzr
ret
@ -463,7 +423,6 @@ _abi_test_clobber_d6:
.private_extern _abi_test_clobber_d7
.align 4
_abi_test_clobber_d7:
AARCH64_VALID_CALL_TARGET
fmov d7, xzr
ret
@ -472,7 +431,6 @@ _abi_test_clobber_d7:
.private_extern _abi_test_clobber_d8
.align 4
_abi_test_clobber_d8:
AARCH64_VALID_CALL_TARGET
fmov d8, xzr
ret
@ -481,7 +439,6 @@ _abi_test_clobber_d8:
.private_extern _abi_test_clobber_d9
.align 4
_abi_test_clobber_d9:
AARCH64_VALID_CALL_TARGET
fmov d9, xzr
ret
@ -490,7 +447,6 @@ _abi_test_clobber_d9:
.private_extern _abi_test_clobber_d10
.align 4
_abi_test_clobber_d10:
AARCH64_VALID_CALL_TARGET
fmov d10, xzr
ret
@ -499,7 +455,6 @@ _abi_test_clobber_d10:
.private_extern _abi_test_clobber_d11
.align 4
_abi_test_clobber_d11:
AARCH64_VALID_CALL_TARGET
fmov d11, xzr
ret
@ -508,7 +463,6 @@ _abi_test_clobber_d11:
.private_extern _abi_test_clobber_d12
.align 4
_abi_test_clobber_d12:
AARCH64_VALID_CALL_TARGET
fmov d12, xzr
ret
@ -517,7 +471,6 @@ _abi_test_clobber_d12:
.private_extern _abi_test_clobber_d13
.align 4
_abi_test_clobber_d13:
AARCH64_VALID_CALL_TARGET
fmov d13, xzr
ret
@ -526,7 +479,6 @@ _abi_test_clobber_d13:
.private_extern _abi_test_clobber_d14
.align 4
_abi_test_clobber_d14:
AARCH64_VALID_CALL_TARGET
fmov d14, xzr
ret
@ -535,7 +487,6 @@ _abi_test_clobber_d14:
.private_extern _abi_test_clobber_d15
.align 4
_abi_test_clobber_d15:
AARCH64_VALID_CALL_TARGET
fmov d15, xzr
ret
@ -544,7 +495,6 @@ _abi_test_clobber_d15:
.private_extern _abi_test_clobber_d16
.align 4
_abi_test_clobber_d16:
AARCH64_VALID_CALL_TARGET
fmov d16, xzr
ret
@ -553,7 +503,6 @@ _abi_test_clobber_d16:
.private_extern _abi_test_clobber_d17
.align 4
_abi_test_clobber_d17:
AARCH64_VALID_CALL_TARGET
fmov d17, xzr
ret
@ -562,7 +511,6 @@ _abi_test_clobber_d17:
.private_extern _abi_test_clobber_d18
.align 4
_abi_test_clobber_d18:
AARCH64_VALID_CALL_TARGET
fmov d18, xzr
ret
@ -571,7 +519,6 @@ _abi_test_clobber_d18:
.private_extern _abi_test_clobber_d19
.align 4
_abi_test_clobber_d19:
AARCH64_VALID_CALL_TARGET
fmov d19, xzr
ret
@ -580,7 +527,6 @@ _abi_test_clobber_d19:
.private_extern _abi_test_clobber_d20
.align 4
_abi_test_clobber_d20:
AARCH64_VALID_CALL_TARGET
fmov d20, xzr
ret
@ -589,7 +535,6 @@ _abi_test_clobber_d20:
.private_extern _abi_test_clobber_d21
.align 4
_abi_test_clobber_d21:
AARCH64_VALID_CALL_TARGET
fmov d21, xzr
ret
@ -598,7 +543,6 @@ _abi_test_clobber_d21:
.private_extern _abi_test_clobber_d22
.align 4
_abi_test_clobber_d22:
AARCH64_VALID_CALL_TARGET
fmov d22, xzr
ret
@ -607,7 +551,6 @@ _abi_test_clobber_d22:
.private_extern _abi_test_clobber_d23
.align 4
_abi_test_clobber_d23:
AARCH64_VALID_CALL_TARGET
fmov d23, xzr
ret
@ -616,7 +559,6 @@ _abi_test_clobber_d23:
.private_extern _abi_test_clobber_d24
.align 4
_abi_test_clobber_d24:
AARCH64_VALID_CALL_TARGET
fmov d24, xzr
ret
@ -625,7 +567,6 @@ _abi_test_clobber_d24:
.private_extern _abi_test_clobber_d25
.align 4
_abi_test_clobber_d25:
AARCH64_VALID_CALL_TARGET
fmov d25, xzr
ret
@ -634,7 +575,6 @@ _abi_test_clobber_d25:
.private_extern _abi_test_clobber_d26
.align 4
_abi_test_clobber_d26:
AARCH64_VALID_CALL_TARGET
fmov d26, xzr
ret
@ -643,7 +583,6 @@ _abi_test_clobber_d26:
.private_extern _abi_test_clobber_d27
.align 4
_abi_test_clobber_d27:
AARCH64_VALID_CALL_TARGET
fmov d27, xzr
ret
@ -652,7 +591,6 @@ _abi_test_clobber_d27:
.private_extern _abi_test_clobber_d28
.align 4
_abi_test_clobber_d28:
AARCH64_VALID_CALL_TARGET
fmov d28, xzr
ret
@ -661,7 +599,6 @@ _abi_test_clobber_d28:
.private_extern _abi_test_clobber_d29
.align 4
_abi_test_clobber_d29:
AARCH64_VALID_CALL_TARGET
fmov d29, xzr
ret
@ -670,7 +607,6 @@ _abi_test_clobber_d29:
.private_extern _abi_test_clobber_d30
.align 4
_abi_test_clobber_d30:
AARCH64_VALID_CALL_TARGET
fmov d30, xzr
ret
@ -679,7 +615,6 @@ _abi_test_clobber_d30:
.private_extern _abi_test_clobber_d31
.align 4
_abi_test_clobber_d31:
AARCH64_VALID_CALL_TARGET
fmov d31, xzr
ret
@ -688,7 +623,6 @@ _abi_test_clobber_d31:
.private_extern _abi_test_clobber_v8_upper
.align 4
_abi_test_clobber_v8_upper:
AARCH64_VALID_CALL_TARGET
fmov v8.d[1], xzr
ret
@ -697,7 +631,6 @@ _abi_test_clobber_v8_upper:
.private_extern _abi_test_clobber_v9_upper
.align 4
_abi_test_clobber_v9_upper:
AARCH64_VALID_CALL_TARGET
fmov v9.d[1], xzr
ret
@ -706,7 +639,6 @@ _abi_test_clobber_v9_upper:
.private_extern _abi_test_clobber_v10_upper
.align 4
_abi_test_clobber_v10_upper:
AARCH64_VALID_CALL_TARGET
fmov v10.d[1], xzr
ret
@ -715,7 +647,6 @@ _abi_test_clobber_v10_upper:
.private_extern _abi_test_clobber_v11_upper
.align 4
_abi_test_clobber_v11_upper:
AARCH64_VALID_CALL_TARGET
fmov v11.d[1], xzr
ret
@ -724,7 +655,6 @@ _abi_test_clobber_v11_upper:
.private_extern _abi_test_clobber_v12_upper
.align 4
_abi_test_clobber_v12_upper:
AARCH64_VALID_CALL_TARGET
fmov v12.d[1], xzr
ret
@ -733,7 +663,6 @@ _abi_test_clobber_v12_upper:
.private_extern _abi_test_clobber_v13_upper
.align 4
_abi_test_clobber_v13_upper:
AARCH64_VALID_CALL_TARGET
fmov v13.d[1], xzr
ret
@ -742,7 +671,6 @@ _abi_test_clobber_v13_upper:
.private_extern _abi_test_clobber_v14_upper
.align 4
_abi_test_clobber_v14_upper:
AARCH64_VALID_CALL_TARGET
fmov v14.d[1], xzr
ret
@ -751,7 +679,6 @@ _abi_test_clobber_v14_upper:
.private_extern _abi_test_clobber_v15_upper
.align 4
_abi_test_clobber_v15_upper:
AARCH64_VALID_CALL_TARGET
fmov v15.d[1], xzr
ret

View File

@ -0,0 +1,996 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.section __TEXT,__const
# p434 x 2
Lp434x2:
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
# p434 + 1
Lp434p1:
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
.text
.globl _sike_mpmul
.private_extern _sike_mpmul
.align 4
_sike_mpmul:
stp x29, x30, [sp,#-96]!
add x29, sp, #0
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
ldp x14, x15, [x1,#32]
ldr x16, [x1,#48]
// x3-x7 <- AH + AL, x7 <- carry
adds x3, x3, x7
adcs x4, x4, x8
adcs x5, x5, x9
adcs x6, x6, xzr
adc x7, xzr, xzr
// x10-x13 <- BH + BL, x8 <- carry
adds x10, x10, x14
adcs x11, x11, x15
adcs x12, x12, x16
adcs x13, x13, xzr
adc x8, xzr, xzr
// x9 <- combined carry
and x9, x7, x8
// x7-x8 <- mask
sub x7, xzr, x7
sub x8, xzr, x8
// x15-x19 <- masked (BH + BL)
and x14, x10, x7
and x15, x11, x7
and x16, x12, x7
and x17, x13, x7
// x20-x23 <- masked (AH + AL)
and x20, x3, x8
and x21, x4, x8
and x22, x5, x8
and x23, x6, x8
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
adds x14, x14, x20
adcs x15, x15, x21
adcs x16, x16, x22
adcs x17, x17, x23
adc x7, x9, xzr
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
stp x3, x4, [x2,#0]
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x25, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x23, x10, x12
adcs x26, x11, x13
adc x24, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x19, xzr, x25
sub x20, xzr, x24
and x8, x23, x19
and x9, x26, x19
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x21, x3, x20
and x22, x4, x20
mul x19, x3, x23
mul x20, x3, x26
and x25, x25, x24
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x8, x21, x8
umulh x21, x3, x26
adcs x9, x22, x9
umulh x22, x3, x23
adc x25, x25, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x23
umulh x23, x4, x23
adds x20, x20, x22
adc x21, x21, xzr
mul x24, x4, x26
umulh x26, x4, x26
adds x20, x20, x3
adcs x21, x21, x23
adc x22, xzr, xzr
adds x21, x21, x24
adc x22, x22, x26
ldp x3, x4, [x2,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x21, x8, x21
umulh x24, x3, x10
umulh x26, x3, x11
adcs x22, x9, x22
mul x8, x3, x10
mul x9, x3, x11
adc x25, x25, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x9, x9, x24
adc x26, x26, xzr
mul x23, x4, x11
umulh x11, x4, x11
adds x9, x9, x3
adcs x26, x26, x10
adc x24, xzr, xzr
adds x26, x26, x23
adc x24, x24, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x19, x19, x8
sbcs x20, x20, x9
sbcs x21, x21, x26
mul x4, x5, x13
umulh x23, x5, x13
sbcs x22, x22, x24
sbc x25, x25, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x23, x23, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x23, x23, x12
adc x10, xzr, xzr
adds x23, x23, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x19, x19, x3
sbcs x20, x20, x4
sbcs x21, x21, x23
sbcs x22, x22, x10
sbc x25, x25, xzr
adds x19, x19, x26
adcs x20, x20, x24
adcs x21, x21, x3
adcs x22, x22, x4
adcs x23, x25, x23
adc x24, x10, xzr
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
adds x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adc x7, x7, xzr
// Load AL
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
// Load BL
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
// Temporarily store x8 in x2
stp x8, x9, [x2,#0]
// x21-x28 <- AL x BL
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x8, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x27, x10, x12
adcs x9, x11, x13
adc x28, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x23, xzr, x8
sub x24, xzr, x28
and x21, x27, x23
and x22, x9, x23
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x25, x3, x24
and x26, x4, x24
mul x23, x3, x27
mul x24, x3, x9
and x8, x8, x28
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x21, x25, x21
umulh x25, x3, x9
adcs x22, x26, x22
umulh x26, x3, x27
adc x8, x8, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x27
umulh x27, x4, x27
adds x24, x24, x26
adc x25, x25, xzr
mul x28, x4, x9
umulh x9, x4, x9
adds x24, x24, x3
adcs x25, x25, x27
adc x26, xzr, xzr
adds x25, x25, x28
adc x26, x26, x9
ldp x3, x4, [x0,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x25, x21, x25
umulh x28, x3, x10
umulh x9, x3, x11
adcs x26, x22, x26
mul x21, x3, x10
mul x22, x3, x11
adc x8, x8, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x22, x22, x28
adc x9, x9, xzr
mul x27, x4, x11
umulh x11, x4, x11
adds x22, x22, x3
adcs x9, x9, x10
adc x28, xzr, xzr
adds x9, x9, x27
adc x28, x28, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x23, x23, x21
sbcs x24, x24, x22
sbcs x25, x25, x9
mul x4, x5, x13
umulh x27, x5, x13
sbcs x26, x26, x28
sbc x8, x8, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x27, x27, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x27, x27, x12
adc x10, xzr, xzr
adds x27, x27, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x23, x23, x3
sbcs x24, x24, x4
sbcs x25, x25, x27
sbcs x26, x26, x10
sbc x8, x8, xzr
adds x23, x23, x9
adcs x24, x24, x28
adcs x25, x25, x3
adcs x26, x26, x4
adcs x27, x8, x27
adc x28, x10, xzr
// Restore x8
ldp x8, x9, [x2,#0]
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, x27
sbcs x17, x17, x28
sbc x7, x7, xzr
// Store ALxBL, low
stp x21, x22, [x2]
stp x23, x24, [x2,#16]
// Load AH
ldp x3, x4, [x0,#32]
ldr x5, [x0,#48]
// Load BH
ldp x10, x11, [x1,#32]
ldr x12, [x1,#48]
adds x8, x8, x25
adcs x9, x9, x26
adcs x19, x19, x27
adcs x20, x20, x28
adc x1, xzr, xzr
add x0, x0, #32
// Temporarily store x8,x9 in x2
stp x8,x9, [x2,#32]
// x21-x28 <- AH x BH
// A0 * B0
mul x21, x3, x10 // C0
umulh x24, x3, x10
// A0 * B1
mul x22, x3, x11
umulh x23, x3, x11
// A1 * B0
mul x8, x4, x10
umulh x9, x4, x10
adds x22, x22, x24
adc x23, x23, xzr
// A0 * B2
mul x27, x3, x12
umulh x28, x3, x12
adds x22, x22, x8 // C1
adcs x23, x23, x9
adc x24, xzr, xzr
// A2 * B0
mul x8, x5, x10
umulh x25, x5, x10
adds x23, x23, x27
adcs x24, x24, x25
adc x25, xzr, xzr
// A1 * B1
mul x27, x4, x11
umulh x9, x4, x11
adds x23, x23, x8
adcs x24, x24, x28
adc x25, x25, xzr
// A1 * B2
mul x8, x4, x12
umulh x28, x4, x12
adds x23, x23, x27 // C2
adcs x24, x24, x9
adc x25, x25, xzr
// A2 * B1
mul x27, x5, x11
umulh x9, x5, x11
adds x24, x24, x8
adcs x25, x25, x28
adc x26, xzr, xzr
// A2 * B2
mul x8, x5, x12
umulh x28, x5, x12
adds x24, x24, x27 // C3
adcs x25, x25, x9
adc x26, x26, xzr
adds x25, x25, x8 // C4
adc x26, x26, x28 // C5
// Restore x8,x9
ldp x8,x9, [x2,#32]
neg x1, x1
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, xzr
sbcs x17, x17, xzr
sbc x7, x7, xzr
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
stp x8, x9, [x2,#32]
stp x19, x20, [x2,#48]
adds x1, x1, #1
adcs x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adcs x25, x7, x25
adc x26, x26, xzr
stp x14, x15, [x2,#64]
stp x16, x17, [x2,#80]
stp x25, x26, [x2,#96]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl _sike_fprdc
.private_extern _sike_fprdc
.align 4
_sike_fprdc:
stp x29, x30, [sp, #-96]!
add x29, sp, xzr
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x2, x3, [x0,#0] // a[0-1]
// Load the prime constant
adrp x26, Lp434p1@PAGE
add x26, x26, Lp434p1@PAGEOFF
ldp x23, x24, [x26, #0x0]
ldp x25, x26, [x26,#0x10]
// a[0-1] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x10, x3, x23
umulh x11, x3, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x10 // C1
adcs x6, x6, x11
adc x7, xzr, xzr
mul x10, x3, x24
umulh x11, x3, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x10 // C2
adcs x7, x7, x11
adc x8, x8, xzr
mul x10, x3, x25
umulh x11, x3, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x3, x26
umulh x28, x3, x26
adds x7, x7, x10 // C3
adcs x8, x8, x11
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
ldp x10, x11, [x0, #0x18]
ldp x12, x13, [x0, #0x28]
ldp x14, x15, [x0, #0x38]
ldp x16, x17, [x0, #0x48]
ldp x19, x20, [x0, #0x58]
ldr x21, [x0, #0x68]
adds x10, x10, x4
adcs x11, x11, x5
adcs x12, x12, x6
adcs x13, x13, x7
adcs x14, x14, x8
adcs x15, x15, x9
adcs x22, x16, xzr
adcs x17, x17, xzr
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
ldr x2, [x0,#0x10] // a[2]
// a[2-3] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x0, x10, x23
umulh x3, x10, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x0 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x0, x10, x24
umulh x3, x10, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x0 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x0, x10, x25
umulh x3, x10, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x10, x26
umulh x28, x10, x26
adds x7, x7, x0 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x12, x12, x4
adcs x13, x13, x5
adcs x14, x14, x6
adcs x15, x15, x7
adcs x16, x22, x8
adcs x17, x17, x9
adcs x22, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
mul x4, x11, x23 // C0
umulh x7, x11, x23
mul x5, x11, x24
umulh x6, x11, x24
mul x10, x12, x23
umulh x3, x12, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x11, x25
umulh x28, x11, x25
adds x5, x5, x10 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x10, x12, x24
umulh x3, x12, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x11, x26
umulh x28, x11, x26
adds x6, x6, x10 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x10, x12, x25
umulh x3, x12, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x12, x26
umulh x28, x12, x26
adds x7, x7, x10 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x14, x14, x4
adcs x15, x15, x5
adcs x16, x16, x6
adcs x17, x17, x7
adcs x19, x22, x8
adcs x20, x20, x9
adc x22, x21, xzr
stp x14, x15, [x1, #0x0] // C0, C1
mul x4, x13, x23 // C0
umulh x10, x13, x23
mul x5, x13, x24
umulh x27, x13, x24
adds x5, x5, x10 // C1
adc x10, xzr, xzr
mul x6, x13, x25
umulh x28, x13, x25
adds x27, x10, x27
adcs x6, x6, x27 // C2
adc x10, xzr, xzr
mul x7, x13, x26
umulh x8, x13, x26
adds x28, x10, x28
adcs x7, x7, x28 // C3
adc x8, x8, xzr // C4
adds x16, x16, x4
adcs x17, x17, x5
adcs x19, x19, x6
adcs x20, x20, x7
adc x21, x22, x8
str x16, [x1, #0x10]
stp x17, x19, [x1, #0x18]
stp x20, x21, [x1, #0x28]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl _sike_fpadd
.private_extern _sike_fpadd
.align 4
_sike_fpadd:
stp x29,x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Add a + b
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
// Subtract 2xp434
adrp x17, Lp434x2@PAGE
add x17, x17, Lp434x2@PAGEOFF
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x12
sbcs x6, x6, x13
sbcs x7, x7, x14
sbcs x8, x8, x15
sbcs x9, x9, x16
sbc x0, xzr, xzr // x0 can be reused now
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl _sike_fpsub
.private_extern _sike_fpsub
.align 4
_sike_fpsub:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Subtract a - b
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
sbcs x9, x9, x17
sbc x0, xzr, xzr
// Add 2xp434 anded with the mask in x0
adrp x17, Lp434x2@PAGE
add x17, x17, Lp434x2@PAGEOFF
// First half
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl _sike_mpadd_asm
.private_extern _sike_mpadd_asm
.align 4
_sike_mpadd_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl _sike_mpsubx2_asm
.private_extern _sike_mpsubx2_asm
.align 4
_sike_mpsubx2_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x11, x12, [x1,#32]
ldp x13, x14, [x1,#48]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbcs x9, x9, x13
sbcs x10, x10, x14
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ldp x3, x4, [x0,#64]
ldp x5, x6, [x0,#80]
ldp x11, x12, [x1,#64]
ldp x13, x14, [x1,#80]
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#96]
ldp x11, x12, [x1,#96]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbc x0, xzr, xzr
stp x3, x4, [x2,#64]
stp x5, x6, [x2,#80]
stp x7, x8, [x2,#96]
ldp x29, x30, [sp],#16
ret
.globl _sike_mpdblsubx2_asm
.private_extern _sike_mpdblsubx2_asm
.align 4
_sike_mpdblsubx2_asm:
stp x29, x30, [sp, #-16]!
add x29, sp, #0
ldp x3, x4, [x2, #0]
ldp x5, x6, [x2,#16]
ldp x7, x8, [x2,#32]
ldp x11, x12, [x0, #0]
ldp x13, x14, [x0,#16]
ldp x15, x16, [x0,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
// x9 stores carry
adc x9, xzr, xzr
ldp x11, x12, [x1, #0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2, #0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
ldp x3, x4, [x2,#48]
ldp x5, x6, [x2,#64]
ldp x7, x8, [x2,#80]
ldp x11, x12, [x0,#48]
ldp x13, x14, [x0,#64]
ldp x15, x16, [x0,#80]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, xzr, xzr
ldp x11, x12, [x1,#48]
ldp x13, x14, [x1,#64]
ldp x15, x16, [x1,#80]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2,#48]
stp x5, x6, [x2,#64]
stp x7, x8, [x2,#80]
ldp x3, x4, [x2,#96]
ldp x11, x12, [x0,#96]
ldp x13, x14, [x1,#96]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
subs x3, x3, x13
sbc x4, x4, x14
stp x3, x4, [x2,#96]
ldp x29, x30, [sp],#16
ret
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,348 @@
#else
.code 32
#endif
.align 5
rem_4bit:
.short 0x0000,0x1C20,0x3840,0x2460
.short 0x7080,0x6CA0,0x48C0,0x54E0
.short 0xE100,0xFD20,0xD940,0xC560
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
#ifdef __thumb2__
.thumb_func rem_4bit_get
#endif
rem_4bit_get:
#if defined(__thumb2__)
adr r2,rem_4bit
#else
sub r2,pc,#8+32 @ &rem_4bit
#endif
b Lrem_4bit_got
nop
nop
.globl _gcm_ghash_4bit
.private_extern _gcm_ghash_4bit
#ifdef __thumb2__
.thumb_func _gcm_ghash_4bit
#endif
.align 4
_gcm_ghash_4bit:
#if defined(__thumb2__)
adr r12,rem_4bit
#else
sub r12,pc,#8+48 @ &rem_4bit
#endif
add r3,r2,r3 @ r3 to point at the end
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
ldrb r12,[r2,#15]
ldrb r14,[r0,#15]
Louter:
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
add r11,r1,r14
ldrb r12,[r2,#14]
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[sp,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
ldrb r14,[r0,#14]
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
eor r7,r7,r8,lsl#16
Linner:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[sp,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r2,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r8,[r0,r3]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r9,[sp,r14]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
#ifdef __thumb2__
it pl
#endif
eorpl r12,r12,r8
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
bpl Linner
ldr r3,[sp,#32] @ re-load r3/end
add r2,r2,#16
mov r14,r4
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
cmp r2,r3
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#ifdef __thumb2__
it ne
#endif
ldrneb r12,[r2,#15]
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
bne Louter
add sp,sp,#36
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.globl _gcm_gmult_4bit
.private_extern _gcm_gmult_4bit
#ifdef __thumb2__
.thumb_func _gcm_gmult_4bit
#endif
_gcm_gmult_4bit:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldrb r12,[r0,#15]
b rem_4bit_get
Lrem_4bit_got:
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
ldrb r12,[r0,#14]
add r11,r1,r14
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
and r14,r12,#0xf0
eor r7,r7,r8,lsl#16
and r12,r12,#0x0f
Loop:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[r2,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r0,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
bpl Loop
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
#if __ARM_MAX_ARCH__>=7

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,7 @@
.private_extern _abi_test_trampoline
.align 4
_abi_test_trampoline:
Labi_test_trampoline_begin:
@ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}

View File

@ -34,7 +34,6 @@
.type ChaCha20_ctr32,%function
.align 5
ChaCha20_ctr32:
AARCH64_VALID_CALL_TARGET
cbz x2,.Labort
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x5,:pg_hi21_nc:OPENSSL_armcap_P
@ -48,7 +47,6 @@ ChaCha20_ctr32:
b.ne ChaCha20_neon
.Lshort:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -261,7 +259,6 @@ ChaCha20_ctr32:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
.Labort:
ret
@ -318,14 +315,12 @@ ChaCha20_ctr32:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.size ChaCha20_ctr32,.-ChaCha20_ctr32
.type ChaCha20_neon,%function
.align 5
ChaCha20_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -706,7 +701,6 @@ ChaCha20_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.Ltail_neon:
@ -816,13 +810,11 @@ ChaCha20_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.size ChaCha20_neon,.-ChaCha20_neon
.type ChaCha20_512_neon,%function
.align 5
ChaCha20_512_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@ -1986,9 +1978,7 @@ ChaCha20_512_neon:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret
.size ChaCha20_512_neon,.-ChaCha20_512_neon
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -33,8 +33,6 @@
.align 5
aes_hw_set_encrypt_key:
.Lenc_key:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
@ -203,7 +201,6 @@ aes_hw_set_encrypt_key:
.type aes_hw_set_decrypt_key,%function
.align 5
aes_hw_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl .Lenc_key
@ -237,7 +234,6 @@ aes_hw_set_decrypt_key:
eor x0,x0,x0 // return value
.Ldec_key_abort:
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
.globl aes_hw_encrypt
@ -245,7 +241,6 @@ aes_hw_set_decrypt_key:
.type aes_hw_encrypt,%function
.align 5
aes_hw_encrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
@ -276,7 +271,6 @@ aes_hw_encrypt:
.type aes_hw_decrypt,%function
.align 5
aes_hw_decrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
@ -307,8 +301,6 @@ aes_hw_decrypt:
.type aes_hw_cbc_encrypt,%function
.align 5
aes_hw_cbc_encrypt:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
@ -600,8 +592,6 @@ aes_hw_cbc_encrypt:
.type aes_hw_ctr32_encrypt_blocks,%function
.align 5
aes_hw_ctr32_encrypt_blocks:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]
@ -782,4 +772,3 @@ aes_hw_ctr32_encrypt_blocks:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -13,8 +13,6 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl bn_mul_mont
@ -22,7 +20,6 @@
.type bn_mul_mont,%function
.align 5
bn_mul_mont:
AARCH64_SIGN_LINK_REGISTER
tst x5,#7
b.eq __bn_sqr8x_mont
tst x5,#3
@ -220,14 +217,11 @@ bn_mul_mont:
mov x0,#1
ldp x23,x24,[x29,#48]
ldr x29,[sp],#64
AARCH64_VALIDATE_LINK_REGISTER
ret
.size bn_mul_mont,.-bn_mul_mont
.type __bn_sqr8x_mont,%function
.align 5
__bn_sqr8x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
// only from bn_mul_mont which has already signed the return address.
cmp x1,x2
b.ne __bn_mul4x_mont
.Lsqr8x_mont:
@ -981,16 +975,11 @@ __bn_sqr8x_mont:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
.type __bn_mul4x_mont,%function
.align 5
__bn_mul4x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
// return address.
stp x29,x30,[sp,#-128]!
add x29,sp,#0
stp x19,x20,[sp,#16]
@ -1424,8 +1413,6 @@ __bn_mul4x_mont:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret
.size __bn_mul4x_mont,.-__bn_mul4x_mont
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
@ -1433,4 +1420,3 @@ __bn_mul4x_mont:
.align 4
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -13,8 +13,6 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl gcm_init_neon
@ -22,7 +20,6 @@
.type gcm_init_neon,%function
.align 4
gcm_init_neon:
AARCH64_VALID_CALL_TARGET
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
@ -48,7 +45,6 @@ gcm_init_neon:
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
@ -68,7 +64,6 @@ gcm_gmult_neon:
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
@ -343,4 +338,3 @@ gcm_ghash_neon:
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -22,7 +22,6 @@
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
@ -74,7 +73,6 @@ gcm_init_v8:
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
@ -117,7 +115,6 @@ gcm_gmult_v8:
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
@ -249,4 +246,3 @@ gcm_ghash_v8:
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -23,8 +23,6 @@
.type sha1_block_data_order,%function
.align 6
sha1_block_data_order:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
#else
@ -1092,8 +1090,6 @@ sha1_block_data_order:
.type sha1_block_armv8,%function
.align 6
sha1_block_armv8:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
.Lv8_entry:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -1236,4 +1232,3 @@ sha1_block_armv8:
.hidden OPENSSL_armcap_P
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -64,7 +64,6 @@
.type sha256_block_data_order,%function
.align 6
sha256_block_data_order:
AARCH64_VALID_CALL_TARGET
#ifndef __KERNEL__
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
@ -75,7 +74,6 @@ sha256_block_data_order:
tst w16,#ARMV8_SHA256
b.ne .Lv8_entry
#endif
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]!
add x29,sp,#0
@ -1036,7 +1034,6 @@ sha256_block_data_order:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret
.size sha256_block_data_order,.-sha256_block_data_order
@ -1071,7 +1068,6 @@ sha256_block_data_order:
.align 6
sha256_block_armv8:
.Lv8_entry:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -1214,4 +1210,3 @@ sha256_block_armv8:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -64,7 +64,6 @@
.type sha512_block_data_order,%function
.align 6
sha512_block_data_order:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]!
add x29,sp,#0
@ -1025,7 +1024,6 @@ sha512_block_data_order:
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret
.size sha512_block_data_order,.-sha512_block_data_order
@ -1084,4 +1082,3 @@ sha512_block_data_order:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -13,8 +13,6 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.section .rodata
.type _vpaes_consts,%object
@ -217,7 +215,6 @@ _vpaes_encrypt_core:
.type vpaes_encrypt,%function
.align 4
vpaes_encrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -227,7 +224,6 @@ vpaes_encrypt:
st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size vpaes_encrypt,.-vpaes_encrypt
@ -456,7 +452,6 @@ _vpaes_decrypt_core:
.type vpaes_decrypt,%function
.align 4
vpaes_decrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@ -466,7 +461,6 @@ vpaes_decrypt:
st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size vpaes_decrypt,.-vpaes_decrypt
@ -636,7 +630,6 @@ _vpaes_key_preheat:
.type _vpaes_schedule_core,%function
.align 4
_vpaes_schedule_core:
AARCH64_SIGN_LINK_REGISTER
stp x29, x30, [sp,#-16]!
add x29,sp,#0
@ -806,7 +799,6 @@ _vpaes_schedule_core:
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
ldp x29, x30, [sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size _vpaes_schedule_core,.-_vpaes_schedule_core
@ -1020,7 +1012,6 @@ _vpaes_schedule_mangle:
.type vpaes_set_encrypt_key,%function
.align 4
vpaes_set_encrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1036,7 +1027,6 @@ vpaes_set_encrypt_key:
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
@ -1045,7 +1035,6 @@ vpaes_set_encrypt_key:
.type vpaes_set_decrypt_key,%function
.align 4
vpaes_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1065,7 +1054,6 @@ vpaes_set_decrypt_key:
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
.globl vpaes_cbc_encrypt
@ -1073,7 +1061,6 @@ vpaes_set_decrypt_key:
.type vpaes_cbc_encrypt,%function
.align 4
vpaes_cbc_encrypt:
AARCH64_SIGN_LINK_REGISTER
cbz x2, .Lcbc_abort
cmp w5, #0 // check direction
b.eq vpaes_cbc_decrypt
@ -1100,7 +1087,6 @@ vpaes_cbc_encrypt:
st1 {v0.16b}, [x4] // write ivec
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
.Lcbc_abort:
ret
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
@ -1108,8 +1094,6 @@ vpaes_cbc_encrypt:
.type vpaes_cbc_decrypt,%function
.align 4
vpaes_cbc_decrypt:
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
// only from vpaes_cbc_encrypt which has already signed the return address.
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1151,7 +1135,6 @@ vpaes_cbc_decrypt:
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt
.globl vpaes_ctr32_encrypt_blocks
@ -1159,7 +1142,6 @@ vpaes_cbc_decrypt:
.type vpaes_ctr32_encrypt_blocks,%function
.align 4
vpaes_ctr32_encrypt_blocks:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1227,9 +1209,7 @@ vpaes_ctr32_encrypt_blocks:
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -13,8 +13,6 @@
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ -29,7 +27,6 @@
.align 4
abi_test_trampoline:
.Labi_test_trampoline_begin:
AARCH64_SIGN_LINK_REGISTER
// Stack layout (low to high addresses)
// x29,x30 (16 bytes)
// d8-d15 (64 bytes)
@ -132,7 +129,6 @@ abi_test_trampoline:
ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176
AARCH64_VALIDATE_LINK_REGISTER
ret
.size abi_test_trampoline,.-abi_test_trampoline
.type abi_test_clobber_x0, %function
@ -140,7 +136,6 @@ abi_test_trampoline:
.hidden abi_test_clobber_x0
.align 4
abi_test_clobber_x0:
AARCH64_VALID_CALL_TARGET
mov x0, xzr
ret
.size abi_test_clobber_x0,.-abi_test_clobber_x0
@ -149,7 +144,6 @@ abi_test_clobber_x0:
.hidden abi_test_clobber_x1
.align 4
abi_test_clobber_x1:
AARCH64_VALID_CALL_TARGET
mov x1, xzr
ret
.size abi_test_clobber_x1,.-abi_test_clobber_x1
@ -158,7 +152,6 @@ abi_test_clobber_x1:
.hidden abi_test_clobber_x2
.align 4
abi_test_clobber_x2:
AARCH64_VALID_CALL_TARGET
mov x2, xzr
ret
.size abi_test_clobber_x2,.-abi_test_clobber_x2
@ -167,7 +160,6 @@ abi_test_clobber_x2:
.hidden abi_test_clobber_x3
.align 4
abi_test_clobber_x3:
AARCH64_VALID_CALL_TARGET
mov x3, xzr
ret
.size abi_test_clobber_x3,.-abi_test_clobber_x3
@ -176,7 +168,6 @@ abi_test_clobber_x3:
.hidden abi_test_clobber_x4
.align 4
abi_test_clobber_x4:
AARCH64_VALID_CALL_TARGET
mov x4, xzr
ret
.size abi_test_clobber_x4,.-abi_test_clobber_x4
@ -185,7 +176,6 @@ abi_test_clobber_x4:
.hidden abi_test_clobber_x5
.align 4
abi_test_clobber_x5:
AARCH64_VALID_CALL_TARGET
mov x5, xzr
ret
.size abi_test_clobber_x5,.-abi_test_clobber_x5
@ -194,7 +184,6 @@ abi_test_clobber_x5:
.hidden abi_test_clobber_x6
.align 4
abi_test_clobber_x6:
AARCH64_VALID_CALL_TARGET
mov x6, xzr
ret
.size abi_test_clobber_x6,.-abi_test_clobber_x6
@ -203,7 +192,6 @@ abi_test_clobber_x6:
.hidden abi_test_clobber_x7
.align 4
abi_test_clobber_x7:
AARCH64_VALID_CALL_TARGET
mov x7, xzr
ret
.size abi_test_clobber_x7,.-abi_test_clobber_x7
@ -212,7 +200,6 @@ abi_test_clobber_x7:
.hidden abi_test_clobber_x8
.align 4
abi_test_clobber_x8:
AARCH64_VALID_CALL_TARGET
mov x8, xzr
ret
.size abi_test_clobber_x8,.-abi_test_clobber_x8
@ -221,7 +208,6 @@ abi_test_clobber_x8:
.hidden abi_test_clobber_x9
.align 4
abi_test_clobber_x9:
AARCH64_VALID_CALL_TARGET
mov x9, xzr
ret
.size abi_test_clobber_x9,.-abi_test_clobber_x9
@ -230,7 +216,6 @@ abi_test_clobber_x9:
.hidden abi_test_clobber_x10
.align 4
abi_test_clobber_x10:
AARCH64_VALID_CALL_TARGET
mov x10, xzr
ret
.size abi_test_clobber_x10,.-abi_test_clobber_x10
@ -239,7 +224,6 @@ abi_test_clobber_x10:
.hidden abi_test_clobber_x11
.align 4
abi_test_clobber_x11:
AARCH64_VALID_CALL_TARGET
mov x11, xzr
ret
.size abi_test_clobber_x11,.-abi_test_clobber_x11
@ -248,7 +232,6 @@ abi_test_clobber_x11:
.hidden abi_test_clobber_x12
.align 4
abi_test_clobber_x12:
AARCH64_VALID_CALL_TARGET
mov x12, xzr
ret
.size abi_test_clobber_x12,.-abi_test_clobber_x12
@ -257,7 +240,6 @@ abi_test_clobber_x12:
.hidden abi_test_clobber_x13
.align 4
abi_test_clobber_x13:
AARCH64_VALID_CALL_TARGET
mov x13, xzr
ret
.size abi_test_clobber_x13,.-abi_test_clobber_x13
@ -266,7 +248,6 @@ abi_test_clobber_x13:
.hidden abi_test_clobber_x14
.align 4
abi_test_clobber_x14:
AARCH64_VALID_CALL_TARGET
mov x14, xzr
ret
.size abi_test_clobber_x14,.-abi_test_clobber_x14
@ -275,7 +256,6 @@ abi_test_clobber_x14:
.hidden abi_test_clobber_x15
.align 4
abi_test_clobber_x15:
AARCH64_VALID_CALL_TARGET
mov x15, xzr
ret
.size abi_test_clobber_x15,.-abi_test_clobber_x15
@ -284,7 +264,6 @@ abi_test_clobber_x15:
.hidden abi_test_clobber_x16
.align 4
abi_test_clobber_x16:
AARCH64_VALID_CALL_TARGET
mov x16, xzr
ret
.size abi_test_clobber_x16,.-abi_test_clobber_x16
@ -293,7 +272,6 @@ abi_test_clobber_x16:
.hidden abi_test_clobber_x17
.align 4
abi_test_clobber_x17:
AARCH64_VALID_CALL_TARGET
mov x17, xzr
ret
.size abi_test_clobber_x17,.-abi_test_clobber_x17
@ -302,7 +280,6 @@ abi_test_clobber_x17:
.hidden abi_test_clobber_x19
.align 4
abi_test_clobber_x19:
AARCH64_VALID_CALL_TARGET
mov x19, xzr
ret
.size abi_test_clobber_x19,.-abi_test_clobber_x19
@ -311,7 +288,6 @@ abi_test_clobber_x19:
.hidden abi_test_clobber_x20
.align 4
abi_test_clobber_x20:
AARCH64_VALID_CALL_TARGET
mov x20, xzr
ret
.size abi_test_clobber_x20,.-abi_test_clobber_x20
@ -320,7 +296,6 @@ abi_test_clobber_x20:
.hidden abi_test_clobber_x21
.align 4
abi_test_clobber_x21:
AARCH64_VALID_CALL_TARGET
mov x21, xzr
ret
.size abi_test_clobber_x21,.-abi_test_clobber_x21
@ -329,7 +304,6 @@ abi_test_clobber_x21:
.hidden abi_test_clobber_x22
.align 4
abi_test_clobber_x22:
AARCH64_VALID_CALL_TARGET
mov x22, xzr
ret
.size abi_test_clobber_x22,.-abi_test_clobber_x22
@ -338,7 +312,6 @@ abi_test_clobber_x22:
.hidden abi_test_clobber_x23
.align 4
abi_test_clobber_x23:
AARCH64_VALID_CALL_TARGET
mov x23, xzr
ret
.size abi_test_clobber_x23,.-abi_test_clobber_x23
@ -347,7 +320,6 @@ abi_test_clobber_x23:
.hidden abi_test_clobber_x24
.align 4
abi_test_clobber_x24:
AARCH64_VALID_CALL_TARGET
mov x24, xzr
ret
.size abi_test_clobber_x24,.-abi_test_clobber_x24
@ -356,7 +328,6 @@ abi_test_clobber_x24:
.hidden abi_test_clobber_x25
.align 4
abi_test_clobber_x25:
AARCH64_VALID_CALL_TARGET
mov x25, xzr
ret
.size abi_test_clobber_x25,.-abi_test_clobber_x25
@ -365,7 +336,6 @@ abi_test_clobber_x25:
.hidden abi_test_clobber_x26
.align 4
abi_test_clobber_x26:
AARCH64_VALID_CALL_TARGET
mov x26, xzr
ret
.size abi_test_clobber_x26,.-abi_test_clobber_x26
@ -374,7 +344,6 @@ abi_test_clobber_x26:
.hidden abi_test_clobber_x27
.align 4
abi_test_clobber_x27:
AARCH64_VALID_CALL_TARGET
mov x27, xzr
ret
.size abi_test_clobber_x27,.-abi_test_clobber_x27
@ -383,7 +352,6 @@ abi_test_clobber_x27:
.hidden abi_test_clobber_x28
.align 4
abi_test_clobber_x28:
AARCH64_VALID_CALL_TARGET
mov x28, xzr
ret
.size abi_test_clobber_x28,.-abi_test_clobber_x28
@ -392,7 +360,6 @@ abi_test_clobber_x28:
.hidden abi_test_clobber_x29
.align 4
abi_test_clobber_x29:
AARCH64_VALID_CALL_TARGET
mov x29, xzr
ret
.size abi_test_clobber_x29,.-abi_test_clobber_x29
@ -401,7 +368,6 @@ abi_test_clobber_x29:
.hidden abi_test_clobber_d0
.align 4
abi_test_clobber_d0:
AARCH64_VALID_CALL_TARGET
fmov d0, xzr
ret
.size abi_test_clobber_d0,.-abi_test_clobber_d0
@ -410,7 +376,6 @@ abi_test_clobber_d0:
.hidden abi_test_clobber_d1
.align 4
abi_test_clobber_d1:
AARCH64_VALID_CALL_TARGET
fmov d1, xzr
ret
.size abi_test_clobber_d1,.-abi_test_clobber_d1
@ -419,7 +384,6 @@ abi_test_clobber_d1:
.hidden abi_test_clobber_d2
.align 4
abi_test_clobber_d2:
AARCH64_VALID_CALL_TARGET
fmov d2, xzr
ret
.size abi_test_clobber_d2,.-abi_test_clobber_d2
@ -428,7 +392,6 @@ abi_test_clobber_d2:
.hidden abi_test_clobber_d3
.align 4
abi_test_clobber_d3:
AARCH64_VALID_CALL_TARGET
fmov d3, xzr
ret
.size abi_test_clobber_d3,.-abi_test_clobber_d3
@ -437,7 +400,6 @@ abi_test_clobber_d3:
.hidden abi_test_clobber_d4
.align 4
abi_test_clobber_d4:
AARCH64_VALID_CALL_TARGET
fmov d4, xzr
ret
.size abi_test_clobber_d4,.-abi_test_clobber_d4
@ -446,7 +408,6 @@ abi_test_clobber_d4:
.hidden abi_test_clobber_d5
.align 4
abi_test_clobber_d5:
AARCH64_VALID_CALL_TARGET
fmov d5, xzr
ret
.size abi_test_clobber_d5,.-abi_test_clobber_d5
@ -455,7 +416,6 @@ abi_test_clobber_d5:
.hidden abi_test_clobber_d6
.align 4
abi_test_clobber_d6:
AARCH64_VALID_CALL_TARGET
fmov d6, xzr
ret
.size abi_test_clobber_d6,.-abi_test_clobber_d6
@ -464,7 +424,6 @@ abi_test_clobber_d6:
.hidden abi_test_clobber_d7
.align 4
abi_test_clobber_d7:
AARCH64_VALID_CALL_TARGET
fmov d7, xzr
ret
.size abi_test_clobber_d7,.-abi_test_clobber_d7
@ -473,7 +432,6 @@ abi_test_clobber_d7:
.hidden abi_test_clobber_d8
.align 4
abi_test_clobber_d8:
AARCH64_VALID_CALL_TARGET
fmov d8, xzr
ret
.size abi_test_clobber_d8,.-abi_test_clobber_d8
@ -482,7 +440,6 @@ abi_test_clobber_d8:
.hidden abi_test_clobber_d9
.align 4
abi_test_clobber_d9:
AARCH64_VALID_CALL_TARGET
fmov d9, xzr
ret
.size abi_test_clobber_d9,.-abi_test_clobber_d9
@ -491,7 +448,6 @@ abi_test_clobber_d9:
.hidden abi_test_clobber_d10
.align 4
abi_test_clobber_d10:
AARCH64_VALID_CALL_TARGET
fmov d10, xzr
ret
.size abi_test_clobber_d10,.-abi_test_clobber_d10
@ -500,7 +456,6 @@ abi_test_clobber_d10:
.hidden abi_test_clobber_d11
.align 4
abi_test_clobber_d11:
AARCH64_VALID_CALL_TARGET
fmov d11, xzr
ret
.size abi_test_clobber_d11,.-abi_test_clobber_d11
@ -509,7 +464,6 @@ abi_test_clobber_d11:
.hidden abi_test_clobber_d12
.align 4
abi_test_clobber_d12:
AARCH64_VALID_CALL_TARGET
fmov d12, xzr
ret
.size abi_test_clobber_d12,.-abi_test_clobber_d12
@ -518,7 +472,6 @@ abi_test_clobber_d12:
.hidden abi_test_clobber_d13
.align 4
abi_test_clobber_d13:
AARCH64_VALID_CALL_TARGET
fmov d13, xzr
ret
.size abi_test_clobber_d13,.-abi_test_clobber_d13
@ -527,7 +480,6 @@ abi_test_clobber_d13:
.hidden abi_test_clobber_d14
.align 4
abi_test_clobber_d14:
AARCH64_VALID_CALL_TARGET
fmov d14, xzr
ret
.size abi_test_clobber_d14,.-abi_test_clobber_d14
@ -536,7 +488,6 @@ abi_test_clobber_d14:
.hidden abi_test_clobber_d15
.align 4
abi_test_clobber_d15:
AARCH64_VALID_CALL_TARGET
fmov d15, xzr
ret
.size abi_test_clobber_d15,.-abi_test_clobber_d15
@ -545,7 +496,6 @@ abi_test_clobber_d15:
.hidden abi_test_clobber_d16
.align 4
abi_test_clobber_d16:
AARCH64_VALID_CALL_TARGET
fmov d16, xzr
ret
.size abi_test_clobber_d16,.-abi_test_clobber_d16
@ -554,7 +504,6 @@ abi_test_clobber_d16:
.hidden abi_test_clobber_d17
.align 4
abi_test_clobber_d17:
AARCH64_VALID_CALL_TARGET
fmov d17, xzr
ret
.size abi_test_clobber_d17,.-abi_test_clobber_d17
@ -563,7 +512,6 @@ abi_test_clobber_d17:
.hidden abi_test_clobber_d18
.align 4
abi_test_clobber_d18:
AARCH64_VALID_CALL_TARGET
fmov d18, xzr
ret
.size abi_test_clobber_d18,.-abi_test_clobber_d18
@ -572,7 +520,6 @@ abi_test_clobber_d18:
.hidden abi_test_clobber_d19
.align 4
abi_test_clobber_d19:
AARCH64_VALID_CALL_TARGET
fmov d19, xzr
ret
.size abi_test_clobber_d19,.-abi_test_clobber_d19
@ -581,7 +528,6 @@ abi_test_clobber_d19:
.hidden abi_test_clobber_d20
.align 4
abi_test_clobber_d20:
AARCH64_VALID_CALL_TARGET
fmov d20, xzr
ret
.size abi_test_clobber_d20,.-abi_test_clobber_d20
@ -590,7 +536,6 @@ abi_test_clobber_d20:
.hidden abi_test_clobber_d21
.align 4
abi_test_clobber_d21:
AARCH64_VALID_CALL_TARGET
fmov d21, xzr
ret
.size abi_test_clobber_d21,.-abi_test_clobber_d21
@ -599,7 +544,6 @@ abi_test_clobber_d21:
.hidden abi_test_clobber_d22
.align 4
abi_test_clobber_d22:
AARCH64_VALID_CALL_TARGET
fmov d22, xzr
ret
.size abi_test_clobber_d22,.-abi_test_clobber_d22
@ -608,7 +552,6 @@ abi_test_clobber_d22:
.hidden abi_test_clobber_d23
.align 4
abi_test_clobber_d23:
AARCH64_VALID_CALL_TARGET
fmov d23, xzr
ret
.size abi_test_clobber_d23,.-abi_test_clobber_d23
@ -617,7 +560,6 @@ abi_test_clobber_d23:
.hidden abi_test_clobber_d24
.align 4
abi_test_clobber_d24:
AARCH64_VALID_CALL_TARGET
fmov d24, xzr
ret
.size abi_test_clobber_d24,.-abi_test_clobber_d24
@ -626,7 +568,6 @@ abi_test_clobber_d24:
.hidden abi_test_clobber_d25
.align 4
abi_test_clobber_d25:
AARCH64_VALID_CALL_TARGET
fmov d25, xzr
ret
.size abi_test_clobber_d25,.-abi_test_clobber_d25
@ -635,7 +576,6 @@ abi_test_clobber_d25:
.hidden abi_test_clobber_d26
.align 4
abi_test_clobber_d26:
AARCH64_VALID_CALL_TARGET
fmov d26, xzr
ret
.size abi_test_clobber_d26,.-abi_test_clobber_d26
@ -644,7 +584,6 @@ abi_test_clobber_d26:
.hidden abi_test_clobber_d27
.align 4
abi_test_clobber_d27:
AARCH64_VALID_CALL_TARGET
fmov d27, xzr
ret
.size abi_test_clobber_d27,.-abi_test_clobber_d27
@ -653,7 +592,6 @@ abi_test_clobber_d27:
.hidden abi_test_clobber_d28
.align 4
abi_test_clobber_d28:
AARCH64_VALID_CALL_TARGET
fmov d28, xzr
ret
.size abi_test_clobber_d28,.-abi_test_clobber_d28
@ -662,7 +600,6 @@ abi_test_clobber_d28:
.hidden abi_test_clobber_d29
.align 4
abi_test_clobber_d29:
AARCH64_VALID_CALL_TARGET
fmov d29, xzr
ret
.size abi_test_clobber_d29,.-abi_test_clobber_d29
@ -671,7 +608,6 @@ abi_test_clobber_d29:
.hidden abi_test_clobber_d30
.align 4
abi_test_clobber_d30:
AARCH64_VALID_CALL_TARGET
fmov d30, xzr
ret
.size abi_test_clobber_d30,.-abi_test_clobber_d30
@ -680,7 +616,6 @@ abi_test_clobber_d30:
.hidden abi_test_clobber_d31
.align 4
abi_test_clobber_d31:
AARCH64_VALID_CALL_TARGET
fmov d31, xzr
ret
.size abi_test_clobber_d31,.-abi_test_clobber_d31
@ -689,7 +624,6 @@ abi_test_clobber_d31:
.hidden abi_test_clobber_v8_upper
.align 4
abi_test_clobber_v8_upper:
AARCH64_VALID_CALL_TARGET
fmov v8.d[1], xzr
ret
.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
@ -698,7 +632,6 @@ abi_test_clobber_v8_upper:
.hidden abi_test_clobber_v9_upper
.align 4
abi_test_clobber_v9_upper:
AARCH64_VALID_CALL_TARGET
fmov v9.d[1], xzr
ret
.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
@ -707,7 +640,6 @@ abi_test_clobber_v9_upper:
.hidden abi_test_clobber_v10_upper
.align 4
abi_test_clobber_v10_upper:
AARCH64_VALID_CALL_TARGET
fmov v10.d[1], xzr
ret
.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
@ -716,7 +648,6 @@ abi_test_clobber_v10_upper:
.hidden abi_test_clobber_v11_upper
.align 4
abi_test_clobber_v11_upper:
AARCH64_VALID_CALL_TARGET
fmov v11.d[1], xzr
ret
.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
@ -725,7 +656,6 @@ abi_test_clobber_v11_upper:
.hidden abi_test_clobber_v12_upper
.align 4
abi_test_clobber_v12_upper:
AARCH64_VALID_CALL_TARGET
fmov v12.d[1], xzr
ret
.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
@ -734,7 +664,6 @@ abi_test_clobber_v12_upper:
.hidden abi_test_clobber_v13_upper
.align 4
abi_test_clobber_v13_upper:
AARCH64_VALID_CALL_TARGET
fmov v13.d[1], xzr
ret
.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
@ -743,7 +672,6 @@ abi_test_clobber_v13_upper:
.hidden abi_test_clobber_v14_upper
.align 4
abi_test_clobber_v14_upper:
AARCH64_VALID_CALL_TARGET
fmov v14.d[1], xzr
ret
.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
@ -752,10 +680,8 @@ abi_test_clobber_v14_upper:
.hidden abi_test_clobber_v15_upper
.align 4
abi_test_clobber_v15_upper:
AARCH64_VALID_CALL_TARGET
fmov v15.d[1], xzr
ret
.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -0,0 +1,998 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.section .rodata
# p434 x 2
.Lp434x2:
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
# p434 + 1
.Lp434p1:
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
.text
.globl sike_mpmul
.hidden sike_mpmul
.align 4
sike_mpmul:
stp x29, x30, [sp,#-96]!
add x29, sp, #0
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
ldp x14, x15, [x1,#32]
ldr x16, [x1,#48]
// x3-x7 <- AH + AL, x7 <- carry
adds x3, x3, x7
adcs x4, x4, x8
adcs x5, x5, x9
adcs x6, x6, xzr
adc x7, xzr, xzr
// x10-x13 <- BH + BL, x8 <- carry
adds x10, x10, x14
adcs x11, x11, x15
adcs x12, x12, x16
adcs x13, x13, xzr
adc x8, xzr, xzr
// x9 <- combined carry
and x9, x7, x8
// x7-x8 <- mask
sub x7, xzr, x7
sub x8, xzr, x8
// x15-x19 <- masked (BH + BL)
and x14, x10, x7
and x15, x11, x7
and x16, x12, x7
and x17, x13, x7
// x20-x23 <- masked (AH + AL)
and x20, x3, x8
and x21, x4, x8
and x22, x5, x8
and x23, x6, x8
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
adds x14, x14, x20
adcs x15, x15, x21
adcs x16, x16, x22
adcs x17, x17, x23
adc x7, x9, xzr
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
stp x3, x4, [x2,#0]
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x25, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x23, x10, x12
adcs x26, x11, x13
adc x24, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x19, xzr, x25
sub x20, xzr, x24
and x8, x23, x19
and x9, x26, x19
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x21, x3, x20
and x22, x4, x20
mul x19, x3, x23
mul x20, x3, x26
and x25, x25, x24
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x8, x21, x8
umulh x21, x3, x26
adcs x9, x22, x9
umulh x22, x3, x23
adc x25, x25, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x23
umulh x23, x4, x23
adds x20, x20, x22
adc x21, x21, xzr
mul x24, x4, x26
umulh x26, x4, x26
adds x20, x20, x3
adcs x21, x21, x23
adc x22, xzr, xzr
adds x21, x21, x24
adc x22, x22, x26
ldp x3, x4, [x2,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x21, x8, x21
umulh x24, x3, x10
umulh x26, x3, x11
adcs x22, x9, x22
mul x8, x3, x10
mul x9, x3, x11
adc x25, x25, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x9, x9, x24
adc x26, x26, xzr
mul x23, x4, x11
umulh x11, x4, x11
adds x9, x9, x3
adcs x26, x26, x10
adc x24, xzr, xzr
adds x26, x26, x23
adc x24, x24, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x19, x19, x8
sbcs x20, x20, x9
sbcs x21, x21, x26
mul x4, x5, x13
umulh x23, x5, x13
sbcs x22, x22, x24
sbc x25, x25, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x23, x23, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x23, x23, x12
adc x10, xzr, xzr
adds x23, x23, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x19, x19, x3
sbcs x20, x20, x4
sbcs x21, x21, x23
sbcs x22, x22, x10
sbc x25, x25, xzr
adds x19, x19, x26
adcs x20, x20, x24
adcs x21, x21, x3
adcs x22, x22, x4
adcs x23, x25, x23
adc x24, x10, xzr
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
adds x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adc x7, x7, xzr
// Load AL
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
// Load BL
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
// Temporarily store x8 in x2
stp x8, x9, [x2,#0]
// x21-x28 <- AL x BL
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x8, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x27, x10, x12
adcs x9, x11, x13
adc x28, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x23, xzr, x8
sub x24, xzr, x28
and x21, x27, x23
and x22, x9, x23
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x25, x3, x24
and x26, x4, x24
mul x23, x3, x27
mul x24, x3, x9
and x8, x8, x28
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x21, x25, x21
umulh x25, x3, x9
adcs x22, x26, x22
umulh x26, x3, x27
adc x8, x8, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x27
umulh x27, x4, x27
adds x24, x24, x26
adc x25, x25, xzr
mul x28, x4, x9
umulh x9, x4, x9
adds x24, x24, x3
adcs x25, x25, x27
adc x26, xzr, xzr
adds x25, x25, x28
adc x26, x26, x9
ldp x3, x4, [x0,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x25, x21, x25
umulh x28, x3, x10
umulh x9, x3, x11
adcs x26, x22, x26
mul x21, x3, x10
mul x22, x3, x11
adc x8, x8, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x22, x22, x28
adc x9, x9, xzr
mul x27, x4, x11
umulh x11, x4, x11
adds x22, x22, x3
adcs x9, x9, x10
adc x28, xzr, xzr
adds x9, x9, x27
adc x28, x28, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x23, x23, x21
sbcs x24, x24, x22
sbcs x25, x25, x9
mul x4, x5, x13
umulh x27, x5, x13
sbcs x26, x26, x28
sbc x8, x8, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x27, x27, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x27, x27, x12
adc x10, xzr, xzr
adds x27, x27, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x23, x23, x3
sbcs x24, x24, x4
sbcs x25, x25, x27
sbcs x26, x26, x10
sbc x8, x8, xzr
adds x23, x23, x9
adcs x24, x24, x28
adcs x25, x25, x3
adcs x26, x26, x4
adcs x27, x8, x27
adc x28, x10, xzr
// Restore x8
ldp x8, x9, [x2,#0]
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, x27
sbcs x17, x17, x28
sbc x7, x7, xzr
// Store ALxBL, low
stp x21, x22, [x2]
stp x23, x24, [x2,#16]
// Load AH
ldp x3, x4, [x0,#32]
ldr x5, [x0,#48]
// Load BH
ldp x10, x11, [x1,#32]
ldr x12, [x1,#48]
adds x8, x8, x25
adcs x9, x9, x26
adcs x19, x19, x27
adcs x20, x20, x28
adc x1, xzr, xzr
add x0, x0, #32
// Temporarily store x8,x9 in x2
stp x8,x9, [x2,#32]
// x21-x28 <- AH x BH
// A0 * B0
mul x21, x3, x10 // C0
umulh x24, x3, x10
// A0 * B1
mul x22, x3, x11
umulh x23, x3, x11
// A1 * B0
mul x8, x4, x10
umulh x9, x4, x10
adds x22, x22, x24
adc x23, x23, xzr
// A0 * B2
mul x27, x3, x12
umulh x28, x3, x12
adds x22, x22, x8 // C1
adcs x23, x23, x9
adc x24, xzr, xzr
// A2 * B0
mul x8, x5, x10
umulh x25, x5, x10
adds x23, x23, x27
adcs x24, x24, x25
adc x25, xzr, xzr
// A1 * B1
mul x27, x4, x11
umulh x9, x4, x11
adds x23, x23, x8
adcs x24, x24, x28
adc x25, x25, xzr
// A1 * B2
mul x8, x4, x12
umulh x28, x4, x12
adds x23, x23, x27 // C2
adcs x24, x24, x9
adc x25, x25, xzr
// A2 * B1
mul x27, x5, x11
umulh x9, x5, x11
adds x24, x24, x8
adcs x25, x25, x28
adc x26, xzr, xzr
// A2 * B2
mul x8, x5, x12
umulh x28, x5, x12
adds x24, x24, x27 // C3
adcs x25, x25, x9
adc x26, x26, xzr
adds x25, x25, x8 // C4
adc x26, x26, x28 // C5
// Restore x8,x9
ldp x8,x9, [x2,#32]
neg x1, x1
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, xzr
sbcs x17, x17, xzr
sbc x7, x7, xzr
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
stp x8, x9, [x2,#32]
stp x19, x20, [x2,#48]
adds x1, x1, #1
adcs x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adcs x25, x7, x25
adc x26, x26, xzr
stp x14, x15, [x2,#64]
stp x16, x17, [x2,#80]
stp x25, x26, [x2,#96]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl sike_fprdc
.hidden sike_fprdc
.align 4
sike_fprdc:
stp x29, x30, [sp, #-96]!
add x29, sp, xzr
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x2, x3, [x0,#0] // a[0-1]
// Load the prime constant
adrp x26, .Lp434p1
add x26, x26, :lo12:.Lp434p1
ldp x23, x24, [x26, #0x0]
ldp x25, x26, [x26,#0x10]
// a[0-1] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x10, x3, x23
umulh x11, x3, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x10 // C1
adcs x6, x6, x11
adc x7, xzr, xzr
mul x10, x3, x24
umulh x11, x3, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x10 // C2
adcs x7, x7, x11
adc x8, x8, xzr
mul x10, x3, x25
umulh x11, x3, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x3, x26
umulh x28, x3, x26
adds x7, x7, x10 // C3
adcs x8, x8, x11
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
ldp x10, x11, [x0, #0x18]
ldp x12, x13, [x0, #0x28]
ldp x14, x15, [x0, #0x38]
ldp x16, x17, [x0, #0x48]
ldp x19, x20, [x0, #0x58]
ldr x21, [x0, #0x68]
adds x10, x10, x4
adcs x11, x11, x5
adcs x12, x12, x6
adcs x13, x13, x7
adcs x14, x14, x8
adcs x15, x15, x9
adcs x22, x16, xzr
adcs x17, x17, xzr
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
ldr x2, [x0,#0x10] // a[2]
// a[2-3] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x0, x10, x23
umulh x3, x10, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x0 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x0, x10, x24
umulh x3, x10, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x0 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x0, x10, x25
umulh x3, x10, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x10, x26
umulh x28, x10, x26
adds x7, x7, x0 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x12, x12, x4
adcs x13, x13, x5
adcs x14, x14, x6
adcs x15, x15, x7
adcs x16, x22, x8
adcs x17, x17, x9
adcs x22, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
mul x4, x11, x23 // C0
umulh x7, x11, x23
mul x5, x11, x24
umulh x6, x11, x24
mul x10, x12, x23
umulh x3, x12, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x11, x25
umulh x28, x11, x25
adds x5, x5, x10 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x10, x12, x24
umulh x3, x12, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x11, x26
umulh x28, x11, x26
adds x6, x6, x10 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x10, x12, x25
umulh x3, x12, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x12, x26
umulh x28, x12, x26
adds x7, x7, x10 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x14, x14, x4
adcs x15, x15, x5
adcs x16, x16, x6
adcs x17, x17, x7
adcs x19, x22, x8
adcs x20, x20, x9
adc x22, x21, xzr
stp x14, x15, [x1, #0x0] // C0, C1
mul x4, x13, x23 // C0
umulh x10, x13, x23
mul x5, x13, x24
umulh x27, x13, x24
adds x5, x5, x10 // C1
adc x10, xzr, xzr
mul x6, x13, x25
umulh x28, x13, x25
adds x27, x10, x27
adcs x6, x6, x27 // C2
adc x10, xzr, xzr
mul x7, x13, x26
umulh x8, x13, x26
adds x28, x10, x28
adcs x7, x7, x28 // C3
adc x8, x8, xzr // C4
adds x16, x16, x4
adcs x17, x17, x5
adcs x19, x19, x6
adcs x20, x20, x7
adc x21, x22, x8
str x16, [x1, #0x10]
stp x17, x19, [x1, #0x18]
stp x20, x21, [x1, #0x28]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl sike_fpadd
.hidden sike_fpadd
.align 4
sike_fpadd:
stp x29,x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Add a + b
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
// Subtract 2xp434
adrp x17, .Lp434x2
add x17, x17, :lo12:.Lp434x2
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x12
sbcs x6, x6, x13
sbcs x7, x7, x14
sbcs x8, x8, x15
sbcs x9, x9, x16
sbc x0, xzr, xzr // x0 can be reused now
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl sike_fpsub
.hidden sike_fpsub
.align 4
sike_fpsub:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Subtract a - b
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
sbcs x9, x9, x17
sbc x0, xzr, xzr
// Add 2xp434 anded with the mask in x0
adrp x17, .Lp434x2
add x17, x17, :lo12:.Lp434x2
// First half
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl sike_mpadd_asm
.hidden sike_mpadd_asm
.align 4
sike_mpadd_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl sike_mpsubx2_asm
.hidden sike_mpsubx2_asm
.align 4
sike_mpsubx2_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x11, x12, [x1,#32]
ldp x13, x14, [x1,#48]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbcs x9, x9, x13
sbcs x10, x10, x14
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ldp x3, x4, [x0,#64]
ldp x5, x6, [x0,#80]
ldp x11, x12, [x1,#64]
ldp x13, x14, [x1,#80]
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#96]
ldp x11, x12, [x1,#96]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbc x0, xzr, xzr
stp x3, x4, [x2,#64]
stp x5, x6, [x2,#80]
stp x7, x8, [x2,#96]
ldp x29, x30, [sp],#16
ret
.globl sike_mpdblsubx2_asm
.hidden sike_mpdblsubx2_asm
.align 4
sike_mpdblsubx2_asm:
stp x29, x30, [sp, #-16]!
add x29, sp, #0
ldp x3, x4, [x2, #0]
ldp x5, x6, [x2,#16]
ldp x7, x8, [x2,#32]
ldp x11, x12, [x0, #0]
ldp x13, x14, [x0,#16]
ldp x15, x16, [x0,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
// x9 stores carry
adc x9, xzr, xzr
ldp x11, x12, [x1, #0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2, #0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
ldp x3, x4, [x2,#48]
ldp x5, x6, [x2,#64]
ldp x7, x8, [x2,#80]
ldp x11, x12, [x0,#48]
ldp x13, x14, [x0,#64]
ldp x15, x16, [x0,#80]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, xzr, xzr
ldp x11, x12, [x1,#48]
ldp x13, x14, [x1,#64]
ldp x15, x16, [x1,#80]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2,#48]
stp x5, x6, [x2,#64]
stp x7, x8, [x2,#80]
ldp x3, x4, [x2,#96]
ldp x11, x12, [x0,#96]
ldp x13, x14, [x1,#96]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
subs x3, x3, x13
sbc x4, x4, x14
stp x3, x4, [x2,#96]
ldp x29, x30, [sp],#16
ret
#endif
#endif // !OPENSSL_NO_ASM

View File

@ -1490,4 +1490,3 @@ ChaCha20_neon:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -778,4 +778,3 @@ aes_hw_ctr32_encrypt_blocks:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -974,4 +974,3 @@ bn_mul8x_mont_neon:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1526,4 +1526,3 @@ bsaes_ctr32_encrypt_blocks:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -31,6 +31,342 @@
#else
.code 32
#endif
.type rem_4bit,%object
.align 5
rem_4bit:
.short 0x0000,0x1C20,0x3840,0x2460
.short 0x7080,0x6CA0,0x48C0,0x54E0
.short 0xE100,0xFD20,0xD940,0xC560
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
.size rem_4bit,.-rem_4bit
.type rem_4bit_get,%function
rem_4bit_get:
#if defined(__thumb2__)
adr r2,rem_4bit
#else
sub r2,pc,#8+32 @ &rem_4bit
#endif
b .Lrem_4bit_got
nop
nop
.size rem_4bit_get,.-rem_4bit_get
.globl gcm_ghash_4bit
.hidden gcm_ghash_4bit
.type gcm_ghash_4bit,%function
.align 4
gcm_ghash_4bit:
#if defined(__thumb2__)
adr r12,rem_4bit
#else
sub r12,pc,#8+48 @ &rem_4bit
#endif
add r3,r2,r3 @ r3 to point at the end
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
ldrb r12,[r2,#15]
ldrb r14,[r0,#15]
.Louter:
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
add r11,r1,r14
ldrb r12,[r2,#14]
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[sp,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
ldrb r14,[r0,#14]
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
eor r7,r7,r8,lsl#16
.Linner:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[sp,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r2,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r8,[r0,r3]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r9,[sp,r14]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
#ifdef __thumb2__
it pl
#endif
eorpl r12,r12,r8
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
bpl .Linner
ldr r3,[sp,#32] @ re-load r3/end
add r2,r2,#16
mov r14,r4
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
cmp r2,r3
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#ifdef __thumb2__
it ne
#endif
ldrneb r12,[r2,#15]
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
bne .Louter
add sp,sp,#36
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size gcm_ghash_4bit,.-gcm_ghash_4bit
.globl gcm_gmult_4bit
.hidden gcm_gmult_4bit
.type gcm_gmult_4bit,%function
gcm_gmult_4bit:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldrb r12,[r0,#15]
b rem_4bit_get
.Lrem_4bit_got:
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
ldrb r12,[r0,#14]
add r11,r1,r14
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
and r14,r12,#0xf0
eor r7,r7,r8,lsl#16
and r12,r12,#0x0f
.Loop:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[r2,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r0,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
bpl .Loop
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size gcm_gmult_4bit,.-gcm_gmult_4bit
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
@ -252,4 +588,3 @@ gcm_ghash_neon:
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -250,4 +250,3 @@ gcm_ghash_v8:
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1508,4 +1508,3 @@ sha1_block_data_order_armv8:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -2836,4 +2836,3 @@ sha256_block_data_order_armv8:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1891,4 +1891,3 @@ sha512_block_data_order_neon:
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -31,6 +31,7 @@
.hidden abi_test_trampoline
.align 4
abi_test_trampoline:
.Labi_test_trampoline_begin:
@ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
@ -376,4 +377,3 @@ abi_test_clobber_d15:
.size abi_test_clobber_d15,.-abi_test_clobber_d15
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -3667,4 +3667,3 @@ _aesp8_xts_dec5x:
.long 0
.byte 0,12,0x14,0,0,0,0,0
#endif // !OPENSSL_NO_ASM && __powerpc64__
.section .note.GNU-stack,"",@progbits

View File

@ -584,4 +584,3 @@ gcm_ghash_p8:
.align 2
.align 2
#endif // !OPENSSL_NO_ASM && __powerpc64__
.section .note.GNU-stack,"",@progbits

View File

@ -972,4 +972,3 @@ ChaCha20_ssse3:
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte 114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
#endif
.globl aes_hw_encrypt
.hidden aes_hw_encrypt
@ -14,7 +14,7 @@
.align 16
aes_hw_encrypt:
.L_aes_hw_encrypt_begin:
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
pushl %ebx
pushl %edx
call .L000pic
@ -845,7 +845,7 @@ aes_hw_ctr32_encrypt_blocks:
pushl %ebx
pushl %esi
pushl %edi
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
pushl %ebx
pushl %edx
call .L038pic
@ -2440,7 +2440,7 @@ _aesni_set_encrypt_key:
.align 16
aes_hw_set_encrypt_key:
.L_aes_hw_set_encrypt_key_begin:
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
pushl %ebx
pushl %edx
call .L116pic
@ -2510,4 +2510,3 @@ aes_hw_set_decrypt_key:
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
.byte 115,108,46,111,114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -993,5 +993,551 @@ bn_sub_words:
popl %ebp
ret
.size bn_sub_words,.-.L_bn_sub_words_begin
.globl bn_sub_part_words
.hidden bn_sub_part_words
.type bn_sub_part_words,@function
.align 16
bn_sub_part_words:
.L_bn_sub_part_words_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebx
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
jz .L029aw_finish
.L030aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
movl 4(%esi),%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
movl 8(%esi),%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
movl 12(%esi),%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
movl 16(%esi),%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
movl 20(%esi),%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
movl 24(%esi),%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
movl 28(%esi),%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%esi
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz .L030aw_loop
.L029aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
.L031aw_end:
cmpl $0,36(%esp)
je .L032pw_end
movl 36(%esp),%ebp
cmpl $0,%ebp
je .L032pw_end
jge .L033pw_pos
movl $0,%edx
subl %ebp,%edx
movl %edx,%ebp
andl $4294967288,%ebp
jz .L034pw_neg_finish
.L035pw_neg_loop:
movl $0,%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
movl $0,%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
movl $0,%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
movl $0,%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
movl $0,%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
movl $0,%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
movl $0,%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
movl $0,%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz .L035pw_neg_loop
.L034pw_neg_finish:
movl 36(%esp),%edx
movl $0,%ebp
subl %edx,%ebp
andl $7,%ebp
jz .L032pw_end
movl $0,%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
jmp .L032pw_end
.L033pw_pos:
andl $4294967288,%ebp
jz .L036pw_pos_finish
.L037pw_pos_loop:
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
jnc .L038pw_nc0
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
jnc .L039pw_nc1
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
jnc .L040pw_nc2
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
jnc .L041pw_nc3
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
jnc .L042pw_nc4
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
jnc .L043pw_nc5
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
jnc .L044pw_nc6
movl 28(%esi),%ecx
subl %eax,%ecx
movl %ecx,28(%ebx)
jnc .L045pw_nc7
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
jnz .L037pw_pos_loop
.L036pw_pos_finish:
movl 36(%esp),%ebp
andl $7,%ebp
jz .L032pw_end
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
jnc .L046pw_tail_nc0
decl %ebp
jz .L032pw_end
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
jnc .L047pw_tail_nc1
decl %ebp
jz .L032pw_end
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
jnc .L048pw_tail_nc2
decl %ebp
jz .L032pw_end
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
jnc .L049pw_tail_nc3
decl %ebp
jz .L032pw_end
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
jnc .L050pw_tail_nc4
decl %ebp
jz .L032pw_end
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
jnc .L051pw_tail_nc5
decl %ebp
jz .L032pw_end
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
jnc .L052pw_tail_nc6
movl $1,%eax
jmp .L032pw_end
.L053pw_nc_loop:
movl (%esi),%ecx
movl %ecx,(%ebx)
.L038pw_nc0:
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
.L039pw_nc1:
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
.L040pw_nc2:
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
.L041pw_nc3:
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
.L042pw_nc4:
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
.L043pw_nc5:
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
.L044pw_nc6:
movl 28(%esi),%ecx
movl %ecx,28(%ebx)
.L045pw_nc7:
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
jnz .L053pw_nc_loop
movl 36(%esp),%ebp
andl $7,%ebp
jz .L054pw_nc_end
movl (%esi),%ecx
movl %ecx,(%ebx)
.L046pw_tail_nc0:
decl %ebp
jz .L054pw_nc_end
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
.L047pw_tail_nc1:
decl %ebp
jz .L054pw_nc_end
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
.L048pw_tail_nc2:
decl %ebp
jz .L054pw_nc_end
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
.L049pw_tail_nc3:
decl %ebp
jz .L054pw_nc_end
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
.L050pw_tail_nc4:
decl %ebp
jz .L054pw_nc_end
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
.L051pw_tail_nc5:
decl %ebp
jz .L054pw_nc_end
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
.L052pw_tail_nc6:
.L054pw_nc_end:
movl $0,%eax
.L032pw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1263,4 +1263,3 @@ bn_sqr_comba4:
ret
.size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -291,4 +291,3 @@ gcm_ghash_ssse3:
.Llow4_mask:
.long 252645135,252645135,252645135,252645135
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -6,6 +6,711 @@
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl gcm_gmult_4bit_mmx
.hidden gcm_gmult_4bit_mmx
.type gcm_gmult_4bit_mmx,@function
.align 16
gcm_gmult_4bit_mmx:
.L_gcm_gmult_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
call .L000pic_point
.L000pic_point:
popl %eax
leal .Lrem_4bit-.L000pic_point(%eax),%eax
movzbl 15(%edi),%ebx
xorl %ecx,%ecx
movl %ebx,%edx
movb %dl,%cl
movl $14,%ebp
shlb $4,%cl
andl $240,%edx
movq 8(%esi,%ecx,1),%mm0
movq (%esi,%ecx,1),%mm1
movd %mm0,%ebx
jmp .L001mmx_loop
.align 16
.L001mmx_loop:
psrlq $4,%mm0
andl $15,%ebx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb (%edi,%ebp,1),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
decl %ebp
movd %mm0,%ebx
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
pxor %mm2,%mm0
js .L002mmx_break
shlb $4,%cl
andl $15,%ebx
psrlq $4,%mm0
andl $240,%edx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0
jmp .L001mmx_loop
.align 16
.L002mmx_break:
shlb $4,%cl
andl $15,%ebx
psrlq $4,%mm0
andl $240,%edx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0
psrlq $4,%mm0
andl $15,%ebx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%edx,1),%mm1
pxor %mm2,%mm0
psrlq $32,%mm0
movd %mm1,%edx
psrlq $32,%mm1
movd %mm0,%ecx
movd %mm1,%ebp
bswap %ebx
bswap %edx
bswap %ecx
bswap %ebp
emms
movl %ebx,12(%edi)
movl %edx,4(%edi)
movl %ecx,8(%edi)
movl %ebp,(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
.globl gcm_ghash_4bit_mmx
.hidden gcm_ghash_4bit_mmx
.type gcm_ghash_4bit_mmx,@function
.align 16
gcm_ghash_4bit_mmx:
.L_gcm_ghash_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%eax
movl 24(%esp),%ebx
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl %esp,%ebp
call .L003pic_point
.L003pic_point:
popl %esi
leal .Lrem_8bit-.L003pic_point(%esi),%esi
subl $544,%esp
andl $-64,%esp
subl $16,%esp
addl %ecx,%edx
movl %eax,544(%esp)
movl %edx,552(%esp)
movl %ebp,556(%esp)
addl $128,%ebx
leal 144(%esp),%edi
leal 400(%esp),%ebp
movl -120(%ebx),%edx
movq -120(%ebx),%mm0
movq -128(%ebx),%mm3
shll $4,%edx
movb %dl,(%esp)
movl -104(%ebx),%edx
movq -104(%ebx),%mm2
movq -112(%ebx),%mm5
movq %mm0,-128(%edi)
psrlq $4,%mm0
movq %mm3,(%edi)
movq %mm3,%mm7
psrlq $4,%mm3
shll $4,%edx
movb %dl,1(%esp)
movl -88(%ebx),%edx
movq -88(%ebx),%mm1
psllq $60,%mm7
movq -96(%ebx),%mm4
por %mm7,%mm0
movq %mm2,-120(%edi)
psrlq $4,%mm2
movq %mm5,8(%edi)
movq %mm5,%mm6
movq %mm0,-128(%ebp)
psrlq $4,%mm5
movq %mm3,(%ebp)
shll $4,%edx
movb %dl,2(%esp)
movl -72(%ebx),%edx
movq -72(%ebx),%mm0
psllq $60,%mm6
movq -80(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-112(%edi)
psrlq $4,%mm1
movq %mm4,16(%edi)
movq %mm4,%mm7
movq %mm2,-120(%ebp)
psrlq $4,%mm4
movq %mm5,8(%ebp)
shll $4,%edx
movb %dl,3(%esp)
movl -56(%ebx),%edx
movq -56(%ebx),%mm2
psllq $60,%mm7
movq -64(%ebx),%mm5
por %mm7,%mm1
movq %mm0,-104(%edi)
psrlq $4,%mm0
movq %mm3,24(%edi)
movq %mm3,%mm6
movq %mm1,-112(%ebp)
psrlq $4,%mm3
movq %mm4,16(%ebp)
shll $4,%edx
movb %dl,4(%esp)
movl -40(%ebx),%edx
movq -40(%ebx),%mm1
psllq $60,%mm6
movq -48(%ebx),%mm4
por %mm6,%mm0
movq %mm2,-96(%edi)
psrlq $4,%mm2
movq %mm5,32(%edi)
movq %mm5,%mm7
movq %mm0,-104(%ebp)
psrlq $4,%mm5
movq %mm3,24(%ebp)
shll $4,%edx
movb %dl,5(%esp)
movl -24(%ebx),%edx
movq -24(%ebx),%mm0
psllq $60,%mm7
movq -32(%ebx),%mm3
por %mm7,%mm2
movq %mm1,-88(%edi)
psrlq $4,%mm1
movq %mm4,40(%edi)
movq %mm4,%mm6
movq %mm2,-96(%ebp)
psrlq $4,%mm4
movq %mm5,32(%ebp)
shll $4,%edx
movb %dl,6(%esp)
movl -8(%ebx),%edx
movq -8(%ebx),%mm2
psllq $60,%mm6
movq -16(%ebx),%mm5
por %mm6,%mm1
movq %mm0,-80(%edi)
psrlq $4,%mm0
movq %mm3,48(%edi)
movq %mm3,%mm7
movq %mm1,-88(%ebp)
psrlq $4,%mm3
movq %mm4,40(%ebp)
shll $4,%edx
movb %dl,7(%esp)
movl 8(%ebx),%edx
movq 8(%ebx),%mm1
psllq $60,%mm7
movq (%ebx),%mm4
por %mm7,%mm0
movq %mm2,-72(%edi)
psrlq $4,%mm2
movq %mm5,56(%edi)
movq %mm5,%mm6
movq %mm0,-80(%ebp)
psrlq $4,%mm5
movq %mm3,48(%ebp)
shll $4,%edx
movb %dl,8(%esp)
movl 24(%ebx),%edx
movq 24(%ebx),%mm0
psllq $60,%mm6
movq 16(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-64(%edi)
psrlq $4,%mm1
movq %mm4,64(%edi)
movq %mm4,%mm7
movq %mm2,-72(%ebp)
psrlq $4,%mm4
movq %mm5,56(%ebp)
shll $4,%edx
movb %dl,9(%esp)
movl 40(%ebx),%edx
movq 40(%ebx),%mm2
psllq $60,%mm7
movq 32(%ebx),%mm5
por %mm7,%mm1
movq %mm0,-56(%edi)
psrlq $4,%mm0
movq %mm3,72(%edi)
movq %mm3,%mm6
movq %mm1,-64(%ebp)
psrlq $4,%mm3
movq %mm4,64(%ebp)
shll $4,%edx
movb %dl,10(%esp)
movl 56(%ebx),%edx
movq 56(%ebx),%mm1
psllq $60,%mm6
movq 48(%ebx),%mm4
por %mm6,%mm0
movq %mm2,-48(%edi)
psrlq $4,%mm2
movq %mm5,80(%edi)
movq %mm5,%mm7
movq %mm0,-56(%ebp)
psrlq $4,%mm5
movq %mm3,72(%ebp)
shll $4,%edx
movb %dl,11(%esp)
movl 72(%ebx),%edx
movq 72(%ebx),%mm0
psllq $60,%mm7
movq 64(%ebx),%mm3
por %mm7,%mm2
movq %mm1,-40(%edi)
psrlq $4,%mm1
movq %mm4,88(%edi)
movq %mm4,%mm6
movq %mm2,-48(%ebp)
psrlq $4,%mm4
movq %mm5,80(%ebp)
shll $4,%edx
movb %dl,12(%esp)
movl 88(%ebx),%edx
movq 88(%ebx),%mm2
psllq $60,%mm6
movq 80(%ebx),%mm5
por %mm6,%mm1
movq %mm0,-32(%edi)
psrlq $4,%mm0
movq %mm3,96(%edi)
movq %mm3,%mm7
movq %mm1,-40(%ebp)
psrlq $4,%mm3
movq %mm4,88(%ebp)
shll $4,%edx
movb %dl,13(%esp)
movl 104(%ebx),%edx
movq 104(%ebx),%mm1
psllq $60,%mm7
movq 96(%ebx),%mm4
por %mm7,%mm0
movq %mm2,-24(%edi)
psrlq $4,%mm2
movq %mm5,104(%edi)
movq %mm5,%mm6
movq %mm0,-32(%ebp)
psrlq $4,%mm5
movq %mm3,96(%ebp)
shll $4,%edx
movb %dl,14(%esp)
movl 120(%ebx),%edx
movq 120(%ebx),%mm0
psllq $60,%mm6
movq 112(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-16(%edi)
psrlq $4,%mm1
movq %mm4,112(%edi)
movq %mm4,%mm7
movq %mm2,-24(%ebp)
psrlq $4,%mm4
movq %mm5,104(%ebp)
shll $4,%edx
movb %dl,15(%esp)
psllq $60,%mm7
por %mm7,%mm1
movq %mm0,-8(%edi)
psrlq $4,%mm0
movq %mm3,120(%edi)
movq %mm3,%mm6
movq %mm1,-16(%ebp)
psrlq $4,%mm3
movq %mm4,112(%ebp)
psllq $60,%mm6
por %mm6,%mm0
movq %mm0,-8(%ebp)
movq %mm3,120(%ebp)
movq (%eax),%mm6
movl 8(%eax),%ebx
movl 12(%eax),%edx
.align 16
.L004outer:
xorl 12(%ecx),%edx
xorl 8(%ecx),%ebx
pxor (%ecx),%mm6
leal 16(%ecx),%ecx
movl %ebx,536(%esp)
movq %mm6,528(%esp)
movl %ecx,548(%esp)
xorl %eax,%eax
roll $8,%edx
movb %dl,%al
movl %eax,%ebp
andb $15,%al
shrl $4,%ebp
pxor %mm0,%mm0
roll $8,%edx
pxor %mm1,%mm1
pxor %mm2,%mm2
movq 16(%esp,%eax,8),%mm7
movq 144(%esp,%eax,8),%mm6
movb %dl,%al
movd %mm7,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
shrl $4,%edi
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 536(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 532(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 528(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 524(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
pxor 144(%esp,%eax,8),%mm6
xorb (%esp,%ebp,1),%bl
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
movzbl %bl,%ebx
pxor %mm2,%mm2
psllq $4,%mm1
movd %mm7,%ecx
psrlq $4,%mm7
movq %mm6,%mm3
psrlq $4,%mm6
shll $4,%ecx
pxor 16(%esp,%edi,8),%mm7
psllq $60,%mm3
movzbl %cl,%ecx
pxor %mm3,%mm7
pxor 144(%esp,%edi,8),%mm6
pinsrw $2,(%esi,%ebx,2),%mm0
pxor %mm1,%mm6
movd %mm7,%edx
pinsrw $3,(%esi,%ecx,2),%mm2
psllq $12,%mm0
pxor %mm0,%mm6
psrlq $32,%mm7
pxor %mm2,%mm6
movl 548(%esp),%ecx
movd %mm7,%ebx
movq %mm6,%mm3
psllw $8,%mm6
psrlw $8,%mm3
por %mm3,%mm6
bswap %edx
pshufw $27,%mm6,%mm6
bswap %ebx
cmpl 552(%esp),%ecx
jne .L004outer
movl 544(%esp),%eax
movl %edx,12(%eax)
movl %ebx,8(%eax)
movq %mm6,(%eax)
movl 556(%esp),%esp
emms
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
.globl gcm_init_clmul
.hidden gcm_init_clmul
.type gcm_init_clmul,@function
@ -14,10 +719,10 @@ gcm_init_clmul:
.L_gcm_init_clmul_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
call .L000pic
.L000pic:
call .L005pic
.L005pic:
popl %ecx
leal .Lbswap-.L000pic(%ecx),%ecx
leal .Lbswap-.L005pic(%ecx),%ecx
movdqu (%eax),%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $255,%xmm2,%xmm4
@ -84,10 +789,10 @@ gcm_gmult_clmul:
.L_gcm_gmult_clmul_begin:
movl 4(%esp),%eax
movl 8(%esp),%edx
call .L001pic
.L001pic:
call .L006pic
.L006pic:
popl %ecx
leal .Lbswap-.L001pic(%ecx),%ecx
leal .Lbswap-.L006pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movups (%edx),%xmm2
@ -144,16 +849,16 @@ gcm_ghash_clmul:
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebx
call .L002pic
.L002pic:
call .L007pic
.L007pic:
popl %ecx
leal .Lbswap-.L002pic(%ecx),%ecx
leal .Lbswap-.L007pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movdqu (%edx),%xmm2
.byte 102,15,56,0,197
subl $16,%ebx
jz .L003odd_tail
jz .L008odd_tail
movdqu (%esi),%xmm3
movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221
@ -170,10 +875,10 @@ gcm_ghash_clmul:
movups 16(%edx),%xmm2
nop
subl $32,%ebx
jbe .L004even_tail
jmp .L005mod_loop
jbe .L009even_tail
jmp .L010mod_loop
.align 32
.L005mod_loop:
.L010mod_loop:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
@ -228,8 +933,8 @@ gcm_ghash_clmul:
.byte 102,15,58,68,221,0
leal 32(%esi),%esi
subl $32,%ebx
ja .L005mod_loop
.L004even_tail:
ja .L010mod_loop
.L009even_tail:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
@ -268,9 +973,9 @@ gcm_ghash_clmul:
psrlq $1,%xmm0
pxor %xmm1,%xmm0
testl %ebx,%ebx
jnz .L006done
jnz .L011done
movups (%edx),%xmm2
.L003odd_tail:
.L008odd_tail:
movdqu (%esi),%xmm3
.byte 102,15,56,0,221
pxor %xmm3,%xmm0
@ -309,7 +1014,7 @@ gcm_ghash_clmul:
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.L006done:
.L011done:
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
popl %edi
@ -322,9 +1027,48 @@ gcm_ghash_clmul:
.Lbswap:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.align 64
.Lrem_8bit:
.value 0,450,900,582,1800,1738,1164,1358
.value 3600,4050,3476,3158,2328,2266,2716,2910
.value 7200,7650,8100,7782,6952,6890,6316,6510
.value 4656,5106,4532,4214,5432,5370,5820,6014
.value 14400,14722,15300,14854,16200,16010,15564,15630
.value 13904,14226,13780,13334,12632,12442,13020,13086
.value 9312,9634,10212,9766,9064,8874,8428,8494
.value 10864,11186,10740,10294,11640,11450,12028,12094
.value 28800,28994,29444,29382,30600,30282,29708,30158
.value 32400,32594,32020,31958,31128,30810,31260,31710
.value 27808,28002,28452,28390,27560,27242,26668,27118
.value 25264,25458,24884,24822,26040,25722,26172,26622
.value 18624,18690,19268,19078,20424,19978,19532,19854
.value 18128,18194,17748,17558,16856,16410,16988,17310
.value 21728,21794,22372,22182,21480,21034,20588,20910
.value 23280,23346,22900,22710,24056,23610,24188,24510
.value 57600,57538,57988,58182,58888,59338,58764,58446
.value 61200,61138,60564,60758,59416,59866,60316,59998
.value 64800,64738,65188,65382,64040,64490,63916,63598
.value 62256,62194,61620,61814,62520,62970,63420,63102
.value 55616,55426,56004,56070,56904,57226,56780,56334
.value 55120,54930,54484,54550,53336,53658,54236,53790
.value 50528,50338,50916,50982,49768,50090,49644,49198
.value 52080,51890,51444,51510,52344,52666,53244,52798
.value 37248,36930,37380,37830,38536,38730,38156,38094
.value 40848,40530,39956,40406,39064,39258,39708,39646
.value 36256,35938,36388,36838,35496,35690,35116,35054
.value 33712,33394,32820,33270,33976,34170,34620,34558
.value 43456,43010,43588,43910,44744,44810,44364,44174
.value 42960,42514,42068,42390,41176,41242,41820,41630
.value 46560,46114,46692,47014,45800,45866,45420,45230
.value 48112,47666,47220,47542,48376,48442,49020,48830
.align 64
.Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte 0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -685,4 +685,3 @@ md5_block_asm_data_order:
ret
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -3805,4 +3805,3 @@ _sha1_block_data_order_avx:
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -5564,4 +5564,3 @@ sha256_block_data_order:
ret
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -2834,4 +2834,3 @@ sha512_block_data_order:
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -6,7 +6,7 @@
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
#endif
.align 64
.L_vpaes_consts:
@ -485,7 +485,7 @@ vpaes_set_encrypt_key:
pushl %ebx
pushl %esi
pushl %edi
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
pushl %ebx
pushl %edx
call .L016pic
@ -570,7 +570,7 @@ vpaes_encrypt:
pushl %ebx
pushl %esi
pushl %edi
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
pushl %ebx
pushl %edx
call .L019pic
@ -705,4 +705,3 @@ vpaes_cbc_encrypt:
ret
.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -481,4 +481,3 @@ bn_mul_mont:
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -203,4 +203,3 @@ abi_test_clobber_xmm7:
ret
.size abi_test_clobber_xmm7,.-.L_abi_test_clobber_xmm7_begin
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1630,4 +1630,3 @@ ChaCha20_8x:
.cfi_endproc
.size ChaCha20_8x,.-ChaCha20_8x
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -3076,4 +3076,3 @@ aes256gcmsiv_kdf:
.cfi_endproc
.size aes256gcmsiv_kdf, .-aes256gcmsiv_kdf
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -3935,7 +3935,7 @@ do_length_block:
popq %rbp
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
.cfi_adjust_cfa_offset (8 * 7) + 288 + 32
.cfi_adjust_cfa_offset (8 * 6) + 288 + 32
seal_sse_128:
movdqu .chacha20_consts(%rip),%xmm0
@ -8984,4 +8984,3 @@ seal_avx2_short_tail:
jmp seal_sse_tail_16
.cfi_endproc
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -556,10 +556,12 @@ _aesni_ctr32_6x:
.align 32
aesni_gcm_encrypt:
.cfi_startproc
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
.extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+2(%rip)
#endif
#endif
xorq %r10,%r10
@ -849,4 +851,3 @@ aesni_gcm_encrypt:
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -20,10 +20,12 @@
.align 16
aes_hw_encrypt:
.cfi_startproc
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
.extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+1(%rip)
#endif
#endif
movups (%rdi),%xmm2
movl 240(%rdx),%eax
@ -885,8 +887,10 @@ aes_hw_ecb_encrypt:
.align 16
aes_hw_ctr32_encrypt_blocks:
.cfi_startproc
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
movb $1,BORINGSSL_function_hit(%rip)
#endif
#endif
cmpq $1,%rdx
jne .Lctr32_bulk
@ -2107,9 +2111,11 @@ aes_hw_set_decrypt_key:
aes_hw_set_encrypt_key:
__aesni_set_encrypt_key:
.cfi_startproc
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
movb $1,BORINGSSL_function_hit+3(%rip)
#endif
#endif
.byte 0x48,0x83,0xEC,0x08
.cfi_adjust_cfa_offset 8
movq $-1,%rax
@ -2503,4 +2509,3 @@ __aesni_set_encrypt_key:
.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -424,4 +424,3 @@ gcm_ghash_ssse3:
.Llow4_mask:
.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -14,6 +14,709 @@
.text
.extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P
.globl gcm_gmult_4bit
.hidden gcm_gmult_4bit
.type gcm_gmult_4bit,@function
.align 16
gcm_gmult_4bit:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $280,%rsp
.cfi_adjust_cfa_offset 280
.Lgmult_prologue:
movzbq 15(%rdi),%r8
leaq .Lrem_4bit(%rip),%r11
xorq %rax,%rax
xorq %rbx,%rbx
movb %r8b,%al
movb %r8b,%bl
shlb $4,%al
movq $14,%rcx
movq 8(%rsi,%rax,1),%r8
movq (%rsi,%rax,1),%r9
andb $0xf0,%bl
movq %r8,%rdx
jmp .Loop1
.align 16
.Loop1:
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
movb (%rdi,%rcx,1),%al
shrq $4,%r9
xorq 8(%rsi,%rbx,1),%r8
shlq $60,%r10
xorq (%rsi,%rbx,1),%r9
movb %al,%bl
xorq (%r11,%rdx,8),%r9
movq %r8,%rdx
shlb $4,%al
xorq %r10,%r8
decq %rcx
js .Lbreak1
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
shrq $4,%r9
xorq 8(%rsi,%rax,1),%r8
shlq $60,%r10
xorq (%rsi,%rax,1),%r9
andb $0xf0,%bl
xorq (%r11,%rdx,8),%r9
movq %r8,%rdx
xorq %r10,%r8
jmp .Loop1
.align 16
.Lbreak1:
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
shrq $4,%r9
xorq 8(%rsi,%rax,1),%r8
shlq $60,%r10
xorq (%rsi,%rax,1),%r9
andb $0xf0,%bl
xorq (%r11,%rdx,8),%r9
movq %r8,%rdx
xorq %r10,%r8
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
shrq $4,%r9
xorq 8(%rsi,%rbx,1),%r8
shlq $60,%r10
xorq (%rsi,%rbx,1),%r9
xorq %r10,%r8
xorq (%r11,%rdx,8),%r9
bswapq %r8
bswapq %r9
movq %r8,8(%rdi)
movq %r9,(%rdi)
leaq 280+48(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lgmult_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_gmult_4bit,.-gcm_gmult_4bit
.globl gcm_ghash_4bit
.hidden gcm_ghash_4bit
.type gcm_ghash_4bit,@function
.align 16
gcm_ghash_4bit:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $280,%rsp
.cfi_adjust_cfa_offset 280
.Lghash_prologue:
movq %rdx,%r14
movq %rcx,%r15
subq $-128,%rsi
leaq 16+128(%rsp),%rbp
xorl %edx,%edx
movq 0+0-128(%rsi),%r8
movq 0+8-128(%rsi),%rax
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq 16+0-128(%rsi),%r9
shlb $4,%dl
movq 16+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,0(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,0(%rbp)
movq 32+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,0-128(%rbp)
movq 32+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,1(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,8(%rbp)
movq 48+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,8-128(%rbp)
movq 48+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,2(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,16(%rbp)
movq 64+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,16-128(%rbp)
movq 64+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,3(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,24(%rbp)
movq 80+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,24-128(%rbp)
movq 80+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,4(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,32(%rbp)
movq 96+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,32-128(%rbp)
movq 96+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,5(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,40(%rbp)
movq 112+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,40-128(%rbp)
movq 112+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,6(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,48(%rbp)
movq 128+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,48-128(%rbp)
movq 128+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,7(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,56(%rbp)
movq 144+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,56-128(%rbp)
movq 144+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,8(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,64(%rbp)
movq 160+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,64-128(%rbp)
movq 160+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,9(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,72(%rbp)
movq 176+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,72-128(%rbp)
movq 176+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,10(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,80(%rbp)
movq 192+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,80-128(%rbp)
movq 192+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,11(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,88(%rbp)
movq 208+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,88-128(%rbp)
movq 208+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,12(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,96(%rbp)
movq 224+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,96-128(%rbp)
movq 224+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,13(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,104(%rbp)
movq 240+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,104-128(%rbp)
movq 240+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,14(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,112(%rbp)
shlb $4,%dl
movq %rax,112-128(%rbp)
shlq $60,%r10
movb %dl,15(%rsp)
orq %r10,%rbx
movq %r9,120(%rbp)
movq %rbx,120-128(%rbp)
addq $-128,%rsi
movq 8(%rdi),%r8
movq 0(%rdi),%r9
addq %r14,%r15
leaq .Lrem_8bit(%rip),%r11
jmp .Louter_loop
.align 16
.Louter_loop:
xorq (%r14),%r9
movq 8(%r14),%rdx
leaq 16(%r14),%r14
xorq %r8,%rdx
movq %r9,(%rdi)
movq %rdx,8(%rdi)
shrq $32,%rdx
xorq %rax,%rax
roll $8,%edx
movb %dl,%al
movzbl %dl,%ebx
shlb $4,%al
shrl $4,%ebx
roll $8,%edx
movq 8(%rsi,%rax,1),%r8
movq (%rsi,%rax,1),%r9
movb %dl,%al
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
xorq %r8,%r12
movq %r9,%r10
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl 8(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl 4(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl 0(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
andl $240,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl -4(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
movzwq (%r11,%r12,2),%r12
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
shlq $48,%r12
xorq %r10,%r8
xorq %r12,%r9
movzbq %r8b,%r13
shrq $4,%r8
movq %r9,%r10
shlb $4,%r13b
shrq $4,%r9
xorq 8(%rsi,%rcx,1),%r8
movzwq (%r11,%r13,2),%r13
shlq $60,%r10
xorq (%rsi,%rcx,1),%r9
xorq %r10,%r8
shlq $48,%r13
bswapq %r8
xorq %r13,%r9
bswapq %r9
cmpq %r15,%r14
jb .Louter_loop
movq %r8,8(%rdi)
movq %r9,(%rdi)
leaq 280+48(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq 0(%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lghash_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_ghash_4bit,.-gcm_ghash_4bit
.globl gcm_init_clmul
.hidden gcm_init_clmul
.type gcm_init_clmul,@function
@ -1119,9 +1822,50 @@ gcm_ghash_avx:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
.L7_mask:
.long 7,0,7,0
.L7_mask_poly:
.long 7,0,450,0
.align 64
.type .Lrem_4bit,@object
.Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.type .Lrem_8bit,@object
.Lrem_8bit:
.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -699,4 +699,3 @@ md5_block_asm_data_order:
.cfi_endproc
.size md5_block_asm_data_order,.-md5_block_asm_data_order
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -4540,4 +4540,3 @@ ecp_nistz256_point_add_affinex:
.cfi_endproc
.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -340,4 +340,3 @@ beeu_mod_inverse_vartime:
.size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -60,4 +60,3 @@ CRYPTO_rdrand_multiple8_buf:
.cfi_endproc
.size CRYPTO_rdrand_multiple8_buf,.-CRYPTO_rdrand_multiple8_buf
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1746,4 +1746,3 @@ rsaz_1024_gather5_avx2:
.long 4,4,4,4, 4,4,4,4
.align 64
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -3970,4 +3970,3 @@ sha256_block_data_order_avx:
.cfi_endproc
.size sha256_block_data_order_avx,.-sha256_block_data_order_avx
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -2989,4 +2989,3 @@ sha512_block_data_order_avx:
.cfi_endproc
.size sha512_block_data_order_avx,.-sha512_block_data_order_avx
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -812,10 +812,12 @@ _vpaes_schedule_mangle:
.align 16
vpaes_set_encrypt_key:
.cfi_startproc
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
.extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+5(%rip)
#endif
#endif
movl %esi,%eax
@ -861,10 +863,12 @@ vpaes_set_decrypt_key:
.align 16
vpaes_encrypt:
.cfi_startproc
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
.extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+4(%rip)
#endif
#endif
movdqu (%rdi),%xmm0
call _vpaes_preheat
@ -1130,4 +1134,3 @@ _vpaes_consts:
.align 64
.size _vpaes_consts,.-_vpaes_consts
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1257,4 +1257,3 @@ bn_mulx4x_mont:
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -3787,4 +3787,3 @@ bn_gather5:
.long 2,2, 2,2
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -515,4 +515,3 @@ abi_test_set_direction_flag:
.byte 0xf3,0xc3
.size abi_test_set_direction_flag,.-abi_test_set_direction_flag
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -6,14 +6,14 @@
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
#endif
.globl _aes_hw_encrypt
.private_extern _aes_hw_encrypt
.align 4
_aes_hw_encrypt:
L_aes_hw_encrypt_begin:
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
pushl %ebx
pushl %edx
call L000pic
@ -818,7 +818,7 @@ L_aes_hw_ctr32_encrypt_blocks_begin:
pushl %ebx
pushl %esi
pushl %edi
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
pushl %ebx
pushl %edx
call L038pic
@ -2403,7 +2403,7 @@ L097bad_keybits:
.align 4
_aes_hw_set_encrypt_key:
L_aes_hw_set_encrypt_key_begin:
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
pushl %ebx
pushl %edx
call L116pic

View File

@ -981,6 +981,551 @@ L028aw_end:
popl %ebx
popl %ebp
ret
.globl _bn_sub_part_words
.private_extern _bn_sub_part_words
.align 4
_bn_sub_part_words:
L_bn_sub_part_words_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebx
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
jz L029aw_finish
L030aw_loop:
# Round 0
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
# Round 1
movl 4(%esi),%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
# Round 2
movl 8(%esi),%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
# Round 3
movl 12(%esi),%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
# Round 4
movl 16(%esi),%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
# Round 5
movl 20(%esi),%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
# Round 6
movl 24(%esi),%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
# Round 7
movl 28(%esi),%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%esi
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz L030aw_loop
L029aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
jz L031aw_end
# Tail Round 0
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz L031aw_end
# Tail Round 1
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz L031aw_end
# Tail Round 2
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz L031aw_end
# Tail Round 3
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz L031aw_end
# Tail Round 4
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz L031aw_end
# Tail Round 5
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz L031aw_end
# Tail Round 6
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
L031aw_end:
cmpl $0,36(%esp)
je L032pw_end
movl 36(%esp),%ebp
cmpl $0,%ebp
je L032pw_end
jge L033pw_pos
# pw_neg
movl $0,%edx
subl %ebp,%edx
movl %edx,%ebp
andl $4294967288,%ebp
jz L034pw_neg_finish
L035pw_neg_loop:
# dl<0 Round 0
movl $0,%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
# dl<0 Round 1
movl $0,%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
# dl<0 Round 2
movl $0,%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
# dl<0 Round 3
movl $0,%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
# dl<0 Round 4
movl $0,%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
# dl<0 Round 5
movl $0,%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
# dl<0 Round 6
movl $0,%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
# dl<0 Round 7
movl $0,%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz L035pw_neg_loop
L034pw_neg_finish:
movl 36(%esp),%edx
movl $0,%ebp
subl %edx,%ebp
andl $7,%ebp
jz L032pw_end
# dl<0 Tail Round 0
movl $0,%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
jz L032pw_end
# dl<0 Tail Round 1
movl $0,%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
jz L032pw_end
# dl<0 Tail Round 2
movl $0,%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
jz L032pw_end
# dl<0 Tail Round 3
movl $0,%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
jz L032pw_end
# dl<0 Tail Round 4
movl $0,%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
jz L032pw_end
# dl<0 Tail Round 5
movl $0,%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
jz L032pw_end
# dl<0 Tail Round 6
movl $0,%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
jmp L032pw_end
L033pw_pos:
andl $4294967288,%ebp
jz L036pw_pos_finish
L037pw_pos_loop:
# dl>0 Round 0
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
jnc L038pw_nc0
# dl>0 Round 1
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
jnc L039pw_nc1
# dl>0 Round 2
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
jnc L040pw_nc2
# dl>0 Round 3
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
jnc L041pw_nc3
# dl>0 Round 4
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
jnc L042pw_nc4
# dl>0 Round 5
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
jnc L043pw_nc5
# dl>0 Round 6
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
jnc L044pw_nc6
# dl>0 Round 7
movl 28(%esi),%ecx
subl %eax,%ecx
movl %ecx,28(%ebx)
jnc L045pw_nc7
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
jnz L037pw_pos_loop
L036pw_pos_finish:
movl 36(%esp),%ebp
andl $7,%ebp
jz L032pw_end
# dl>0 Tail Round 0
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
jnc L046pw_tail_nc0
decl %ebp
jz L032pw_end
# dl>0 Tail Round 1
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
jnc L047pw_tail_nc1
decl %ebp
jz L032pw_end
# dl>0 Tail Round 2
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
jnc L048pw_tail_nc2
decl %ebp
jz L032pw_end
# dl>0 Tail Round 3
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
jnc L049pw_tail_nc3
decl %ebp
jz L032pw_end
# dl>0 Tail Round 4
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
jnc L050pw_tail_nc4
decl %ebp
jz L032pw_end
# dl>0 Tail Round 5
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
jnc L051pw_tail_nc5
decl %ebp
jz L032pw_end
# dl>0 Tail Round 6
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
jnc L052pw_tail_nc6
movl $1,%eax
jmp L032pw_end
L053pw_nc_loop:
movl (%esi),%ecx
movl %ecx,(%ebx)
L038pw_nc0:
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
L039pw_nc1:
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
L040pw_nc2:
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
L041pw_nc3:
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
L042pw_nc4:
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
L043pw_nc5:
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
L044pw_nc6:
movl 28(%esi),%ecx
movl %ecx,28(%ebx)
L045pw_nc7:
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
jnz L053pw_nc_loop
movl 36(%esp),%ebp
andl $7,%ebp
jz L054pw_nc_end
movl (%esi),%ecx
movl %ecx,(%ebx)
L046pw_tail_nc0:
decl %ebp
jz L054pw_nc_end
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
L047pw_tail_nc1:
decl %ebp
jz L054pw_nc_end
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
L048pw_tail_nc2:
decl %ebp
jz L054pw_nc_end
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
L049pw_tail_nc3:
decl %ebp
jz L054pw_nc_end
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
L050pw_tail_nc4:
decl %ebp
jz L054pw_nc_end
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
L051pw_tail_nc5:
decl %ebp
jz L054pw_nc_end
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
L052pw_tail_nc6:
L054pw_nc_end:
movl $0,%eax
L032pw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.section __IMPORT,__pointers,non_lazy_symbol_pointers
L_OPENSSL_ia32cap_P$non_lazy_ptr:
.indirect_symbol _OPENSSL_ia32cap_P

View File

@ -6,6 +6,707 @@
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _gcm_gmult_4bit_mmx
.private_extern _gcm_gmult_4bit_mmx
.align 4
_gcm_gmult_4bit_mmx:
L_gcm_gmult_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
call L000pic_point
L000pic_point:
popl %eax
leal Lrem_4bit-L000pic_point(%eax),%eax
movzbl 15(%edi),%ebx
xorl %ecx,%ecx
movl %ebx,%edx
movb %dl,%cl
movl $14,%ebp
shlb $4,%cl
andl $240,%edx
movq 8(%esi,%ecx,1),%mm0
movq (%esi,%ecx,1),%mm1
movd %mm0,%ebx
jmp L001mmx_loop
.align 4,0x90
L001mmx_loop:
psrlq $4,%mm0
andl $15,%ebx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb (%edi,%ebp,1),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
decl %ebp
movd %mm0,%ebx
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
pxor %mm2,%mm0
js L002mmx_break
shlb $4,%cl
andl $15,%ebx
psrlq $4,%mm0
andl $240,%edx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0
jmp L001mmx_loop
.align 4,0x90
L002mmx_break:
shlb $4,%cl
andl $15,%ebx
psrlq $4,%mm0
andl $240,%edx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0
psrlq $4,%mm0
andl $15,%ebx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%edx,1),%mm1
pxor %mm2,%mm0
psrlq $32,%mm0
movd %mm1,%edx
psrlq $32,%mm1
movd %mm0,%ecx
movd %mm1,%ebp
bswap %ebx
bswap %edx
bswap %ecx
bswap %ebp
emms
movl %ebx,12(%edi)
movl %edx,4(%edi)
movl %ecx,8(%edi)
movl %ebp,(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_ghash_4bit_mmx
.private_extern _gcm_ghash_4bit_mmx
.align 4
_gcm_ghash_4bit_mmx:
L_gcm_ghash_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%eax
movl 24(%esp),%ebx
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl %esp,%ebp
call L003pic_point
L003pic_point:
popl %esi
leal Lrem_8bit-L003pic_point(%esi),%esi
subl $544,%esp
andl $-64,%esp
subl $16,%esp
addl %ecx,%edx
movl %eax,544(%esp)
movl %edx,552(%esp)
movl %ebp,556(%esp)
addl $128,%ebx
leal 144(%esp),%edi
leal 400(%esp),%ebp
movl -120(%ebx),%edx
movq -120(%ebx),%mm0
movq -128(%ebx),%mm3
shll $4,%edx
movb %dl,(%esp)
movl -104(%ebx),%edx
movq -104(%ebx),%mm2
movq -112(%ebx),%mm5
movq %mm0,-128(%edi)
psrlq $4,%mm0
movq %mm3,(%edi)
movq %mm3,%mm7
psrlq $4,%mm3
shll $4,%edx
movb %dl,1(%esp)
movl -88(%ebx),%edx
movq -88(%ebx),%mm1
psllq $60,%mm7
movq -96(%ebx),%mm4
por %mm7,%mm0
movq %mm2,-120(%edi)
psrlq $4,%mm2
movq %mm5,8(%edi)
movq %mm5,%mm6
movq %mm0,-128(%ebp)
psrlq $4,%mm5
movq %mm3,(%ebp)
shll $4,%edx
movb %dl,2(%esp)
movl -72(%ebx),%edx
movq -72(%ebx),%mm0
psllq $60,%mm6
movq -80(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-112(%edi)
psrlq $4,%mm1
movq %mm4,16(%edi)
movq %mm4,%mm7
movq %mm2,-120(%ebp)
psrlq $4,%mm4
movq %mm5,8(%ebp)
shll $4,%edx
movb %dl,3(%esp)
movl -56(%ebx),%edx
movq -56(%ebx),%mm2
psllq $60,%mm7
movq -64(%ebx),%mm5
por %mm7,%mm1
movq %mm0,-104(%edi)
psrlq $4,%mm0
movq %mm3,24(%edi)
movq %mm3,%mm6
movq %mm1,-112(%ebp)
psrlq $4,%mm3
movq %mm4,16(%ebp)
shll $4,%edx
movb %dl,4(%esp)
movl -40(%ebx),%edx
movq -40(%ebx),%mm1
psllq $60,%mm6
movq -48(%ebx),%mm4
por %mm6,%mm0
movq %mm2,-96(%edi)
psrlq $4,%mm2
movq %mm5,32(%edi)
movq %mm5,%mm7
movq %mm0,-104(%ebp)
psrlq $4,%mm5
movq %mm3,24(%ebp)
shll $4,%edx
movb %dl,5(%esp)
movl -24(%ebx),%edx
movq -24(%ebx),%mm0
psllq $60,%mm7
movq -32(%ebx),%mm3
por %mm7,%mm2
movq %mm1,-88(%edi)
psrlq $4,%mm1
movq %mm4,40(%edi)
movq %mm4,%mm6
movq %mm2,-96(%ebp)
psrlq $4,%mm4
movq %mm5,32(%ebp)
shll $4,%edx
movb %dl,6(%esp)
movl -8(%ebx),%edx
movq -8(%ebx),%mm2
psllq $60,%mm6
movq -16(%ebx),%mm5
por %mm6,%mm1
movq %mm0,-80(%edi)
psrlq $4,%mm0
movq %mm3,48(%edi)
movq %mm3,%mm7
movq %mm1,-88(%ebp)
psrlq $4,%mm3
movq %mm4,40(%ebp)
shll $4,%edx
movb %dl,7(%esp)
movl 8(%ebx),%edx
movq 8(%ebx),%mm1
psllq $60,%mm7
movq (%ebx),%mm4
por %mm7,%mm0
movq %mm2,-72(%edi)
psrlq $4,%mm2
movq %mm5,56(%edi)
movq %mm5,%mm6
movq %mm0,-80(%ebp)
psrlq $4,%mm5
movq %mm3,48(%ebp)
shll $4,%edx
movb %dl,8(%esp)
movl 24(%ebx),%edx
movq 24(%ebx),%mm0
psllq $60,%mm6
movq 16(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-64(%edi)
psrlq $4,%mm1
movq %mm4,64(%edi)
movq %mm4,%mm7
movq %mm2,-72(%ebp)
psrlq $4,%mm4
movq %mm5,56(%ebp)
shll $4,%edx
movb %dl,9(%esp)
movl 40(%ebx),%edx
movq 40(%ebx),%mm2
psllq $60,%mm7
movq 32(%ebx),%mm5
por %mm7,%mm1
movq %mm0,-56(%edi)
psrlq $4,%mm0
movq %mm3,72(%edi)
movq %mm3,%mm6
movq %mm1,-64(%ebp)
psrlq $4,%mm3
movq %mm4,64(%ebp)
shll $4,%edx
movb %dl,10(%esp)
movl 56(%ebx),%edx
movq 56(%ebx),%mm1
psllq $60,%mm6
movq 48(%ebx),%mm4
por %mm6,%mm0
movq %mm2,-48(%edi)
psrlq $4,%mm2
movq %mm5,80(%edi)
movq %mm5,%mm7
movq %mm0,-56(%ebp)
psrlq $4,%mm5
movq %mm3,72(%ebp)
shll $4,%edx
movb %dl,11(%esp)
movl 72(%ebx),%edx
movq 72(%ebx),%mm0
psllq $60,%mm7
movq 64(%ebx),%mm3
por %mm7,%mm2
movq %mm1,-40(%edi)
psrlq $4,%mm1
movq %mm4,88(%edi)
movq %mm4,%mm6
movq %mm2,-48(%ebp)
psrlq $4,%mm4
movq %mm5,80(%ebp)
shll $4,%edx
movb %dl,12(%esp)
movl 88(%ebx),%edx
movq 88(%ebx),%mm2
psllq $60,%mm6
movq 80(%ebx),%mm5
por %mm6,%mm1
movq %mm0,-32(%edi)
psrlq $4,%mm0
movq %mm3,96(%edi)
movq %mm3,%mm7
movq %mm1,-40(%ebp)
psrlq $4,%mm3
movq %mm4,88(%ebp)
shll $4,%edx
movb %dl,13(%esp)
movl 104(%ebx),%edx
movq 104(%ebx),%mm1
psllq $60,%mm7
movq 96(%ebx),%mm4
por %mm7,%mm0
movq %mm2,-24(%edi)
psrlq $4,%mm2
movq %mm5,104(%edi)
movq %mm5,%mm6
movq %mm0,-32(%ebp)
psrlq $4,%mm5
movq %mm3,96(%ebp)
shll $4,%edx
movb %dl,14(%esp)
movl 120(%ebx),%edx
movq 120(%ebx),%mm0
psllq $60,%mm6
movq 112(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-16(%edi)
psrlq $4,%mm1
movq %mm4,112(%edi)
movq %mm4,%mm7
movq %mm2,-24(%ebp)
psrlq $4,%mm4
movq %mm5,104(%ebp)
shll $4,%edx
movb %dl,15(%esp)
psllq $60,%mm7
por %mm7,%mm1
movq %mm0,-8(%edi)
psrlq $4,%mm0
movq %mm3,120(%edi)
movq %mm3,%mm6
movq %mm1,-16(%ebp)
psrlq $4,%mm3
movq %mm4,112(%ebp)
psllq $60,%mm6
por %mm6,%mm0
movq %mm0,-8(%ebp)
movq %mm3,120(%ebp)
movq (%eax),%mm6
movl 8(%eax),%ebx
movl 12(%eax),%edx
.align 4,0x90
L004outer:
xorl 12(%ecx),%edx
xorl 8(%ecx),%ebx
pxor (%ecx),%mm6
leal 16(%ecx),%ecx
movl %ebx,536(%esp)
movq %mm6,528(%esp)
movl %ecx,548(%esp)
xorl %eax,%eax
roll $8,%edx
movb %dl,%al
movl %eax,%ebp
andb $15,%al
shrl $4,%ebp
pxor %mm0,%mm0
roll $8,%edx
pxor %mm1,%mm1
pxor %mm2,%mm2
movq 16(%esp,%eax,8),%mm7
movq 144(%esp,%eax,8),%mm6
movb %dl,%al
movd %mm7,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
shrl $4,%edi
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 536(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 532(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 528(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 524(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
pxor 144(%esp,%eax,8),%mm6
xorb (%esp,%ebp,1),%bl
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
movzbl %bl,%ebx
pxor %mm2,%mm2
psllq $4,%mm1
movd %mm7,%ecx
psrlq $4,%mm7
movq %mm6,%mm3
psrlq $4,%mm6
shll $4,%ecx
pxor 16(%esp,%edi,8),%mm7
psllq $60,%mm3
movzbl %cl,%ecx
pxor %mm3,%mm7
pxor 144(%esp,%edi,8),%mm6
pinsrw $2,(%esi,%ebx,2),%mm0
pxor %mm1,%mm6
movd %mm7,%edx
pinsrw $3,(%esi,%ecx,2),%mm2
psllq $12,%mm0
pxor %mm0,%mm6
psrlq $32,%mm7
pxor %mm2,%mm6
movl 548(%esp),%ecx
movd %mm7,%ebx
movq %mm6,%mm3
psllw $8,%mm6
psrlw $8,%mm3
por %mm3,%mm6
bswap %edx
pshufw $27,%mm6,%mm6
bswap %ebx
cmpl 552(%esp),%ecx
jne L004outer
movl 544(%esp),%eax
movl %edx,12(%eax)
movl %ebx,8(%eax)
movq %mm6,(%eax)
movl 556(%esp),%esp
emms
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_init_clmul
.private_extern _gcm_init_clmul
.align 4
@ -13,10 +714,10 @@ _gcm_init_clmul:
L_gcm_init_clmul_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
call L000pic
L000pic:
call L005pic
L005pic:
popl %ecx
leal Lbswap-L000pic(%ecx),%ecx
leal Lbswap-L005pic(%ecx),%ecx
movdqu (%eax),%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $255,%xmm2,%xmm4
@ -81,10 +782,10 @@ _gcm_gmult_clmul:
L_gcm_gmult_clmul_begin:
movl 4(%esp),%eax
movl 8(%esp),%edx
call L001pic
L001pic:
call L006pic
L006pic:
popl %ecx
leal Lbswap-L001pic(%ecx),%ecx
leal Lbswap-L006pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movups (%edx),%xmm2
@ -139,16 +840,16 @@ L_gcm_ghash_clmul_begin:
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebx
call L002pic
L002pic:
call L007pic
L007pic:
popl %ecx
leal Lbswap-L002pic(%ecx),%ecx
leal Lbswap-L007pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movdqu (%edx),%xmm2
.byte 102,15,56,0,197
subl $16,%ebx
jz L003odd_tail
jz L008odd_tail
movdqu (%esi),%xmm3
movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221
@ -165,10 +866,10 @@ L002pic:
movups 16(%edx),%xmm2
nop
subl $32,%ebx
jbe L004even_tail
jmp L005mod_loop
jbe L009even_tail
jmp L010mod_loop
.align 5,0x90
L005mod_loop:
L010mod_loop:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
@ -223,8 +924,8 @@ L005mod_loop:
.byte 102,15,58,68,221,0
leal 32(%esi),%esi
subl $32,%ebx
ja L005mod_loop
L004even_tail:
ja L010mod_loop
L009even_tail:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
@ -263,9 +964,9 @@ L004even_tail:
psrlq $1,%xmm0
pxor %xmm1,%xmm0
testl %ebx,%ebx
jnz L006done
jnz L011done
movups (%edx),%xmm2
L003odd_tail:
L008odd_tail:
movdqu (%esi),%xmm3
.byte 102,15,56,0,221
pxor %xmm3,%xmm0
@ -304,7 +1005,7 @@ L003odd_tail:
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
L006done:
L011done:
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
popl %edi
@ -316,6 +1017,46 @@ L006done:
Lbswap:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.align 6,0x90
Lrem_8bit:
.value 0,450,900,582,1800,1738,1164,1358
.value 3600,4050,3476,3158,2328,2266,2716,2910
.value 7200,7650,8100,7782,6952,6890,6316,6510
.value 4656,5106,4532,4214,5432,5370,5820,6014
.value 14400,14722,15300,14854,16200,16010,15564,15630
.value 13904,14226,13780,13334,12632,12442,13020,13086
.value 9312,9634,10212,9766,9064,8874,8428,8494
.value 10864,11186,10740,10294,11640,11450,12028,12094
.value 28800,28994,29444,29382,30600,30282,29708,30158
.value 32400,32594,32020,31958,31128,30810,31260,31710
.value 27808,28002,28452,28390,27560,27242,26668,27118
.value 25264,25458,24884,24822,26040,25722,26172,26622
.value 18624,18690,19268,19078,20424,19978,19532,19854
.value 18128,18194,17748,17558,16856,16410,16988,17310
.value 21728,21794,22372,22182,21480,21034,20588,20910
.value 23280,23346,22900,22710,24056,23610,24188,24510
.value 57600,57538,57988,58182,58888,59338,58764,58446
.value 61200,61138,60564,60758,59416,59866,60316,59998
.value 64800,64738,65188,65382,64040,64490,63916,63598
.value 62256,62194,61620,61814,62520,62970,63420,63102
.value 55616,55426,56004,56070,56904,57226,56780,56334
.value 55120,54930,54484,54550,53336,53658,54236,53790
.value 50528,50338,50916,50982,49768,50090,49644,49198
.value 52080,51890,51444,51510,52344,52666,53244,52798
.value 37248,36930,37380,37830,38536,38730,38156,38094
.value 40848,40530,39956,40406,39064,39258,39708,39646
.value 36256,35938,36388,36838,35496,35690,35116,35054
.value 33712,33394,32820,33270,33976,34170,34620,34558
.value 43456,43010,43588,43910,44744,44810,44364,44174
.value 42960,42514,42068,42390,41176,41242,41820,41630
.value 46560,46114,46692,47014,45800,45866,45420,45230
.value 48112,47666,47220,47542,48376,48442,49020,48830
.align 6,0x90
Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62

View File

@ -6,7 +6,7 @@
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
#endif
.align 6,0x90
L_vpaes_consts:
@ -468,7 +468,7 @@ L_vpaes_set_encrypt_key_begin:
pushl %ebx
pushl %esi
pushl %edi
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
pushl %ebx
pushl %edx
call L016pic
@ -549,7 +549,7 @@ L_vpaes_encrypt_begin:
pushl %ebx
pushl %esi
pushl %edi
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
pushl %ebx
pushl %edx
call L019pic

File diff suppressed because it is too large Load Diff

View File

@ -556,9 +556,11 @@ L$handle_ctr32_2:
.p2align 5
_aesni_gcm_encrypt:
#ifdef BORINGSSL_DISPATCH_TEST
#ifndef NDEBUG
#ifndef BORINGSSL_FIPS
movb $1,_BORINGSSL_function_hit+2(%rip)
#endif
#endif
xorq %r10,%r10

Some files were not shown because too many files have changed in this diff Show More