Merge branch 'master' into dt64_timeslots

This commit is contained in:
Andrey Zvonov 2022-07-12 10:26:38 +03:00 committed by GitHub
commit 7bfe155f09
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1747 changed files with 54493 additions and 46119 deletions

View File

@ -1,6 +1,6 @@
--- ---
name: Bug report name: Bug report
about: Wrong behaviour (visible to users) in official ClickHouse release. about: Wrong behavior (visible to users) in the official ClickHouse release.
title: '' title: ''
labels: 'potential bug' labels: 'potential bug'
assignees: '' assignees: ''

View File

@ -143,6 +143,8 @@ jobs:
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code - name: Check out repository code
uses: actions/checkout@v2 uses: actions/checkout@v2
with:
fetch-depth: 0 # For a proper version and performance artifacts
- name: Build - name: Build
run: | run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive git -C "$GITHUB_WORKSPACE" submodule sync --recursive
@ -188,6 +190,8 @@ jobs:
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code - name: Check out repository code
uses: actions/checkout@v2 uses: actions/checkout@v2
with:
fetch-depth: 0 # For a proper version and performance artifacts
- name: Build - name: Build
run: | run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive git -C "$GITHUB_WORKSPACE" submodule sync --recursive
@ -346,6 +350,36 @@ jobs:
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################ ############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
needs:
- BuilderDebRelease
- BuilderDebAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head --no-push
python3 docker_server.py --release-type head --no-push --no-ubuntu \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
############################################################################################
##################################### BUILD REPORTER ####################################### ##################################### BUILD REPORTER #######################################
############################################################################################ ############################################################################################
BuilderReport: BuilderReport:
@ -556,6 +590,7 @@ jobs:
FinishCheck: FinishCheck:
needs: needs:
- DockerHubPush - DockerHubPush
- DockerServerImages
- BuilderReport - BuilderReport
- FunctionalStatelessTestAsan - FunctionalStatelessTestAsan
- FunctionalStatefulTestDebug - FunctionalStatefulTestDebug

View File

@ -643,7 +643,7 @@ jobs:
# shellcheck disable=SC2046 # shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinTidy: BuilderBinClangTidy:
needs: [DockerHubPush] needs: [DockerHubPush]
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
steps: steps:
@ -1011,7 +1011,7 @@ jobs:
- BuilderBinFreeBSD - BuilderBinFreeBSD
# - BuilderBinGCC # - BuilderBinGCC
- BuilderBinPPC64 - BuilderBinPPC64
- BuilderBinTidy - BuilderBinClangTidy
- BuilderDebSplitted - BuilderDebSplitted
runs-on: [self-hosted, style-checker] runs-on: [self-hosted, style-checker]
steps: steps:

View File

@ -707,7 +707,7 @@ jobs:
# shellcheck disable=SC2046 # shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinTidy: BuilderBinClangTidy:
needs: [DockerHubPush, FastTest] needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
steps: steps:
@ -1065,7 +1065,7 @@ jobs:
- BuilderBinFreeBSD - BuilderBinFreeBSD
# - BuilderBinGCC # - BuilderBinGCC
- BuilderBinPPC64 - BuilderBinPPC64
- BuilderBinTidy - BuilderBinClangTidy
- BuilderDebSplitted - BuilderDebSplitted
runs-on: [self-hosted, style-checker] runs-on: [self-hosted, style-checker]
if: ${{ success() || failure() }} if: ${{ success() || failure() }}

View File

@ -21,6 +21,9 @@ jobs:
EOF EOF
- name: Check out repository code - name: Check out repository code
uses: actions/checkout@v2 uses: actions/checkout@v2
with:
# Always use the most recent script version
ref: master
- name: Download packages and push to Artifactory - name: Download packages and push to Artifactory
run: | run: |
rm -rf "$TEMP_PATH" && mkdir -p "$TEMP_PATH" rm -rf "$TEMP_PATH" && mkdir -p "$TEMP_PATH"

View File

@ -427,6 +427,36 @@ jobs:
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################ ############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
needs:
- BuilderDebRelease
- BuilderDebAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head --no-push
python3 docker_server.py --release-type head --no-push --no-ubuntu \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
############################################################################################
##################################### BUILD REPORTER ####################################### ##################################### BUILD REPORTER #######################################
############################################################################################ ############################################################################################
BuilderReport: BuilderReport:
@ -1815,6 +1845,7 @@ jobs:
FinishCheck: FinishCheck:
needs: needs:
- DockerHubPush - DockerHubPush
- DockerServerImages
- BuilderReport - BuilderReport
- FunctionalStatelessTestDebug0 - FunctionalStatelessTestDebug0
- FunctionalStatelessTestDebug1 - FunctionalStatelessTestDebug1

15
.gitmodules vendored
View File

@ -86,9 +86,6 @@
[submodule "contrib/h3"] [submodule "contrib/h3"]
path = contrib/h3 path = contrib/h3
url = https://github.com/ClickHouse/h3 url = https://github.com/ClickHouse/h3
[submodule "contrib/hyperscan"]
path = contrib/hyperscan
url = https://github.com/ClickHouse/hyperscan.git
[submodule "contrib/libunwind"] [submodule "contrib/libunwind"]
path = contrib/libunwind path = contrib/libunwind
url = https://github.com/ClickHouse/libunwind.git url = https://github.com/ClickHouse/libunwind.git
@ -268,3 +265,15 @@
[submodule "contrib/hashidsxx"] [submodule "contrib/hashidsxx"]
path = contrib/hashidsxx path = contrib/hashidsxx
url = https://github.com/schoentoon/hashidsxx.git url = https://github.com/schoentoon/hashidsxx.git
[submodule "contrib/nats-io"]
path = contrib/nats-io
url = https://github.com/ClickHouse/nats.c.git
[submodule "contrib/vectorscan"]
path = contrib/vectorscan
url = https://github.com/VectorCamp/vectorscan.git
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing.git
[submodule "contrib/base-x"]
path = contrib/base-x
url = https://github.com/ClickHouse/base-x.git

View File

@ -34,7 +34,6 @@
* Add two new settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines` to allow skipping specified number of lines in the beginning of the file in CSV/TSV formats. [#37537](https://github.com/ClickHouse/ClickHouse/pull/37537) ([Kruglov Pavel](https://github.com/Avogar)). * Add two new settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines` to allow skipping specified number of lines in the beginning of the file in CSV/TSV formats. [#37537](https://github.com/ClickHouse/ClickHouse/pull/37537) ([Kruglov Pavel](https://github.com/Avogar)).
* `showCertificate` function shows current server's SSL certificate. [#37540](https://github.com/ClickHouse/ClickHouse/pull/37540) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). * `showCertificate` function shows current server's SSL certificate. [#37540](https://github.com/ClickHouse/ClickHouse/pull/37540) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* HTTP source for Data Dictionaries in Named Collections is supported. [#37581](https://github.com/ClickHouse/ClickHouse/pull/37581) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). * HTTP source for Data Dictionaries in Named Collections is supported. [#37581](https://github.com/ClickHouse/ClickHouse/pull/37581) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Added a new window function `nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL x SECOND])`. [#37628](https://github.com/ClickHouse/ClickHouse/pull/37628) ([Andrey Zvonov](https://github.com/zvonand)).
* Implemented changing the comment for `ReplicatedMergeTree` tables. [#37416](https://github.com/ClickHouse/ClickHouse/pull/37416) ([Vasily Nemkov](https://github.com/Enmk)). * Implemented changing the comment for `ReplicatedMergeTree` tables. [#37416](https://github.com/ClickHouse/ClickHouse/pull/37416) ([Vasily Nemkov](https://github.com/Enmk)).
* Added `SYSTEM UNFREEZE` query that deletes the whole backup regardless if the corresponding table is deleted or not. [#36424](https://github.com/ClickHouse/ClickHouse/pull/36424) ([Vadim Volodin](https://github.com/PolyProgrammist)). * Added `SYSTEM UNFREEZE` query that deletes the whole backup regardless if the corresponding table is deleted or not. [#36424](https://github.com/ClickHouse/ClickHouse/pull/36424) ([Vadim Volodin](https://github.com/PolyProgrammist)).

View File

@ -223,11 +223,25 @@ if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
endif () endif ()
endif() endif()
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT ON)
else()
set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT OFF)
endif()
# Provides faster linking and lower binary size.
# Tradeoff is the inability to debug some source files with e.g. gdb
# (empty stack frames and no local variables)."
option(OMIT_HEAVY_DEBUG_SYMBOLS
"Do not generate debugger info for heavy modules (ClickHouse functions and dictionaries, some contrib)"
${OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT})
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
set(USE_DEBUG_HELPERS ON) set(USE_DEBUG_HELPERS ON)
endif() endif()
option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS}) option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS})
option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF) option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF)
if (NOT BUILD_STANDALONE_KEEPER) if (NOT BUILD_STANDALONE_KEEPER)
option(CREATE_KEEPER_SYMLINK "Create symlink for clickhouse-keeper to main server binary" ON) option(CREATE_KEEPER_SYMLINK "Create symlink for clickhouse-keeper to main server binary" ON)
@ -252,10 +266,10 @@ else ()
endif () endif ()
# Optionally split binaries and debug symbols. # Optionally split binaries and debug symbols.
option(INSTALL_STRIPPED_BINARIES "Split binaries and debug symbols" OFF) option(SPLIT_DEBUG_SYMBOLS "Split binaries and debug symbols" OFF)
if (INSTALL_STRIPPED_BINARIES) if (SPLIT_DEBUG_SYMBOLS)
message(STATUS "Will split binaries and debug symbols") message(STATUS "Will split binaries and debug symbols")
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information") set(SPLITTED_DEBUG_SYMBOLS_DIR "stripped" CACHE STRING "A separate directory for stripped information")
endif() endif()
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd

View File

@ -62,9 +62,10 @@ execute_process(COMMAND uname -m OUTPUT_VARIABLE ARCH)
# By default, prefer clang on Linux # By default, prefer clang on Linux
# But note, that you still may change the compiler with -DCMAKE_C_COMPILER/-DCMAKE_CXX_COMPILER. # But note, that you still may change the compiler with -DCMAKE_C_COMPILER/-DCMAKE_CXX_COMPILER.
if (OS MATCHES "Linux" if (OS MATCHES "Linux"
# some build systems may use CC/CXX env variables
AND "$ENV{CC}" STREQUAL "" AND "$ENV{CC}" STREQUAL ""
AND "$ENV{CXX}" STREQUAL "") AND "$ENV{CXX}" STREQUAL ""
AND NOT DEFINED CMAKE_C_COMPILER
AND NOT DEFINED CMAKE_CXX_COMPILER)
find_program(CLANG_PATH clang) find_program(CLANG_PATH clang)
if (CLANG_PATH) if (CLANG_PATH)
set(CMAKE_C_COMPILER "clang" CACHE INTERNAL "") set(CMAKE_C_COMPILER "clang" CACHE INTERNAL "")
@ -87,8 +88,7 @@ if (OS MATCHES "Linux"
set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-aarch64.cmake" CACHE INTERNAL "") set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-aarch64.cmake" CACHE INTERNAL "")
elseif (ARCH MATCHES "^(ppc64le.*|PPC64LE.*)") elseif (ARCH MATCHES "^(ppc64le.*|PPC64LE.*)")
set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-ppc64le.cmake" CACHE INTERNAL "") set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-ppc64le.cmake" CACHE INTERNAL "")
else () else ()
message (FATAL_ERROR "Unsupported architecture: ${ARCH}") message (FATAL_ERROR "Unsupported architecture: ${ARCH}")
endif () endif ()
endif() endif()

View File

@ -15,4 +15,8 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any. * [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any.
## Upcoming events ## Upcoming events
* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/286304312/) Please join us for an evening of talks (in English), food and discussion. Featuring talks of ClickHouse in production and at least one on the deep internals of ClickHouse itself. * [v22.7 Release Webinar](https://clickhouse.com/company/events/v22-7-release-webinar/) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
* [ClickHouse Meetup at the Cloudflare office in London](https://www.meetup.com/clickhouse-london-user-group/events/286891586/) ClickHouse meetup at the Cloudflare office space in central London
* [ClickHouse Meetup at the Metoda office in Munich](https://www.meetup.com/clickhouse-meetup-munich/events/286891667/) ClickHouse meetup at the Metoda office in Munich

View File

@ -37,7 +37,7 @@ The following versions of ClickHouse server are currently being supported with s
We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers. We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers.
To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). We do not offer any financial rewards for reporting issues to us using this method. Alternatively, you can also submit your findings through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement.
### When Should I Report a Vulnerability? ### When Should I Report a Vulnerability?

View File

@ -89,7 +89,7 @@ public:
inline void returnObject(T && object_to_return) inline void returnObject(T && object_to_return)
{ {
{ {
std::lock_guard<std::mutex> lock(objects_mutex); std::lock_guard lock(objects_mutex);
objects.emplace_back(std::move(object_to_return)); objects.emplace_back(std::move(object_to_return));
--borrowed_objects_size; --borrowed_objects_size;
@ -107,14 +107,14 @@ public:
/// Allocated objects size by the pool. If allocatedObjectsSize == maxSize then pool is full. /// Allocated objects size by the pool. If allocatedObjectsSize == maxSize then pool is full.
inline size_t allocatedObjectsSize() const inline size_t allocatedObjectsSize() const
{ {
std::unique_lock<std::mutex> lock(objects_mutex); std::lock_guard lock(objects_mutex);
return allocated_objects_size; return allocated_objects_size;
} }
/// Returns allocatedObjectsSize == maxSize /// Returns allocatedObjectsSize == maxSize
inline bool isFull() const inline bool isFull() const
{ {
std::unique_lock<std::mutex> lock(objects_mutex); std::lock_guard lock(objects_mutex);
return allocated_objects_size == max_size; return allocated_objects_size == max_size;
} }
@ -122,7 +122,7 @@ public:
/// Then client will wait during borrowObject function call. /// Then client will wait during borrowObject function call.
inline size_t borrowedObjectsSize() const inline size_t borrowedObjectsSize() const
{ {
std::unique_lock<std::mutex> lock(objects_mutex); std::lock_guard lock(objects_mutex);
return borrowed_objects_size; return borrowed_objects_size;
} }

View File

@ -4,7 +4,7 @@
namespace Poco::Util namespace Poco::Util
{ {
class LayeredConfiguration; class LayeredConfiguration; // NOLINT(cppcoreguidelines-virtual-class-destructor)
} }
/// Import extra command line arguments to configuration. These are command line arguments after --. /// Import extra command line arguments to configuration. These are command line arguments after --.

View File

@ -93,6 +93,7 @@
# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) # define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) # define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined"))) # define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined")))
# define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation))
#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it. #else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it.
# define NO_SANITIZE_UNDEFINED # define NO_SANITIZE_UNDEFINED
# define NO_SANITIZE_ADDRESS # define NO_SANITIZE_ADDRESS
@ -124,21 +125,37 @@
#endif #endif
#endif #endif
// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers. /// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.
// Feel free to extend, but please stay close to https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#mutexheader /// Feel free to extend, but please stay close to https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#mutexheader
#if defined(__clang__) #if defined(__clang__)
# define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) // data is protected by given capability # define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) /// data is protected by given capability
# define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) // pointed-to data is protected by the given capability # define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) /// pointed-to data is protected by the given capability
# define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) // thread needs exclusive possession of given capability # define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) /// thread needs exclusive possession of given capability
# define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) // thread needs shared possession of given capability # define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) /// thread needs shared possession of given capability
# define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) // annotated lock must be locked after given lock # define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) /// annotated lock must be locked after given lock
# define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) // disable TSA for a function # define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function
/// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function)
/// Consider adding a comment before using these macros.
# define TSA_SUPPRESS_WARNING_FOR_READ(x) [&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()
# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) [&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }()
/// This macro is useful when only one thread writes to a member
/// and you want to read this member from the same thread without locking a mutex.
/// It's safe (because no concurrent writes are possible), but TSA generates a warning.
/// (Seems like there's no way to verify it, but it makes sense to distinguish it from TSA_SUPPRESS_WARNING_FOR_READ for readability)
# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x)
#else #else
# define TSA_GUARDED_BY(...) # define TSA_GUARDED_BY(...)
# define TSA_PT_GUARDED_BY(...) # define TSA_PT_GUARDED_BY(...)
# define TSA_REQUIRES(...) # define TSA_REQUIRES(...)
# define TSA_REQUIRES_SHARED(...) # define TSA_REQUIRES_SHARED(...)
# define TSA_NO_THREAD_SAFETY_ANALYSIS # define TSA_NO_THREAD_SAFETY_ANALYSIS
# define TSA_SUPPRESS_WARNING_FOR_READ(x)
# define TSA_SUPPRESS_WARNING_FOR_WRITE(x)
# define TSA_READ_ONE_THREAD(x)
#endif #endif
/// A template function for suppressing warnings about unused variables or function results. /// A template function for suppressing warnings about unused variables or function results.

View File

@ -27,6 +27,6 @@ struct FreeingDeleter
} }
}; };
typedef std::unique_ptr<char, FreeingDeleter> DemangleResult; using DemangleResult = std::unique_ptr<char, FreeingDeleter>;
DemangleResult tryDemangle(const char * name); DemangleResult tryDemangle(const char * name);

View File

@ -23,10 +23,10 @@ public:
constexpr StrongTypedef(): t() {} constexpr StrongTypedef(): t() {}
constexpr StrongTypedef(const Self &) = default; constexpr StrongTypedef(const Self &) = default;
constexpr StrongTypedef(Self &&) = default; constexpr StrongTypedef(Self &&) noexcept(std::is_nothrow_move_constructible_v<T>) = default;
Self & operator=(const Self &) = default; Self & operator=(const Self &) = default;
Self & operator=(Self &&) = default; Self & operator=(Self &&) noexcept(std::is_nothrow_move_assignable_v<T>)= default;
template <class Enable = typename std::is_copy_assignable<T>::type> template <class Enable = typename std::is_copy_assignable<T>::type>
Self & operator=(const T & rhs) { t = rhs; return *this;} Self & operator=(const T & rhs) { t = rhs; return *this;}

View File

@ -1,6 +1,6 @@
#pragma once #pragma once
#include <time.h> #include <ctime>
#if defined (OS_DARWIN) || defined (OS_SUNOS) #if defined (OS_DARWIN) || defined (OS_SUNOS)
# define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC # define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC

View File

@ -1,6 +1,6 @@
#pragma once #pragma once
#include <string.h> #include <cstring>
#include <type_traits> #include <type_traits>

View File

@ -27,6 +27,8 @@
#include <type_traits> #include <type_traits>
#include <initializer_list> #include <initializer_list>
// NOLINTBEGIN(*)
namespace wide namespace wide
{ {
template <size_t Bits, typename Signed> template <size_t Bits, typename Signed>
@ -257,4 +259,7 @@ struct hash<wide::integer<Bits, Signed>>;
} }
// NOLINTEND(*)
#include "wide_integer_impl.h" #include "wide_integer_impl.h"

View File

@ -15,6 +15,8 @@
#include <boost/multiprecision/cpp_bin_float.hpp> #include <boost/multiprecision/cpp_bin_float.hpp>
#include <boost/math/special_functions/fpclassify.hpp> #include <boost/math/special_functions/fpclassify.hpp>
// NOLINTBEGIN(*)
/// Use same extended double for all platforms /// Use same extended double for all platforms
#if (LDBL_MANT_DIG == 64) #if (LDBL_MANT_DIG == 64)
#define CONSTEXPR_FROM_DOUBLE constexpr #define CONSTEXPR_FROM_DOUBLE constexpr
@ -1478,3 +1480,5 @@ struct hash<wide::integer<Bits, Signed>>
}; };
} }
// NOLINTEND(*)

View File

@ -90,6 +90,7 @@
#define PCG_EMULATED_128BIT_MATH 1 #define PCG_EMULATED_128BIT_MATH 1
#endif #endif
// NOLINTBEGIN(*)
namespace pcg_extras { namespace pcg_extras {
@ -552,4 +553,6 @@ std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
} // namespace pcg_extras } // namespace pcg_extras
// NOLINTEND(*)
#endif // PCG_EXTRAS_HPP_INCLUDED #endif // PCG_EXTRAS_HPP_INCLUDED

View File

@ -113,6 +113,8 @@
#include "pcg_extras.hpp" #include "pcg_extras.hpp"
// NOLINTBEGIN(*)
namespace DB namespace DB
{ {
struct PcgSerializer; struct PcgSerializer;
@ -1777,4 +1779,6 @@ typedef pcg_engines::ext_oneseq_xsh_rs_64_32<14,32,true> pcg32_k16384_fast;
#pragma warning(default:4146) #pragma warning(default:4146)
#endif #endif
// NOLINTEND(*)
#endif // PCG_RAND_HPP_INCLUDED #endif // PCG_RAND_HPP_INCLUDED

View File

@ -16,6 +16,8 @@
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
// NOLINTBEGIN(*)
/* Special width values */ /* Special width values */
enum { enum {
widechar_nonprint = -1, // The character is not printable. widechar_nonprint = -1, // The character is not printable.
@ -518,4 +520,6 @@ inline int widechar_wcwidth(wchar_t c) {
return 1; return 1;
} }
// NOLINTEND(*)
#endif // WIDECHAR_WIDTH_H #endif // WIDECHAR_WIDTH_H

View File

@ -1,180 +0,0 @@
#!/usr/bin/env bash
# script to run query to databases
function usage()
{
cat <<EOF
usage: $0 options
This script run benhmark for database
OPTIONS:
-c config file where some script variables are defined
-n table name
-h Show this message
-t how many times execute each query. default is '3'
-q query file
-e expect file
-s /etc/init.d/service
-p table name pattern to be replaced to name. default is 'hits_10m'
EOF
}
TIMES=3
table_name_pattern=hits_10m
while getopts “c:ht:n:q:e:s:r” OPTION
do
case $OPTION in
c)
source $OPTARG
;;
?)
;;
esac
done
OPTIND=1
while getopts “c:ht:n:q:e:s:r” OPTION
do
case $OPTION in
h)
usage
exit 0
;;
t)
TIMES=$OPTARG
;;
n)
table_name=$OPTARG
;;
q)
test_file=$OPTARG
;;
e)
expect_file=$OPTARG
;;
s)
etc_init_d_service=$OPTARG
;;
p)
table_name_pattern=$OPTARG
;;
c)
;;
r)
restart_server_each_query=1
;;
?)
usage
exit 0
;;
esac
done
if [[ ! -f $expect_file ]]; then
echo "Not found: expect file"
exit 1
fi
if [[ ! -f $test_file ]]; then
echo "Not found: test file"
exit 1
fi
if [[ ! -f $etc_init_d_service ]]; then
echo "Not found: /etc/init.d/service with path=$etc_init_d_service"
use_service=0
else
use_service=1
fi
if [[ "$table_name_pattern" == "" ]]; then
echo "Empty table_name_pattern"
exit 1
fi
if [[ "$table_name" == "" ]]; then
echo "Empty table_name"
exit 1
fi
function execute()
{
queries=("${@}")
queries_count=${#queries[@]}
if [ -z $TIMES ]; then
TIMES=1
fi
index=0
while [ "$index" -lt "$queries_count" ]; do
query=${queries[$index]}
if [[ $query == "" ]]; then
let "index = $index + 1"
continue
fi
comment_re='--.*'
if [[ $query =~ $comment_re ]]; then
echo "$query"
echo
else
sync
sudo sh -c "echo 3 > /proc/sys/vm/drop_caches"
if [[ "$restart_server_each_query" == "1" && "$use_service" == "1" ]]; then
echo "restart server: $etc_init_d_service restart"
sudo $etc_init_d_service restart
fi
for i in $(seq $TIMES)
do
if [[ -f $etc_init_d_service && "$use_service" == "1" ]]; then
sudo $etc_init_d_service status
server_status=$?
expect -f $expect_file ""
if [[ "$?" != "0" || $server_status != "0" ]]; then
echo "restart server: $etc_init_d_service restart"
sudo $etc_init_d_service restart
fi
#wait until can connect to server
restart_timer=0
restart_limit=60
expect -f $expect_file "" &> /dev/null
while [ "$?" != "0" ]; do
echo "waiting"
sleep 1
let "restart_timer = $restart_timer + 1"
if (( $restart_limit < $restart_timer )); then
sudo $etc_init_d_service restart
restart_timer=0
fi
expect -f $expect_file "" &> /dev/null
done
fi
echo
echo "times: $i"
echo "query:" "$query"
expect -f $expect_file "$query"
done
fi
let "index = $index + 1"
done
}
temp_test_file=temp_queries_$table_name
cat $test_file | sed s/$table_name_pattern/$table_name/g > $temp_test_file
mapfile -t test_queries < $temp_test_file
echo "start time: $(date)"
time execute "${test_queries[@]}"
echo "stop time: $(date)"

View File

@ -1,3 +0,0 @@
#!/usr/bin/env bash
table=hits_10m; time clickhouse-client --max_bytes_before_external_sort=30000000000 --query="SELECT toInt64(WatchID), JavaEnable, Title, GoodEvent, (EventTime < toDateTime('1971-01-01 00:00:00') ? toDateTime('1971-01-01 00:00:01') : EventTime), (EventDate < toDate('1971-01-01') ? toDate('1971-01-01') : EventDate), CounterID, ClientIP, RegionID, toInt64(UserID), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, (ClientEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : ClientEventTime), SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, toInt64(FUniqID), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, (LocalEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : LocalEventTime), Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, toInt64(RefererHash), toInt64(URLHash), CLID FROM $table ORDER BY rand()" | corrector_utf8 > /opt/dumps/${table}_corrected.tsv

View File

@ -1,107 +1,25 @@
#!/bin/bash -e #!/bin/bash -e
if [[ -n $1 ]]; then
SCALE=$1
else
SCALE=100
fi
TABLE="hits_${SCALE}m_obfuscated"
DATASET="${TABLE}_v1.tar.xz"
QUERIES_FILE="queries.sql" QUERIES_FILE="queries.sql"
TRIES=3 TRIES=3
# Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'. mkdir -p clickhouse-benchmark
pushd clickhouse-benchmark
FASTER_DOWNLOAD=wget # Download the binary
if command -v axel >/dev/null; then if [[ ! -x clickhouse ]]; then
FASTER_DOWNLOAD=axel curl https://clickhouse.com/ | sh
else
echo "It's recommended to install 'axel' for faster downloads."
fi fi
if command -v pixz >/dev/null; then
TAR_PARAMS='-Ipixz'
else
echo "It's recommended to install 'pixz' for faster decompression of the dataset."
fi
mkdir -p clickhouse-benchmark-$SCALE
pushd clickhouse-benchmark-$SCALE
OS=$(uname -s)
ARCH=$(uname -m)
DIR=
if [ "${OS}" = "Linux" ]
then
if [ "${ARCH}" = "x86_64" ]
then
DIR="amd64"
elif [ "${ARCH}" = "aarch64" ]
then
DIR="aarch64"
elif [ "${ARCH}" = "powerpc64le" ]
then
DIR="powerpc64le"
fi
elif [ "${OS}" = "FreeBSD" ]
then
if [ "${ARCH}" = "x86_64" ]
then
DIR="freebsd"
elif [ "${ARCH}" = "aarch64" ]
then
DIR="freebsd-aarch64"
elif [ "${ARCH}" = "powerpc64le" ]
then
DIR="freebsd-powerpc64le"
fi
elif [ "${OS}" = "Darwin" ]
then
if [ "${ARCH}" = "x86_64" ]
then
DIR="macos"
elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
then
DIR="macos-aarch64"
fi
fi
if [ -z "${DIR}" ]
then
echo "The '${OS}' operating system with the '${ARCH}' architecture is not supported."
exit 1
fi
URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
echo
echo "Will download ${URL}"
echo
curl -O "${URL}" && chmod a+x clickhouse || exit 1
echo
echo "Successfully downloaded the ClickHouse binary"
chmod a+x clickhouse
if [[ ! -f $QUERIES_FILE ]]; then if [[ ! -f $QUERIES_FILE ]]; then
wget "https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/$QUERIES_FILE" wget "https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/$QUERIES_FILE"
fi fi
if [[ ! -d data ]]; then
if [[ ! -f $DATASET ]]; then
$FASTER_DOWNLOAD "https://datasets.clickhouse.com/hits/partitions/$DATASET"
fi
tar $TAR_PARAMS --strip-components=1 --directory=. -x -v -f $DATASET
fi
uptime uptime
echo "Starting clickhouse-server" echo "Starting clickhouse-server"
./clickhouse server > server.log 2>&1 & ./clickhouse server >/dev/null 2>&1 &
PID=$! PID=$!
function finish { function finish {
@ -114,18 +32,45 @@ echo "Waiting for clickhouse-server to start"
for i in {1..30}; do for i in {1..30}; do
sleep 1 sleep 1
./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM $TABLE" 2>/dev/null && break || echo '.' ./clickhouse client --query "SELECT 'Ok.'" 2>/dev/null && break || echo -n '.'
if [[ $i == 30 ]]; then exit 1; fi if [[ $i == 30 ]]; then exit 1; fi
done done
if [[ $(./clickhouse client --query "EXISTS hits") == '1' && $(./clickhouse client --query "SELECT count() FROM hits") == '100000000' ]]; then
echo "Dataset already downloaded"
else
echo "Will download the dataset"
if [ "`uname`" = "Darwin" ]
then
./clickhouse client --receive_timeout 1000 --max_insert_threads $(sysctl -n hw.ncpu) --progress --query "
CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime)
AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')"
else
./clickhouse client --receive_timeout 1000 --max_insert_threads $(nproc || 4) --progress --query "
CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime)
AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')"
fi
./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM hits"
fi
if [[ $(./clickhouse client --query "SELECT count() FROM system.parts WHERE table = 'hits' AND database = 'default' AND active") == '1' ]]; then
echo "Dataset already prepared"
else
echo "Will prepare the dataset"
./clickhouse client --receive_timeout 1000 --query "OPTIMIZE TABLE hits FINAL"
fi
echo echo
echo "Will perform benchmark. Results:" echo "Will perform benchmark. Results:"
echo echo
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do >result.csv
QUERY_NUM=1
cat "$QUERIES_FILE" | sed "s/{table}/hits/g" | while read query; do
sync sync
if [ "${OS}" = "Darwin" ] if [ "`uname`" = "Darwin" ]
then then
sudo purge > /dev/null sudo purge > /dev/null
else else
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
@ -133,11 +78,15 @@ cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
echo -n "[" echo -n "["
for i in $(seq 1 $TRIES); do for i in $(seq 1 $TRIES); do
RES=$(./clickhouse client --max_memory_usage 100G --time --format=Null --query="$query" 2>&1 ||:) RES=$(./clickhouse client --time --format=Null --query="$query" 2>&1 ||:)
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", " [[ "$i" != $TRIES ]] && echo -n ", "
echo "${QUERY_NUM},${i},${RES}" >> result.csv
done done
echo "]," echo "],"
QUERY_NUM=$((QUERY_NUM + 1))
done done
@ -145,22 +94,23 @@ echo
echo "Benchmark complete. System info:" echo "Benchmark complete. System info:"
echo echo
if [ "${OS}" = "Darwin" ] touch {cpu_model,cpu,df,memory,memory_total,blk,mdstat,instance}.txt
then
if [ "`uname`" = "Darwin" ]
then
echo '----Version, build id-----------' echo '----Version, build id-----------'
./clickhouse local --query "SELECT format('Version: {}', version())" ./clickhouse local --query "SELECT format('Version: {}', version())"
sw_vers | grep BuildVersion
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" ./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
echo '----CPU-------------------------' echo '----CPU-------------------------'
sysctl hw.model sysctl hw.model | tee cpu_model.txt
sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize' sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize' | tee cpu.txt
echo '----Disk Free and Total--------' echo '----Disk Free and Total--------'
df -h . df -h . | tee df.txt
echo '----Memory Free and Total-------' echo '----Memory Free and Total-------'
vm_stat vm_stat | tee memory.txt
echo '----Physical Memory Amount------' echo '----Physical Memory Amount------'
ls -l /var/vm ls -l /var/vm | tee memory_total.txt
echo '--------------------------------' echo '--------------------------------'
else else
echo '----Version, build id-----------' echo '----Version, build id-----------'
@ -168,22 +118,130 @@ else
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" ./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
echo '----CPU-------------------------' echo '----CPU-------------------------'
cat /proc/cpuinfo | grep -i -F 'model name' | uniq cat /proc/cpuinfo | grep -i -F 'model name' | uniq | tee cpu_model.txt
lscpu lscpu | tee cpu.txt
echo '----Block Devices---------------' echo '----Block Devices---------------'
lsblk lsblk | tee blk.txt
echo '----Disk Free and Total--------' echo '----Disk Free and Total--------'
df -h . df -h . | tee df.txt
echo '----Memory Free and Total-------' echo '----Memory Free and Total-------'
free -h free -h | tee memory.txt
echo '----Physical Memory Amount------' echo '----Physical Memory Amount------'
cat /proc/meminfo | grep MemTotal cat /proc/meminfo | grep MemTotal | tee memory_total.txt
echo '----RAID Info-------------------' echo '----RAID Info-------------------'
cat /proc/mdstat cat /proc/mdstat| tee mdstat.txt
#echo '----PCI-------------------------'
#lspci
#echo '----All Hardware Info-----------'
#lshw
echo '--------------------------------' echo '--------------------------------'
fi fi
echo echo
echo "Instance type from IMDS (if available):"
curl -s --connect-timeout 1 'http://169.254.169.254/latest/meta-data/instance-type' | tee instance.txt
echo
echo "Uploading the results (if possible)"
UUID=$(./clickhouse local --query "SELECT generateUUIDv4()")
./clickhouse local --query "
SELECT
'${UUID}' AS run_id,
version() AS version,
now() AS test_time,
(SELECT value FROM system.settings WHERE name = 'max_threads') AS threads,
filesystemCapacity() AS fs_capacity,
filesystemAvailable() AS fs_available,
file('cpu_model.txt') AS cpu_model,
file('cpu.txt') AS cpu,
file('df.txt') AS df,
file('memory.txt') AS memory,
file('memory_total.txt') AS memory_total,
file('blk.txt') AS blk,
file('mdstat.txt') AS mdstat,
file('instance.txt') AS instance
" | tee meta.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query "
INSERT INTO benchmark_runs
(run_id, version, test_time, threads, fs_capacity, fs_available, cpu_model, cpu, df, memory, memory_total, blk, mdstat, instance)
FORMAT TSV" || echo "Cannot upload results."
./clickhouse local --query "
SELECT
'${UUID}' AS run_id,
c1 AS query_num,
c2 AS try_num,
c3 AS time
FROM file('result.csv')
" | tee results.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query "
INSERT INTO benchmark_results
(run_id, query_num, try_num, time)
FORMAT TSV" || echo "Cannot upload results. Please send the output to feedback@clickhouse.com"
<<////
Server Setup:
CREATE TABLE benchmark_runs
(
run_id UUID,
version String,
test_time DateTime,
threads String,
fs_capacity UInt64,
fs_available UInt64,
cpu_model String,
cpu String,
df String,
memory String,
memory_total String,
blk String,
mdstat String,
instance String
) ENGINE = ReplicatedMergeTree ORDER BY run_id;
CREATE TABLE benchmark_results
(
run_id UUID,
query_num UInt8,
try_num UInt8,
time Decimal32(3)
) ENGINE = ReplicatedMergeTree ORDER BY (run_id, query_num, try_num);
CREATE USER benchmark IDENTIFIED WITH no_password SETTINGS max_rows_to_read = 1, max_result_rows = 1, max_execution_time = 1;
CREATE QUOTA benchmark
KEYED BY ip_address
FOR RANDOMIZED INTERVAL 1 MINUTE MAX query_inserts = 4, written_bytes = 100000,
FOR RANDOMIZED INTERVAL 1 HOUR MAX query_inserts = 10, written_bytes = 500000,
FOR RANDOMIZED INTERVAL 1 DAY MAX query_inserts = 50, written_bytes = 2000000
TO benchmark;
GRANT INSERT ON benchmark_runs TO benchmark;
GRANT INSERT ON benchmark_results TO benchmark;
Example query:
SELECT
cpu_model,
threads,
instance,
k
FROM
(
SELECT
run_id,
exp(avg(log(adjusted_time / best_time))) AS k
FROM
(
WITH greatest(time, 0.01) AS adjusted_time
SELECT
run_id,
adjusted_time,
min(adjusted_time) OVER (PARTITION BY query_num, try_num) AS best_time
FROM benchmark_results
WHERE try_num > 1
)
GROUP BY run_id
ORDER BY k ASC
) AS t
INNER JOIN benchmark_runs USING (run_id)
////

View File

@ -1,4 +1,4 @@
macro(clickhouse_strip_binary) macro(clickhouse_split_debug_symbols)
set(oneValueArgs TARGET DESTINATION_DIR BINARY_PATH) set(oneValueArgs TARGET DESTINATION_DIR BINARY_PATH)
cmake_parse_arguments(STRIP "" "${oneValueArgs}" "" ${ARGN}) cmake_parse_arguments(STRIP "" "${oneValueArgs}" "" ${ARGN})

View File

@ -37,6 +37,8 @@ function(add_contrib cmake_folder)
file(GLOB contrib_files "${base_folder}/*") file(GLOB contrib_files "${base_folder}/*")
if (NOT contrib_files) if (NOT contrib_files)
# Checking out *all* submodules takes > 5 min. Therefore, the smoke build ("FastTest") in CI initializes only the set of
# submodules minimally needed for a build and we cannot assume here that all submodules are populated.
message(STATUS "submodule ${base_folder} is missing or empty. to fix try run:") message(STATUS "submodule ${base_folder} is missing or empty. to fix try run:")
message(STATUS " git submodule update --init") message(STATUS " git submodule update --init")
return() return()
@ -56,7 +58,7 @@ add_contrib (boost-cmake boost)
add_contrib (cctz-cmake cctz) add_contrib (cctz-cmake cctz)
add_contrib (consistent-hashing) add_contrib (consistent-hashing)
add_contrib (dragonbox-cmake dragonbox) add_contrib (dragonbox-cmake dragonbox)
add_contrib (hyperscan-cmake hyperscan) add_contrib (vectorscan-cmake vectorscan)
add_contrib (jemalloc-cmake jemalloc) add_contrib (jemalloc-cmake jemalloc)
add_contrib (libcpuid-cmake libcpuid) add_contrib (libcpuid-cmake libcpuid)
add_contrib (libdivide) add_contrib (libdivide)
@ -132,6 +134,7 @@ add_contrib (krb5-cmake krb5)
add_contrib (cyrus-sasl-cmake cyrus-sasl) # for krb5 add_contrib (cyrus-sasl-cmake cyrus-sasl) # for krb5
add_contrib (libgsasl-cmake libgsasl) # requires krb5 add_contrib (libgsasl-cmake libgsasl) # requires krb5
add_contrib (librdkafka-cmake librdkafka) # requires: libgsasl add_contrib (librdkafka-cmake librdkafka) # requires: libgsasl
add_contrib (nats-io-cmake nats-io)
add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5 add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5
add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift/avro/arrow/libhdfs3 add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift/avro/arrow/libhdfs3
add_contrib (cppkafka-cmake cppkafka) add_contrib (cppkafka-cmake cppkafka)
@ -153,6 +156,7 @@ endif()
add_contrib (sqlite-cmake sqlite-amalgamation) add_contrib (sqlite-cmake sqlite-amalgamation)
add_contrib (s2geometry-cmake s2geometry) add_contrib (s2geometry-cmake s2geometry)
add_contrib (base-x-cmake base-x)
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs. # Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear

View File

@ -462,5 +462,7 @@ foreach (TOOL ${PARQUET_TOOLS})
endforeach () endforeach ()
# The library is large - avoid bloat. # The library is large - avoid bloat.
target_compile_options (_arrow PRIVATE -g0) if (OMIT_HEAVY_DEBUG_SYMBOLS)
target_compile_options (_parquet PRIVATE -g0) target_compile_options (_arrow PRIVATE -g0)
target_compile_options (_parquet PRIVATE -g0)
endif()

View File

@ -114,7 +114,9 @@ endif()
target_link_libraries(_aws_s3 PRIVATE _aws_s3_checksums) target_link_libraries(_aws_s3 PRIVATE _aws_s3_checksums)
# The library is large - avoid bloat. # The library is large - avoid bloat.
target_compile_options (_aws_s3 PRIVATE -g0) if (OMIT_HEAVY_DEBUG_SYMBOLS)
target_compile_options (_aws_s3_checksums PRIVATE -g0) target_compile_options (_aws_s3 PRIVATE -g0)
target_compile_options (_aws_s3_checksums PRIVATE -g0)
endif()
add_library(ch_contrib::aws_s3 ALIAS _aws_s3) add_library(ch_contrib::aws_s3 ALIAS _aws_s3)

1
contrib/base-x vendored Submodule

@ -0,0 +1 @@
Subproject commit a85f98fb4ed52c2f4029a4b6ac1ef0bafdfc56f5

View File

@ -0,0 +1,28 @@
option (ENABLE_BASEX "Enable base-x" ${ENABLE_LIBRARIES})
if (NOT ENABLE_BASEX)
message(STATUS "Not using base-x")
return()
endif()
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base-x")
set (SRCS
${LIBRARY_DIR}/base_x.hh
${LIBRARY_DIR}/uinteger_t.hh
)
add_library(_base-x INTERFACE)
target_include_directories(_base-x SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/base-x")
if (XCODE OR XCODE_VERSION)
# https://gitlab.kitware.com/cmake/cmake/issues/17457
# Some native build systems may not like targets that have only object files, so consider adding at least one real source file
# This applies to Xcode.
if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "")
endif ()
target_sources(_base-x PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
endif ()
add_library(ch_contrib::base-x ALIAS _base-x)

2
contrib/boringssl vendored

@ -1 +1 @@
Subproject commit 9c0715ce459de443e7b08f270a518c1702f1a380 Subproject commit c1e01a441d6db234f4f12e63a7657d1f9e6db9c1

View File

@ -159,14 +159,12 @@ set(
ios-aarch64/crypto/fipsmodule/sha512-armv8.S ios-aarch64/crypto/fipsmodule/sha512-armv8.S
ios-aarch64/crypto/fipsmodule/vpaes-armv8.S ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
ios-aarch64/crypto/test/trampoline-armv8.S ios-aarch64/crypto/test/trampoline-armv8.S
ios-aarch64/crypto/third_party/sike/asm/fp-armv8.S
) )
set( set(
CRYPTO_ios_arm_SOURCES CRYPTO_ios_arm_SOURCES
ios-arm/crypto/chacha/chacha-armv4.S ios-arm/crypto/chacha/chacha-armv4.S
ios-arm/crypto/fipsmodule/aes-armv4.S
ios-arm/crypto/fipsmodule/aesv8-armx32.S ios-arm/crypto/fipsmodule/aesv8-armx32.S
ios-arm/crypto/fipsmodule/armv4-mont.S ios-arm/crypto/fipsmodule/armv4-mont.S
ios-arm/crypto/fipsmodule/bsaes-armv7.S ios-arm/crypto/fipsmodule/bsaes-armv7.S
@ -192,14 +190,12 @@ set(
linux-aarch64/crypto/fipsmodule/sha512-armv8.S linux-aarch64/crypto/fipsmodule/sha512-armv8.S
linux-aarch64/crypto/fipsmodule/vpaes-armv8.S linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
linux-aarch64/crypto/test/trampoline-armv8.S linux-aarch64/crypto/test/trampoline-armv8.S
linux-aarch64/crypto/third_party/sike/asm/fp-armv8.S
) )
set( set(
CRYPTO_linux_arm_SOURCES CRYPTO_linux_arm_SOURCES
linux-arm/crypto/chacha/chacha-armv4.S linux-arm/crypto/chacha/chacha-armv4.S
linux-arm/crypto/fipsmodule/aes-armv4.S
linux-arm/crypto/fipsmodule/aesv8-armx32.S linux-arm/crypto/fipsmodule/aesv8-armx32.S
linux-arm/crypto/fipsmodule/armv4-mont.S linux-arm/crypto/fipsmodule/armv4-mont.S
linux-arm/crypto/fipsmodule/bsaes-armv7.S linux-arm/crypto/fipsmodule/bsaes-armv7.S
@ -219,13 +215,13 @@ set(
linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
linux-ppc64le/crypto/test/trampoline-ppc.S
) )
set( set(
CRYPTO_linux_x86_SOURCES CRYPTO_linux_x86_SOURCES
linux-x86/crypto/chacha/chacha-x86.S linux-x86/crypto/chacha/chacha-x86.S
linux-x86/crypto/fipsmodule/aes-586.S
linux-x86/crypto/fipsmodule/aesni-x86.S linux-x86/crypto/fipsmodule/aesni-x86.S
linux-x86/crypto/fipsmodule/bn-586.S linux-x86/crypto/fipsmodule/bn-586.S
linux-x86/crypto/fipsmodule/co-586.S linux-x86/crypto/fipsmodule/co-586.S
@ -246,7 +242,6 @@ set(
linux-x86_64/crypto/chacha/chacha-x86_64.S linux-x86_64/crypto/chacha/chacha-x86_64.S
linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
linux-x86_64/crypto/fipsmodule/aes-x86_64.S
linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
linux-x86_64/crypto/fipsmodule/aesni-x86_64.S linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
@ -263,7 +258,6 @@ set(
linux-x86_64/crypto/fipsmodule/x86_64-mont.S linux-x86_64/crypto/fipsmodule/x86_64-mont.S
linux-x86_64/crypto/fipsmodule/x86_64-mont5.S linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
linux-x86_64/crypto/test/trampoline-x86_64.S linux-x86_64/crypto/test/trampoline-x86_64.S
linux-x86_64/crypto/third_party/sike/asm/fp-x86_64.S
"${BORINGSSL_SOURCE_DIR}/crypto/hrss/asm/poly_rq_mul.S" "${BORINGSSL_SOURCE_DIR}/crypto/hrss/asm/poly_rq_mul.S"
) )
@ -271,7 +265,6 @@ set(
CRYPTO_mac_x86_SOURCES CRYPTO_mac_x86_SOURCES
mac-x86/crypto/chacha/chacha-x86.S mac-x86/crypto/chacha/chacha-x86.S
mac-x86/crypto/fipsmodule/aes-586.S
mac-x86/crypto/fipsmodule/aesni-x86.S mac-x86/crypto/fipsmodule/aesni-x86.S
mac-x86/crypto/fipsmodule/bn-586.S mac-x86/crypto/fipsmodule/bn-586.S
mac-x86/crypto/fipsmodule/co-586.S mac-x86/crypto/fipsmodule/co-586.S
@ -292,7 +285,6 @@ set(
mac-x86_64/crypto/chacha/chacha-x86_64.S mac-x86_64/crypto/chacha/chacha-x86_64.S
mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
mac-x86_64/crypto/fipsmodule/aes-x86_64.S
mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
mac-x86_64/crypto/fipsmodule/aesni-x86_64.S mac-x86_64/crypto/fipsmodule/aesni-x86_64.S
mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
@ -309,7 +301,6 @@ set(
mac-x86_64/crypto/fipsmodule/x86_64-mont.S mac-x86_64/crypto/fipsmodule/x86_64-mont.S
mac-x86_64/crypto/fipsmodule/x86_64-mont5.S mac-x86_64/crypto/fipsmodule/x86_64-mont5.S
mac-x86_64/crypto/test/trampoline-x86_64.S mac-x86_64/crypto/test/trampoline-x86_64.S
mac-x86_64/crypto/third_party/sike/asm/fp-x86_64.S
) )
set( set(
@ -331,7 +322,6 @@ set(
CRYPTO_win_x86_SOURCES CRYPTO_win_x86_SOURCES
win-x86/crypto/chacha/chacha-x86.asm win-x86/crypto/chacha/chacha-x86.asm
win-x86/crypto/fipsmodule/aes-586.asm
win-x86/crypto/fipsmodule/aesni-x86.asm win-x86/crypto/fipsmodule/aesni-x86.asm
win-x86/crypto/fipsmodule/bn-586.asm win-x86/crypto/fipsmodule/bn-586.asm
win-x86/crypto/fipsmodule/co-586.asm win-x86/crypto/fipsmodule/co-586.asm
@ -352,7 +342,6 @@ set(
win-x86_64/crypto/chacha/chacha-x86_64.asm win-x86_64/crypto/chacha/chacha-x86_64.asm
win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
win-x86_64/crypto/fipsmodule/aes-x86_64.asm
win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
win-x86_64/crypto/fipsmodule/aesni-x86_64.asm win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm
@ -369,7 +358,6 @@ set(
win-x86_64/crypto/fipsmodule/x86_64-mont.asm win-x86_64/crypto/fipsmodule/x86_64-mont.asm
win-x86_64/crypto/fipsmodule/x86_64-mont5.asm win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
win-x86_64/crypto/test/trampoline-x86_64.asm win-x86_64/crypto/test/trampoline-x86_64.asm
win-x86_64/crypto/third_party/sike/asm/fp-x86_64.asm
) )
if(APPLE AND ARCH STREQUAL "aarch64") if(APPLE AND ARCH STREQUAL "aarch64")
@ -401,6 +389,7 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strex.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c"
@ -430,6 +419,7 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c" "${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c" "${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c" "${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c"
"${BORINGSSL_SOURCE_DIR}/crypto/blake2/blake2.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c" "${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c" "${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c"
"${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c" "${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c"
@ -454,20 +444,22 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c" "${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-win.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-ppc64le.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-ppc64le.c"
"${BORINGSSL_SOURCE_DIR}/crypto/crypto.c" "${BORINGSSL_SOURCE_DIR}/crypto/crypto.c"
"${BORINGSSL_SOURCE_DIR}/crypto/curve25519/curve25519.c"
"${BORINGSSL_SOURCE_DIR}/crypto/curve25519/spake25519.c" "${BORINGSSL_SOURCE_DIR}/crypto/curve25519/spake25519.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh/check.c" "${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/dh_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh/dh.c" "${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/params.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh/dh_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh/params.c"
"${BORINGSSL_SOURCE_DIR}/crypto/digest_extra/digest_extra.c" "${BORINGSSL_SOURCE_DIR}/crypto/digest_extra/digest_extra.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa.c" "${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa_asn1.c" "${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_asn1.c" "${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_derive.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/hash_to_curve.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ecdh_extra/ecdh_extra.c" "${BORINGSSL_SOURCE_DIR}/crypto/ecdh_extra/ecdh_extra.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ecdsa_extra/ecdsa_asn1.c" "${BORINGSSL_SOURCE_DIR}/crypto/ecdsa_extra/ecdsa_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/engine/engine.c" "${BORINGSSL_SOURCE_DIR}/crypto/engine/engine.c"
@ -492,8 +484,8 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c" "${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c"
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c" "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c" "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c"
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/is_fips.c"
"${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c" "${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c"
"${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c" "${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c"
"${BORINGSSL_SOURCE_DIR}/crypto/lhash/lhash.c" "${BORINGSSL_SOURCE_DIR}/crypto/lhash/lhash.c"
"${BORINGSSL_SOURCE_DIR}/crypto/mem.c" "${BORINGSSL_SOURCE_DIR}/crypto/mem.c"
@ -519,6 +511,7 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/deterministic.c" "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/deterministic.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/forkunsafe.c" "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/forkunsafe.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/fuchsia.c" "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/fuchsia.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/passive.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/rand_extra.c" "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/rand_extra.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/windows.c" "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/windows.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rc4/rc4.c" "${BORINGSSL_SOURCE_DIR}/crypto/rc4/rc4.c"
@ -532,15 +525,18 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/thread_none.c" "${BORINGSSL_SOURCE_DIR}/crypto/thread_none.c"
"${BORINGSSL_SOURCE_DIR}/crypto/thread_pthread.c" "${BORINGSSL_SOURCE_DIR}/crypto/thread_pthread.c"
"${BORINGSSL_SOURCE_DIR}/crypto/thread_win.c" "${BORINGSSL_SOURCE_DIR}/crypto/thread_win.c"
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/pmbtoken.c"
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/trust_token.c"
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_strex.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/name_print.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c"
@ -606,19 +602,11 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pci.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pci.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcia.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcia.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcons.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcons.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pku.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pmaps.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pmaps.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_prn.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_prn.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_purp.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_purp.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_skey.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_skey.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_sxnet.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_utl.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_utl.c"
"${BORINGSSL_SOURCE_DIR}/third_party/fiat/curve25519.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/asm/fp_generic.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/curve_params.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/fpx.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/isogeny.c"
"${BORINGSSL_SOURCE_DIR}/third_party/sike/sike.c"
) )
add_library( add_library(
@ -631,6 +619,8 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc" "${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc" "${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc" "${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/encrypted_client_hello.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/extensions.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc" "${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc" "${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc" "${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc"
@ -653,7 +643,6 @@ add_library(
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc" "${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc" "${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc" "${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/t1_lib.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc" "${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc" "${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc" "${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc"
@ -674,7 +663,9 @@ add_executable(
"${BORINGSSL_SOURCE_DIR}/tool/client.cc" "${BORINGSSL_SOURCE_DIR}/tool/client.cc"
"${BORINGSSL_SOURCE_DIR}/tool/const.cc" "${BORINGSSL_SOURCE_DIR}/tool/const.cc"
"${BORINGSSL_SOURCE_DIR}/tool/digest.cc" "${BORINGSSL_SOURCE_DIR}/tool/digest.cc"
"${BORINGSSL_SOURCE_DIR}/tool/fd.cc"
"${BORINGSSL_SOURCE_DIR}/tool/file.cc" "${BORINGSSL_SOURCE_DIR}/tool/file.cc"
"${BORINGSSL_SOURCE_DIR}/tool/generate_ech.cc"
"${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc" "${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc"
"${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc" "${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc"
"${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc" "${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc"

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -33,6 +33,7 @@ Lone:
.align 5 .align 5
_ChaCha20_ctr32: _ChaCha20_ctr32:
AARCH64_VALID_CALL_TARGET
cbz x2,Labort cbz x2,Labort
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 #if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x5,:pg_hi21_nc:_OPENSSL_armcap_P adrp x5,:pg_hi21_nc:_OPENSSL_armcap_P
@ -46,6 +47,7 @@ _ChaCha20_ctr32:
b.ne ChaCha20_neon b.ne ChaCha20_neon
Lshort: Lshort:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]! stp x29,x30,[sp,#-96]!
add x29,sp,#0 add x29,sp,#0
@ -258,6 +260,7 @@ Loop:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96 ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
Labort: Labort:
ret ret
@ -314,12 +317,14 @@ Loop_tail:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96 ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.align 5 .align 5
ChaCha20_neon: ChaCha20_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]! stp x29,x30,[sp,#-96]!
add x29,sp,#0 add x29,sp,#0
@ -700,6 +705,7 @@ Loop_neon:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96 ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret ret
Ltail_neon: Ltail_neon:
@ -809,11 +815,13 @@ Ldone_neon:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96 ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.align 5 .align 5
ChaCha20_512_neon: ChaCha20_512_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]! stp x29,x30,[sp,#-96]!
add x29,sp,#0 add x29,sp,#0
@ -1977,6 +1985,7 @@ Ldone_512_neon:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96 ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret ret
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM

View File

@ -32,6 +32,8 @@ Lrcon:
.align 5 .align 5
_aes_hw_set_encrypt_key: _aes_hw_set_encrypt_key:
Lenc_key: Lenc_key:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
mov x3,#-1 mov x3,#-1
@ -200,6 +202,7 @@ Lenc_key_abort:
.align 5 .align 5
_aes_hw_set_decrypt_key: _aes_hw_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
bl Lenc_key bl Lenc_key
@ -233,6 +236,7 @@ Loop_imc:
eor x0,x0,x0 // return value eor x0,x0,x0 // return value
Ldec_key_abort: Ldec_key_abort:
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.globl _aes_hw_encrypt .globl _aes_hw_encrypt
@ -240,6 +244,7 @@ Ldec_key_abort:
.align 5 .align 5
_aes_hw_encrypt: _aes_hw_encrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240] ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16 ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0] ld1 {v2.16b},[x0]
@ -270,6 +275,7 @@ Loop_enc:
.align 5 .align 5
_aes_hw_decrypt: _aes_hw_decrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240] ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16 ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0] ld1 {v2.16b},[x0]
@ -300,6 +306,8 @@ Loop_dec:
.align 5 .align 5
_aes_hw_cbc_encrypt: _aes_hw_cbc_encrypt:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
subs x2,x2,#16 subs x2,x2,#16
@ -591,6 +599,8 @@ Lcbc_abort:
.align 5 .align 5
_aes_hw_ctr32_encrypt_blocks: _aes_hw_ctr32_encrypt_blocks:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
ldr w5,[x3,#240] ldr w5,[x3,#240]

View File

@ -12,6 +12,8 @@
#if defined(BORINGSSL_PREFIX) #if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h> #include <boringssl_prefix_symbols_asm.h>
#endif #endif
#include <openssl/arm_arch.h>
.text .text
.globl _bn_mul_mont .globl _bn_mul_mont
@ -19,6 +21,7 @@
.align 5 .align 5
_bn_mul_mont: _bn_mul_mont:
AARCH64_SIGN_LINK_REGISTER
tst x5,#7 tst x5,#7
b.eq __bn_sqr8x_mont b.eq __bn_sqr8x_mont
tst x5,#3 tst x5,#3
@ -216,11 +219,14 @@ Lcond_copy:
mov x0,#1 mov x0,#1
ldp x23,x24,[x29,#48] ldp x23,x24,[x29,#48]
ldr x29,[sp],#64 ldr x29,[sp],#64
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.align 5 .align 5
__bn_sqr8x_mont: __bn_sqr8x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
// only from bn_mul_mont which has already signed the return address.
cmp x1,x2 cmp x1,x2
b.ne __bn_mul4x_mont b.ne __bn_mul4x_mont
Lsqr8x_mont: Lsqr8x_mont:
@ -974,11 +980,16 @@ Lsqr8x_done:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldr x29,[sp],#128 ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.align 5 .align 5
__bn_mul4x_mont: __bn_mul4x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
// return address.
stp x29,x30,[sp,#-128]! stp x29,x30,[sp,#-128]!
add x29,sp,#0 add x29,sp,#0
stp x19,x20,[sp,#16] stp x19,x20,[sp,#16]
@ -1412,6 +1423,8 @@ Lmul4x_done:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldr x29,[sp],#128 ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0

View File

@ -12,6 +12,8 @@
#if defined(BORINGSSL_PREFIX) #if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h> #include <boringssl_prefix_symbols_asm.h>
#endif #endif
#include <openssl/arm_arch.h>
.text .text
.globl _gcm_init_neon .globl _gcm_init_neon
@ -19,6 +21,7 @@
.align 4 .align 4
_gcm_init_neon: _gcm_init_neon:
AARCH64_VALID_CALL_TARGET
// This function is adapted from gcm_init_v8. xC2 is t3. // This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1 movi v19.16b, #0xe1
@ -44,6 +47,7 @@ _gcm_init_neon:
.align 4 .align 4
_gcm_gmult_neon: _gcm_gmult_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v3.16b}, [x0] // load Xi ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1] ld1 {v6.1d}, [x1]
@ -63,6 +67,7 @@ _gcm_gmult_neon:
.align 4 .align 4
_gcm_ghash_neon: _gcm_ghash_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v0.16b}, [x0] // load Xi ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1] ld1 {v6.1d}, [x1]

View File

@ -21,6 +21,7 @@
.align 4 .align 4
_gcm_init_v8: _gcm_init_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x1] //load input H ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1 movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0 shl v19.2d,v19.2d,#57 //0xc2.0
@ -72,6 +73,7 @@ _gcm_init_v8:
.align 4 .align 4
_gcm_gmult_v8: _gcm_gmult_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x0] //load Xi ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1 movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ... ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
@ -114,6 +116,7 @@ _gcm_gmult_v8:
.align 4 .align 4
_gcm_ghash_v8: _gcm_ghash_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v0.2d},[x0] //load [rotated] Xi ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that //"[rotated]" means that
//loaded value would have //loaded value would have

View File

@ -22,6 +22,8 @@
.align 6 .align 6
_sha1_block_data_order: _sha1_block_data_order:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 #if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
#else #else
@ -1089,6 +1091,8 @@ Loop:
.align 6 .align 6
sha1_block_armv8: sha1_block_armv8:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
Lv8_entry: Lv8_entry:
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0

View File

@ -63,6 +63,7 @@
.align 6 .align 6
_sha256_block_data_order: _sha256_block_data_order:
AARCH64_VALID_CALL_TARGET
#ifndef __KERNEL__ #ifndef __KERNEL__
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 #if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
@ -73,6 +74,7 @@ _sha256_block_data_order:
tst w16,#ARMV8_SHA256 tst w16,#ARMV8_SHA256
b.ne Lv8_entry b.ne Lv8_entry
#endif #endif
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]! stp x29,x30,[sp,#-128]!
add x29,sp,#0 add x29,sp,#0
@ -1033,6 +1035,7 @@ Loop_16_xx:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128 ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret ret
@ -1067,6 +1070,7 @@ LK256:
.align 6 .align 6
sha256_block_armv8: sha256_block_armv8:
Lv8_entry: Lv8_entry:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0

View File

@ -63,6 +63,7 @@
.align 6 .align 6
_sha512_block_data_order: _sha512_block_data_order:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]! stp x29,x30,[sp,#-128]!
add x29,sp,#0 add x29,sp,#0
@ -1023,6 +1024,7 @@ Loop_16_xx:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128 ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret ret

View File

@ -12,6 +12,8 @@
#if defined(BORINGSSL_PREFIX) #if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h> #include <boringssl_prefix_symbols_asm.h>
#endif #endif
#include <openssl/arm_arch.h>
.section __TEXT,__const .section __TEXT,__const
@ -214,6 +216,7 @@ Lenc_entry:
.align 4 .align 4
_vpaes_encrypt: _vpaes_encrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
@ -223,6 +226,7 @@ _vpaes_encrypt:
st1 {v0.16b}, [x1] st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
@ -451,6 +455,7 @@ Ldec_entry:
.align 4 .align 4
_vpaes_decrypt: _vpaes_decrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
@ -460,6 +465,7 @@ _vpaes_decrypt:
st1 {v0.16b}, [x1] st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
@ -629,6 +635,7 @@ _vpaes_key_preheat:
.align 4 .align 4
_vpaes_schedule_core: _vpaes_schedule_core:
AARCH64_SIGN_LINK_REGISTER
stp x29, x30, [sp,#-16]! stp x29, x30, [sp,#-16]!
add x29,sp,#0 add x29,sp,#0
@ -798,6 +805,7 @@ Lschedule_mangle_last_dec:
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6 eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7 eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
ldp x29, x30, [sp],#16 ldp x29, x30, [sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
@ -1011,6 +1019,7 @@ Lschedule_mangle_both:
.align 4 .align 4
_vpaes_set_encrypt_key: _vpaes_set_encrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1026,6 +1035,7 @@ _vpaes_set_encrypt_key:
ldp d8,d9,[sp],#16 ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
@ -1034,6 +1044,7 @@ _vpaes_set_encrypt_key:
.align 4 .align 4
_vpaes_set_decrypt_key: _vpaes_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1053,6 +1064,7 @@ _vpaes_set_decrypt_key:
ldp d8,d9,[sp],#16 ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.globl _vpaes_cbc_encrypt .globl _vpaes_cbc_encrypt
@ -1060,6 +1072,7 @@ _vpaes_set_decrypt_key:
.align 4 .align 4
_vpaes_cbc_encrypt: _vpaes_cbc_encrypt:
AARCH64_SIGN_LINK_REGISTER
cbz x2, Lcbc_abort cbz x2, Lcbc_abort
cmp w5, #0 // check direction cmp w5, #0 // check direction
b.eq vpaes_cbc_decrypt b.eq vpaes_cbc_decrypt
@ -1086,6 +1099,7 @@ Lcbc_enc_loop:
st1 {v0.16b}, [x4] // write ivec st1 {v0.16b}, [x4] // write ivec
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
Lcbc_abort: Lcbc_abort:
ret ret
@ -1093,6 +1107,8 @@ Lcbc_abort:
.align 4 .align 4
vpaes_cbc_decrypt: vpaes_cbc_decrypt:
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
// only from vpaes_cbc_encrypt which has already signed the return address.
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1134,6 +1150,7 @@ Lcbc_dec_done:
ldp d10,d11,[sp],#16 ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16 ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.globl _vpaes_ctr32_encrypt_blocks .globl _vpaes_ctr32_encrypt_blocks
@ -1141,6 +1158,7 @@ Lcbc_dec_done:
.align 4 .align 4
_vpaes_ctr32_encrypt_blocks: _vpaes_ctr32_encrypt_blocks:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1208,6 +1226,7 @@ Lctr32_done:
ldp d10,d11,[sp],#16 ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16 ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM

View File

@ -12,6 +12,8 @@
#if defined(BORINGSSL_PREFIX) #if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h> #include <boringssl_prefix_symbols_asm.h>
#endif #endif
#include <openssl/arm_arch.h>
.text .text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func| // abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ -26,6 +28,7 @@
.align 4 .align 4
_abi_test_trampoline: _abi_test_trampoline:
Labi_test_trampoline_begin: Labi_test_trampoline_begin:
AARCH64_SIGN_LINK_REGISTER
// Stack layout (low to high addresses) // Stack layout (low to high addresses)
// x29,x30 (16 bytes) // x29,x30 (16 bytes)
// d8-d15 (64 bytes) // d8-d15 (64 bytes)
@ -128,6 +131,7 @@ Lx29_ok:
ldp x27, x28, [sp, #144] ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176 ldp x29, x30, [sp], #176
AARCH64_VALIDATE_LINK_REGISTER
ret ret
@ -135,6 +139,7 @@ Lx29_ok:
.private_extern _abi_test_clobber_x0 .private_extern _abi_test_clobber_x0
.align 4 .align 4
_abi_test_clobber_x0: _abi_test_clobber_x0:
AARCH64_VALID_CALL_TARGET
mov x0, xzr mov x0, xzr
ret ret
@ -143,6 +148,7 @@ _abi_test_clobber_x0:
.private_extern _abi_test_clobber_x1 .private_extern _abi_test_clobber_x1
.align 4 .align 4
_abi_test_clobber_x1: _abi_test_clobber_x1:
AARCH64_VALID_CALL_TARGET
mov x1, xzr mov x1, xzr
ret ret
@ -151,6 +157,7 @@ _abi_test_clobber_x1:
.private_extern _abi_test_clobber_x2 .private_extern _abi_test_clobber_x2
.align 4 .align 4
_abi_test_clobber_x2: _abi_test_clobber_x2:
AARCH64_VALID_CALL_TARGET
mov x2, xzr mov x2, xzr
ret ret
@ -159,6 +166,7 @@ _abi_test_clobber_x2:
.private_extern _abi_test_clobber_x3 .private_extern _abi_test_clobber_x3
.align 4 .align 4
_abi_test_clobber_x3: _abi_test_clobber_x3:
AARCH64_VALID_CALL_TARGET
mov x3, xzr mov x3, xzr
ret ret
@ -167,6 +175,7 @@ _abi_test_clobber_x3:
.private_extern _abi_test_clobber_x4 .private_extern _abi_test_clobber_x4
.align 4 .align 4
_abi_test_clobber_x4: _abi_test_clobber_x4:
AARCH64_VALID_CALL_TARGET
mov x4, xzr mov x4, xzr
ret ret
@ -175,6 +184,7 @@ _abi_test_clobber_x4:
.private_extern _abi_test_clobber_x5 .private_extern _abi_test_clobber_x5
.align 4 .align 4
_abi_test_clobber_x5: _abi_test_clobber_x5:
AARCH64_VALID_CALL_TARGET
mov x5, xzr mov x5, xzr
ret ret
@ -183,6 +193,7 @@ _abi_test_clobber_x5:
.private_extern _abi_test_clobber_x6 .private_extern _abi_test_clobber_x6
.align 4 .align 4
_abi_test_clobber_x6: _abi_test_clobber_x6:
AARCH64_VALID_CALL_TARGET
mov x6, xzr mov x6, xzr
ret ret
@ -191,6 +202,7 @@ _abi_test_clobber_x6:
.private_extern _abi_test_clobber_x7 .private_extern _abi_test_clobber_x7
.align 4 .align 4
_abi_test_clobber_x7: _abi_test_clobber_x7:
AARCH64_VALID_CALL_TARGET
mov x7, xzr mov x7, xzr
ret ret
@ -199,6 +211,7 @@ _abi_test_clobber_x7:
.private_extern _abi_test_clobber_x8 .private_extern _abi_test_clobber_x8
.align 4 .align 4
_abi_test_clobber_x8: _abi_test_clobber_x8:
AARCH64_VALID_CALL_TARGET
mov x8, xzr mov x8, xzr
ret ret
@ -207,6 +220,7 @@ _abi_test_clobber_x8:
.private_extern _abi_test_clobber_x9 .private_extern _abi_test_clobber_x9
.align 4 .align 4
_abi_test_clobber_x9: _abi_test_clobber_x9:
AARCH64_VALID_CALL_TARGET
mov x9, xzr mov x9, xzr
ret ret
@ -215,6 +229,7 @@ _abi_test_clobber_x9:
.private_extern _abi_test_clobber_x10 .private_extern _abi_test_clobber_x10
.align 4 .align 4
_abi_test_clobber_x10: _abi_test_clobber_x10:
AARCH64_VALID_CALL_TARGET
mov x10, xzr mov x10, xzr
ret ret
@ -223,6 +238,7 @@ _abi_test_clobber_x10:
.private_extern _abi_test_clobber_x11 .private_extern _abi_test_clobber_x11
.align 4 .align 4
_abi_test_clobber_x11: _abi_test_clobber_x11:
AARCH64_VALID_CALL_TARGET
mov x11, xzr mov x11, xzr
ret ret
@ -231,6 +247,7 @@ _abi_test_clobber_x11:
.private_extern _abi_test_clobber_x12 .private_extern _abi_test_clobber_x12
.align 4 .align 4
_abi_test_clobber_x12: _abi_test_clobber_x12:
AARCH64_VALID_CALL_TARGET
mov x12, xzr mov x12, xzr
ret ret
@ -239,6 +256,7 @@ _abi_test_clobber_x12:
.private_extern _abi_test_clobber_x13 .private_extern _abi_test_clobber_x13
.align 4 .align 4
_abi_test_clobber_x13: _abi_test_clobber_x13:
AARCH64_VALID_CALL_TARGET
mov x13, xzr mov x13, xzr
ret ret
@ -247,6 +265,7 @@ _abi_test_clobber_x13:
.private_extern _abi_test_clobber_x14 .private_extern _abi_test_clobber_x14
.align 4 .align 4
_abi_test_clobber_x14: _abi_test_clobber_x14:
AARCH64_VALID_CALL_TARGET
mov x14, xzr mov x14, xzr
ret ret
@ -255,6 +274,7 @@ _abi_test_clobber_x14:
.private_extern _abi_test_clobber_x15 .private_extern _abi_test_clobber_x15
.align 4 .align 4
_abi_test_clobber_x15: _abi_test_clobber_x15:
AARCH64_VALID_CALL_TARGET
mov x15, xzr mov x15, xzr
ret ret
@ -263,6 +283,7 @@ _abi_test_clobber_x15:
.private_extern _abi_test_clobber_x16 .private_extern _abi_test_clobber_x16
.align 4 .align 4
_abi_test_clobber_x16: _abi_test_clobber_x16:
AARCH64_VALID_CALL_TARGET
mov x16, xzr mov x16, xzr
ret ret
@ -271,6 +292,7 @@ _abi_test_clobber_x16:
.private_extern _abi_test_clobber_x17 .private_extern _abi_test_clobber_x17
.align 4 .align 4
_abi_test_clobber_x17: _abi_test_clobber_x17:
AARCH64_VALID_CALL_TARGET
mov x17, xzr mov x17, xzr
ret ret
@ -279,6 +301,7 @@ _abi_test_clobber_x17:
.private_extern _abi_test_clobber_x19 .private_extern _abi_test_clobber_x19
.align 4 .align 4
_abi_test_clobber_x19: _abi_test_clobber_x19:
AARCH64_VALID_CALL_TARGET
mov x19, xzr mov x19, xzr
ret ret
@ -287,6 +310,7 @@ _abi_test_clobber_x19:
.private_extern _abi_test_clobber_x20 .private_extern _abi_test_clobber_x20
.align 4 .align 4
_abi_test_clobber_x20: _abi_test_clobber_x20:
AARCH64_VALID_CALL_TARGET
mov x20, xzr mov x20, xzr
ret ret
@ -295,6 +319,7 @@ _abi_test_clobber_x20:
.private_extern _abi_test_clobber_x21 .private_extern _abi_test_clobber_x21
.align 4 .align 4
_abi_test_clobber_x21: _abi_test_clobber_x21:
AARCH64_VALID_CALL_TARGET
mov x21, xzr mov x21, xzr
ret ret
@ -303,6 +328,7 @@ _abi_test_clobber_x21:
.private_extern _abi_test_clobber_x22 .private_extern _abi_test_clobber_x22
.align 4 .align 4
_abi_test_clobber_x22: _abi_test_clobber_x22:
AARCH64_VALID_CALL_TARGET
mov x22, xzr mov x22, xzr
ret ret
@ -311,6 +337,7 @@ _abi_test_clobber_x22:
.private_extern _abi_test_clobber_x23 .private_extern _abi_test_clobber_x23
.align 4 .align 4
_abi_test_clobber_x23: _abi_test_clobber_x23:
AARCH64_VALID_CALL_TARGET
mov x23, xzr mov x23, xzr
ret ret
@ -319,6 +346,7 @@ _abi_test_clobber_x23:
.private_extern _abi_test_clobber_x24 .private_extern _abi_test_clobber_x24
.align 4 .align 4
_abi_test_clobber_x24: _abi_test_clobber_x24:
AARCH64_VALID_CALL_TARGET
mov x24, xzr mov x24, xzr
ret ret
@ -327,6 +355,7 @@ _abi_test_clobber_x24:
.private_extern _abi_test_clobber_x25 .private_extern _abi_test_clobber_x25
.align 4 .align 4
_abi_test_clobber_x25: _abi_test_clobber_x25:
AARCH64_VALID_CALL_TARGET
mov x25, xzr mov x25, xzr
ret ret
@ -335,6 +364,7 @@ _abi_test_clobber_x25:
.private_extern _abi_test_clobber_x26 .private_extern _abi_test_clobber_x26
.align 4 .align 4
_abi_test_clobber_x26: _abi_test_clobber_x26:
AARCH64_VALID_CALL_TARGET
mov x26, xzr mov x26, xzr
ret ret
@ -343,6 +373,7 @@ _abi_test_clobber_x26:
.private_extern _abi_test_clobber_x27 .private_extern _abi_test_clobber_x27
.align 4 .align 4
_abi_test_clobber_x27: _abi_test_clobber_x27:
AARCH64_VALID_CALL_TARGET
mov x27, xzr mov x27, xzr
ret ret
@ -351,6 +382,7 @@ _abi_test_clobber_x27:
.private_extern _abi_test_clobber_x28 .private_extern _abi_test_clobber_x28
.align 4 .align 4
_abi_test_clobber_x28: _abi_test_clobber_x28:
AARCH64_VALID_CALL_TARGET
mov x28, xzr mov x28, xzr
ret ret
@ -359,6 +391,7 @@ _abi_test_clobber_x28:
.private_extern _abi_test_clobber_x29 .private_extern _abi_test_clobber_x29
.align 4 .align 4
_abi_test_clobber_x29: _abi_test_clobber_x29:
AARCH64_VALID_CALL_TARGET
mov x29, xzr mov x29, xzr
ret ret
@ -367,6 +400,7 @@ _abi_test_clobber_x29:
.private_extern _abi_test_clobber_d0 .private_extern _abi_test_clobber_d0
.align 4 .align 4
_abi_test_clobber_d0: _abi_test_clobber_d0:
AARCH64_VALID_CALL_TARGET
fmov d0, xzr fmov d0, xzr
ret ret
@ -375,6 +409,7 @@ _abi_test_clobber_d0:
.private_extern _abi_test_clobber_d1 .private_extern _abi_test_clobber_d1
.align 4 .align 4
_abi_test_clobber_d1: _abi_test_clobber_d1:
AARCH64_VALID_CALL_TARGET
fmov d1, xzr fmov d1, xzr
ret ret
@ -383,6 +418,7 @@ _abi_test_clobber_d1:
.private_extern _abi_test_clobber_d2 .private_extern _abi_test_clobber_d2
.align 4 .align 4
_abi_test_clobber_d2: _abi_test_clobber_d2:
AARCH64_VALID_CALL_TARGET
fmov d2, xzr fmov d2, xzr
ret ret
@ -391,6 +427,7 @@ _abi_test_clobber_d2:
.private_extern _abi_test_clobber_d3 .private_extern _abi_test_clobber_d3
.align 4 .align 4
_abi_test_clobber_d3: _abi_test_clobber_d3:
AARCH64_VALID_CALL_TARGET
fmov d3, xzr fmov d3, xzr
ret ret
@ -399,6 +436,7 @@ _abi_test_clobber_d3:
.private_extern _abi_test_clobber_d4 .private_extern _abi_test_clobber_d4
.align 4 .align 4
_abi_test_clobber_d4: _abi_test_clobber_d4:
AARCH64_VALID_CALL_TARGET
fmov d4, xzr fmov d4, xzr
ret ret
@ -407,6 +445,7 @@ _abi_test_clobber_d4:
.private_extern _abi_test_clobber_d5 .private_extern _abi_test_clobber_d5
.align 4 .align 4
_abi_test_clobber_d5: _abi_test_clobber_d5:
AARCH64_VALID_CALL_TARGET
fmov d5, xzr fmov d5, xzr
ret ret
@ -415,6 +454,7 @@ _abi_test_clobber_d5:
.private_extern _abi_test_clobber_d6 .private_extern _abi_test_clobber_d6
.align 4 .align 4
_abi_test_clobber_d6: _abi_test_clobber_d6:
AARCH64_VALID_CALL_TARGET
fmov d6, xzr fmov d6, xzr
ret ret
@ -423,6 +463,7 @@ _abi_test_clobber_d6:
.private_extern _abi_test_clobber_d7 .private_extern _abi_test_clobber_d7
.align 4 .align 4
_abi_test_clobber_d7: _abi_test_clobber_d7:
AARCH64_VALID_CALL_TARGET
fmov d7, xzr fmov d7, xzr
ret ret
@ -431,6 +472,7 @@ _abi_test_clobber_d7:
.private_extern _abi_test_clobber_d8 .private_extern _abi_test_clobber_d8
.align 4 .align 4
_abi_test_clobber_d8: _abi_test_clobber_d8:
AARCH64_VALID_CALL_TARGET
fmov d8, xzr fmov d8, xzr
ret ret
@ -439,6 +481,7 @@ _abi_test_clobber_d8:
.private_extern _abi_test_clobber_d9 .private_extern _abi_test_clobber_d9
.align 4 .align 4
_abi_test_clobber_d9: _abi_test_clobber_d9:
AARCH64_VALID_CALL_TARGET
fmov d9, xzr fmov d9, xzr
ret ret
@ -447,6 +490,7 @@ _abi_test_clobber_d9:
.private_extern _abi_test_clobber_d10 .private_extern _abi_test_clobber_d10
.align 4 .align 4
_abi_test_clobber_d10: _abi_test_clobber_d10:
AARCH64_VALID_CALL_TARGET
fmov d10, xzr fmov d10, xzr
ret ret
@ -455,6 +499,7 @@ _abi_test_clobber_d10:
.private_extern _abi_test_clobber_d11 .private_extern _abi_test_clobber_d11
.align 4 .align 4
_abi_test_clobber_d11: _abi_test_clobber_d11:
AARCH64_VALID_CALL_TARGET
fmov d11, xzr fmov d11, xzr
ret ret
@ -463,6 +508,7 @@ _abi_test_clobber_d11:
.private_extern _abi_test_clobber_d12 .private_extern _abi_test_clobber_d12
.align 4 .align 4
_abi_test_clobber_d12: _abi_test_clobber_d12:
AARCH64_VALID_CALL_TARGET
fmov d12, xzr fmov d12, xzr
ret ret
@ -471,6 +517,7 @@ _abi_test_clobber_d12:
.private_extern _abi_test_clobber_d13 .private_extern _abi_test_clobber_d13
.align 4 .align 4
_abi_test_clobber_d13: _abi_test_clobber_d13:
AARCH64_VALID_CALL_TARGET
fmov d13, xzr fmov d13, xzr
ret ret
@ -479,6 +526,7 @@ _abi_test_clobber_d13:
.private_extern _abi_test_clobber_d14 .private_extern _abi_test_clobber_d14
.align 4 .align 4
_abi_test_clobber_d14: _abi_test_clobber_d14:
AARCH64_VALID_CALL_TARGET
fmov d14, xzr fmov d14, xzr
ret ret
@ -487,6 +535,7 @@ _abi_test_clobber_d14:
.private_extern _abi_test_clobber_d15 .private_extern _abi_test_clobber_d15
.align 4 .align 4
_abi_test_clobber_d15: _abi_test_clobber_d15:
AARCH64_VALID_CALL_TARGET
fmov d15, xzr fmov d15, xzr
ret ret
@ -495,6 +544,7 @@ _abi_test_clobber_d15:
.private_extern _abi_test_clobber_d16 .private_extern _abi_test_clobber_d16
.align 4 .align 4
_abi_test_clobber_d16: _abi_test_clobber_d16:
AARCH64_VALID_CALL_TARGET
fmov d16, xzr fmov d16, xzr
ret ret
@ -503,6 +553,7 @@ _abi_test_clobber_d16:
.private_extern _abi_test_clobber_d17 .private_extern _abi_test_clobber_d17
.align 4 .align 4
_abi_test_clobber_d17: _abi_test_clobber_d17:
AARCH64_VALID_CALL_TARGET
fmov d17, xzr fmov d17, xzr
ret ret
@ -511,6 +562,7 @@ _abi_test_clobber_d17:
.private_extern _abi_test_clobber_d18 .private_extern _abi_test_clobber_d18
.align 4 .align 4
_abi_test_clobber_d18: _abi_test_clobber_d18:
AARCH64_VALID_CALL_TARGET
fmov d18, xzr fmov d18, xzr
ret ret
@ -519,6 +571,7 @@ _abi_test_clobber_d18:
.private_extern _abi_test_clobber_d19 .private_extern _abi_test_clobber_d19
.align 4 .align 4
_abi_test_clobber_d19: _abi_test_clobber_d19:
AARCH64_VALID_CALL_TARGET
fmov d19, xzr fmov d19, xzr
ret ret
@ -527,6 +580,7 @@ _abi_test_clobber_d19:
.private_extern _abi_test_clobber_d20 .private_extern _abi_test_clobber_d20
.align 4 .align 4
_abi_test_clobber_d20: _abi_test_clobber_d20:
AARCH64_VALID_CALL_TARGET
fmov d20, xzr fmov d20, xzr
ret ret
@ -535,6 +589,7 @@ _abi_test_clobber_d20:
.private_extern _abi_test_clobber_d21 .private_extern _abi_test_clobber_d21
.align 4 .align 4
_abi_test_clobber_d21: _abi_test_clobber_d21:
AARCH64_VALID_CALL_TARGET
fmov d21, xzr fmov d21, xzr
ret ret
@ -543,6 +598,7 @@ _abi_test_clobber_d21:
.private_extern _abi_test_clobber_d22 .private_extern _abi_test_clobber_d22
.align 4 .align 4
_abi_test_clobber_d22: _abi_test_clobber_d22:
AARCH64_VALID_CALL_TARGET
fmov d22, xzr fmov d22, xzr
ret ret
@ -551,6 +607,7 @@ _abi_test_clobber_d22:
.private_extern _abi_test_clobber_d23 .private_extern _abi_test_clobber_d23
.align 4 .align 4
_abi_test_clobber_d23: _abi_test_clobber_d23:
AARCH64_VALID_CALL_TARGET
fmov d23, xzr fmov d23, xzr
ret ret
@ -559,6 +616,7 @@ _abi_test_clobber_d23:
.private_extern _abi_test_clobber_d24 .private_extern _abi_test_clobber_d24
.align 4 .align 4
_abi_test_clobber_d24: _abi_test_clobber_d24:
AARCH64_VALID_CALL_TARGET
fmov d24, xzr fmov d24, xzr
ret ret
@ -567,6 +625,7 @@ _abi_test_clobber_d24:
.private_extern _abi_test_clobber_d25 .private_extern _abi_test_clobber_d25
.align 4 .align 4
_abi_test_clobber_d25: _abi_test_clobber_d25:
AARCH64_VALID_CALL_TARGET
fmov d25, xzr fmov d25, xzr
ret ret
@ -575,6 +634,7 @@ _abi_test_clobber_d25:
.private_extern _abi_test_clobber_d26 .private_extern _abi_test_clobber_d26
.align 4 .align 4
_abi_test_clobber_d26: _abi_test_clobber_d26:
AARCH64_VALID_CALL_TARGET
fmov d26, xzr fmov d26, xzr
ret ret
@ -583,6 +643,7 @@ _abi_test_clobber_d26:
.private_extern _abi_test_clobber_d27 .private_extern _abi_test_clobber_d27
.align 4 .align 4
_abi_test_clobber_d27: _abi_test_clobber_d27:
AARCH64_VALID_CALL_TARGET
fmov d27, xzr fmov d27, xzr
ret ret
@ -591,6 +652,7 @@ _abi_test_clobber_d27:
.private_extern _abi_test_clobber_d28 .private_extern _abi_test_clobber_d28
.align 4 .align 4
_abi_test_clobber_d28: _abi_test_clobber_d28:
AARCH64_VALID_CALL_TARGET
fmov d28, xzr fmov d28, xzr
ret ret
@ -599,6 +661,7 @@ _abi_test_clobber_d28:
.private_extern _abi_test_clobber_d29 .private_extern _abi_test_clobber_d29
.align 4 .align 4
_abi_test_clobber_d29: _abi_test_clobber_d29:
AARCH64_VALID_CALL_TARGET
fmov d29, xzr fmov d29, xzr
ret ret
@ -607,6 +670,7 @@ _abi_test_clobber_d29:
.private_extern _abi_test_clobber_d30 .private_extern _abi_test_clobber_d30
.align 4 .align 4
_abi_test_clobber_d30: _abi_test_clobber_d30:
AARCH64_VALID_CALL_TARGET
fmov d30, xzr fmov d30, xzr
ret ret
@ -615,6 +679,7 @@ _abi_test_clobber_d30:
.private_extern _abi_test_clobber_d31 .private_extern _abi_test_clobber_d31
.align 4 .align 4
_abi_test_clobber_d31: _abi_test_clobber_d31:
AARCH64_VALID_CALL_TARGET
fmov d31, xzr fmov d31, xzr
ret ret
@ -623,6 +688,7 @@ _abi_test_clobber_d31:
.private_extern _abi_test_clobber_v8_upper .private_extern _abi_test_clobber_v8_upper
.align 4 .align 4
_abi_test_clobber_v8_upper: _abi_test_clobber_v8_upper:
AARCH64_VALID_CALL_TARGET
fmov v8.d[1], xzr fmov v8.d[1], xzr
ret ret
@ -631,6 +697,7 @@ _abi_test_clobber_v8_upper:
.private_extern _abi_test_clobber_v9_upper .private_extern _abi_test_clobber_v9_upper
.align 4 .align 4
_abi_test_clobber_v9_upper: _abi_test_clobber_v9_upper:
AARCH64_VALID_CALL_TARGET
fmov v9.d[1], xzr fmov v9.d[1], xzr
ret ret
@ -639,6 +706,7 @@ _abi_test_clobber_v9_upper:
.private_extern _abi_test_clobber_v10_upper .private_extern _abi_test_clobber_v10_upper
.align 4 .align 4
_abi_test_clobber_v10_upper: _abi_test_clobber_v10_upper:
AARCH64_VALID_CALL_TARGET
fmov v10.d[1], xzr fmov v10.d[1], xzr
ret ret
@ -647,6 +715,7 @@ _abi_test_clobber_v10_upper:
.private_extern _abi_test_clobber_v11_upper .private_extern _abi_test_clobber_v11_upper
.align 4 .align 4
_abi_test_clobber_v11_upper: _abi_test_clobber_v11_upper:
AARCH64_VALID_CALL_TARGET
fmov v11.d[1], xzr fmov v11.d[1], xzr
ret ret
@ -655,6 +724,7 @@ _abi_test_clobber_v11_upper:
.private_extern _abi_test_clobber_v12_upper .private_extern _abi_test_clobber_v12_upper
.align 4 .align 4
_abi_test_clobber_v12_upper: _abi_test_clobber_v12_upper:
AARCH64_VALID_CALL_TARGET
fmov v12.d[1], xzr fmov v12.d[1], xzr
ret ret
@ -663,6 +733,7 @@ _abi_test_clobber_v12_upper:
.private_extern _abi_test_clobber_v13_upper .private_extern _abi_test_clobber_v13_upper
.align 4 .align 4
_abi_test_clobber_v13_upper: _abi_test_clobber_v13_upper:
AARCH64_VALID_CALL_TARGET
fmov v13.d[1], xzr fmov v13.d[1], xzr
ret ret
@ -671,6 +742,7 @@ _abi_test_clobber_v13_upper:
.private_extern _abi_test_clobber_v14_upper .private_extern _abi_test_clobber_v14_upper
.align 4 .align 4
_abi_test_clobber_v14_upper: _abi_test_clobber_v14_upper:
AARCH64_VALID_CALL_TARGET
fmov v14.d[1], xzr fmov v14.d[1], xzr
ret ret
@ -679,6 +751,7 @@ _abi_test_clobber_v14_upper:
.private_extern _abi_test_clobber_v15_upper .private_extern _abi_test_clobber_v15_upper
.align 4 .align 4
_abi_test_clobber_v15_upper: _abi_test_clobber_v15_upper:
AARCH64_VALID_CALL_TARGET
fmov v15.d[1], xzr fmov v15.d[1], xzr
ret ret

View File

@ -1,996 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.section __TEXT,__const
# p434 x 2
Lp434x2:
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
# p434 + 1
Lp434p1:
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
.text
.globl _sike_mpmul
.private_extern _sike_mpmul
.align 4
_sike_mpmul:
stp x29, x30, [sp,#-96]!
add x29, sp, #0
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
ldp x14, x15, [x1,#32]
ldr x16, [x1,#48]
// x3-x7 <- AH + AL, x7 <- carry
adds x3, x3, x7
adcs x4, x4, x8
adcs x5, x5, x9
adcs x6, x6, xzr
adc x7, xzr, xzr
// x10-x13 <- BH + BL, x8 <- carry
adds x10, x10, x14
adcs x11, x11, x15
adcs x12, x12, x16
adcs x13, x13, xzr
adc x8, xzr, xzr
// x9 <- combined carry
and x9, x7, x8
// x7-x8 <- mask
sub x7, xzr, x7
sub x8, xzr, x8
// x15-x19 <- masked (BH + BL)
and x14, x10, x7
and x15, x11, x7
and x16, x12, x7
and x17, x13, x7
// x20-x23 <- masked (AH + AL)
and x20, x3, x8
and x21, x4, x8
and x22, x5, x8
and x23, x6, x8
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
adds x14, x14, x20
adcs x15, x15, x21
adcs x16, x16, x22
adcs x17, x17, x23
adc x7, x9, xzr
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
stp x3, x4, [x2,#0]
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x25, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x23, x10, x12
adcs x26, x11, x13
adc x24, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x19, xzr, x25
sub x20, xzr, x24
and x8, x23, x19
and x9, x26, x19
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x21, x3, x20
and x22, x4, x20
mul x19, x3, x23
mul x20, x3, x26
and x25, x25, x24
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x8, x21, x8
umulh x21, x3, x26
adcs x9, x22, x9
umulh x22, x3, x23
adc x25, x25, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x23
umulh x23, x4, x23
adds x20, x20, x22
adc x21, x21, xzr
mul x24, x4, x26
umulh x26, x4, x26
adds x20, x20, x3
adcs x21, x21, x23
adc x22, xzr, xzr
adds x21, x21, x24
adc x22, x22, x26
ldp x3, x4, [x2,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x21, x8, x21
umulh x24, x3, x10
umulh x26, x3, x11
adcs x22, x9, x22
mul x8, x3, x10
mul x9, x3, x11
adc x25, x25, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x9, x9, x24
adc x26, x26, xzr
mul x23, x4, x11
umulh x11, x4, x11
adds x9, x9, x3
adcs x26, x26, x10
adc x24, xzr, xzr
adds x26, x26, x23
adc x24, x24, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x19, x19, x8
sbcs x20, x20, x9
sbcs x21, x21, x26
mul x4, x5, x13
umulh x23, x5, x13
sbcs x22, x22, x24
sbc x25, x25, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x23, x23, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x23, x23, x12
adc x10, xzr, xzr
adds x23, x23, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x19, x19, x3
sbcs x20, x20, x4
sbcs x21, x21, x23
sbcs x22, x22, x10
sbc x25, x25, xzr
adds x19, x19, x26
adcs x20, x20, x24
adcs x21, x21, x3
adcs x22, x22, x4
adcs x23, x25, x23
adc x24, x10, xzr
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
adds x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adc x7, x7, xzr
// Load AL
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
// Load BL
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
// Temporarily store x8 in x2
stp x8, x9, [x2,#0]
// x21-x28 <- AL x BL
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x8, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x27, x10, x12
adcs x9, x11, x13
adc x28, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x23, xzr, x8
sub x24, xzr, x28
and x21, x27, x23
and x22, x9, x23
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x25, x3, x24
and x26, x4, x24
mul x23, x3, x27
mul x24, x3, x9
and x8, x8, x28
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x21, x25, x21
umulh x25, x3, x9
adcs x22, x26, x22
umulh x26, x3, x27
adc x8, x8, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x27
umulh x27, x4, x27
adds x24, x24, x26
adc x25, x25, xzr
mul x28, x4, x9
umulh x9, x4, x9
adds x24, x24, x3
adcs x25, x25, x27
adc x26, xzr, xzr
adds x25, x25, x28
adc x26, x26, x9
ldp x3, x4, [x0,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x25, x21, x25
umulh x28, x3, x10
umulh x9, x3, x11
adcs x26, x22, x26
mul x21, x3, x10
mul x22, x3, x11
adc x8, x8, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x22, x22, x28
adc x9, x9, xzr
mul x27, x4, x11
umulh x11, x4, x11
adds x22, x22, x3
adcs x9, x9, x10
adc x28, xzr, xzr
adds x9, x9, x27
adc x28, x28, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x23, x23, x21
sbcs x24, x24, x22
sbcs x25, x25, x9
mul x4, x5, x13
umulh x27, x5, x13
sbcs x26, x26, x28
sbc x8, x8, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x27, x27, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x27, x27, x12
adc x10, xzr, xzr
adds x27, x27, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x23, x23, x3
sbcs x24, x24, x4
sbcs x25, x25, x27
sbcs x26, x26, x10
sbc x8, x8, xzr
adds x23, x23, x9
adcs x24, x24, x28
adcs x25, x25, x3
adcs x26, x26, x4
adcs x27, x8, x27
adc x28, x10, xzr
// Restore x8
ldp x8, x9, [x2,#0]
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, x27
sbcs x17, x17, x28
sbc x7, x7, xzr
// Store ALxBL, low
stp x21, x22, [x2]
stp x23, x24, [x2,#16]
// Load AH
ldp x3, x4, [x0,#32]
ldr x5, [x0,#48]
// Load BH
ldp x10, x11, [x1,#32]
ldr x12, [x1,#48]
adds x8, x8, x25
adcs x9, x9, x26
adcs x19, x19, x27
adcs x20, x20, x28
adc x1, xzr, xzr
add x0, x0, #32
// Temporarily store x8,x9 in x2
stp x8,x9, [x2,#32]
// x21-x28 <- AH x BH
// A0 * B0
mul x21, x3, x10 // C0
umulh x24, x3, x10
// A0 * B1
mul x22, x3, x11
umulh x23, x3, x11
// A1 * B0
mul x8, x4, x10
umulh x9, x4, x10
adds x22, x22, x24
adc x23, x23, xzr
// A0 * B2
mul x27, x3, x12
umulh x28, x3, x12
adds x22, x22, x8 // C1
adcs x23, x23, x9
adc x24, xzr, xzr
// A2 * B0
mul x8, x5, x10
umulh x25, x5, x10
adds x23, x23, x27
adcs x24, x24, x25
adc x25, xzr, xzr
// A1 * B1
mul x27, x4, x11
umulh x9, x4, x11
adds x23, x23, x8
adcs x24, x24, x28
adc x25, x25, xzr
// A1 * B2
mul x8, x4, x12
umulh x28, x4, x12
adds x23, x23, x27 // C2
adcs x24, x24, x9
adc x25, x25, xzr
// A2 * B1
mul x27, x5, x11
umulh x9, x5, x11
adds x24, x24, x8
adcs x25, x25, x28
adc x26, xzr, xzr
// A2 * B2
mul x8, x5, x12
umulh x28, x5, x12
adds x24, x24, x27 // C3
adcs x25, x25, x9
adc x26, x26, xzr
adds x25, x25, x8 // C4
adc x26, x26, x28 // C5
// Restore x8,x9
ldp x8,x9, [x2,#32]
neg x1, x1
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, xzr
sbcs x17, x17, xzr
sbc x7, x7, xzr
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
stp x8, x9, [x2,#32]
stp x19, x20, [x2,#48]
adds x1, x1, #1
adcs x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adcs x25, x7, x25
adc x26, x26, xzr
stp x14, x15, [x2,#64]
stp x16, x17, [x2,#80]
stp x25, x26, [x2,#96]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl _sike_fprdc
.private_extern _sike_fprdc
.align 4
_sike_fprdc:
stp x29, x30, [sp, #-96]!
add x29, sp, xzr
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x2, x3, [x0,#0] // a[0-1]
// Load the prime constant
adrp x26, Lp434p1@PAGE
add x26, x26, Lp434p1@PAGEOFF
ldp x23, x24, [x26, #0x0]
ldp x25, x26, [x26,#0x10]
// a[0-1] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x10, x3, x23
umulh x11, x3, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x10 // C1
adcs x6, x6, x11
adc x7, xzr, xzr
mul x10, x3, x24
umulh x11, x3, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x10 // C2
adcs x7, x7, x11
adc x8, x8, xzr
mul x10, x3, x25
umulh x11, x3, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x3, x26
umulh x28, x3, x26
adds x7, x7, x10 // C3
adcs x8, x8, x11
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
ldp x10, x11, [x0, #0x18]
ldp x12, x13, [x0, #0x28]
ldp x14, x15, [x0, #0x38]
ldp x16, x17, [x0, #0x48]
ldp x19, x20, [x0, #0x58]
ldr x21, [x0, #0x68]
adds x10, x10, x4
adcs x11, x11, x5
adcs x12, x12, x6
adcs x13, x13, x7
adcs x14, x14, x8
adcs x15, x15, x9
adcs x22, x16, xzr
adcs x17, x17, xzr
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
ldr x2, [x0,#0x10] // a[2]
// a[2-3] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x0, x10, x23
umulh x3, x10, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x0 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x0, x10, x24
umulh x3, x10, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x0 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x0, x10, x25
umulh x3, x10, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x10, x26
umulh x28, x10, x26
adds x7, x7, x0 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x12, x12, x4
adcs x13, x13, x5
adcs x14, x14, x6
adcs x15, x15, x7
adcs x16, x22, x8
adcs x17, x17, x9
adcs x22, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
mul x4, x11, x23 // C0
umulh x7, x11, x23
mul x5, x11, x24
umulh x6, x11, x24
mul x10, x12, x23
umulh x3, x12, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x11, x25
umulh x28, x11, x25
adds x5, x5, x10 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x10, x12, x24
umulh x3, x12, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x11, x26
umulh x28, x11, x26
adds x6, x6, x10 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x10, x12, x25
umulh x3, x12, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x12, x26
umulh x28, x12, x26
adds x7, x7, x10 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x14, x14, x4
adcs x15, x15, x5
adcs x16, x16, x6
adcs x17, x17, x7
adcs x19, x22, x8
adcs x20, x20, x9
adc x22, x21, xzr
stp x14, x15, [x1, #0x0] // C0, C1
mul x4, x13, x23 // C0
umulh x10, x13, x23
mul x5, x13, x24
umulh x27, x13, x24
adds x5, x5, x10 // C1
adc x10, xzr, xzr
mul x6, x13, x25
umulh x28, x13, x25
adds x27, x10, x27
adcs x6, x6, x27 // C2
adc x10, xzr, xzr
mul x7, x13, x26
umulh x8, x13, x26
adds x28, x10, x28
adcs x7, x7, x28 // C3
adc x8, x8, xzr // C4
adds x16, x16, x4
adcs x17, x17, x5
adcs x19, x19, x6
adcs x20, x20, x7
adc x21, x22, x8
str x16, [x1, #0x10]
stp x17, x19, [x1, #0x18]
stp x20, x21, [x1, #0x28]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl _sike_fpadd
.private_extern _sike_fpadd
.align 4
_sike_fpadd:
stp x29,x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Add a + b
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
// Subtract 2xp434
adrp x17, Lp434x2@PAGE
add x17, x17, Lp434x2@PAGEOFF
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x12
sbcs x6, x6, x13
sbcs x7, x7, x14
sbcs x8, x8, x15
sbcs x9, x9, x16
sbc x0, xzr, xzr // x0 can be reused now
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl _sike_fpsub
.private_extern _sike_fpsub
.align 4
_sike_fpsub:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Subtract a - b
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
sbcs x9, x9, x17
sbc x0, xzr, xzr
// Add 2xp434 anded with the mask in x0
adrp x17, Lp434x2@PAGE
add x17, x17, Lp434x2@PAGEOFF
// First half
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl _sike_mpadd_asm
.private_extern _sike_mpadd_asm
.align 4
_sike_mpadd_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl _sike_mpsubx2_asm
.private_extern _sike_mpsubx2_asm
.align 4
_sike_mpsubx2_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x11, x12, [x1,#32]
ldp x13, x14, [x1,#48]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbcs x9, x9, x13
sbcs x10, x10, x14
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ldp x3, x4, [x0,#64]
ldp x5, x6, [x0,#80]
ldp x11, x12, [x1,#64]
ldp x13, x14, [x1,#80]
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#96]
ldp x11, x12, [x1,#96]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbc x0, xzr, xzr
stp x3, x4, [x2,#64]
stp x5, x6, [x2,#80]
stp x7, x8, [x2,#96]
ldp x29, x30, [sp],#16
ret
.globl _sike_mpdblsubx2_asm
.private_extern _sike_mpdblsubx2_asm
.align 4
_sike_mpdblsubx2_asm:
stp x29, x30, [sp, #-16]!
add x29, sp, #0
ldp x3, x4, [x2, #0]
ldp x5, x6, [x2,#16]
ldp x7, x8, [x2,#32]
ldp x11, x12, [x0, #0]
ldp x13, x14, [x0,#16]
ldp x15, x16, [x0,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
// x9 stores carry
adc x9, xzr, xzr
ldp x11, x12, [x1, #0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2, #0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
ldp x3, x4, [x2,#48]
ldp x5, x6, [x2,#64]
ldp x7, x8, [x2,#80]
ldp x11, x12, [x0,#48]
ldp x13, x14, [x0,#64]
ldp x15, x16, [x0,#80]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, xzr, xzr
ldp x11, x12, [x1,#48]
ldp x13, x14, [x1,#64]
ldp x15, x16, [x1,#80]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2,#48]
stp x5, x6, [x2,#64]
stp x7, x8, [x2,#80]
ldp x3, x4, [x2,#96]
ldp x11, x12, [x0,#96]
ldp x13, x14, [x1,#96]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
subs x3, x3, x13
sbc x4, x4, x14
stp x3, x4, [x2,#96]
ldp x29, x30, [sp],#16
ret
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

View File

@ -30,348 +30,6 @@
#else #else
.code 32 .code 32
#endif #endif
.align 5
rem_4bit:
.short 0x0000,0x1C20,0x3840,0x2460
.short 0x7080,0x6CA0,0x48C0,0x54E0
.short 0xE100,0xFD20,0xD940,0xC560
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
#ifdef __thumb2__
.thumb_func rem_4bit_get
#endif
rem_4bit_get:
#if defined(__thumb2__)
adr r2,rem_4bit
#else
sub r2,pc,#8+32 @ &rem_4bit
#endif
b Lrem_4bit_got
nop
nop
.globl _gcm_ghash_4bit
.private_extern _gcm_ghash_4bit
#ifdef __thumb2__
.thumb_func _gcm_ghash_4bit
#endif
.align 4
_gcm_ghash_4bit:
#if defined(__thumb2__)
adr r12,rem_4bit
#else
sub r12,pc,#8+48 @ &rem_4bit
#endif
add r3,r2,r3 @ r3 to point at the end
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
ldrb r12,[r2,#15]
ldrb r14,[r0,#15]
Louter:
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
add r11,r1,r14
ldrb r12,[r2,#14]
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[sp,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
ldrb r14,[r0,#14]
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
eor r7,r7,r8,lsl#16
Linner:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[sp,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r2,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r8,[r0,r3]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r9,[sp,r14]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
#ifdef __thumb2__
it pl
#endif
eorpl r12,r12,r8
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
bpl Linner
ldr r3,[sp,#32] @ re-load r3/end
add r2,r2,#16
mov r14,r4
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
cmp r2,r3
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#ifdef __thumb2__
it ne
#endif
ldrneb r12,[r2,#15]
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
bne Louter
add sp,sp,#36
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.globl _gcm_gmult_4bit
.private_extern _gcm_gmult_4bit
#ifdef __thumb2__
.thumb_func _gcm_gmult_4bit
#endif
_gcm_gmult_4bit:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldrb r12,[r0,#15]
b rem_4bit_get
Lrem_4bit_got:
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
ldrb r12,[r0,#14]
add r11,r1,r14
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
and r14,r12,#0xf0
eor r7,r7,r8,lsl#16
and r12,r12,#0x0f
Loop:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[r2,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r0,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
bpl Loop
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
#if __ARM_MAX_ARCH__>=7 #if __ARM_MAX_ARCH__>=7

File diff suppressed because it is too large Load Diff

View File

@ -30,7 +30,6 @@
.private_extern _abi_test_trampoline .private_extern _abi_test_trampoline
.align 4 .align 4
_abi_test_trampoline: _abi_test_trampoline:
Labi_test_trampoline_begin:
@ Save parameters and all callee-saved registers. For convenience, we @ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile. @ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}

View File

@ -34,6 +34,7 @@
.type ChaCha20_ctr32,%function .type ChaCha20_ctr32,%function
.align 5 .align 5
ChaCha20_ctr32: ChaCha20_ctr32:
AARCH64_VALID_CALL_TARGET
cbz x2,.Labort cbz x2,.Labort
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 #if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x5,:pg_hi21_nc:OPENSSL_armcap_P adrp x5,:pg_hi21_nc:OPENSSL_armcap_P
@ -47,6 +48,7 @@ ChaCha20_ctr32:
b.ne ChaCha20_neon b.ne ChaCha20_neon
.Lshort: .Lshort:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]! stp x29,x30,[sp,#-96]!
add x29,sp,#0 add x29,sp,#0
@ -259,6 +261,7 @@ ChaCha20_ctr32:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96 ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
.Labort: .Labort:
ret ret
@ -315,12 +318,14 @@ ChaCha20_ctr32:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96 ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size ChaCha20_ctr32,.-ChaCha20_ctr32 .size ChaCha20_ctr32,.-ChaCha20_ctr32
.type ChaCha20_neon,%function .type ChaCha20_neon,%function
.align 5 .align 5
ChaCha20_neon: ChaCha20_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]! stp x29,x30,[sp,#-96]!
add x29,sp,#0 add x29,sp,#0
@ -701,6 +706,7 @@ ChaCha20_neon:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96 ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.Ltail_neon: .Ltail_neon:
@ -810,11 +816,13 @@ ChaCha20_neon:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96 ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size ChaCha20_neon,.-ChaCha20_neon .size ChaCha20_neon,.-ChaCha20_neon
.type ChaCha20_512_neon,%function .type ChaCha20_512_neon,%function
.align 5 .align 5
ChaCha20_512_neon: ChaCha20_512_neon:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-96]! stp x29,x30,[sp,#-96]!
add x29,sp,#0 add x29,sp,#0
@ -1978,7 +1986,9 @@ ChaCha20_512_neon:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96 ldp x29,x30,[sp],#96
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size ChaCha20_512_neon,.-ChaCha20_512_neon .size ChaCha20_512_neon,.-ChaCha20_512_neon
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -33,6 +33,8 @@
.align 5 .align 5
aes_hw_set_encrypt_key: aes_hw_set_encrypt_key:
.Lenc_key: .Lenc_key:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
mov x3,#-1 mov x3,#-1
@ -201,6 +203,7 @@ aes_hw_set_encrypt_key:
.type aes_hw_set_decrypt_key,%function .type aes_hw_set_decrypt_key,%function
.align 5 .align 5
aes_hw_set_decrypt_key: aes_hw_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
bl .Lenc_key bl .Lenc_key
@ -234,6 +237,7 @@ aes_hw_set_decrypt_key:
eor x0,x0,x0 // return value eor x0,x0,x0 // return value
.Ldec_key_abort: .Ldec_key_abort:
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key .size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
.globl aes_hw_encrypt .globl aes_hw_encrypt
@ -241,6 +245,7 @@ aes_hw_set_decrypt_key:
.type aes_hw_encrypt,%function .type aes_hw_encrypt,%function
.align 5 .align 5
aes_hw_encrypt: aes_hw_encrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240] ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16 ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0] ld1 {v2.16b},[x0]
@ -271,6 +276,7 @@ aes_hw_encrypt:
.type aes_hw_decrypt,%function .type aes_hw_decrypt,%function
.align 5 .align 5
aes_hw_decrypt: aes_hw_decrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240] ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16 ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0] ld1 {v2.16b},[x0]
@ -301,6 +307,8 @@ aes_hw_decrypt:
.type aes_hw_cbc_encrypt,%function .type aes_hw_cbc_encrypt,%function
.align 5 .align 5
aes_hw_cbc_encrypt: aes_hw_cbc_encrypt:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
subs x2,x2,#16 subs x2,x2,#16
@ -592,6 +600,8 @@ aes_hw_cbc_encrypt:
.type aes_hw_ctr32_encrypt_blocks,%function .type aes_hw_ctr32_encrypt_blocks,%function
.align 5 .align 5
aes_hw_ctr32_encrypt_blocks: aes_hw_ctr32_encrypt_blocks:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
ldr w5,[x3,#240] ldr w5,[x3,#240]
@ -772,3 +782,4 @@ aes_hw_ctr32_encrypt_blocks:
#endif #endif
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -13,6 +13,8 @@
#if defined(BORINGSSL_PREFIX) #if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h> #include <boringssl_prefix_symbols_asm.h>
#endif #endif
#include <openssl/arm_arch.h>
.text .text
.globl bn_mul_mont .globl bn_mul_mont
@ -20,6 +22,7 @@
.type bn_mul_mont,%function .type bn_mul_mont,%function
.align 5 .align 5
bn_mul_mont: bn_mul_mont:
AARCH64_SIGN_LINK_REGISTER
tst x5,#7 tst x5,#7
b.eq __bn_sqr8x_mont b.eq __bn_sqr8x_mont
tst x5,#3 tst x5,#3
@ -217,11 +220,14 @@ bn_mul_mont:
mov x0,#1 mov x0,#1
ldp x23,x24,[x29,#48] ldp x23,x24,[x29,#48]
ldr x29,[sp],#64 ldr x29,[sp],#64
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size bn_mul_mont,.-bn_mul_mont .size bn_mul_mont,.-bn_mul_mont
.type __bn_sqr8x_mont,%function .type __bn_sqr8x_mont,%function
.align 5 .align 5
__bn_sqr8x_mont: __bn_sqr8x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
// only from bn_mul_mont which has already signed the return address.
cmp x1,x2 cmp x1,x2
b.ne __bn_mul4x_mont b.ne __bn_mul4x_mont
.Lsqr8x_mont: .Lsqr8x_mont:
@ -975,11 +981,16 @@ __bn_sqr8x_mont:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldr x29,[sp],#128 ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size __bn_sqr8x_mont,.-__bn_sqr8x_mont .size __bn_sqr8x_mont,.-__bn_sqr8x_mont
.type __bn_mul4x_mont,%function .type __bn_mul4x_mont,%function
.align 5 .align 5
__bn_mul4x_mont: __bn_mul4x_mont:
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
// return address.
stp x29,x30,[sp,#-128]! stp x29,x30,[sp,#-128]!
add x29,sp,#0 add x29,sp,#0
stp x19,x20,[sp,#16] stp x19,x20,[sp,#16]
@ -1413,6 +1424,8 @@ __bn_mul4x_mont:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldr x29,[sp],#128 ldr x29,[sp],#128
// x30 is popped earlier
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size __bn_mul4x_mont,.-__bn_mul4x_mont .size __bn_mul4x_mont,.-__bn_mul4x_mont
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
@ -1420,3 +1433,4 @@ __bn_mul4x_mont:
.align 4 .align 4
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -13,6 +13,8 @@
#if defined(BORINGSSL_PREFIX) #if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h> #include <boringssl_prefix_symbols_asm.h>
#endif #endif
#include <openssl/arm_arch.h>
.text .text
.globl gcm_init_neon .globl gcm_init_neon
@ -20,6 +22,7 @@
.type gcm_init_neon,%function .type gcm_init_neon,%function
.align 4 .align 4
gcm_init_neon: gcm_init_neon:
AARCH64_VALID_CALL_TARGET
// This function is adapted from gcm_init_v8. xC2 is t3. // This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1 movi v19.16b, #0xe1
@ -45,6 +48,7 @@ gcm_init_neon:
.type gcm_gmult_neon,%function .type gcm_gmult_neon,%function
.align 4 .align 4
gcm_gmult_neon: gcm_gmult_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v3.16b}, [x0] // load Xi ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1] ld1 {v6.1d}, [x1]
@ -64,6 +68,7 @@ gcm_gmult_neon:
.type gcm_ghash_neon,%function .type gcm_ghash_neon,%function
.align 4 .align 4
gcm_ghash_neon: gcm_ghash_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v0.16b}, [x0] // load Xi ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1] ld1 {v6.1d}, [x1]
@ -338,3 +343,4 @@ gcm_ghash_neon:
.align 2 .align 2
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -22,6 +22,7 @@
.type gcm_init_v8,%function .type gcm_init_v8,%function
.align 4 .align 4
gcm_init_v8: gcm_init_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x1] //load input H ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1 movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0 shl v19.2d,v19.2d,#57 //0xc2.0
@ -73,6 +74,7 @@ gcm_init_v8:
.type gcm_gmult_v8,%function .type gcm_gmult_v8,%function
.align 4 .align 4
gcm_gmult_v8: gcm_gmult_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x0] //load Xi ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1 movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ... ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
@ -115,6 +117,7 @@ gcm_gmult_v8:
.type gcm_ghash_v8,%function .type gcm_ghash_v8,%function
.align 4 .align 4
gcm_ghash_v8: gcm_ghash_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v0.2d},[x0] //load [rotated] Xi ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that //"[rotated]" means that
//loaded value would have //loaded value would have
@ -246,3 +249,4 @@ gcm_ghash_v8:
.align 2 .align 2
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -23,6 +23,8 @@
.type sha1_block_data_order,%function .type sha1_block_data_order,%function
.align 6 .align 6
sha1_block_data_order: sha1_block_data_order:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 #if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
#else #else
@ -1090,6 +1092,8 @@ sha1_block_data_order:
.type sha1_block_armv8,%function .type sha1_block_armv8,%function
.align 6 .align 6
sha1_block_armv8: sha1_block_armv8:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
.Lv8_entry: .Lv8_entry:
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
@ -1232,3 +1236,4 @@ sha1_block_armv8:
.hidden OPENSSL_armcap_P .hidden OPENSSL_armcap_P
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -64,6 +64,7 @@
.type sha256_block_data_order,%function .type sha256_block_data_order,%function
.align 6 .align 6
sha256_block_data_order: sha256_block_data_order:
AARCH64_VALID_CALL_TARGET
#ifndef __KERNEL__ #ifndef __KERNEL__
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 #if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
@ -74,6 +75,7 @@ sha256_block_data_order:
tst w16,#ARMV8_SHA256 tst w16,#ARMV8_SHA256
b.ne .Lv8_entry b.ne .Lv8_entry
#endif #endif
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]! stp x29,x30,[sp,#-128]!
add x29,sp,#0 add x29,sp,#0
@ -1034,6 +1036,7 @@ sha256_block_data_order:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128 ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size sha256_block_data_order,.-sha256_block_data_order .size sha256_block_data_order,.-sha256_block_data_order
@ -1068,6 +1071,7 @@ sha256_block_data_order:
.align 6 .align 6
sha256_block_armv8: sha256_block_armv8:
.Lv8_entry: .Lv8_entry:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
@ -1210,3 +1214,4 @@ sha256_block_armv8:
#endif #endif
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -64,6 +64,7 @@
.type sha512_block_data_order,%function .type sha512_block_data_order,%function
.align 6 .align 6
sha512_block_data_order: sha512_block_data_order:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-128]! stp x29,x30,[sp,#-128]!
add x29,sp,#0 add x29,sp,#0
@ -1024,6 +1025,7 @@ sha512_block_data_order:
ldp x25,x26,[x29,#64] ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80] ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#128 ldp x29,x30,[sp],#128
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size sha512_block_data_order,.-sha512_block_data_order .size sha512_block_data_order,.-sha512_block_data_order
@ -1082,3 +1084,4 @@ sha512_block_data_order:
#endif #endif
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -13,6 +13,8 @@
#if defined(BORINGSSL_PREFIX) #if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h> #include <boringssl_prefix_symbols_asm.h>
#endif #endif
#include <openssl/arm_arch.h>
.section .rodata .section .rodata
.type _vpaes_consts,%object .type _vpaes_consts,%object
@ -215,6 +217,7 @@ _vpaes_encrypt_core:
.type vpaes_encrypt,%function .type vpaes_encrypt,%function
.align 4 .align 4
vpaes_encrypt: vpaes_encrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
@ -224,6 +227,7 @@ vpaes_encrypt:
st1 {v0.16b}, [x1] st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size vpaes_encrypt,.-vpaes_encrypt .size vpaes_encrypt,.-vpaes_encrypt
@ -452,6 +456,7 @@ _vpaes_decrypt_core:
.type vpaes_decrypt,%function .type vpaes_decrypt,%function
.align 4 .align 4
vpaes_decrypt: vpaes_decrypt:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
@ -461,6 +466,7 @@ vpaes_decrypt:
st1 {v0.16b}, [x1] st1 {v0.16b}, [x1]
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size vpaes_decrypt,.-vpaes_decrypt .size vpaes_decrypt,.-vpaes_decrypt
@ -630,6 +636,7 @@ _vpaes_key_preheat:
.type _vpaes_schedule_core,%function .type _vpaes_schedule_core,%function
.align 4 .align 4
_vpaes_schedule_core: _vpaes_schedule_core:
AARCH64_SIGN_LINK_REGISTER
stp x29, x30, [sp,#-16]! stp x29, x30, [sp,#-16]!
add x29,sp,#0 add x29,sp,#0
@ -799,6 +806,7 @@ _vpaes_schedule_core:
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6 eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7 eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
ldp x29, x30, [sp],#16 ldp x29, x30, [sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size _vpaes_schedule_core,.-_vpaes_schedule_core .size _vpaes_schedule_core,.-_vpaes_schedule_core
@ -1012,6 +1020,7 @@ _vpaes_schedule_mangle:
.type vpaes_set_encrypt_key,%function .type vpaes_set_encrypt_key,%function
.align 4 .align 4
vpaes_set_encrypt_key: vpaes_set_encrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1027,6 +1036,7 @@ vpaes_set_encrypt_key:
ldp d8,d9,[sp],#16 ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key .size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
@ -1035,6 +1045,7 @@ vpaes_set_encrypt_key:
.type vpaes_set_decrypt_key,%function .type vpaes_set_decrypt_key,%function
.align 4 .align 4
vpaes_set_decrypt_key: vpaes_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1054,6 +1065,7 @@ vpaes_set_decrypt_key:
ldp d8,d9,[sp],#16 ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key .size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
.globl vpaes_cbc_encrypt .globl vpaes_cbc_encrypt
@ -1061,6 +1073,7 @@ vpaes_set_decrypt_key:
.type vpaes_cbc_encrypt,%function .type vpaes_cbc_encrypt,%function
.align 4 .align 4
vpaes_cbc_encrypt: vpaes_cbc_encrypt:
AARCH64_SIGN_LINK_REGISTER
cbz x2, .Lcbc_abort cbz x2, .Lcbc_abort
cmp w5, #0 // check direction cmp w5, #0 // check direction
b.eq vpaes_cbc_decrypt b.eq vpaes_cbc_decrypt
@ -1087,6 +1100,7 @@ vpaes_cbc_encrypt:
st1 {v0.16b}, [x4] // write ivec st1 {v0.16b}, [x4] // write ivec
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
.Lcbc_abort: .Lcbc_abort:
ret ret
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt .size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
@ -1094,6 +1108,8 @@ vpaes_cbc_encrypt:
.type vpaes_cbc_decrypt,%function .type vpaes_cbc_decrypt,%function
.align 4 .align 4
vpaes_cbc_decrypt: vpaes_cbc_decrypt:
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
// only from vpaes_cbc_encrypt which has already signed the return address.
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1135,6 +1151,7 @@ vpaes_cbc_decrypt:
ldp d10,d11,[sp],#16 ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16 ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt .size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt
.globl vpaes_ctr32_encrypt_blocks .globl vpaes_ctr32_encrypt_blocks
@ -1142,6 +1159,7 @@ vpaes_cbc_decrypt:
.type vpaes_ctr32_encrypt_blocks,%function .type vpaes_ctr32_encrypt_blocks,%function
.align 4 .align 4
vpaes_ctr32_encrypt_blocks: vpaes_ctr32_encrypt_blocks:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
add x29,sp,#0 add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so stp d8,d9,[sp,#-16]! // ABI spec says so
@ -1209,7 +1227,9 @@ vpaes_ctr32_encrypt_blocks:
ldp d10,d11,[sp],#16 ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16 ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16 ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks .size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -13,6 +13,8 @@
#if defined(BORINGSSL_PREFIX) #if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h> #include <boringssl_prefix_symbols_asm.h>
#endif #endif
#include <openssl/arm_arch.h>
.text .text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func| // abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ -27,6 +29,7 @@
.align 4 .align 4
abi_test_trampoline: abi_test_trampoline:
.Labi_test_trampoline_begin: .Labi_test_trampoline_begin:
AARCH64_SIGN_LINK_REGISTER
// Stack layout (low to high addresses) // Stack layout (low to high addresses)
// x29,x30 (16 bytes) // x29,x30 (16 bytes)
// d8-d15 (64 bytes) // d8-d15 (64 bytes)
@ -129,6 +132,7 @@ abi_test_trampoline:
ldp x27, x28, [sp, #144] ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176 ldp x29, x30, [sp], #176
AARCH64_VALIDATE_LINK_REGISTER
ret ret
.size abi_test_trampoline,.-abi_test_trampoline .size abi_test_trampoline,.-abi_test_trampoline
.type abi_test_clobber_x0, %function .type abi_test_clobber_x0, %function
@ -136,6 +140,7 @@ abi_test_trampoline:
.hidden abi_test_clobber_x0 .hidden abi_test_clobber_x0
.align 4 .align 4
abi_test_clobber_x0: abi_test_clobber_x0:
AARCH64_VALID_CALL_TARGET
mov x0, xzr mov x0, xzr
ret ret
.size abi_test_clobber_x0,.-abi_test_clobber_x0 .size abi_test_clobber_x0,.-abi_test_clobber_x0
@ -144,6 +149,7 @@ abi_test_clobber_x0:
.hidden abi_test_clobber_x1 .hidden abi_test_clobber_x1
.align 4 .align 4
abi_test_clobber_x1: abi_test_clobber_x1:
AARCH64_VALID_CALL_TARGET
mov x1, xzr mov x1, xzr
ret ret
.size abi_test_clobber_x1,.-abi_test_clobber_x1 .size abi_test_clobber_x1,.-abi_test_clobber_x1
@ -152,6 +158,7 @@ abi_test_clobber_x1:
.hidden abi_test_clobber_x2 .hidden abi_test_clobber_x2
.align 4 .align 4
abi_test_clobber_x2: abi_test_clobber_x2:
AARCH64_VALID_CALL_TARGET
mov x2, xzr mov x2, xzr
ret ret
.size abi_test_clobber_x2,.-abi_test_clobber_x2 .size abi_test_clobber_x2,.-abi_test_clobber_x2
@ -160,6 +167,7 @@ abi_test_clobber_x2:
.hidden abi_test_clobber_x3 .hidden abi_test_clobber_x3
.align 4 .align 4
abi_test_clobber_x3: abi_test_clobber_x3:
AARCH64_VALID_CALL_TARGET
mov x3, xzr mov x3, xzr
ret ret
.size abi_test_clobber_x3,.-abi_test_clobber_x3 .size abi_test_clobber_x3,.-abi_test_clobber_x3
@ -168,6 +176,7 @@ abi_test_clobber_x3:
.hidden abi_test_clobber_x4 .hidden abi_test_clobber_x4
.align 4 .align 4
abi_test_clobber_x4: abi_test_clobber_x4:
AARCH64_VALID_CALL_TARGET
mov x4, xzr mov x4, xzr
ret ret
.size abi_test_clobber_x4,.-abi_test_clobber_x4 .size abi_test_clobber_x4,.-abi_test_clobber_x4
@ -176,6 +185,7 @@ abi_test_clobber_x4:
.hidden abi_test_clobber_x5 .hidden abi_test_clobber_x5
.align 4 .align 4
abi_test_clobber_x5: abi_test_clobber_x5:
AARCH64_VALID_CALL_TARGET
mov x5, xzr mov x5, xzr
ret ret
.size abi_test_clobber_x5,.-abi_test_clobber_x5 .size abi_test_clobber_x5,.-abi_test_clobber_x5
@ -184,6 +194,7 @@ abi_test_clobber_x5:
.hidden abi_test_clobber_x6 .hidden abi_test_clobber_x6
.align 4 .align 4
abi_test_clobber_x6: abi_test_clobber_x6:
AARCH64_VALID_CALL_TARGET
mov x6, xzr mov x6, xzr
ret ret
.size abi_test_clobber_x6,.-abi_test_clobber_x6 .size abi_test_clobber_x6,.-abi_test_clobber_x6
@ -192,6 +203,7 @@ abi_test_clobber_x6:
.hidden abi_test_clobber_x7 .hidden abi_test_clobber_x7
.align 4 .align 4
abi_test_clobber_x7: abi_test_clobber_x7:
AARCH64_VALID_CALL_TARGET
mov x7, xzr mov x7, xzr
ret ret
.size abi_test_clobber_x7,.-abi_test_clobber_x7 .size abi_test_clobber_x7,.-abi_test_clobber_x7
@ -200,6 +212,7 @@ abi_test_clobber_x7:
.hidden abi_test_clobber_x8 .hidden abi_test_clobber_x8
.align 4 .align 4
abi_test_clobber_x8: abi_test_clobber_x8:
AARCH64_VALID_CALL_TARGET
mov x8, xzr mov x8, xzr
ret ret
.size abi_test_clobber_x8,.-abi_test_clobber_x8 .size abi_test_clobber_x8,.-abi_test_clobber_x8
@ -208,6 +221,7 @@ abi_test_clobber_x8:
.hidden abi_test_clobber_x9 .hidden abi_test_clobber_x9
.align 4 .align 4
abi_test_clobber_x9: abi_test_clobber_x9:
AARCH64_VALID_CALL_TARGET
mov x9, xzr mov x9, xzr
ret ret
.size abi_test_clobber_x9,.-abi_test_clobber_x9 .size abi_test_clobber_x9,.-abi_test_clobber_x9
@ -216,6 +230,7 @@ abi_test_clobber_x9:
.hidden abi_test_clobber_x10 .hidden abi_test_clobber_x10
.align 4 .align 4
abi_test_clobber_x10: abi_test_clobber_x10:
AARCH64_VALID_CALL_TARGET
mov x10, xzr mov x10, xzr
ret ret
.size abi_test_clobber_x10,.-abi_test_clobber_x10 .size abi_test_clobber_x10,.-abi_test_clobber_x10
@ -224,6 +239,7 @@ abi_test_clobber_x10:
.hidden abi_test_clobber_x11 .hidden abi_test_clobber_x11
.align 4 .align 4
abi_test_clobber_x11: abi_test_clobber_x11:
AARCH64_VALID_CALL_TARGET
mov x11, xzr mov x11, xzr
ret ret
.size abi_test_clobber_x11,.-abi_test_clobber_x11 .size abi_test_clobber_x11,.-abi_test_clobber_x11
@ -232,6 +248,7 @@ abi_test_clobber_x11:
.hidden abi_test_clobber_x12 .hidden abi_test_clobber_x12
.align 4 .align 4
abi_test_clobber_x12: abi_test_clobber_x12:
AARCH64_VALID_CALL_TARGET
mov x12, xzr mov x12, xzr
ret ret
.size abi_test_clobber_x12,.-abi_test_clobber_x12 .size abi_test_clobber_x12,.-abi_test_clobber_x12
@ -240,6 +257,7 @@ abi_test_clobber_x12:
.hidden abi_test_clobber_x13 .hidden abi_test_clobber_x13
.align 4 .align 4
abi_test_clobber_x13: abi_test_clobber_x13:
AARCH64_VALID_CALL_TARGET
mov x13, xzr mov x13, xzr
ret ret
.size abi_test_clobber_x13,.-abi_test_clobber_x13 .size abi_test_clobber_x13,.-abi_test_clobber_x13
@ -248,6 +266,7 @@ abi_test_clobber_x13:
.hidden abi_test_clobber_x14 .hidden abi_test_clobber_x14
.align 4 .align 4
abi_test_clobber_x14: abi_test_clobber_x14:
AARCH64_VALID_CALL_TARGET
mov x14, xzr mov x14, xzr
ret ret
.size abi_test_clobber_x14,.-abi_test_clobber_x14 .size abi_test_clobber_x14,.-abi_test_clobber_x14
@ -256,6 +275,7 @@ abi_test_clobber_x14:
.hidden abi_test_clobber_x15 .hidden abi_test_clobber_x15
.align 4 .align 4
abi_test_clobber_x15: abi_test_clobber_x15:
AARCH64_VALID_CALL_TARGET
mov x15, xzr mov x15, xzr
ret ret
.size abi_test_clobber_x15,.-abi_test_clobber_x15 .size abi_test_clobber_x15,.-abi_test_clobber_x15
@ -264,6 +284,7 @@ abi_test_clobber_x15:
.hidden abi_test_clobber_x16 .hidden abi_test_clobber_x16
.align 4 .align 4
abi_test_clobber_x16: abi_test_clobber_x16:
AARCH64_VALID_CALL_TARGET
mov x16, xzr mov x16, xzr
ret ret
.size abi_test_clobber_x16,.-abi_test_clobber_x16 .size abi_test_clobber_x16,.-abi_test_clobber_x16
@ -272,6 +293,7 @@ abi_test_clobber_x16:
.hidden abi_test_clobber_x17 .hidden abi_test_clobber_x17
.align 4 .align 4
abi_test_clobber_x17: abi_test_clobber_x17:
AARCH64_VALID_CALL_TARGET
mov x17, xzr mov x17, xzr
ret ret
.size abi_test_clobber_x17,.-abi_test_clobber_x17 .size abi_test_clobber_x17,.-abi_test_clobber_x17
@ -280,6 +302,7 @@ abi_test_clobber_x17:
.hidden abi_test_clobber_x19 .hidden abi_test_clobber_x19
.align 4 .align 4
abi_test_clobber_x19: abi_test_clobber_x19:
AARCH64_VALID_CALL_TARGET
mov x19, xzr mov x19, xzr
ret ret
.size abi_test_clobber_x19,.-abi_test_clobber_x19 .size abi_test_clobber_x19,.-abi_test_clobber_x19
@ -288,6 +311,7 @@ abi_test_clobber_x19:
.hidden abi_test_clobber_x20 .hidden abi_test_clobber_x20
.align 4 .align 4
abi_test_clobber_x20: abi_test_clobber_x20:
AARCH64_VALID_CALL_TARGET
mov x20, xzr mov x20, xzr
ret ret
.size abi_test_clobber_x20,.-abi_test_clobber_x20 .size abi_test_clobber_x20,.-abi_test_clobber_x20
@ -296,6 +320,7 @@ abi_test_clobber_x20:
.hidden abi_test_clobber_x21 .hidden abi_test_clobber_x21
.align 4 .align 4
abi_test_clobber_x21: abi_test_clobber_x21:
AARCH64_VALID_CALL_TARGET
mov x21, xzr mov x21, xzr
ret ret
.size abi_test_clobber_x21,.-abi_test_clobber_x21 .size abi_test_clobber_x21,.-abi_test_clobber_x21
@ -304,6 +329,7 @@ abi_test_clobber_x21:
.hidden abi_test_clobber_x22 .hidden abi_test_clobber_x22
.align 4 .align 4
abi_test_clobber_x22: abi_test_clobber_x22:
AARCH64_VALID_CALL_TARGET
mov x22, xzr mov x22, xzr
ret ret
.size abi_test_clobber_x22,.-abi_test_clobber_x22 .size abi_test_clobber_x22,.-abi_test_clobber_x22
@ -312,6 +338,7 @@ abi_test_clobber_x22:
.hidden abi_test_clobber_x23 .hidden abi_test_clobber_x23
.align 4 .align 4
abi_test_clobber_x23: abi_test_clobber_x23:
AARCH64_VALID_CALL_TARGET
mov x23, xzr mov x23, xzr
ret ret
.size abi_test_clobber_x23,.-abi_test_clobber_x23 .size abi_test_clobber_x23,.-abi_test_clobber_x23
@ -320,6 +347,7 @@ abi_test_clobber_x23:
.hidden abi_test_clobber_x24 .hidden abi_test_clobber_x24
.align 4 .align 4
abi_test_clobber_x24: abi_test_clobber_x24:
AARCH64_VALID_CALL_TARGET
mov x24, xzr mov x24, xzr
ret ret
.size abi_test_clobber_x24,.-abi_test_clobber_x24 .size abi_test_clobber_x24,.-abi_test_clobber_x24
@ -328,6 +356,7 @@ abi_test_clobber_x24:
.hidden abi_test_clobber_x25 .hidden abi_test_clobber_x25
.align 4 .align 4
abi_test_clobber_x25: abi_test_clobber_x25:
AARCH64_VALID_CALL_TARGET
mov x25, xzr mov x25, xzr
ret ret
.size abi_test_clobber_x25,.-abi_test_clobber_x25 .size abi_test_clobber_x25,.-abi_test_clobber_x25
@ -336,6 +365,7 @@ abi_test_clobber_x25:
.hidden abi_test_clobber_x26 .hidden abi_test_clobber_x26
.align 4 .align 4
abi_test_clobber_x26: abi_test_clobber_x26:
AARCH64_VALID_CALL_TARGET
mov x26, xzr mov x26, xzr
ret ret
.size abi_test_clobber_x26,.-abi_test_clobber_x26 .size abi_test_clobber_x26,.-abi_test_clobber_x26
@ -344,6 +374,7 @@ abi_test_clobber_x26:
.hidden abi_test_clobber_x27 .hidden abi_test_clobber_x27
.align 4 .align 4
abi_test_clobber_x27: abi_test_clobber_x27:
AARCH64_VALID_CALL_TARGET
mov x27, xzr mov x27, xzr
ret ret
.size abi_test_clobber_x27,.-abi_test_clobber_x27 .size abi_test_clobber_x27,.-abi_test_clobber_x27
@ -352,6 +383,7 @@ abi_test_clobber_x27:
.hidden abi_test_clobber_x28 .hidden abi_test_clobber_x28
.align 4 .align 4
abi_test_clobber_x28: abi_test_clobber_x28:
AARCH64_VALID_CALL_TARGET
mov x28, xzr mov x28, xzr
ret ret
.size abi_test_clobber_x28,.-abi_test_clobber_x28 .size abi_test_clobber_x28,.-abi_test_clobber_x28
@ -360,6 +392,7 @@ abi_test_clobber_x28:
.hidden abi_test_clobber_x29 .hidden abi_test_clobber_x29
.align 4 .align 4
abi_test_clobber_x29: abi_test_clobber_x29:
AARCH64_VALID_CALL_TARGET
mov x29, xzr mov x29, xzr
ret ret
.size abi_test_clobber_x29,.-abi_test_clobber_x29 .size abi_test_clobber_x29,.-abi_test_clobber_x29
@ -368,6 +401,7 @@ abi_test_clobber_x29:
.hidden abi_test_clobber_d0 .hidden abi_test_clobber_d0
.align 4 .align 4
abi_test_clobber_d0: abi_test_clobber_d0:
AARCH64_VALID_CALL_TARGET
fmov d0, xzr fmov d0, xzr
ret ret
.size abi_test_clobber_d0,.-abi_test_clobber_d0 .size abi_test_clobber_d0,.-abi_test_clobber_d0
@ -376,6 +410,7 @@ abi_test_clobber_d0:
.hidden abi_test_clobber_d1 .hidden abi_test_clobber_d1
.align 4 .align 4
abi_test_clobber_d1: abi_test_clobber_d1:
AARCH64_VALID_CALL_TARGET
fmov d1, xzr fmov d1, xzr
ret ret
.size abi_test_clobber_d1,.-abi_test_clobber_d1 .size abi_test_clobber_d1,.-abi_test_clobber_d1
@ -384,6 +419,7 @@ abi_test_clobber_d1:
.hidden abi_test_clobber_d2 .hidden abi_test_clobber_d2
.align 4 .align 4
abi_test_clobber_d2: abi_test_clobber_d2:
AARCH64_VALID_CALL_TARGET
fmov d2, xzr fmov d2, xzr
ret ret
.size abi_test_clobber_d2,.-abi_test_clobber_d2 .size abi_test_clobber_d2,.-abi_test_clobber_d2
@ -392,6 +428,7 @@ abi_test_clobber_d2:
.hidden abi_test_clobber_d3 .hidden abi_test_clobber_d3
.align 4 .align 4
abi_test_clobber_d3: abi_test_clobber_d3:
AARCH64_VALID_CALL_TARGET
fmov d3, xzr fmov d3, xzr
ret ret
.size abi_test_clobber_d3,.-abi_test_clobber_d3 .size abi_test_clobber_d3,.-abi_test_clobber_d3
@ -400,6 +437,7 @@ abi_test_clobber_d3:
.hidden abi_test_clobber_d4 .hidden abi_test_clobber_d4
.align 4 .align 4
abi_test_clobber_d4: abi_test_clobber_d4:
AARCH64_VALID_CALL_TARGET
fmov d4, xzr fmov d4, xzr
ret ret
.size abi_test_clobber_d4,.-abi_test_clobber_d4 .size abi_test_clobber_d4,.-abi_test_clobber_d4
@ -408,6 +446,7 @@ abi_test_clobber_d4:
.hidden abi_test_clobber_d5 .hidden abi_test_clobber_d5
.align 4 .align 4
abi_test_clobber_d5: abi_test_clobber_d5:
AARCH64_VALID_CALL_TARGET
fmov d5, xzr fmov d5, xzr
ret ret
.size abi_test_clobber_d5,.-abi_test_clobber_d5 .size abi_test_clobber_d5,.-abi_test_clobber_d5
@ -416,6 +455,7 @@ abi_test_clobber_d5:
.hidden abi_test_clobber_d6 .hidden abi_test_clobber_d6
.align 4 .align 4
abi_test_clobber_d6: abi_test_clobber_d6:
AARCH64_VALID_CALL_TARGET
fmov d6, xzr fmov d6, xzr
ret ret
.size abi_test_clobber_d6,.-abi_test_clobber_d6 .size abi_test_clobber_d6,.-abi_test_clobber_d6
@ -424,6 +464,7 @@ abi_test_clobber_d6:
.hidden abi_test_clobber_d7 .hidden abi_test_clobber_d7
.align 4 .align 4
abi_test_clobber_d7: abi_test_clobber_d7:
AARCH64_VALID_CALL_TARGET
fmov d7, xzr fmov d7, xzr
ret ret
.size abi_test_clobber_d7,.-abi_test_clobber_d7 .size abi_test_clobber_d7,.-abi_test_clobber_d7
@ -432,6 +473,7 @@ abi_test_clobber_d7:
.hidden abi_test_clobber_d8 .hidden abi_test_clobber_d8
.align 4 .align 4
abi_test_clobber_d8: abi_test_clobber_d8:
AARCH64_VALID_CALL_TARGET
fmov d8, xzr fmov d8, xzr
ret ret
.size abi_test_clobber_d8,.-abi_test_clobber_d8 .size abi_test_clobber_d8,.-abi_test_clobber_d8
@ -440,6 +482,7 @@ abi_test_clobber_d8:
.hidden abi_test_clobber_d9 .hidden abi_test_clobber_d9
.align 4 .align 4
abi_test_clobber_d9: abi_test_clobber_d9:
AARCH64_VALID_CALL_TARGET
fmov d9, xzr fmov d9, xzr
ret ret
.size abi_test_clobber_d9,.-abi_test_clobber_d9 .size abi_test_clobber_d9,.-abi_test_clobber_d9
@ -448,6 +491,7 @@ abi_test_clobber_d9:
.hidden abi_test_clobber_d10 .hidden abi_test_clobber_d10
.align 4 .align 4
abi_test_clobber_d10: abi_test_clobber_d10:
AARCH64_VALID_CALL_TARGET
fmov d10, xzr fmov d10, xzr
ret ret
.size abi_test_clobber_d10,.-abi_test_clobber_d10 .size abi_test_clobber_d10,.-abi_test_clobber_d10
@ -456,6 +500,7 @@ abi_test_clobber_d10:
.hidden abi_test_clobber_d11 .hidden abi_test_clobber_d11
.align 4 .align 4
abi_test_clobber_d11: abi_test_clobber_d11:
AARCH64_VALID_CALL_TARGET
fmov d11, xzr fmov d11, xzr
ret ret
.size abi_test_clobber_d11,.-abi_test_clobber_d11 .size abi_test_clobber_d11,.-abi_test_clobber_d11
@ -464,6 +509,7 @@ abi_test_clobber_d11:
.hidden abi_test_clobber_d12 .hidden abi_test_clobber_d12
.align 4 .align 4
abi_test_clobber_d12: abi_test_clobber_d12:
AARCH64_VALID_CALL_TARGET
fmov d12, xzr fmov d12, xzr
ret ret
.size abi_test_clobber_d12,.-abi_test_clobber_d12 .size abi_test_clobber_d12,.-abi_test_clobber_d12
@ -472,6 +518,7 @@ abi_test_clobber_d12:
.hidden abi_test_clobber_d13 .hidden abi_test_clobber_d13
.align 4 .align 4
abi_test_clobber_d13: abi_test_clobber_d13:
AARCH64_VALID_CALL_TARGET
fmov d13, xzr fmov d13, xzr
ret ret
.size abi_test_clobber_d13,.-abi_test_clobber_d13 .size abi_test_clobber_d13,.-abi_test_clobber_d13
@ -480,6 +527,7 @@ abi_test_clobber_d13:
.hidden abi_test_clobber_d14 .hidden abi_test_clobber_d14
.align 4 .align 4
abi_test_clobber_d14: abi_test_clobber_d14:
AARCH64_VALID_CALL_TARGET
fmov d14, xzr fmov d14, xzr
ret ret
.size abi_test_clobber_d14,.-abi_test_clobber_d14 .size abi_test_clobber_d14,.-abi_test_clobber_d14
@ -488,6 +536,7 @@ abi_test_clobber_d14:
.hidden abi_test_clobber_d15 .hidden abi_test_clobber_d15
.align 4 .align 4
abi_test_clobber_d15: abi_test_clobber_d15:
AARCH64_VALID_CALL_TARGET
fmov d15, xzr fmov d15, xzr
ret ret
.size abi_test_clobber_d15,.-abi_test_clobber_d15 .size abi_test_clobber_d15,.-abi_test_clobber_d15
@ -496,6 +545,7 @@ abi_test_clobber_d15:
.hidden abi_test_clobber_d16 .hidden abi_test_clobber_d16
.align 4 .align 4
abi_test_clobber_d16: abi_test_clobber_d16:
AARCH64_VALID_CALL_TARGET
fmov d16, xzr fmov d16, xzr
ret ret
.size abi_test_clobber_d16,.-abi_test_clobber_d16 .size abi_test_clobber_d16,.-abi_test_clobber_d16
@ -504,6 +554,7 @@ abi_test_clobber_d16:
.hidden abi_test_clobber_d17 .hidden abi_test_clobber_d17
.align 4 .align 4
abi_test_clobber_d17: abi_test_clobber_d17:
AARCH64_VALID_CALL_TARGET
fmov d17, xzr fmov d17, xzr
ret ret
.size abi_test_clobber_d17,.-abi_test_clobber_d17 .size abi_test_clobber_d17,.-abi_test_clobber_d17
@ -512,6 +563,7 @@ abi_test_clobber_d17:
.hidden abi_test_clobber_d18 .hidden abi_test_clobber_d18
.align 4 .align 4
abi_test_clobber_d18: abi_test_clobber_d18:
AARCH64_VALID_CALL_TARGET
fmov d18, xzr fmov d18, xzr
ret ret
.size abi_test_clobber_d18,.-abi_test_clobber_d18 .size abi_test_clobber_d18,.-abi_test_clobber_d18
@ -520,6 +572,7 @@ abi_test_clobber_d18:
.hidden abi_test_clobber_d19 .hidden abi_test_clobber_d19
.align 4 .align 4
abi_test_clobber_d19: abi_test_clobber_d19:
AARCH64_VALID_CALL_TARGET
fmov d19, xzr fmov d19, xzr
ret ret
.size abi_test_clobber_d19,.-abi_test_clobber_d19 .size abi_test_clobber_d19,.-abi_test_clobber_d19
@ -528,6 +581,7 @@ abi_test_clobber_d19:
.hidden abi_test_clobber_d20 .hidden abi_test_clobber_d20
.align 4 .align 4
abi_test_clobber_d20: abi_test_clobber_d20:
AARCH64_VALID_CALL_TARGET
fmov d20, xzr fmov d20, xzr
ret ret
.size abi_test_clobber_d20,.-abi_test_clobber_d20 .size abi_test_clobber_d20,.-abi_test_clobber_d20
@ -536,6 +590,7 @@ abi_test_clobber_d20:
.hidden abi_test_clobber_d21 .hidden abi_test_clobber_d21
.align 4 .align 4
abi_test_clobber_d21: abi_test_clobber_d21:
AARCH64_VALID_CALL_TARGET
fmov d21, xzr fmov d21, xzr
ret ret
.size abi_test_clobber_d21,.-abi_test_clobber_d21 .size abi_test_clobber_d21,.-abi_test_clobber_d21
@ -544,6 +599,7 @@ abi_test_clobber_d21:
.hidden abi_test_clobber_d22 .hidden abi_test_clobber_d22
.align 4 .align 4
abi_test_clobber_d22: abi_test_clobber_d22:
AARCH64_VALID_CALL_TARGET
fmov d22, xzr fmov d22, xzr
ret ret
.size abi_test_clobber_d22,.-abi_test_clobber_d22 .size abi_test_clobber_d22,.-abi_test_clobber_d22
@ -552,6 +608,7 @@ abi_test_clobber_d22:
.hidden abi_test_clobber_d23 .hidden abi_test_clobber_d23
.align 4 .align 4
abi_test_clobber_d23: abi_test_clobber_d23:
AARCH64_VALID_CALL_TARGET
fmov d23, xzr fmov d23, xzr
ret ret
.size abi_test_clobber_d23,.-abi_test_clobber_d23 .size abi_test_clobber_d23,.-abi_test_clobber_d23
@ -560,6 +617,7 @@ abi_test_clobber_d23:
.hidden abi_test_clobber_d24 .hidden abi_test_clobber_d24
.align 4 .align 4
abi_test_clobber_d24: abi_test_clobber_d24:
AARCH64_VALID_CALL_TARGET
fmov d24, xzr fmov d24, xzr
ret ret
.size abi_test_clobber_d24,.-abi_test_clobber_d24 .size abi_test_clobber_d24,.-abi_test_clobber_d24
@ -568,6 +626,7 @@ abi_test_clobber_d24:
.hidden abi_test_clobber_d25 .hidden abi_test_clobber_d25
.align 4 .align 4
abi_test_clobber_d25: abi_test_clobber_d25:
AARCH64_VALID_CALL_TARGET
fmov d25, xzr fmov d25, xzr
ret ret
.size abi_test_clobber_d25,.-abi_test_clobber_d25 .size abi_test_clobber_d25,.-abi_test_clobber_d25
@ -576,6 +635,7 @@ abi_test_clobber_d25:
.hidden abi_test_clobber_d26 .hidden abi_test_clobber_d26
.align 4 .align 4
abi_test_clobber_d26: abi_test_clobber_d26:
AARCH64_VALID_CALL_TARGET
fmov d26, xzr fmov d26, xzr
ret ret
.size abi_test_clobber_d26,.-abi_test_clobber_d26 .size abi_test_clobber_d26,.-abi_test_clobber_d26
@ -584,6 +644,7 @@ abi_test_clobber_d26:
.hidden abi_test_clobber_d27 .hidden abi_test_clobber_d27
.align 4 .align 4
abi_test_clobber_d27: abi_test_clobber_d27:
AARCH64_VALID_CALL_TARGET
fmov d27, xzr fmov d27, xzr
ret ret
.size abi_test_clobber_d27,.-abi_test_clobber_d27 .size abi_test_clobber_d27,.-abi_test_clobber_d27
@ -592,6 +653,7 @@ abi_test_clobber_d27:
.hidden abi_test_clobber_d28 .hidden abi_test_clobber_d28
.align 4 .align 4
abi_test_clobber_d28: abi_test_clobber_d28:
AARCH64_VALID_CALL_TARGET
fmov d28, xzr fmov d28, xzr
ret ret
.size abi_test_clobber_d28,.-abi_test_clobber_d28 .size abi_test_clobber_d28,.-abi_test_clobber_d28
@ -600,6 +662,7 @@ abi_test_clobber_d28:
.hidden abi_test_clobber_d29 .hidden abi_test_clobber_d29
.align 4 .align 4
abi_test_clobber_d29: abi_test_clobber_d29:
AARCH64_VALID_CALL_TARGET
fmov d29, xzr fmov d29, xzr
ret ret
.size abi_test_clobber_d29,.-abi_test_clobber_d29 .size abi_test_clobber_d29,.-abi_test_clobber_d29
@ -608,6 +671,7 @@ abi_test_clobber_d29:
.hidden abi_test_clobber_d30 .hidden abi_test_clobber_d30
.align 4 .align 4
abi_test_clobber_d30: abi_test_clobber_d30:
AARCH64_VALID_CALL_TARGET
fmov d30, xzr fmov d30, xzr
ret ret
.size abi_test_clobber_d30,.-abi_test_clobber_d30 .size abi_test_clobber_d30,.-abi_test_clobber_d30
@ -616,6 +680,7 @@ abi_test_clobber_d30:
.hidden abi_test_clobber_d31 .hidden abi_test_clobber_d31
.align 4 .align 4
abi_test_clobber_d31: abi_test_clobber_d31:
AARCH64_VALID_CALL_TARGET
fmov d31, xzr fmov d31, xzr
ret ret
.size abi_test_clobber_d31,.-abi_test_clobber_d31 .size abi_test_clobber_d31,.-abi_test_clobber_d31
@ -624,6 +689,7 @@ abi_test_clobber_d31:
.hidden abi_test_clobber_v8_upper .hidden abi_test_clobber_v8_upper
.align 4 .align 4
abi_test_clobber_v8_upper: abi_test_clobber_v8_upper:
AARCH64_VALID_CALL_TARGET
fmov v8.d[1], xzr fmov v8.d[1], xzr
ret ret
.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper .size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
@ -632,6 +698,7 @@ abi_test_clobber_v8_upper:
.hidden abi_test_clobber_v9_upper .hidden abi_test_clobber_v9_upper
.align 4 .align 4
abi_test_clobber_v9_upper: abi_test_clobber_v9_upper:
AARCH64_VALID_CALL_TARGET
fmov v9.d[1], xzr fmov v9.d[1], xzr
ret ret
.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper .size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
@ -640,6 +707,7 @@ abi_test_clobber_v9_upper:
.hidden abi_test_clobber_v10_upper .hidden abi_test_clobber_v10_upper
.align 4 .align 4
abi_test_clobber_v10_upper: abi_test_clobber_v10_upper:
AARCH64_VALID_CALL_TARGET
fmov v10.d[1], xzr fmov v10.d[1], xzr
ret ret
.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper .size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
@ -648,6 +716,7 @@ abi_test_clobber_v10_upper:
.hidden abi_test_clobber_v11_upper .hidden abi_test_clobber_v11_upper
.align 4 .align 4
abi_test_clobber_v11_upper: abi_test_clobber_v11_upper:
AARCH64_VALID_CALL_TARGET
fmov v11.d[1], xzr fmov v11.d[1], xzr
ret ret
.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper .size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
@ -656,6 +725,7 @@ abi_test_clobber_v11_upper:
.hidden abi_test_clobber_v12_upper .hidden abi_test_clobber_v12_upper
.align 4 .align 4
abi_test_clobber_v12_upper: abi_test_clobber_v12_upper:
AARCH64_VALID_CALL_TARGET
fmov v12.d[1], xzr fmov v12.d[1], xzr
ret ret
.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper .size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
@ -664,6 +734,7 @@ abi_test_clobber_v12_upper:
.hidden abi_test_clobber_v13_upper .hidden abi_test_clobber_v13_upper
.align 4 .align 4
abi_test_clobber_v13_upper: abi_test_clobber_v13_upper:
AARCH64_VALID_CALL_TARGET
fmov v13.d[1], xzr fmov v13.d[1], xzr
ret ret
.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper .size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
@ -672,6 +743,7 @@ abi_test_clobber_v13_upper:
.hidden abi_test_clobber_v14_upper .hidden abi_test_clobber_v14_upper
.align 4 .align 4
abi_test_clobber_v14_upper: abi_test_clobber_v14_upper:
AARCH64_VALID_CALL_TARGET
fmov v14.d[1], xzr fmov v14.d[1], xzr
ret ret
.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper .size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
@ -680,8 +752,10 @@ abi_test_clobber_v14_upper:
.hidden abi_test_clobber_v15_upper .hidden abi_test_clobber_v15_upper
.align 4 .align 4
abi_test_clobber_v15_upper: abi_test_clobber_v15_upper:
AARCH64_VALID_CALL_TARGET
fmov v15.d[1], xzr fmov v15.d[1], xzr
ret ret
.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper .size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,998 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.section .rodata
# p434 x 2
.Lp434x2:
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
# p434 + 1
.Lp434p1:
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
.text
.globl sike_mpmul
.hidden sike_mpmul
.align 4
sike_mpmul:
stp x29, x30, [sp,#-96]!
add x29, sp, #0
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
ldp x14, x15, [x1,#32]
ldr x16, [x1,#48]
// x3-x7 <- AH + AL, x7 <- carry
adds x3, x3, x7
adcs x4, x4, x8
adcs x5, x5, x9
adcs x6, x6, xzr
adc x7, xzr, xzr
// x10-x13 <- BH + BL, x8 <- carry
adds x10, x10, x14
adcs x11, x11, x15
adcs x12, x12, x16
adcs x13, x13, xzr
adc x8, xzr, xzr
// x9 <- combined carry
and x9, x7, x8
// x7-x8 <- mask
sub x7, xzr, x7
sub x8, xzr, x8
// x15-x19 <- masked (BH + BL)
and x14, x10, x7
and x15, x11, x7
and x16, x12, x7
and x17, x13, x7
// x20-x23 <- masked (AH + AL)
and x20, x3, x8
and x21, x4, x8
and x22, x5, x8
and x23, x6, x8
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
adds x14, x14, x20
adcs x15, x15, x21
adcs x16, x16, x22
adcs x17, x17, x23
adc x7, x9, xzr
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
stp x3, x4, [x2,#0]
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x25, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x23, x10, x12
adcs x26, x11, x13
adc x24, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x19, xzr, x25
sub x20, xzr, x24
and x8, x23, x19
and x9, x26, x19
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x21, x3, x20
and x22, x4, x20
mul x19, x3, x23
mul x20, x3, x26
and x25, x25, x24
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x8, x21, x8
umulh x21, x3, x26
adcs x9, x22, x9
umulh x22, x3, x23
adc x25, x25, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x23
umulh x23, x4, x23
adds x20, x20, x22
adc x21, x21, xzr
mul x24, x4, x26
umulh x26, x4, x26
adds x20, x20, x3
adcs x21, x21, x23
adc x22, xzr, xzr
adds x21, x21, x24
adc x22, x22, x26
ldp x3, x4, [x2,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x21, x8, x21
umulh x24, x3, x10
umulh x26, x3, x11
adcs x22, x9, x22
mul x8, x3, x10
mul x9, x3, x11
adc x25, x25, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x9, x9, x24
adc x26, x26, xzr
mul x23, x4, x11
umulh x11, x4, x11
adds x9, x9, x3
adcs x26, x26, x10
adc x24, xzr, xzr
adds x26, x26, x23
adc x24, x24, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x19, x19, x8
sbcs x20, x20, x9
sbcs x21, x21, x26
mul x4, x5, x13
umulh x23, x5, x13
sbcs x22, x22, x24
sbc x25, x25, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x23, x23, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x23, x23, x12
adc x10, xzr, xzr
adds x23, x23, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x19, x19, x3
sbcs x20, x20, x4
sbcs x21, x21, x23
sbcs x22, x22, x10
sbc x25, x25, xzr
adds x19, x19, x26
adcs x20, x20, x24
adcs x21, x21, x3
adcs x22, x22, x4
adcs x23, x25, x23
adc x24, x10, xzr
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
adds x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adc x7, x7, xzr
// Load AL
ldp x3, x4, [x0]
ldp x5, x6, [x0,#16]
// Load BL
ldp x10, x11, [x1,#0]
ldp x12, x13, [x1,#16]
// Temporarily store x8 in x2
stp x8, x9, [x2,#0]
// x21-x28 <- AL x BL
// A0-A1 <- AH + AL, T0 <- mask
adds x3, x3, x5
adcs x4, x4, x6
adc x8, xzr, xzr
// C6, T1 <- BH + BL, C7 <- mask
adds x27, x10, x12
adcs x9, x11, x13
adc x28, xzr, xzr
// C0-C1 <- masked (BH + BL)
sub x23, xzr, x8
sub x24, xzr, x28
and x21, x27, x23
and x22, x9, x23
// C4-C5 <- masked (AH + AL), T0 <- combined carry
and x25, x3, x24
and x26, x4, x24
mul x23, x3, x27
mul x24, x3, x9
and x8, x8, x28
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
adds x21, x25, x21
umulh x25, x3, x9
adcs x22, x26, x22
umulh x26, x3, x27
adc x8, x8, xzr
// C2-C5 <- (AH+AL) x (BH+BL), low part
mul x3, x4, x27
umulh x27, x4, x27
adds x24, x24, x26
adc x25, x25, xzr
mul x28, x4, x9
umulh x9, x4, x9
adds x24, x24, x3
adcs x25, x25, x27
adc x26, xzr, xzr
adds x25, x25, x28
adc x26, x26, x9
ldp x3, x4, [x0,#0]
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
adds x25, x21, x25
umulh x28, x3, x10
umulh x9, x3, x11
adcs x26, x22, x26
mul x21, x3, x10
mul x22, x3, x11
adc x8, x8, xzr
// C0-C1, T1, C7 <- AL x BL
mul x3, x4, x10
umulh x10, x4, x10
adds x22, x22, x28
adc x9, x9, xzr
mul x27, x4, x11
umulh x11, x4, x11
adds x22, x22, x3
adcs x9, x9, x10
adc x28, xzr, xzr
adds x9, x9, x27
adc x28, x28, x11
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
mul x3, x5, x12
umulh x10, x5, x12
subs x23, x23, x21
sbcs x24, x24, x22
sbcs x25, x25, x9
mul x4, x5, x13
umulh x27, x5, x13
sbcs x26, x26, x28
sbc x8, x8, xzr
// A0, A1, C6, B0 <- AH x BH
mul x5, x6, x12
umulh x12, x6, x12
adds x4, x4, x10
adc x27, x27, xzr
mul x11, x6, x13
umulh x13, x6, x13
adds x4, x4, x5
adcs x27, x27, x12
adc x10, xzr, xzr
adds x27, x27, x11
adc x10, x10, x13
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x23, x23, x3
sbcs x24, x24, x4
sbcs x25, x25, x27
sbcs x26, x26, x10
sbc x8, x8, xzr
adds x23, x23, x9
adcs x24, x24, x28
adcs x25, x25, x3
adcs x26, x26, x4
adcs x27, x8, x27
adc x28, x10, xzr
// Restore x8
ldp x8, x9, [x2,#0]
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, x27
sbcs x17, x17, x28
sbc x7, x7, xzr
// Store ALxBL, low
stp x21, x22, [x2]
stp x23, x24, [x2,#16]
// Load AH
ldp x3, x4, [x0,#32]
ldr x5, [x0,#48]
// Load BH
ldp x10, x11, [x1,#32]
ldr x12, [x1,#48]
adds x8, x8, x25
adcs x9, x9, x26
adcs x19, x19, x27
adcs x20, x20, x28
adc x1, xzr, xzr
add x0, x0, #32
// Temporarily store x8,x9 in x2
stp x8,x9, [x2,#32]
// x21-x28 <- AH x BH
// A0 * B0
mul x21, x3, x10 // C0
umulh x24, x3, x10
// A0 * B1
mul x22, x3, x11
umulh x23, x3, x11
// A1 * B0
mul x8, x4, x10
umulh x9, x4, x10
adds x22, x22, x24
adc x23, x23, xzr
// A0 * B2
mul x27, x3, x12
umulh x28, x3, x12
adds x22, x22, x8 // C1
adcs x23, x23, x9
adc x24, xzr, xzr
// A2 * B0
mul x8, x5, x10
umulh x25, x5, x10
adds x23, x23, x27
adcs x24, x24, x25
adc x25, xzr, xzr
// A1 * B1
mul x27, x4, x11
umulh x9, x4, x11
adds x23, x23, x8
adcs x24, x24, x28
adc x25, x25, xzr
// A1 * B2
mul x8, x4, x12
umulh x28, x4, x12
adds x23, x23, x27 // C2
adcs x24, x24, x9
adc x25, x25, xzr
// A2 * B1
mul x27, x5, x11
umulh x9, x5, x11
adds x24, x24, x8
adcs x25, x25, x28
adc x26, xzr, xzr
// A2 * B2
mul x8, x5, x12
umulh x28, x5, x12
adds x24, x24, x27 // C3
adcs x25, x25, x9
adc x26, x26, xzr
adds x25, x25, x8 // C4
adc x26, x26, x28 // C5
// Restore x8,x9
ldp x8,x9, [x2,#32]
neg x1, x1
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
subs x8, x8, x21
sbcs x9, x9, x22
sbcs x19, x19, x23
sbcs x20, x20, x24
sbcs x14, x14, x25
sbcs x15, x15, x26
sbcs x16, x16, xzr
sbcs x17, x17, xzr
sbc x7, x7, xzr
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
stp x8, x9, [x2,#32]
stp x19, x20, [x2,#48]
adds x1, x1, #1
adcs x14, x14, x21
adcs x15, x15, x22
adcs x16, x16, x23
adcs x17, x17, x24
adcs x25, x7, x25
adc x26, x26, xzr
stp x14, x15, [x2,#64]
stp x16, x17, [x2,#80]
stp x25, x26, [x2,#96]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl sike_fprdc
.hidden sike_fprdc
.align 4
sike_fprdc:
stp x29, x30, [sp, #-96]!
add x29, sp, xzr
stp x19, x20, [sp,#16]
stp x21, x22, [sp,#32]
stp x23, x24, [sp,#48]
stp x25, x26, [sp,#64]
stp x27, x28, [sp,#80]
ldp x2, x3, [x0,#0] // a[0-1]
// Load the prime constant
adrp x26, .Lp434p1
add x26, x26, :lo12:.Lp434p1
ldp x23, x24, [x26, #0x0]
ldp x25, x26, [x26,#0x10]
// a[0-1] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x10, x3, x23
umulh x11, x3, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x10 // C1
adcs x6, x6, x11
adc x7, xzr, xzr
mul x10, x3, x24
umulh x11, x3, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x10 // C2
adcs x7, x7, x11
adc x8, x8, xzr
mul x10, x3, x25
umulh x11, x3, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x3, x26
umulh x28, x3, x26
adds x7, x7, x10 // C3
adcs x8, x8, x11
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
ldp x10, x11, [x0, #0x18]
ldp x12, x13, [x0, #0x28]
ldp x14, x15, [x0, #0x38]
ldp x16, x17, [x0, #0x48]
ldp x19, x20, [x0, #0x58]
ldr x21, [x0, #0x68]
adds x10, x10, x4
adcs x11, x11, x5
adcs x12, x12, x6
adcs x13, x13, x7
adcs x14, x14, x8
adcs x15, x15, x9
adcs x22, x16, xzr
adcs x17, x17, xzr
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
ldr x2, [x0,#0x10] // a[2]
// a[2-3] * p434+1
mul x4, x2, x23 // C0
umulh x7, x2, x23
mul x5, x2, x24
umulh x6, x2, x24
mul x0, x10, x23
umulh x3, x10, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x2, x25
umulh x28, x2, x25
adds x5, x5, x0 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x0, x10, x24
umulh x3, x10, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x2, x26
umulh x28, x2, x26
adds x6, x6, x0 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x0, x10, x25
umulh x3, x10, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x10, x26
umulh x28, x10, x26
adds x7, x7, x0 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x12, x12, x4
adcs x13, x13, x5
adcs x14, x14, x6
adcs x15, x15, x7
adcs x16, x22, x8
adcs x17, x17, x9
adcs x22, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
mul x4, x11, x23 // C0
umulh x7, x11, x23
mul x5, x11, x24
umulh x6, x11, x24
mul x10, x12, x23
umulh x3, x12, x23
adds x5, x5, x7
adc x6, x6, xzr
mul x27, x11, x25
umulh x28, x11, x25
adds x5, x5, x10 // C1
adcs x6, x6, x3
adc x7, xzr, xzr
mul x10, x12, x24
umulh x3, x12, x24
adds x6, x6, x27
adcs x7, x7, x28
adc x8, xzr, xzr
mul x27, x11, x26
umulh x28, x11, x26
adds x6, x6, x10 // C2
adcs x7, x7, x3
adc x8, x8, xzr
mul x10, x12, x25
umulh x3, x12, x25
adds x7, x7, x27
adcs x8, x8, x28
adc x9, xzr, xzr
mul x27, x12, x26
umulh x28, x12, x26
adds x7, x7, x10 // C3
adcs x8, x8, x3
adc x9, x9, xzr
adds x8, x8, x27 // C4
adc x9, x9, x28 // C5
adds x14, x14, x4
adcs x15, x15, x5
adcs x16, x16, x6
adcs x17, x17, x7
adcs x19, x22, x8
adcs x20, x20, x9
adc x22, x21, xzr
stp x14, x15, [x1, #0x0] // C0, C1
mul x4, x13, x23 // C0
umulh x10, x13, x23
mul x5, x13, x24
umulh x27, x13, x24
adds x5, x5, x10 // C1
adc x10, xzr, xzr
mul x6, x13, x25
umulh x28, x13, x25
adds x27, x10, x27
adcs x6, x6, x27 // C2
adc x10, xzr, xzr
mul x7, x13, x26
umulh x8, x13, x26
adds x28, x10, x28
adcs x7, x7, x28 // C3
adc x8, x8, xzr // C4
adds x16, x16, x4
adcs x17, x17, x5
adcs x19, x19, x6
adcs x20, x20, x7
adc x21, x22, x8
str x16, [x1, #0x10]
stp x17, x19, [x1, #0x18]
stp x20, x21, [x1, #0x28]
ldp x19, x20, [x29,#16]
ldp x21, x22, [x29,#32]
ldp x23, x24, [x29,#48]
ldp x25, x26, [x29,#64]
ldp x27, x28, [x29,#80]
ldp x29, x30, [sp],#96
ret
.globl sike_fpadd
.hidden sike_fpadd
.align 4
sike_fpadd:
stp x29,x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Add a + b
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
// Subtract 2xp434
adrp x17, .Lp434x2
add x17, x17, :lo12:.Lp434x2
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x12
sbcs x6, x6, x13
sbcs x7, x7, x14
sbcs x8, x8, x15
sbcs x9, x9, x16
sbc x0, xzr, xzr // x0 can be reused now
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl sike_fpsub
.hidden sike_fpsub
.align 4
sike_fpsub:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
// Subtract a - b
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
sbcs x9, x9, x17
sbc x0, xzr, xzr
// Add 2xp434 anded with the mask in x0
adrp x17, .Lp434x2
add x17, x17, :lo12:.Lp434x2
// First half
ldp x11, x12, [x17, #0]
ldp x13, x14, [x17, #16]
ldp x15, x16, [x17, #32]
// Add 2xp434 anded with the mask in x0
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adc x9, x9, x16
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl sike_mpadd_asm
.hidden sike_mpadd_asm
.align 4
sike_mpadd_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
ldr x9, [x0,#48]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
ldr x17, [x1,#48]
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, x16
adc x9, x9, x17
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
str x9, [x2,#48]
ldp x29, x30, [sp],#16
ret
.globl sike_mpsubx2_asm
.hidden sike_mpsubx2_asm
.align 4
sike_mpsubx2_asm:
stp x29, x30, [sp,#-16]!
add x29, sp, #0
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x11, x12, [x1,#32]
ldp x13, x14, [x1,#48]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbcs x9, x9, x13
sbcs x10, x10, x14
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ldp x3, x4, [x0,#64]
ldp x5, x6, [x0,#80]
ldp x11, x12, [x1,#64]
ldp x13, x14, [x1,#80]
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#96]
ldp x11, x12, [x1,#96]
sbcs x7, x7, x11
sbcs x8, x8, x12
sbc x0, xzr, xzr
stp x3, x4, [x2,#64]
stp x5, x6, [x2,#80]
stp x7, x8, [x2,#96]
ldp x29, x30, [sp],#16
ret
.globl sike_mpdblsubx2_asm
.hidden sike_mpdblsubx2_asm
.align 4
sike_mpdblsubx2_asm:
stp x29, x30, [sp, #-16]!
add x29, sp, #0
ldp x3, x4, [x2, #0]
ldp x5, x6, [x2,#16]
ldp x7, x8, [x2,#32]
ldp x11, x12, [x0, #0]
ldp x13, x14, [x0,#16]
ldp x15, x16, [x0,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
// x9 stores carry
adc x9, xzr, xzr
ldp x11, x12, [x1, #0]
ldp x13, x14, [x1,#16]
ldp x15, x16, [x1,#32]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2, #0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
ldp x3, x4, [x2,#48]
ldp x5, x6, [x2,#64]
ldp x7, x8, [x2,#80]
ldp x11, x12, [x0,#48]
ldp x13, x14, [x0,#64]
ldp x15, x16, [x0,#80]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, xzr, xzr
ldp x11, x12, [x1,#48]
ldp x13, x14, [x1,#64]
ldp x15, x16, [x1,#80]
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
sbcs x8, x8, x16
adc x9, x9, xzr
stp x3, x4, [x2,#48]
stp x5, x6, [x2,#64]
stp x7, x8, [x2,#80]
ldp x3, x4, [x2,#96]
ldp x11, x12, [x0,#96]
ldp x13, x14, [x1,#96]
// x9 = 2 - x9
neg x9, x9
add x9, x9, #2
subs x3, x3, x9
sbcs x3, x3, x11
sbcs x4, x4, x12
subs x3, x3, x13
sbc x4, x4, x14
stp x3, x4, [x2,#96]
ldp x29, x30, [sp],#16
ret
#endif
#endif // !OPENSSL_NO_ASM

View File

@ -1490,3 +1490,4 @@ ChaCha20_neon:
#endif #endif
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -778,3 +778,4 @@ aes_hw_ctr32_encrypt_blocks:
#endif #endif
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -974,3 +974,4 @@ bn_mul8x_mont_neon:
#endif #endif
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1526,3 +1526,4 @@ bsaes_ctr32_encrypt_blocks:
#endif #endif
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -31,342 +31,6 @@
#else #else
.code 32 .code 32
#endif #endif
.type rem_4bit,%object
.align 5
rem_4bit:
.short 0x0000,0x1C20,0x3840,0x2460
.short 0x7080,0x6CA0,0x48C0,0x54E0
.short 0xE100,0xFD20,0xD940,0xC560
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
.size rem_4bit,.-rem_4bit
.type rem_4bit_get,%function
rem_4bit_get:
#if defined(__thumb2__)
adr r2,rem_4bit
#else
sub r2,pc,#8+32 @ &rem_4bit
#endif
b .Lrem_4bit_got
nop
nop
.size rem_4bit_get,.-rem_4bit_get
.globl gcm_ghash_4bit
.hidden gcm_ghash_4bit
.type gcm_ghash_4bit,%function
.align 4
gcm_ghash_4bit:
#if defined(__thumb2__)
adr r12,rem_4bit
#else
sub r12,pc,#8+48 @ &rem_4bit
#endif
add r3,r2,r3 @ r3 to point at the end
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
ldrb r12,[r2,#15]
ldrb r14,[r0,#15]
.Louter:
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
add r11,r1,r14
ldrb r12,[r2,#14]
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[sp,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
ldrb r14,[r0,#14]
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
eor r12,r12,r14
and r14,r12,#0xf0
and r12,r12,#0x0f
eor r7,r7,r8,lsl#16
.Linner:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[sp,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r2,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r8,[r0,r3]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r9,[sp,r14]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
#ifdef __thumb2__
it pl
#endif
eorpl r12,r12,r8
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
bpl .Linner
ldr r3,[sp,#32] @ re-load r3/end
add r2,r2,#16
mov r14,r4
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
cmp r2,r3
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#ifdef __thumb2__
it ne
#endif
ldrneb r12,[r2,#15]
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
bne .Louter
add sp,sp,#36
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size gcm_ghash_4bit,.-gcm_ghash_4bit
.globl gcm_gmult_4bit
.hidden gcm_gmult_4bit
.type gcm_gmult_4bit,%function
gcm_gmult_4bit:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
ldrb r12,[r0,#15]
b rem_4bit_get
.Lrem_4bit_got:
and r14,r12,#0xf0
and r12,r12,#0x0f
mov r3,#14
add r7,r1,r12,lsl#4
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
ldrb r12,[r0,#14]
add r11,r1,r14
and r14,r4,#0xf @ rem
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
add r14,r14,r14
eor r4,r8,r4,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
and r14,r12,#0xf0
eor r7,r7,r8,lsl#16
and r12,r12,#0x0f
.Loop:
add r11,r1,r12,lsl#4
and r12,r4,#0xf @ rem
subs r3,r3,#1
add r12,r12,r12
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
eor r5,r5,r6,lsl#28
ldrh r8,[r2,r12] @ rem_4bit[rem]
eor r6,r10,r6,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb r12,[r0,r3]
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
add r11,r1,r14
and r14,r4,#0xf @ rem
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
add r14,r14,r14
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
eor r4,r8,r4,lsr#4
eor r4,r4,r5,lsl#28
eor r5,r9,r5,lsr#4
ldrh r8,[r2,r14] @ rem_4bit[rem]
eor r5,r5,r6,lsl#28
eor r6,r10,r6,lsr#4
eor r6,r6,r7,lsl#28
eor r7,r11,r7,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl r14,r12,#0xf0
andpl r12,r12,#0x0f
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
bpl .Loop
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r4,r4
str r4,[r0,#12]
#elif defined(__ARMEB__)
str r4,[r0,#12]
#else
mov r9,r4,lsr#8
strb r4,[r0,#12+3]
mov r10,r4,lsr#16
strb r9,[r0,#12+2]
mov r11,r4,lsr#24
strb r10,[r0,#12+1]
strb r11,[r0,#12]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r5,r5
str r5,[r0,#8]
#elif defined(__ARMEB__)
str r5,[r0,#8]
#else
mov r9,r5,lsr#8
strb r5,[r0,#8+3]
mov r10,r5,lsr#16
strb r9,[r0,#8+2]
mov r11,r5,lsr#24
strb r10,[r0,#8+1]
strb r11,[r0,#8]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r6,r6
str r6,[r0,#4]
#elif defined(__ARMEB__)
str r6,[r0,#4]
#else
mov r9,r6,lsr#8
strb r6,[r0,#4+3]
mov r10,r6,lsr#16
strb r9,[r0,#4+2]
mov r11,r6,lsr#24
strb r10,[r0,#4+1]
strb r11,[r0,#4]
#endif
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
rev r7,r7
str r7,[r0,#0]
#elif defined(__ARMEB__)
str r7,[r0,#0]
#else
mov r9,r7,lsr#8
strb r7,[r0,#0+3]
mov r10,r7,lsr#16
strb r9,[r0,#0+2]
mov r11,r7,lsr#24
strb r10,[r0,#0+1]
strb r11,[r0,#0]
#endif
#if __ARM_ARCH__>=5
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size gcm_gmult_4bit,.-gcm_gmult_4bit
#if __ARM_MAX_ARCH__>=7 #if __ARM_MAX_ARCH__>=7
.arch armv7-a .arch armv7-a
.fpu neon .fpu neon
@ -588,3 +252,4 @@ gcm_ghash_neon:
.align 2 .align 2
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -250,3 +250,4 @@ gcm_ghash_v8:
.align 2 .align 2
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1508,3 +1508,4 @@ sha1_block_data_order_armv8:
#endif #endif
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -2836,3 +2836,4 @@ sha256_block_data_order_armv8:
#endif #endif
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1891,3 +1891,4 @@ sha512_block_data_order_neon:
#endif #endif
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -31,7 +31,6 @@
.hidden abi_test_trampoline .hidden abi_test_trampoline
.align 4 .align 4
abi_test_trampoline: abi_test_trampoline:
.Labi_test_trampoline_begin:
@ Save parameters and all callee-saved registers. For convenience, we @ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile. @ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
@ -377,3 +376,4 @@ abi_test_clobber_d15:
.size abi_test_clobber_d15,.-abi_test_clobber_d15 .size abi_test_clobber_d15,.-abi_test_clobber_d15
#endif #endif
#endif // !OPENSSL_NO_ASM #endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -3667,3 +3667,4 @@ _aesp8_xts_dec5x:
.long 0 .long 0
.byte 0,12,0x14,0,0,0,0,0 .byte 0,12,0x14,0,0,0,0,0
#endif // !OPENSSL_NO_ASM && __powerpc64__ #endif // !OPENSSL_NO_ASM && __powerpc64__
.section .note.GNU-stack,"",@progbits

View File

@ -584,3 +584,4 @@ gcm_ghash_p8:
.align 2 .align 2
.align 2 .align 2
#endif // !OPENSSL_NO_ASM && __powerpc64__ #endif // !OPENSSL_NO_ASM && __powerpc64__
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -972,3 +972,4 @@ ChaCha20_ssse3:
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte 114,103,62,0 .byte 114,103,62,0
#endif #endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@
#include <boringssl_prefix_symbols_asm.h> #include <boringssl_prefix_symbols_asm.h>
#endif #endif
.text .text
#ifndef NDEBUG #ifdef BORINGSSL_DISPATCH_TEST
#endif #endif
.globl aes_hw_encrypt .globl aes_hw_encrypt
.hidden aes_hw_encrypt .hidden aes_hw_encrypt
@ -14,7 +14,7 @@
.align 16 .align 16
aes_hw_encrypt: aes_hw_encrypt:
.L_aes_hw_encrypt_begin: .L_aes_hw_encrypt_begin:
#ifndef NDEBUG #ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx pushl %ebx
pushl %edx pushl %edx
call .L000pic call .L000pic
@ -845,7 +845,7 @@ aes_hw_ctr32_encrypt_blocks:
pushl %ebx pushl %ebx
pushl %esi pushl %esi
pushl %edi pushl %edi
#ifndef NDEBUG #ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx pushl %ebx
pushl %edx pushl %edx
call .L038pic call .L038pic
@ -2440,7 +2440,7 @@ _aesni_set_encrypt_key:
.align 16 .align 16
aes_hw_set_encrypt_key: aes_hw_set_encrypt_key:
.L_aes_hw_set_encrypt_key_begin: .L_aes_hw_set_encrypt_key_begin:
#ifndef NDEBUG #ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx pushl %ebx
pushl %edx pushl %edx
call .L116pic call .L116pic
@ -2510,3 +2510,4 @@ aes_hw_set_decrypt_key:
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
.byte 115,108,46,111,114,103,62,0 .byte 115,108,46,111,114,103,62,0
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -993,551 +993,5 @@ bn_sub_words:
popl %ebp popl %ebp
ret ret
.size bn_sub_words,.-.L_bn_sub_words_begin .size bn_sub_words,.-.L_bn_sub_words_begin
.globl bn_sub_part_words
.hidden bn_sub_part_words
.type bn_sub_part_words,@function
.align 16
bn_sub_part_words:
.L_bn_sub_part_words_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebx
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
jz .L029aw_finish
.L030aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
movl 4(%esi),%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
movl 8(%esi),%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
movl 12(%esi),%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
movl 16(%esi),%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
movl 20(%esi),%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
movl 24(%esi),%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
movl 28(%esi),%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%esi
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz .L030aw_loop
.L029aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
decl %ebp
jz .L031aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
addl $4,%esi
addl $4,%edi
addl $4,%ebx
.L031aw_end:
cmpl $0,36(%esp)
je .L032pw_end
movl 36(%esp),%ebp
cmpl $0,%ebp
je .L032pw_end
jge .L033pw_pos
movl $0,%edx
subl %ebp,%edx
movl %edx,%ebp
andl $4294967288,%ebp
jz .L034pw_neg_finish
.L035pw_neg_loop:
movl $0,%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
movl $0,%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
movl $0,%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
movl $0,%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
movl $0,%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
movl $0,%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
movl $0,%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
movl $0,%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz .L035pw_neg_loop
.L034pw_neg_finish:
movl 36(%esp),%edx
movl $0,%ebp
subl %edx,%ebp
andl $7,%ebp
jz .L032pw_end
movl $0,%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
jz .L032pw_end
movl $0,%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
jmp .L032pw_end
.L033pw_pos:
andl $4294967288,%ebp
jz .L036pw_pos_finish
.L037pw_pos_loop:
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
jnc .L038pw_nc0
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
jnc .L039pw_nc1
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
jnc .L040pw_nc2
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
jnc .L041pw_nc3
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
jnc .L042pw_nc4
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
jnc .L043pw_nc5
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
jnc .L044pw_nc6
movl 28(%esi),%ecx
subl %eax,%ecx
movl %ecx,28(%ebx)
jnc .L045pw_nc7
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
jnz .L037pw_pos_loop
.L036pw_pos_finish:
movl 36(%esp),%ebp
andl $7,%ebp
jz .L032pw_end
movl (%esi),%ecx
subl %eax,%ecx
movl %ecx,(%ebx)
jnc .L046pw_tail_nc0
decl %ebp
jz .L032pw_end
movl 4(%esi),%ecx
subl %eax,%ecx
movl %ecx,4(%ebx)
jnc .L047pw_tail_nc1
decl %ebp
jz .L032pw_end
movl 8(%esi),%ecx
subl %eax,%ecx
movl %ecx,8(%ebx)
jnc .L048pw_tail_nc2
decl %ebp
jz .L032pw_end
movl 12(%esi),%ecx
subl %eax,%ecx
movl %ecx,12(%ebx)
jnc .L049pw_tail_nc3
decl %ebp
jz .L032pw_end
movl 16(%esi),%ecx
subl %eax,%ecx
movl %ecx,16(%ebx)
jnc .L050pw_tail_nc4
decl %ebp
jz .L032pw_end
movl 20(%esi),%ecx
subl %eax,%ecx
movl %ecx,20(%ebx)
jnc .L051pw_tail_nc5
decl %ebp
jz .L032pw_end
movl 24(%esi),%ecx
subl %eax,%ecx
movl %ecx,24(%ebx)
jnc .L052pw_tail_nc6
movl $1,%eax
jmp .L032pw_end
.L053pw_nc_loop:
movl (%esi),%ecx
movl %ecx,(%ebx)
.L038pw_nc0:
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
.L039pw_nc1:
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
.L040pw_nc2:
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
.L041pw_nc3:
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
.L042pw_nc4:
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
.L043pw_nc5:
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
.L044pw_nc6:
movl 28(%esi),%ecx
movl %ecx,28(%ebx)
.L045pw_nc7:
addl $32,%esi
addl $32,%ebx
subl $8,%ebp
jnz .L053pw_nc_loop
movl 36(%esp),%ebp
andl $7,%ebp
jz .L054pw_nc_end
movl (%esi),%ecx
movl %ecx,(%ebx)
.L046pw_tail_nc0:
decl %ebp
jz .L054pw_nc_end
movl 4(%esi),%ecx
movl %ecx,4(%ebx)
.L047pw_tail_nc1:
decl %ebp
jz .L054pw_nc_end
movl 8(%esi),%ecx
movl %ecx,8(%ebx)
.L048pw_tail_nc2:
decl %ebp
jz .L054pw_nc_end
movl 12(%esi),%ecx
movl %ecx,12(%ebx)
.L049pw_tail_nc3:
decl %ebp
jz .L054pw_nc_end
movl 16(%esi),%ecx
movl %ecx,16(%ebx)
.L050pw_tail_nc4:
decl %ebp
jz .L054pw_nc_end
movl 20(%esi),%ecx
movl %ecx,20(%ebx)
.L051pw_tail_nc5:
decl %ebp
jz .L054pw_nc_end
movl 24(%esi),%ecx
movl %ecx,24(%ebx)
.L052pw_tail_nc6:
.L054pw_nc_end:
movl $0,%eax
.L032pw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -1263,3 +1263,4 @@ bn_sqr_comba4:
ret ret
.size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin .size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -291,3 +291,4 @@ gcm_ghash_ssse3:
.Llow4_mask: .Llow4_mask:
.long 252645135,252645135,252645135,252645135 .long 252645135,252645135,252645135,252645135
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -6,711 +6,6 @@
#include <boringssl_prefix_symbols_asm.h> #include <boringssl_prefix_symbols_asm.h>
#endif #endif
.text .text
.globl gcm_gmult_4bit_mmx
.hidden gcm_gmult_4bit_mmx
.type gcm_gmult_4bit_mmx,@function
.align 16
gcm_gmult_4bit_mmx:
.L_gcm_gmult_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
call .L000pic_point
.L000pic_point:
popl %eax
leal .Lrem_4bit-.L000pic_point(%eax),%eax
movzbl 15(%edi),%ebx
xorl %ecx,%ecx
movl %ebx,%edx
movb %dl,%cl
movl $14,%ebp
shlb $4,%cl
andl $240,%edx
movq 8(%esi,%ecx,1),%mm0
movq (%esi,%ecx,1),%mm1
movd %mm0,%ebx
jmp .L001mmx_loop
.align 16
.L001mmx_loop:
psrlq $4,%mm0
andl $15,%ebx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb (%edi,%ebp,1),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
decl %ebp
movd %mm0,%ebx
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
pxor %mm2,%mm0
js .L002mmx_break
shlb $4,%cl
andl $15,%ebx
psrlq $4,%mm0
andl $240,%edx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0
jmp .L001mmx_loop
.align 16
.L002mmx_break:
shlb $4,%cl
andl $15,%ebx
psrlq $4,%mm0
andl $240,%edx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0
psrlq $4,%mm0
andl $15,%ebx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%edx,1),%mm1
pxor %mm2,%mm0
psrlq $32,%mm0
movd %mm1,%edx
psrlq $32,%mm1
movd %mm0,%ecx
movd %mm1,%ebp
bswap %ebx
bswap %edx
bswap %ecx
bswap %ebp
emms
movl %ebx,12(%edi)
movl %edx,4(%edi)
movl %ecx,8(%edi)
movl %ebp,(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
.globl gcm_ghash_4bit_mmx
.hidden gcm_ghash_4bit_mmx
.type gcm_ghash_4bit_mmx,@function
.align 16
gcm_ghash_4bit_mmx:
.L_gcm_ghash_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%eax
movl 24(%esp),%ebx
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl %esp,%ebp
call .L003pic_point
.L003pic_point:
popl %esi
leal .Lrem_8bit-.L003pic_point(%esi),%esi
subl $544,%esp
andl $-64,%esp
subl $16,%esp
addl %ecx,%edx
movl %eax,544(%esp)
movl %edx,552(%esp)
movl %ebp,556(%esp)
addl $128,%ebx
leal 144(%esp),%edi
leal 400(%esp),%ebp
movl -120(%ebx),%edx
movq -120(%ebx),%mm0
movq -128(%ebx),%mm3
shll $4,%edx
movb %dl,(%esp)
movl -104(%ebx),%edx
movq -104(%ebx),%mm2
movq -112(%ebx),%mm5
movq %mm0,-128(%edi)
psrlq $4,%mm0
movq %mm3,(%edi)
movq %mm3,%mm7
psrlq $4,%mm3
shll $4,%edx
movb %dl,1(%esp)
movl -88(%ebx),%edx
movq -88(%ebx),%mm1
psllq $60,%mm7
movq -96(%ebx),%mm4
por %mm7,%mm0
movq %mm2,-120(%edi)
psrlq $4,%mm2
movq %mm5,8(%edi)
movq %mm5,%mm6
movq %mm0,-128(%ebp)
psrlq $4,%mm5
movq %mm3,(%ebp)
shll $4,%edx
movb %dl,2(%esp)
movl -72(%ebx),%edx
movq -72(%ebx),%mm0
psllq $60,%mm6
movq -80(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-112(%edi)
psrlq $4,%mm1
movq %mm4,16(%edi)
movq %mm4,%mm7
movq %mm2,-120(%ebp)
psrlq $4,%mm4
movq %mm5,8(%ebp)
shll $4,%edx
movb %dl,3(%esp)
movl -56(%ebx),%edx
movq -56(%ebx),%mm2
psllq $60,%mm7
movq -64(%ebx),%mm5
por %mm7,%mm1
movq %mm0,-104(%edi)
psrlq $4,%mm0
movq %mm3,24(%edi)
movq %mm3,%mm6
movq %mm1,-112(%ebp)
psrlq $4,%mm3
movq %mm4,16(%ebp)
shll $4,%edx
movb %dl,4(%esp)
movl -40(%ebx),%edx
movq -40(%ebx),%mm1
psllq $60,%mm6
movq -48(%ebx),%mm4
por %mm6,%mm0
movq %mm2,-96(%edi)
psrlq $4,%mm2
movq %mm5,32(%edi)
movq %mm5,%mm7
movq %mm0,-104(%ebp)
psrlq $4,%mm5
movq %mm3,24(%ebp)
shll $4,%edx
movb %dl,5(%esp)
movl -24(%ebx),%edx
movq -24(%ebx),%mm0
psllq $60,%mm7
movq -32(%ebx),%mm3
por %mm7,%mm2
movq %mm1,-88(%edi)
psrlq $4,%mm1
movq %mm4,40(%edi)
movq %mm4,%mm6
movq %mm2,-96(%ebp)
psrlq $4,%mm4
movq %mm5,32(%ebp)
shll $4,%edx
movb %dl,6(%esp)
movl -8(%ebx),%edx
movq -8(%ebx),%mm2
psllq $60,%mm6
movq -16(%ebx),%mm5
por %mm6,%mm1
movq %mm0,-80(%edi)
psrlq $4,%mm0
movq %mm3,48(%edi)
movq %mm3,%mm7
movq %mm1,-88(%ebp)
psrlq $4,%mm3
movq %mm4,40(%ebp)
shll $4,%edx
movb %dl,7(%esp)
movl 8(%ebx),%edx
movq 8(%ebx),%mm1
psllq $60,%mm7
movq (%ebx),%mm4
por %mm7,%mm0
movq %mm2,-72(%edi)
psrlq $4,%mm2
movq %mm5,56(%edi)
movq %mm5,%mm6
movq %mm0,-80(%ebp)
psrlq $4,%mm5
movq %mm3,48(%ebp)
shll $4,%edx
movb %dl,8(%esp)
movl 24(%ebx),%edx
movq 24(%ebx),%mm0
psllq $60,%mm6
movq 16(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-64(%edi)
psrlq $4,%mm1
movq %mm4,64(%edi)
movq %mm4,%mm7
movq %mm2,-72(%ebp)
psrlq $4,%mm4
movq %mm5,56(%ebp)
shll $4,%edx
movb %dl,9(%esp)
movl 40(%ebx),%edx
movq 40(%ebx),%mm2
psllq $60,%mm7
movq 32(%ebx),%mm5
por %mm7,%mm1
movq %mm0,-56(%edi)
psrlq $4,%mm0
movq %mm3,72(%edi)
movq %mm3,%mm6
movq %mm1,-64(%ebp)
psrlq $4,%mm3
movq %mm4,64(%ebp)
shll $4,%edx
movb %dl,10(%esp)
movl 56(%ebx),%edx
movq 56(%ebx),%mm1
psllq $60,%mm6
movq 48(%ebx),%mm4
por %mm6,%mm0
movq %mm2,-48(%edi)
psrlq $4,%mm2
movq %mm5,80(%edi)
movq %mm5,%mm7
movq %mm0,-56(%ebp)
psrlq $4,%mm5
movq %mm3,72(%ebp)
shll $4,%edx
movb %dl,11(%esp)
movl 72(%ebx),%edx
movq 72(%ebx),%mm0
psllq $60,%mm7
movq 64(%ebx),%mm3
por %mm7,%mm2
movq %mm1,-40(%edi)
psrlq $4,%mm1
movq %mm4,88(%edi)
movq %mm4,%mm6
movq %mm2,-48(%ebp)
psrlq $4,%mm4
movq %mm5,80(%ebp)
shll $4,%edx
movb %dl,12(%esp)
movl 88(%ebx),%edx
movq 88(%ebx),%mm2
psllq $60,%mm6
movq 80(%ebx),%mm5
por %mm6,%mm1
movq %mm0,-32(%edi)
psrlq $4,%mm0
movq %mm3,96(%edi)
movq %mm3,%mm7
movq %mm1,-40(%ebp)
psrlq $4,%mm3
movq %mm4,88(%ebp)
shll $4,%edx
movb %dl,13(%esp)
movl 104(%ebx),%edx
movq 104(%ebx),%mm1
psllq $60,%mm7
movq 96(%ebx),%mm4
por %mm7,%mm0
movq %mm2,-24(%edi)
psrlq $4,%mm2
movq %mm5,104(%edi)
movq %mm5,%mm6
movq %mm0,-32(%ebp)
psrlq $4,%mm5
movq %mm3,96(%ebp)
shll $4,%edx
movb %dl,14(%esp)
movl 120(%ebx),%edx
movq 120(%ebx),%mm0
psllq $60,%mm6
movq 112(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-16(%edi)
psrlq $4,%mm1
movq %mm4,112(%edi)
movq %mm4,%mm7
movq %mm2,-24(%ebp)
psrlq $4,%mm4
movq %mm5,104(%ebp)
shll $4,%edx
movb %dl,15(%esp)
psllq $60,%mm7
por %mm7,%mm1
movq %mm0,-8(%edi)
psrlq $4,%mm0
movq %mm3,120(%edi)
movq %mm3,%mm6
movq %mm1,-16(%ebp)
psrlq $4,%mm3
movq %mm4,112(%ebp)
psllq $60,%mm6
por %mm6,%mm0
movq %mm0,-8(%ebp)
movq %mm3,120(%ebp)
movq (%eax),%mm6
movl 8(%eax),%ebx
movl 12(%eax),%edx
.align 16
.L004outer:
xorl 12(%ecx),%edx
xorl 8(%ecx),%ebx
pxor (%ecx),%mm6
leal 16(%ecx),%ecx
movl %ebx,536(%esp)
movq %mm6,528(%esp)
movl %ecx,548(%esp)
xorl %eax,%eax
roll $8,%edx
movb %dl,%al
movl %eax,%ebp
andb $15,%al
shrl $4,%ebp
pxor %mm0,%mm0
roll $8,%edx
pxor %mm1,%mm1
pxor %mm2,%mm2
movq 16(%esp,%eax,8),%mm7
movq 144(%esp,%eax,8),%mm6
movb %dl,%al
movd %mm7,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
shrl $4,%edi
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 536(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 532(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 528(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 524(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
pxor 144(%esp,%eax,8),%mm6
xorb (%esp,%ebp,1),%bl
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
movzbl %bl,%ebx
pxor %mm2,%mm2
psllq $4,%mm1
movd %mm7,%ecx
psrlq $4,%mm7
movq %mm6,%mm3
psrlq $4,%mm6
shll $4,%ecx
pxor 16(%esp,%edi,8),%mm7
psllq $60,%mm3
movzbl %cl,%ecx
pxor %mm3,%mm7
pxor 144(%esp,%edi,8),%mm6
pinsrw $2,(%esi,%ebx,2),%mm0
pxor %mm1,%mm6
movd %mm7,%edx
pinsrw $3,(%esi,%ecx,2),%mm2
psllq $12,%mm0
pxor %mm0,%mm6
psrlq $32,%mm7
pxor %mm2,%mm6
movl 548(%esp),%ecx
movd %mm7,%ebx
movq %mm6,%mm3
psllw $8,%mm6
psrlw $8,%mm3
por %mm3,%mm6
bswap %edx
pshufw $27,%mm6,%mm6
bswap %ebx
cmpl 552(%esp),%ecx
jne .L004outer
movl 544(%esp),%eax
movl %edx,12(%eax)
movl %ebx,8(%eax)
movq %mm6,(%eax)
movl 556(%esp),%esp
emms
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
.globl gcm_init_clmul .globl gcm_init_clmul
.hidden gcm_init_clmul .hidden gcm_init_clmul
.type gcm_init_clmul,@function .type gcm_init_clmul,@function
@ -719,10 +14,10 @@ gcm_init_clmul:
.L_gcm_init_clmul_begin: .L_gcm_init_clmul_begin:
movl 4(%esp),%edx movl 4(%esp),%edx
movl 8(%esp),%eax movl 8(%esp),%eax
call .L005pic call .L000pic
.L005pic: .L000pic:
popl %ecx popl %ecx
leal .Lbswap-.L005pic(%ecx),%ecx leal .Lbswap-.L000pic(%ecx),%ecx
movdqu (%eax),%xmm2 movdqu (%eax),%xmm2
pshufd $78,%xmm2,%xmm2 pshufd $78,%xmm2,%xmm2
pshufd $255,%xmm2,%xmm4 pshufd $255,%xmm2,%xmm4
@ -789,10 +84,10 @@ gcm_gmult_clmul:
.L_gcm_gmult_clmul_begin: .L_gcm_gmult_clmul_begin:
movl 4(%esp),%eax movl 4(%esp),%eax
movl 8(%esp),%edx movl 8(%esp),%edx
call .L006pic call .L001pic
.L006pic: .L001pic:
popl %ecx popl %ecx
leal .Lbswap-.L006pic(%ecx),%ecx leal .Lbswap-.L001pic(%ecx),%ecx
movdqu (%eax),%xmm0 movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5 movdqa (%ecx),%xmm5
movups (%edx),%xmm2 movups (%edx),%xmm2
@ -849,16 +144,16 @@ gcm_ghash_clmul:
movl 24(%esp),%edx movl 24(%esp),%edx
movl 28(%esp),%esi movl 28(%esp),%esi
movl 32(%esp),%ebx movl 32(%esp),%ebx
call .L007pic call .L002pic
.L007pic: .L002pic:
popl %ecx popl %ecx
leal .Lbswap-.L007pic(%ecx),%ecx leal .Lbswap-.L002pic(%ecx),%ecx
movdqu (%eax),%xmm0 movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5 movdqa (%ecx),%xmm5
movdqu (%edx),%xmm2 movdqu (%edx),%xmm2
.byte 102,15,56,0,197 .byte 102,15,56,0,197
subl $16,%ebx subl $16,%ebx
jz .L008odd_tail jz .L003odd_tail
movdqu (%esi),%xmm3 movdqu (%esi),%xmm3
movdqu 16(%esi),%xmm6 movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221 .byte 102,15,56,0,221
@ -875,10 +170,10 @@ gcm_ghash_clmul:
movups 16(%edx),%xmm2 movups 16(%edx),%xmm2
nop nop
subl $32,%ebx subl $32,%ebx
jbe .L009even_tail jbe .L004even_tail
jmp .L010mod_loop jmp .L005mod_loop
.align 32 .align 32
.L010mod_loop: .L005mod_loop:
pshufd $78,%xmm0,%xmm4 pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1 movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4 pxor %xmm0,%xmm4
@ -933,8 +228,8 @@ gcm_ghash_clmul:
.byte 102,15,58,68,221,0 .byte 102,15,58,68,221,0
leal 32(%esi),%esi leal 32(%esi),%esi
subl $32,%ebx subl $32,%ebx
ja .L010mod_loop ja .L005mod_loop
.L009even_tail: .L004even_tail:
pshufd $78,%xmm0,%xmm4 pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1 movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4 pxor %xmm0,%xmm4
@ -973,9 +268,9 @@ gcm_ghash_clmul:
psrlq $1,%xmm0 psrlq $1,%xmm0
pxor %xmm1,%xmm0 pxor %xmm1,%xmm0
testl %ebx,%ebx testl %ebx,%ebx
jnz .L011done jnz .L006done
movups (%edx),%xmm2 movups (%edx),%xmm2
.L008odd_tail: .L003odd_tail:
movdqu (%esi),%xmm3 movdqu (%esi),%xmm3
.byte 102,15,56,0,221 .byte 102,15,56,0,221
pxor %xmm3,%xmm0 pxor %xmm3,%xmm0
@ -1014,7 +309,7 @@ gcm_ghash_clmul:
pxor %xmm4,%xmm0 pxor %xmm4,%xmm0
psrlq $1,%xmm0 psrlq $1,%xmm0
pxor %xmm1,%xmm0 pxor %xmm1,%xmm0
.L011done: .L006done:
.byte 102,15,56,0,197 .byte 102,15,56,0,197
movdqu %xmm0,(%eax) movdqu %xmm0,(%eax)
popl %edi popl %edi
@ -1027,48 +322,9 @@ gcm_ghash_clmul:
.Lbswap: .Lbswap:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.align 64
.Lrem_8bit:
.value 0,450,900,582,1800,1738,1164,1358
.value 3600,4050,3476,3158,2328,2266,2716,2910
.value 7200,7650,8100,7782,6952,6890,6316,6510
.value 4656,5106,4532,4214,5432,5370,5820,6014
.value 14400,14722,15300,14854,16200,16010,15564,15630
.value 13904,14226,13780,13334,12632,12442,13020,13086
.value 9312,9634,10212,9766,9064,8874,8428,8494
.value 10864,11186,10740,10294,11640,11450,12028,12094
.value 28800,28994,29444,29382,30600,30282,29708,30158
.value 32400,32594,32020,31958,31128,30810,31260,31710
.value 27808,28002,28452,28390,27560,27242,26668,27118
.value 25264,25458,24884,24822,26040,25722,26172,26622
.value 18624,18690,19268,19078,20424,19978,19532,19854
.value 18128,18194,17748,17558,16856,16410,16988,17310
.value 21728,21794,22372,22182,21480,21034,20588,20910
.value 23280,23346,22900,22710,24056,23610,24188,24510
.value 57600,57538,57988,58182,58888,59338,58764,58446
.value 61200,61138,60564,60758,59416,59866,60316,59998
.value 64800,64738,65188,65382,64040,64490,63916,63598
.value 62256,62194,61620,61814,62520,62970,63420,63102
.value 55616,55426,56004,56070,56904,57226,56780,56334
.value 55120,54930,54484,54550,53336,53658,54236,53790
.value 50528,50338,50916,50982,49768,50090,49644,49198
.value 52080,51890,51444,51510,52344,52666,53244,52798
.value 37248,36930,37380,37830,38536,38730,38156,38094
.value 40848,40530,39956,40406,39064,39258,39708,39646
.value 36256,35938,36388,36838,35496,35690,35116,35054
.value 33712,33394,32820,33270,33976,34170,34620,34558
.value 43456,43010,43588,43910,44744,44810,44364,44174
.value 42960,42514,42068,42390,41176,41242,41820,41630
.value 46560,46114,46692,47014,45800,45866,45420,45230
.value 48112,47666,47220,47542,48376,48442,49020,48830
.align 64
.Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte 0 .byte 0
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -685,3 +685,4 @@ md5_block_asm_data_order:
ret ret
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin .size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -3805,3 +3805,4 @@ _sha1_block_data_order_avx:
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 .byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -5564,3 +5564,4 @@ sha256_block_data_order:
ret ret
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin .size sha256_block_data_order,.-.L_sha256_block_data_order_begin
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -2834,3 +2834,4 @@ sha512_block_data_order:
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0 .byte 62,0
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -6,7 +6,7 @@
#include <boringssl_prefix_symbols_asm.h> #include <boringssl_prefix_symbols_asm.h>
#endif #endif
.text .text
#ifndef NDEBUG #ifdef BORINGSSL_DISPATCH_TEST
#endif #endif
.align 64 .align 64
.L_vpaes_consts: .L_vpaes_consts:
@ -485,7 +485,7 @@ vpaes_set_encrypt_key:
pushl %ebx pushl %ebx
pushl %esi pushl %esi
pushl %edi pushl %edi
#ifndef NDEBUG #ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx pushl %ebx
pushl %edx pushl %edx
call .L016pic call .L016pic
@ -570,7 +570,7 @@ vpaes_encrypt:
pushl %ebx pushl %ebx
pushl %esi pushl %esi
pushl %edi pushl %edi
#ifndef NDEBUG #ifdef BORINGSSL_DISPATCH_TEST
pushl %ebx pushl %ebx
pushl %edx pushl %edx
call .L019pic call .L019pic
@ -705,3 +705,4 @@ vpaes_cbc_encrypt:
ret ret
.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin .size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -481,3 +481,4 @@ bn_mul_mont:
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0 .byte 111,114,103,62,0
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -203,3 +203,4 @@ abi_test_clobber_xmm7:
ret ret
.size abi_test_clobber_xmm7,.-.L_abi_test_clobber_xmm7_begin .size abi_test_clobber_xmm7,.-.L_abi_test_clobber_xmm7_begin
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -1630,3 +1630,4 @@ ChaCha20_8x:
.cfi_endproc .cfi_endproc
.size ChaCha20_8x,.-ChaCha20_8x .size ChaCha20_8x,.-ChaCha20_8x
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -3076,3 +3076,4 @@ aes256gcmsiv_kdf:
.cfi_endproc .cfi_endproc
.size aes256gcmsiv_kdf, .-aes256gcmsiv_kdf .size aes256gcmsiv_kdf, .-aes256gcmsiv_kdf
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -3935,7 +3935,7 @@ do_length_block:
popq %rbp popq %rbp
.cfi_adjust_cfa_offset -8 .cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3 .byte 0xf3,0xc3
.cfi_adjust_cfa_offset (8 * 6) + 288 + 32 .cfi_adjust_cfa_offset (8 * 7) + 288 + 32
seal_sse_128: seal_sse_128:
movdqu .chacha20_consts(%rip),%xmm0 movdqu .chacha20_consts(%rip),%xmm0
@ -8984,3 +8984,4 @@ seal_avx2_short_tail:
jmp seal_sse_tail_16 jmp seal_sse_tail_16
.cfi_endproc .cfi_endproc
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -556,12 +556,10 @@ _aesni_ctr32_6x:
.align 32 .align 32
aesni_gcm_encrypt: aesni_gcm_encrypt:
.cfi_startproc .cfi_startproc
#ifndef NDEBUG #ifdef BORINGSSL_DISPATCH_TEST
#ifndef BORINGSSL_FIPS
.extern BORINGSSL_function_hit .extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit .hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+2(%rip) movb $1,BORINGSSL_function_hit+2(%rip)
#endif
#endif #endif
xorq %r10,%r10 xorq %r10,%r10
@ -851,3 +849,4 @@ aesni_gcm_encrypt:
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64 .align 64
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -20,12 +20,10 @@
.align 16 .align 16
aes_hw_encrypt: aes_hw_encrypt:
.cfi_startproc .cfi_startproc
#ifndef NDEBUG #ifdef BORINGSSL_DISPATCH_TEST
#ifndef BORINGSSL_FIPS
.extern BORINGSSL_function_hit .extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit .hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+1(%rip) movb $1,BORINGSSL_function_hit+1(%rip)
#endif
#endif #endif
movups (%rdi),%xmm2 movups (%rdi),%xmm2
movl 240(%rdx),%eax movl 240(%rdx),%eax
@ -887,10 +885,8 @@ aes_hw_ecb_encrypt:
.align 16 .align 16
aes_hw_ctr32_encrypt_blocks: aes_hw_ctr32_encrypt_blocks:
.cfi_startproc .cfi_startproc
#ifndef NDEBUG #ifdef BORINGSSL_DISPATCH_TEST
#ifndef BORINGSSL_FIPS
movb $1,BORINGSSL_function_hit(%rip) movb $1,BORINGSSL_function_hit(%rip)
#endif
#endif #endif
cmpq $1,%rdx cmpq $1,%rdx
jne .Lctr32_bulk jne .Lctr32_bulk
@ -2111,11 +2107,9 @@ aes_hw_set_decrypt_key:
aes_hw_set_encrypt_key: aes_hw_set_encrypt_key:
__aesni_set_encrypt_key: __aesni_set_encrypt_key:
.cfi_startproc .cfi_startproc
#ifndef NDEBUG #ifdef BORINGSSL_DISPATCH_TEST
#ifndef BORINGSSL_FIPS
movb $1,BORINGSSL_function_hit+3(%rip) movb $1,BORINGSSL_function_hit+3(%rip)
#endif #endif
#endif
.byte 0x48,0x83,0xEC,0x08 .byte 0x48,0x83,0xEC,0x08
.cfi_adjust_cfa_offset 8 .cfi_adjust_cfa_offset 8
movq $-1,%rax movq $-1,%rax
@ -2509,3 +2503,4 @@ __aesni_set_encrypt_key:
.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64 .align 64
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -424,3 +424,4 @@ gcm_ghash_ssse3:
.Llow4_mask: .Llow4_mask:
.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
#endif #endif
.section .note.GNU-stack,"",@progbits

View File

@ -14,709 +14,6 @@
.text .text
.extern OPENSSL_ia32cap_P .extern OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P .hidden OPENSSL_ia32cap_P
.globl gcm_gmult_4bit
.hidden gcm_gmult_4bit
.type gcm_gmult_4bit,@function
.align 16
gcm_gmult_4bit:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $280,%rsp
.cfi_adjust_cfa_offset 280
.Lgmult_prologue:
movzbq 15(%rdi),%r8
leaq .Lrem_4bit(%rip),%r11
xorq %rax,%rax
xorq %rbx,%rbx
movb %r8b,%al
movb %r8b,%bl
shlb $4,%al
movq $14,%rcx
movq 8(%rsi,%rax,1),%r8
movq (%rsi,%rax,1),%r9
andb $0xf0,%bl
movq %r8,%rdx
jmp .Loop1
.align 16
.Loop1:
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
movb (%rdi,%rcx,1),%al
shrq $4,%r9
xorq 8(%rsi,%rbx,1),%r8
shlq $60,%r10
xorq (%rsi,%rbx,1),%r9
movb %al,%bl
xorq (%r11,%rdx,8),%r9
movq %r8,%rdx
shlb $4,%al
xorq %r10,%r8
decq %rcx
js .Lbreak1
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
shrq $4,%r9
xorq 8(%rsi,%rax,1),%r8
shlq $60,%r10
xorq (%rsi,%rax,1),%r9
andb $0xf0,%bl
xorq (%r11,%rdx,8),%r9
movq %r8,%rdx
xorq %r10,%r8
jmp .Loop1
.align 16
.Lbreak1:
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
shrq $4,%r9
xorq 8(%rsi,%rax,1),%r8
shlq $60,%r10
xorq (%rsi,%rax,1),%r9
andb $0xf0,%bl
xorq (%r11,%rdx,8),%r9
movq %r8,%rdx
xorq %r10,%r8
shrq $4,%r8
andq $0xf,%rdx
movq %r9,%r10
shrq $4,%r9
xorq 8(%rsi,%rbx,1),%r8
shlq $60,%r10
xorq (%rsi,%rbx,1),%r9
xorq %r10,%r8
xorq (%r11,%rdx,8),%r9
bswapq %r8
bswapq %r9
movq %r8,8(%rdi)
movq %r9,(%rdi)
leaq 280+48(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lgmult_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_gmult_4bit,.-gcm_gmult_4bit
.globl gcm_ghash_4bit
.hidden gcm_ghash_4bit
.type gcm_ghash_4bit,@function
.align 16
gcm_ghash_4bit:
.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset %r12,-32
pushq %r13
.cfi_adjust_cfa_offset 8
.cfi_offset %r13,-40
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset %r14,-48
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
subq $280,%rsp
.cfi_adjust_cfa_offset 280
.Lghash_prologue:
movq %rdx,%r14
movq %rcx,%r15
subq $-128,%rsi
leaq 16+128(%rsp),%rbp
xorl %edx,%edx
movq 0+0-128(%rsi),%r8
movq 0+8-128(%rsi),%rax
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq 16+0-128(%rsi),%r9
shlb $4,%dl
movq 16+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,0(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,0(%rbp)
movq 32+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,0-128(%rbp)
movq 32+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,1(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,8(%rbp)
movq 48+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,8-128(%rbp)
movq 48+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,2(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,16(%rbp)
movq 64+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,16-128(%rbp)
movq 64+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,3(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,24(%rbp)
movq 80+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,24-128(%rbp)
movq 80+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,4(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,32(%rbp)
movq 96+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,32-128(%rbp)
movq 96+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,5(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,40(%rbp)
movq 112+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,40-128(%rbp)
movq 112+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,6(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,48(%rbp)
movq 128+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,48-128(%rbp)
movq 128+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,7(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,56(%rbp)
movq 144+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,56-128(%rbp)
movq 144+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,8(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,64(%rbp)
movq 160+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,64-128(%rbp)
movq 160+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,9(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,72(%rbp)
movq 176+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,72-128(%rbp)
movq 176+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,10(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,80(%rbp)
movq 192+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,80-128(%rbp)
movq 192+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,11(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,88(%rbp)
movq 208+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,88-128(%rbp)
movq 208+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,12(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,96(%rbp)
movq 224+0-128(%rsi),%r8
shlb $4,%dl
movq %rax,96-128(%rbp)
movq 224+8-128(%rsi),%rax
shlq $60,%r10
movb %dl,13(%rsp)
orq %r10,%rbx
movb %al,%dl
shrq $4,%rax
movq %r8,%r10
shrq $4,%r8
movq %r9,104(%rbp)
movq 240+0-128(%rsi),%r9
shlb $4,%dl
movq %rbx,104-128(%rbp)
movq 240+8-128(%rsi),%rbx
shlq $60,%r10
movb %dl,14(%rsp)
orq %r10,%rax
movb %bl,%dl
shrq $4,%rbx
movq %r9,%r10
shrq $4,%r9
movq %r8,112(%rbp)
shlb $4,%dl
movq %rax,112-128(%rbp)
shlq $60,%r10
movb %dl,15(%rsp)
orq %r10,%rbx
movq %r9,120(%rbp)
movq %rbx,120-128(%rbp)
addq $-128,%rsi
movq 8(%rdi),%r8
movq 0(%rdi),%r9
addq %r14,%r15
leaq .Lrem_8bit(%rip),%r11
jmp .Louter_loop
.align 16
.Louter_loop:
xorq (%r14),%r9
movq 8(%r14),%rdx
leaq 16(%r14),%r14
xorq %r8,%rdx
movq %r9,(%rdi)
movq %rdx,8(%rdi)
shrq $32,%rdx
xorq %rax,%rax
roll $8,%edx
movb %dl,%al
movzbl %dl,%ebx
shlb $4,%al
shrl $4,%ebx
roll $8,%edx
movq 8(%rsi,%rax,1),%r8
movq (%rsi,%rax,1),%r9
movb %dl,%al
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
xorq %r8,%r12
movq %r9,%r10
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl 8(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl 4(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl 0(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
shrl $4,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r12,2),%r12
movzbl %dl,%ebx
shlb $4,%al
movzbq (%rsp,%rcx,1),%r13
shrl $4,%ebx
shlq $48,%r12
xorq %r8,%r13
movq %r9,%r10
xorq %r12,%r9
shrq $8,%r8
movzbq %r13b,%r13
shrq $8,%r9
xorq -128(%rbp,%rcx,8),%r8
shlq $56,%r10
xorq (%rbp,%rcx,8),%r9
roll $8,%edx
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
movb %dl,%al
xorq %r10,%r8
movzwq (%r11,%r13,2),%r13
movzbl %dl,%ecx
shlb $4,%al
movzbq (%rsp,%rbx,1),%r12
andl $240,%ecx
shlq $48,%r13
xorq %r8,%r12
movq %r9,%r10
xorq %r13,%r9
shrq $8,%r8
movzbq %r12b,%r12
movl -4(%rdi),%edx
shrq $8,%r9
xorq -128(%rbp,%rbx,8),%r8
shlq $56,%r10
xorq (%rbp,%rbx,8),%r9
movzwq (%r11,%r12,2),%r12
xorq 8(%rsi,%rax,1),%r8
xorq (%rsi,%rax,1),%r9
shlq $48,%r12
xorq %r10,%r8
xorq %r12,%r9
movzbq %r8b,%r13
shrq $4,%r8
movq %r9,%r10
shlb $4,%r13b
shrq $4,%r9
xorq 8(%rsi,%rcx,1),%r8
movzwq (%r11,%r13,2),%r13
shlq $60,%r10
xorq (%rsi,%rcx,1),%r9
xorq %r10,%r8
shlq $48,%r13
bswapq %r8
xorq %r13,%r9
bswapq %r9
cmpq %r15,%r14
jb .Louter_loop
movq %r8,8(%rdi)
movq %r9,(%rdi)
leaq 280+48(%rsp),%rsi
.cfi_def_cfa %rsi,8
movq -48(%rsi),%r15
.cfi_restore %r15
movq -40(%rsi),%r14
.cfi_restore %r14
movq -32(%rsi),%r13
.cfi_restore %r13
movq -24(%rsi),%r12
.cfi_restore %r12
movq -16(%rsi),%rbp
.cfi_restore %rbp
movq -8(%rsi),%rbx
.cfi_restore %rbx
leaq 0(%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lghash_epilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_ghash_4bit,.-gcm_ghash_4bit
.globl gcm_init_clmul .globl gcm_init_clmul
.hidden gcm_init_clmul .hidden gcm_init_clmul
.type gcm_init_clmul,@function .type gcm_init_clmul,@function
@ -1822,50 +1119,9 @@ gcm_ghash_avx:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
.L7_mask: .L7_mask:
.long 7,0,7,0 .long 7,0,7,0
.L7_mask_poly:
.long 7,0,450,0
.align 64 .align 64
.type .Lrem_4bit,@object
.Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.type .Lrem_8bit,@object
.Lrem_8bit:
.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64 .align 64
#endif #endif
.section .note.GNU-stack,"",@progbits

Some files were not shown because too many files have changed in this diff Show More