mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge master
This commit is contained in:
commit
cfff7c4c28
4
.github/workflows/backport_branches.yml
vendored
4
.github/workflows/backport_branches.yml
vendored
@ -143,6 +143,8 @@ jobs:
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
@ -188,6 +190,8 @@ jobs:
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
|
4
.github/workflows/master.yml
vendored
4
.github/workflows/master.yml
vendored
@ -643,7 +643,7 @@ jobs:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinTidy:
|
||||
BuilderBinClangTidy:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
@ -1011,7 +1011,7 @@ jobs:
|
||||
- BuilderBinFreeBSD
|
||||
# - BuilderBinGCC
|
||||
- BuilderBinPPC64
|
||||
- BuilderBinTidy
|
||||
- BuilderBinClangTidy
|
||||
- BuilderDebSplitted
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
|
4
.github/workflows/pull_request.yml
vendored
4
.github/workflows/pull_request.yml
vendored
@ -707,7 +707,7 @@ jobs:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinTidy:
|
||||
BuilderBinClangTidy:
|
||||
needs: [DockerHubPush, FastTest]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
@ -1065,7 +1065,7 @@ jobs:
|
||||
- BuilderBinFreeBSD
|
||||
# - BuilderBinGCC
|
||||
- BuilderBinPPC64
|
||||
- BuilderBinTidy
|
||||
- BuilderBinClangTidy
|
||||
- BuilderDebSplitted
|
||||
runs-on: [self-hosted, style-checker]
|
||||
if: ${{ success() || failure() }}
|
||||
|
3
.github/workflows/release.yml
vendored
3
.github/workflows/release.yml
vendored
@ -21,6 +21,9 @@ jobs:
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
# Always use the most recent script version
|
||||
ref: master
|
||||
- name: Download packages and push to Artifactory
|
||||
run: |
|
||||
rm -rf "$TEMP_PATH" && mkdir -p "$TEMP_PATH"
|
||||
|
12
.gitmodules
vendored
12
.gitmodules
vendored
@ -86,9 +86,6 @@
|
||||
[submodule "contrib/h3"]
|
||||
path = contrib/h3
|
||||
url = https://github.com/ClickHouse/h3
|
||||
[submodule "contrib/hyperscan"]
|
||||
path = contrib/hyperscan
|
||||
url = https://github.com/ClickHouse/hyperscan.git
|
||||
[submodule "contrib/libunwind"]
|
||||
path = contrib/libunwind
|
||||
url = https://github.com/ClickHouse/libunwind.git
|
||||
@ -268,3 +265,12 @@
|
||||
[submodule "contrib/hashidsxx"]
|
||||
path = contrib/hashidsxx
|
||||
url = https://github.com/schoentoon/hashidsxx.git
|
||||
[submodule "contrib/vectorscan"]
|
||||
path = contrib/vectorscan
|
||||
url = https://github.com/VectorCamp/vectorscan.git
|
||||
[submodule "contrib/liburing"]
|
||||
path = contrib/liburing
|
||||
url = https://github.com/axboe/liburing.git
|
||||
[submodule "contrib/base-x"]
|
||||
path = contrib/base-x
|
||||
url = https://github.com/ClickHouse/base-x.git
|
||||
|
@ -34,7 +34,6 @@
|
||||
* Add two new settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines` to allow skipping specified number of lines in the beginning of the file in CSV/TSV formats. [#37537](https://github.com/ClickHouse/ClickHouse/pull/37537) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* `showCertificate` function shows current server's SSL certificate. [#37540](https://github.com/ClickHouse/ClickHouse/pull/37540) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* HTTP source for Data Dictionaries in Named Collections is supported. [#37581](https://github.com/ClickHouse/ClickHouse/pull/37581) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Added a new window function `nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL x SECOND])`. [#37628](https://github.com/ClickHouse/ClickHouse/pull/37628) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* Implemented changing the comment for `ReplicatedMergeTree` tables. [#37416](https://github.com/ClickHouse/ClickHouse/pull/37416) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
* Added `SYSTEM UNFREEZE` query that deletes the whole backup regardless if the corresponding table is deleted or not. [#36424](https://github.com/ClickHouse/ClickHouse/pull/36424) ([Vadim Volodin](https://github.com/PolyProgrammist)).
|
||||
|
||||
|
@ -252,10 +252,10 @@ else ()
|
||||
endif ()
|
||||
|
||||
# Optionally split binaries and debug symbols.
|
||||
option(INSTALL_STRIPPED_BINARIES "Split binaries and debug symbols" OFF)
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
option(SPLIT_DEBUG_SYMBOLS "Split binaries and debug symbols" OFF)
|
||||
if (SPLIT_DEBUG_SYMBOLS)
|
||||
message(STATUS "Will split binaries and debug symbols")
|
||||
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")
|
||||
set(SPLITTED_DEBUG_SYMBOLS_DIR "stripped" CACHE STRING "A separate directory for stripped information")
|
||||
endif()
|
||||
|
||||
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
|
||||
|
@ -15,4 +15,8 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any.
|
||||
|
||||
## Upcoming events
|
||||
* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/286304312/) Please join us for an evening of talks (in English), food and discussion. Featuring talks of ClickHouse in production and at least one on the deep internals of ClickHouse itself.
|
||||
* [v22.7 Release Webinar](https://clickhouse.com/company/events/v22-7-release-webinar/) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
|
||||
* [ClickHouse Meetup at the Cloudflare office in London](https://www.meetup.com/clickhouse-london-user-group/events/286891586/) ClickHouse meetup at the Cloudflare office space in central London
|
||||
* [ClickHouse Meetup at the Metoda office in Munich](https://www.meetup.com/clickhouse-meetup-munich/events/286891667/) ClickHouse meetup at the Metoda office in Munich
|
||||
|
||||
|
||||
|
@ -89,7 +89,7 @@ public:
|
||||
inline void returnObject(T && object_to_return)
|
||||
{
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(objects_mutex);
|
||||
std::lock_guard lock(objects_mutex);
|
||||
|
||||
objects.emplace_back(std::move(object_to_return));
|
||||
--borrowed_objects_size;
|
||||
@ -107,14 +107,14 @@ public:
|
||||
/// Allocated objects size by the pool. If allocatedObjectsSize == maxSize then pool is full.
|
||||
inline size_t allocatedObjectsSize() const
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(objects_mutex);
|
||||
std::lock_guard lock(objects_mutex);
|
||||
return allocated_objects_size;
|
||||
}
|
||||
|
||||
/// Returns allocatedObjectsSize == maxSize
|
||||
inline bool isFull() const
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(objects_mutex);
|
||||
std::lock_guard lock(objects_mutex);
|
||||
return allocated_objects_size == max_size;
|
||||
}
|
||||
|
||||
@ -122,7 +122,7 @@ public:
|
||||
/// Then client will wait during borrowObject function call.
|
||||
inline size_t borrowedObjectsSize() const
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(objects_mutex);
|
||||
std::lock_guard lock(objects_mutex);
|
||||
return borrowed_objects_size;
|
||||
}
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
namespace Poco::Util
|
||||
{
|
||||
class LayeredConfiguration;
|
||||
class LayeredConfiguration; // NOLINT(cppcoreguidelines-virtual-class-destructor)
|
||||
}
|
||||
|
||||
/// Import extra command line arguments to configuration. These are command line arguments after --.
|
||||
|
@ -124,21 +124,37 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.
|
||||
// Feel free to extend, but please stay close to https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#mutexheader
|
||||
/// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.
|
||||
/// Feel free to extend, but please stay close to https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#mutexheader
|
||||
#if defined(__clang__)
|
||||
# define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) // data is protected by given capability
|
||||
# define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) // pointed-to data is protected by the given capability
|
||||
# define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) // thread needs exclusive possession of given capability
|
||||
# define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) // thread needs shared possession of given capability
|
||||
# define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) // annotated lock must be locked after given lock
|
||||
# define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) // disable TSA for a function
|
||||
# define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) /// data is protected by given capability
|
||||
# define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) /// pointed-to data is protected by the given capability
|
||||
# define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) /// thread needs exclusive possession of given capability
|
||||
# define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) /// thread needs shared possession of given capability
|
||||
# define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) /// annotated lock must be locked after given lock
|
||||
# define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function
|
||||
|
||||
/// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function)
|
||||
/// Consider adding a comment before using these macros.
|
||||
# define TSA_SUPPRESS_WARNING_FOR_READ(x) [&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()
|
||||
# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) [&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }()
|
||||
|
||||
/// This macro is useful when only one thread writes to a member
|
||||
/// and you want to read this member from the same thread without locking a mutex.
|
||||
/// It's safe (because no concurrent writes are possible), but TSA generates a warning.
|
||||
/// (Seems like there's no way to verify it, but it makes sense to distinguish it from TSA_SUPPRESS_WARNING_FOR_READ for readability)
|
||||
# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x)
|
||||
|
||||
#else
|
||||
# define TSA_GUARDED_BY(...)
|
||||
# define TSA_PT_GUARDED_BY(...)
|
||||
# define TSA_REQUIRES(...)
|
||||
# define TSA_REQUIRES_SHARED(...)
|
||||
# define TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
|
||||
# define TSA_SUPPRESS_WARNING_FOR_READ(x)
|
||||
# define TSA_SUPPRESS_WARNING_FOR_WRITE(x)
|
||||
# define TSA_READ_ONE_THREAD(x)
|
||||
#endif
|
||||
|
||||
/// A template function for suppressing warnings about unused variables or function results.
|
||||
|
@ -27,6 +27,6 @@ struct FreeingDeleter
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<char, FreeingDeleter> DemangleResult;
|
||||
using DemangleResult = std::unique_ptr<char, FreeingDeleter>;
|
||||
|
||||
DemangleResult tryDemangle(const char * name);
|
||||
|
@ -23,10 +23,10 @@ public:
|
||||
constexpr StrongTypedef(): t() {}
|
||||
|
||||
constexpr StrongTypedef(const Self &) = default;
|
||||
constexpr StrongTypedef(Self &&) = default;
|
||||
constexpr StrongTypedef(Self &&) noexcept(std::is_nothrow_move_constructible_v<T>) = default;
|
||||
|
||||
Self & operator=(const Self &) = default;
|
||||
Self & operator=(Self &&) = default;
|
||||
Self & operator=(Self &&) noexcept(std::is_nothrow_move_assignable_v<T>)= default;
|
||||
|
||||
template <class Enable = typename std::is_copy_assignable<T>::type>
|
||||
Self & operator=(const T & rhs) { t = rhs; return *this;}
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <time.h>
|
||||
#include <ctime>
|
||||
|
||||
#if defined (OS_DARWIN) || defined (OS_SUNOS)
|
||||
# define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <string.h>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
|
||||
|
||||
|
@ -27,6 +27,8 @@
|
||||
#include <type_traits>
|
||||
#include <initializer_list>
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
namespace wide
|
||||
{
|
||||
template <size_t Bits, typename Signed>
|
||||
@ -257,4 +259,7 @@ struct hash<wide::integer<Bits, Signed>>;
|
||||
|
||||
}
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
||||
#include "wide_integer_impl.h"
|
||||
|
||||
|
@ -15,6 +15,8 @@
|
||||
#include <boost/multiprecision/cpp_bin_float.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
/// Use same extended double for all platforms
|
||||
#if (LDBL_MANT_DIG == 64)
|
||||
#define CONSTEXPR_FROM_DOUBLE constexpr
|
||||
@ -1478,3 +1480,5 @@ struct hash<wide::integer<Bits, Signed>>
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
@ -90,6 +90,7 @@
|
||||
#define PCG_EMULATED_128BIT_MATH 1
|
||||
#endif
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
namespace pcg_extras {
|
||||
|
||||
@ -552,4 +553,6 @@ std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
|
||||
|
||||
} // namespace pcg_extras
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
||||
#endif // PCG_EXTRAS_HPP_INCLUDED
|
||||
|
@ -113,6 +113,8 @@
|
||||
|
||||
#include "pcg_extras.hpp"
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct PcgSerializer;
|
||||
@ -1777,4 +1779,6 @@ typedef pcg_engines::ext_oneseq_xsh_rs_64_32<14,32,true> pcg32_k16384_fast;
|
||||
#pragma warning(default:4146)
|
||||
#endif
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
||||
#endif // PCG_RAND_HPP_INCLUDED
|
||||
|
@ -16,6 +16,8 @@
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
/* Special width values */
|
||||
enum {
|
||||
widechar_nonprint = -1, // The character is not printable.
|
||||
@ -518,4 +520,6 @@ inline int widechar_wcwidth(wchar_t c) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
||||
#endif // WIDECHAR_WIDTH_H
|
||||
|
@ -1,180 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# script to run query to databases
|
||||
|
||||
function usage()
|
||||
{
|
||||
cat <<EOF
|
||||
usage: $0 options
|
||||
|
||||
This script run benhmark for database
|
||||
|
||||
OPTIONS:
|
||||
-c config file where some script variables are defined
|
||||
-n table name
|
||||
|
||||
-h Show this message
|
||||
-t how many times execute each query. default is '3'
|
||||
-q query file
|
||||
-e expect file
|
||||
-s /etc/init.d/service
|
||||
-p table name pattern to be replaced to name. default is 'hits_10m'
|
||||
EOF
|
||||
}
|
||||
|
||||
TIMES=3
|
||||
table_name_pattern=hits_10m
|
||||
|
||||
while getopts “c:ht:n:q:e:s:r” OPTION
|
||||
do
|
||||
case $OPTION in
|
||||
c)
|
||||
source $OPTARG
|
||||
;;
|
||||
?)
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
OPTIND=1
|
||||
|
||||
while getopts “c:ht:n:q:e:s:r” OPTION
|
||||
do
|
||||
case $OPTION in
|
||||
h)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
t)
|
||||
TIMES=$OPTARG
|
||||
;;
|
||||
n)
|
||||
table_name=$OPTARG
|
||||
;;
|
||||
q)
|
||||
test_file=$OPTARG
|
||||
;;
|
||||
e)
|
||||
expect_file=$OPTARG
|
||||
;;
|
||||
s)
|
||||
etc_init_d_service=$OPTARG
|
||||
;;
|
||||
p)
|
||||
table_name_pattern=$OPTARG
|
||||
;;
|
||||
c)
|
||||
;;
|
||||
r)
|
||||
restart_server_each_query=1
|
||||
;;
|
||||
?)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! -f $expect_file ]]; then
|
||||
echo "Not found: expect file"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -f $test_file ]]; then
|
||||
echo "Not found: test file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f $etc_init_d_service ]]; then
|
||||
echo "Not found: /etc/init.d/service with path=$etc_init_d_service"
|
||||
use_service=0
|
||||
else
|
||||
use_service=1
|
||||
fi
|
||||
|
||||
if [[ "$table_name_pattern" == "" ]]; then
|
||||
echo "Empty table_name_pattern"
|
||||
exit 1
|
||||
fi
|
||||
if [[ "$table_name" == "" ]]; then
|
||||
echo "Empty table_name"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
function execute()
|
||||
{
|
||||
queries=("${@}")
|
||||
queries_count=${#queries[@]}
|
||||
|
||||
if [ -z $TIMES ]; then
|
||||
TIMES=1
|
||||
fi
|
||||
|
||||
index=0
|
||||
while [ "$index" -lt "$queries_count" ]; do
|
||||
query=${queries[$index]}
|
||||
|
||||
if [[ $query == "" ]]; then
|
||||
let "index = $index + 1"
|
||||
continue
|
||||
fi
|
||||
|
||||
comment_re='--.*'
|
||||
if [[ $query =~ $comment_re ]]; then
|
||||
echo "$query"
|
||||
echo
|
||||
else
|
||||
sync
|
||||
sudo sh -c "echo 3 > /proc/sys/vm/drop_caches"
|
||||
|
||||
if [[ "$restart_server_each_query" == "1" && "$use_service" == "1" ]]; then
|
||||
echo "restart server: $etc_init_d_service restart"
|
||||
sudo $etc_init_d_service restart
|
||||
fi
|
||||
|
||||
for i in $(seq $TIMES)
|
||||
do
|
||||
if [[ -f $etc_init_d_service && "$use_service" == "1" ]]; then
|
||||
sudo $etc_init_d_service status
|
||||
server_status=$?
|
||||
expect -f $expect_file ""
|
||||
|
||||
if [[ "$?" != "0" || $server_status != "0" ]]; then
|
||||
echo "restart server: $etc_init_d_service restart"
|
||||
sudo $etc_init_d_service restart
|
||||
fi
|
||||
|
||||
#wait until can connect to server
|
||||
restart_timer=0
|
||||
restart_limit=60
|
||||
expect -f $expect_file "" &> /dev/null
|
||||
while [ "$?" != "0" ]; do
|
||||
echo "waiting"
|
||||
sleep 1
|
||||
let "restart_timer = $restart_timer + 1"
|
||||
if (( $restart_limit < $restart_timer )); then
|
||||
sudo $etc_init_d_service restart
|
||||
restart_timer=0
|
||||
fi
|
||||
expect -f $expect_file "" &> /dev/null
|
||||
done
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "times: $i"
|
||||
|
||||
echo "query:" "$query"
|
||||
expect -f $expect_file "$query"
|
||||
|
||||
done
|
||||
fi
|
||||
|
||||
let "index = $index + 1"
|
||||
done
|
||||
}
|
||||
|
||||
temp_test_file=temp_queries_$table_name
|
||||
cat $test_file | sed s/$table_name_pattern/$table_name/g > $temp_test_file
|
||||
mapfile -t test_queries < $temp_test_file
|
||||
|
||||
echo "start time: $(date)"
|
||||
time execute "${test_queries[@]}"
|
||||
echo "stop time: $(date)"
|
@ -1,3 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
table=hits_10m; time clickhouse-client --max_bytes_before_external_sort=30000000000 --query="SELECT toInt64(WatchID), JavaEnable, Title, GoodEvent, (EventTime < toDateTime('1971-01-01 00:00:00') ? toDateTime('1971-01-01 00:00:01') : EventTime), (EventDate < toDate('1971-01-01') ? toDate('1971-01-01') : EventDate), CounterID, ClientIP, RegionID, toInt64(UserID), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, (ClientEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : ClientEventTime), SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, toInt64(FUniqID), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, (LocalEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : LocalEventTime), Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, toInt64(RefererHash), toInt64(URLHash), CLID FROM $table ORDER BY rand()" | corrector_utf8 > /opt/dumps/${table}_corrected.tsv
|
@ -1,107 +1,25 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
if [[ -n $1 ]]; then
|
||||
SCALE=$1
|
||||
else
|
||||
SCALE=100
|
||||
fi
|
||||
|
||||
TABLE="hits_${SCALE}m_obfuscated"
|
||||
DATASET="${TABLE}_v1.tar.xz"
|
||||
QUERIES_FILE="queries.sql"
|
||||
TRIES=3
|
||||
|
||||
# Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'.
|
||||
mkdir -p clickhouse-benchmark
|
||||
pushd clickhouse-benchmark
|
||||
|
||||
FASTER_DOWNLOAD=wget
|
||||
if command -v axel >/dev/null; then
|
||||
FASTER_DOWNLOAD=axel
|
||||
else
|
||||
echo "It's recommended to install 'axel' for faster downloads."
|
||||
# Download the binary
|
||||
if [[ ! -x clickhouse ]]; then
|
||||
curl https://clickhouse.com/ | sh
|
||||
fi
|
||||
|
||||
if command -v pixz >/dev/null; then
|
||||
TAR_PARAMS='-Ipixz'
|
||||
else
|
||||
echo "It's recommended to install 'pixz' for faster decompression of the dataset."
|
||||
fi
|
||||
|
||||
mkdir -p clickhouse-benchmark-$SCALE
|
||||
pushd clickhouse-benchmark-$SCALE
|
||||
|
||||
OS=$(uname -s)
|
||||
ARCH=$(uname -m)
|
||||
|
||||
DIR=
|
||||
|
||||
if [ "${OS}" = "Linux" ]
|
||||
then
|
||||
if [ "${ARCH}" = "x86_64" ]
|
||||
then
|
||||
DIR="amd64"
|
||||
elif [ "${ARCH}" = "aarch64" ]
|
||||
then
|
||||
DIR="aarch64"
|
||||
elif [ "${ARCH}" = "powerpc64le" ]
|
||||
then
|
||||
DIR="powerpc64le"
|
||||
fi
|
||||
elif [ "${OS}" = "FreeBSD" ]
|
||||
then
|
||||
if [ "${ARCH}" = "x86_64" ]
|
||||
then
|
||||
DIR="freebsd"
|
||||
elif [ "${ARCH}" = "aarch64" ]
|
||||
then
|
||||
DIR="freebsd-aarch64"
|
||||
elif [ "${ARCH}" = "powerpc64le" ]
|
||||
then
|
||||
DIR="freebsd-powerpc64le"
|
||||
fi
|
||||
elif [ "${OS}" = "Darwin" ]
|
||||
then
|
||||
if [ "${ARCH}" = "x86_64" ]
|
||||
then
|
||||
DIR="macos"
|
||||
elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
|
||||
then
|
||||
DIR="macos-aarch64"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "${DIR}" ]
|
||||
then
|
||||
echo "The '${OS}' operating system with the '${ARCH}' architecture is not supported."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
|
||||
echo
|
||||
echo "Will download ${URL}"
|
||||
echo
|
||||
curl -O "${URL}" && chmod a+x clickhouse || exit 1
|
||||
echo
|
||||
echo "Successfully downloaded the ClickHouse binary"
|
||||
|
||||
chmod a+x clickhouse
|
||||
|
||||
if [[ ! -f $QUERIES_FILE ]]; then
|
||||
wget "https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/$QUERIES_FILE"
|
||||
fi
|
||||
|
||||
if [[ ! -d data ]]; then
|
||||
if [[ ! -f $DATASET ]]; then
|
||||
$FASTER_DOWNLOAD "https://datasets.clickhouse.com/hits/partitions/$DATASET"
|
||||
fi
|
||||
|
||||
tar $TAR_PARAMS --strip-components=1 --directory=. -x -v -f $DATASET
|
||||
fi
|
||||
|
||||
uptime
|
||||
|
||||
echo "Starting clickhouse-server"
|
||||
|
||||
./clickhouse server > server.log 2>&1 &
|
||||
./clickhouse server >/dev/null 2>&1 &
|
||||
PID=$!
|
||||
|
||||
function finish {
|
||||
@ -114,18 +32,45 @@ echo "Waiting for clickhouse-server to start"
|
||||
|
||||
for i in {1..30}; do
|
||||
sleep 1
|
||||
./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM $TABLE" 2>/dev/null && break || echo '.'
|
||||
./clickhouse client --query "SELECT 'Ok.'" 2>/dev/null && break || echo -n '.'
|
||||
if [[ $i == 30 ]]; then exit 1; fi
|
||||
done
|
||||
|
||||
if [[ $(./clickhouse client --query "EXISTS hits") == '1' && $(./clickhouse client --query "SELECT count() FROM hits") == '100000000' ]]; then
|
||||
echo "Dataset already downloaded"
|
||||
else
|
||||
echo "Will download the dataset"
|
||||
if [ "`uname`" = "Darwin" ]
|
||||
then
|
||||
./clickhouse client --receive_timeout 1000 --max_insert_threads $(sysctl -n hw.ncpu) --progress --query "
|
||||
CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime)
|
||||
AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')"
|
||||
else
|
||||
./clickhouse client --receive_timeout 1000 --max_insert_threads $(nproc || 4) --progress --query "
|
||||
CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime)
|
||||
AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')"
|
||||
fi
|
||||
./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM hits"
|
||||
fi
|
||||
|
||||
if [[ $(./clickhouse client --query "SELECT count() FROM system.parts WHERE table = 'hits' AND database = 'default' AND active") == '1' ]]; then
|
||||
echo "Dataset already prepared"
|
||||
else
|
||||
echo "Will prepare the dataset"
|
||||
./clickhouse client --receive_timeout 1000 --query "OPTIMIZE TABLE hits FINAL"
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "Will perform benchmark. Results:"
|
||||
echo
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
|
||||
>result.csv
|
||||
QUERY_NUM=1
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s/{table}/hits/g" | while read query; do
|
||||
sync
|
||||
if [ "${OS}" = "Darwin" ]
|
||||
then
|
||||
if [ "`uname`" = "Darwin" ]
|
||||
then
|
||||
sudo purge > /dev/null
|
||||
else
|
||||
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
|
||||
@ -133,11 +78,15 @@ cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
|
||||
|
||||
echo -n "["
|
||||
for i in $(seq 1 $TRIES); do
|
||||
RES=$(./clickhouse client --max_memory_usage 100G --time --format=Null --query="$query" 2>&1 ||:)
|
||||
RES=$(./clickhouse client --time --format=Null --query="$query" 2>&1 ||:)
|
||||
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
|
||||
[[ "$i" != $TRIES ]] && echo -n ", "
|
||||
|
||||
echo "${QUERY_NUM},${i},${RES}" >> result.csv
|
||||
done
|
||||
echo "],"
|
||||
|
||||
QUERY_NUM=$((QUERY_NUM + 1))
|
||||
done
|
||||
|
||||
|
||||
@ -145,22 +94,23 @@ echo
|
||||
echo "Benchmark complete. System info:"
|
||||
echo
|
||||
|
||||
if [ "${OS}" = "Darwin" ]
|
||||
then
|
||||
touch {cpu_model,cpu,df,memory,memory_total,blk,mdstat,instance}.txt
|
||||
|
||||
if [ "`uname`" = "Darwin" ]
|
||||
then
|
||||
echo '----Version, build id-----------'
|
||||
./clickhouse local --query "SELECT format('Version: {}', version())"
|
||||
sw_vers | grep BuildVersion
|
||||
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
|
||||
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
|
||||
echo '----CPU-------------------------'
|
||||
sysctl hw.model
|
||||
sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize'
|
||||
sysctl hw.model | tee cpu_model.txt
|
||||
sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize' | tee cpu.txt
|
||||
echo '----Disk Free and Total--------'
|
||||
df -h .
|
||||
df -h . | tee df.txt
|
||||
echo '----Memory Free and Total-------'
|
||||
vm_stat
|
||||
vm_stat | tee memory.txt
|
||||
echo '----Physical Memory Amount------'
|
||||
ls -l /var/vm
|
||||
ls -l /var/vm | tee memory_total.txt
|
||||
echo '--------------------------------'
|
||||
else
|
||||
echo '----Version, build id-----------'
|
||||
@ -168,22 +118,130 @@ else
|
||||
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
|
||||
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
|
||||
echo '----CPU-------------------------'
|
||||
cat /proc/cpuinfo | grep -i -F 'model name' | uniq
|
||||
lscpu
|
||||
cat /proc/cpuinfo | grep -i -F 'model name' | uniq | tee cpu_model.txt
|
||||
lscpu | tee cpu.txt
|
||||
echo '----Block Devices---------------'
|
||||
lsblk
|
||||
lsblk | tee blk.txt
|
||||
echo '----Disk Free and Total--------'
|
||||
df -h .
|
||||
df -h . | tee df.txt
|
||||
echo '----Memory Free and Total-------'
|
||||
free -h
|
||||
free -h | tee memory.txt
|
||||
echo '----Physical Memory Amount------'
|
||||
cat /proc/meminfo | grep MemTotal
|
||||
cat /proc/meminfo | grep MemTotal | tee memory_total.txt
|
||||
echo '----RAID Info-------------------'
|
||||
cat /proc/mdstat
|
||||
#echo '----PCI-------------------------'
|
||||
#lspci
|
||||
#echo '----All Hardware Info-----------'
|
||||
#lshw
|
||||
cat /proc/mdstat| tee mdstat.txt
|
||||
echo '--------------------------------'
|
||||
fi
|
||||
echo
|
||||
|
||||
echo "Instance type from IMDS (if available):"
|
||||
curl -s --connect-timeout 1 'http://169.254.169.254/latest/meta-data/instance-type' | tee instance.txt
|
||||
echo
|
||||
|
||||
echo "Uploading the results (if possible)"
|
||||
|
||||
UUID=$(./clickhouse local --query "SELECT generateUUIDv4()")
|
||||
|
||||
./clickhouse local --query "
|
||||
SELECT
|
||||
'${UUID}' AS run_id,
|
||||
version() AS version,
|
||||
now() AS test_time,
|
||||
(SELECT value FROM system.settings WHERE name = 'max_threads') AS threads,
|
||||
filesystemCapacity() AS fs_capacity,
|
||||
filesystemAvailable() AS fs_available,
|
||||
file('cpu_model.txt') AS cpu_model,
|
||||
file('cpu.txt') AS cpu,
|
||||
file('df.txt') AS df,
|
||||
file('memory.txt') AS memory,
|
||||
file('memory_total.txt') AS memory_total,
|
||||
file('blk.txt') AS blk,
|
||||
file('mdstat.txt') AS mdstat,
|
||||
file('instance.txt') AS instance
|
||||
" | tee meta.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query "
|
||||
INSERT INTO benchmark_runs
|
||||
(run_id, version, test_time, threads, fs_capacity, fs_available, cpu_model, cpu, df, memory, memory_total, blk, mdstat, instance)
|
||||
FORMAT TSV" || echo "Cannot upload results."
|
||||
|
||||
./clickhouse local --query "
|
||||
SELECT
|
||||
'${UUID}' AS run_id,
|
||||
c1 AS query_num,
|
||||
c2 AS try_num,
|
||||
c3 AS time
|
||||
FROM file('result.csv')
|
||||
" | tee results.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query "
|
||||
INSERT INTO benchmark_results
|
||||
(run_id, query_num, try_num, time)
|
||||
FORMAT TSV" || echo "Cannot upload results. Please send the output to feedback@clickhouse.com"
|
||||
|
||||
<<////
|
||||
|
||||
Server Setup:
|
||||
|
||||
CREATE TABLE benchmark_runs
|
||||
(
|
||||
run_id UUID,
|
||||
version String,
|
||||
test_time DateTime,
|
||||
threads String,
|
||||
fs_capacity UInt64,
|
||||
fs_available UInt64,
|
||||
cpu_model String,
|
||||
cpu String,
|
||||
df String,
|
||||
memory String,
|
||||
memory_total String,
|
||||
blk String,
|
||||
mdstat String,
|
||||
instance String
|
||||
) ENGINE = ReplicatedMergeTree ORDER BY run_id;
|
||||
|
||||
CREATE TABLE benchmark_results
|
||||
(
|
||||
run_id UUID,
|
||||
query_num UInt8,
|
||||
try_num UInt8,
|
||||
time Decimal32(3)
|
||||
) ENGINE = ReplicatedMergeTree ORDER BY (run_id, query_num, try_num);
|
||||
|
||||
CREATE USER benchmark IDENTIFIED WITH no_password SETTINGS max_rows_to_read = 1, max_result_rows = 1, max_execution_time = 1;
|
||||
|
||||
CREATE QUOTA benchmark
|
||||
KEYED BY ip_address
|
||||
FOR RANDOMIZED INTERVAL 1 MINUTE MAX query_inserts = 4, written_bytes = 100000,
|
||||
FOR RANDOMIZED INTERVAL 1 HOUR MAX query_inserts = 10, written_bytes = 500000,
|
||||
FOR RANDOMIZED INTERVAL 1 DAY MAX query_inserts = 50, written_bytes = 2000000
|
||||
TO benchmark;
|
||||
|
||||
GRANT INSERT ON benchmark_runs TO benchmark;
|
||||
GRANT INSERT ON benchmark_results TO benchmark;
|
||||
|
||||
Example query:
|
||||
|
||||
SELECT
|
||||
cpu_model,
|
||||
threads,
|
||||
instance,
|
||||
k
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
run_id,
|
||||
exp(avg(log(adjusted_time / best_time))) AS k
|
||||
FROM
|
||||
(
|
||||
WITH greatest(time, 0.01) AS adjusted_time
|
||||
SELECT
|
||||
run_id,
|
||||
adjusted_time,
|
||||
min(adjusted_time) OVER (PARTITION BY query_num, try_num) AS best_time
|
||||
FROM benchmark_results
|
||||
WHERE try_num > 1
|
||||
)
|
||||
GROUP BY run_id
|
||||
ORDER BY k ASC
|
||||
) AS t
|
||||
INNER JOIN benchmark_runs USING (run_id)
|
||||
|
||||
////
|
||||
|
@ -1,4 +1,4 @@
|
||||
macro(clickhouse_strip_binary)
|
||||
macro(clickhouse_split_debug_symbols)
|
||||
set(oneValueArgs TARGET DESTINATION_DIR BINARY_PATH)
|
||||
|
||||
cmake_parse_arguments(STRIP "" "${oneValueArgs}" "" ${ARGN})
|
3
contrib/CMakeLists.txt
vendored
3
contrib/CMakeLists.txt
vendored
@ -58,7 +58,7 @@ add_contrib (boost-cmake boost)
|
||||
add_contrib (cctz-cmake cctz)
|
||||
add_contrib (consistent-hashing)
|
||||
add_contrib (dragonbox-cmake dragonbox)
|
||||
add_contrib (hyperscan-cmake hyperscan)
|
||||
add_contrib (vectorscan-cmake vectorscan)
|
||||
add_contrib (jemalloc-cmake jemalloc)
|
||||
add_contrib (libcpuid-cmake libcpuid)
|
||||
add_contrib (libdivide)
|
||||
@ -155,6 +155,7 @@ endif()
|
||||
|
||||
add_contrib (sqlite-cmake sqlite-amalgamation)
|
||||
add_contrib (s2geometry-cmake s2geometry)
|
||||
add_contrib (base-x-cmake base-x)
|
||||
|
||||
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
|
||||
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
|
||||
|
1
contrib/base-x
vendored
Submodule
1
contrib/base-x
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit a85f98fb4ed52c2f4029a4b6ac1ef0bafdfc56f5
|
28
contrib/base-x-cmake/CMakeLists.txt
Normal file
28
contrib/base-x-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,28 @@
|
||||
option (ENABLE_BASEX "Enable base-x" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_BASEX)
|
||||
message(STATUS "Not using base-x")
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base-x")
|
||||
|
||||
set (SRCS
|
||||
${LIBRARY_DIR}/base_x.hh
|
||||
${LIBRARY_DIR}/uinteger_t.hh
|
||||
)
|
||||
|
||||
add_library(_base-x INTERFACE)
|
||||
target_include_directories(_base-x SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/base-x")
|
||||
|
||||
if (XCODE OR XCODE_VERSION)
|
||||
# https://gitlab.kitware.com/cmake/cmake/issues/17457
|
||||
# Some native build systems may not like targets that have only object files, so consider adding at least one real source file
|
||||
# This applies to Xcode.
|
||||
if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
|
||||
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "")
|
||||
endif ()
|
||||
target_sources(_base-x PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
|
||||
endif ()
|
||||
|
||||
add_library(ch_contrib::base-x ALIAS _base-x)
|
2
contrib/boringssl
vendored
2
contrib/boringssl
vendored
@ -1 +1 @@
|
||||
Subproject commit 9c0715ce459de443e7b08f270a518c1702f1a380
|
||||
Subproject commit c1e01a441d6db234f4f12e63a7657d1f9e6db9c1
|
@ -159,14 +159,12 @@ set(
|
||||
ios-aarch64/crypto/fipsmodule/sha512-armv8.S
|
||||
ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
|
||||
ios-aarch64/crypto/test/trampoline-armv8.S
|
||||
ios-aarch64/crypto/third_party/sike/asm/fp-armv8.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_ios_arm_SOURCES
|
||||
|
||||
ios-arm/crypto/chacha/chacha-armv4.S
|
||||
ios-arm/crypto/fipsmodule/aes-armv4.S
|
||||
ios-arm/crypto/fipsmodule/aesv8-armx32.S
|
||||
ios-arm/crypto/fipsmodule/armv4-mont.S
|
||||
ios-arm/crypto/fipsmodule/bsaes-armv7.S
|
||||
@ -192,14 +190,12 @@ set(
|
||||
linux-aarch64/crypto/fipsmodule/sha512-armv8.S
|
||||
linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
|
||||
linux-aarch64/crypto/test/trampoline-armv8.S
|
||||
linux-aarch64/crypto/third_party/sike/asm/fp-armv8.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_arm_SOURCES
|
||||
|
||||
linux-arm/crypto/chacha/chacha-armv4.S
|
||||
linux-arm/crypto/fipsmodule/aes-armv4.S
|
||||
linux-arm/crypto/fipsmodule/aesv8-armx32.S
|
||||
linux-arm/crypto/fipsmodule/armv4-mont.S
|
||||
linux-arm/crypto/fipsmodule/bsaes-armv7.S
|
||||
@ -219,13 +215,13 @@ set(
|
||||
|
||||
linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
|
||||
linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
|
||||
linux-ppc64le/crypto/test/trampoline-ppc.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_x86_SOURCES
|
||||
|
||||
linux-x86/crypto/chacha/chacha-x86.S
|
||||
linux-x86/crypto/fipsmodule/aes-586.S
|
||||
linux-x86/crypto/fipsmodule/aesni-x86.S
|
||||
linux-x86/crypto/fipsmodule/bn-586.S
|
||||
linux-x86/crypto/fipsmodule/co-586.S
|
||||
@ -246,7 +242,6 @@ set(
|
||||
linux-x86_64/crypto/chacha/chacha-x86_64.S
|
||||
linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
|
||||
linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/aes-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
|
||||
@ -263,7 +258,6 @@ set(
|
||||
linux-x86_64/crypto/fipsmodule/x86_64-mont.S
|
||||
linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
|
||||
linux-x86_64/crypto/test/trampoline-x86_64.S
|
||||
linux-x86_64/crypto/third_party/sike/asm/fp-x86_64.S
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/hrss/asm/poly_rq_mul.S"
|
||||
)
|
||||
|
||||
@ -271,7 +265,6 @@ set(
|
||||
CRYPTO_mac_x86_SOURCES
|
||||
|
||||
mac-x86/crypto/chacha/chacha-x86.S
|
||||
mac-x86/crypto/fipsmodule/aes-586.S
|
||||
mac-x86/crypto/fipsmodule/aesni-x86.S
|
||||
mac-x86/crypto/fipsmodule/bn-586.S
|
||||
mac-x86/crypto/fipsmodule/co-586.S
|
||||
@ -292,7 +285,6 @@ set(
|
||||
mac-x86_64/crypto/chacha/chacha-x86_64.S
|
||||
mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
|
||||
mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/aes-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/aesni-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
|
||||
@ -309,7 +301,6 @@ set(
|
||||
mac-x86_64/crypto/fipsmodule/x86_64-mont.S
|
||||
mac-x86_64/crypto/fipsmodule/x86_64-mont5.S
|
||||
mac-x86_64/crypto/test/trampoline-x86_64.S
|
||||
mac-x86_64/crypto/third_party/sike/asm/fp-x86_64.S
|
||||
)
|
||||
|
||||
set(
|
||||
@ -331,7 +322,6 @@ set(
|
||||
CRYPTO_win_x86_SOURCES
|
||||
|
||||
win-x86/crypto/chacha/chacha-x86.asm
|
||||
win-x86/crypto/fipsmodule/aes-586.asm
|
||||
win-x86/crypto/fipsmodule/aesni-x86.asm
|
||||
win-x86/crypto/fipsmodule/bn-586.asm
|
||||
win-x86/crypto/fipsmodule/co-586.asm
|
||||
@ -352,7 +342,6 @@ set(
|
||||
win-x86_64/crypto/chacha/chacha-x86_64.asm
|
||||
win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
|
||||
win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/aes-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm
|
||||
@ -369,7 +358,6 @@ set(
|
||||
win-x86_64/crypto/fipsmodule/x86_64-mont.asm
|
||||
win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
|
||||
win-x86_64/crypto/test/trampoline-x86_64.asm
|
||||
win-x86_64/crypto/third_party/sike/asm/fp-x86_64.asm
|
||||
)
|
||||
|
||||
if(APPLE AND ARCH STREQUAL "aarch64")
|
||||
@ -401,6 +389,7 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strex.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c"
|
||||
@ -430,6 +419,7 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/blake2/blake2.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c"
|
||||
@ -454,20 +444,22 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-win.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-ppc64le.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/crypto.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/curve25519/curve25519.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/curve25519/spake25519.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dh/check.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dh/dh.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dh/dh_asn1.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dh/params.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/dh_asn1.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/params.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/digest_extra/digest_extra.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa_asn1.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_asn1.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_derive.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/hash_to_curve.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/ecdh_extra/ecdh_extra.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/ecdsa_extra/ecdsa_asn1.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/engine/engine.c"
|
||||
@ -492,8 +484,8 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/is_fips.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/lhash/lhash.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/mem.c"
|
||||
@ -519,6 +511,7 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/deterministic.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/forkunsafe.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/fuchsia.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/passive.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/rand_extra.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/windows.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/rc4/rc4.c"
|
||||
@ -532,15 +525,18 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/thread_none.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/thread_pthread.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/thread_win.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/pmbtoken.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/trust_token.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_strex.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/name_print.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c"
|
||||
@ -606,19 +602,11 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pci.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcia.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcons.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pku.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pmaps.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_prn.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_purp.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_skey.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_sxnet.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_utl.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/third_party/fiat/curve25519.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/third_party/sike/asm/fp_generic.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/third_party/sike/curve_params.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/third_party/sike/fpx.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/third_party/sike/isogeny.c"
|
||||
"${BORINGSSL_SOURCE_DIR}/third_party/sike/sike.c"
|
||||
)
|
||||
|
||||
add_library(
|
||||
@ -631,6 +619,8 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/encrypted_client_hello.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/extensions.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc"
|
||||
@ -653,7 +643,6 @@ add_library(
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/t1_lib.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc"
|
||||
@ -674,7 +663,9 @@ add_executable(
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/client.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/const.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/digest.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/fd.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/file.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/generate_ech.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc"
|
||||
"${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc"
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@ -33,6 +33,7 @@ Lone:
|
||||
|
||||
.align 5
|
||||
_ChaCha20_ctr32:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
cbz x2,Labort
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x5,:pg_hi21_nc:_OPENSSL_armcap_P
|
||||
@ -46,6 +47,7 @@ _ChaCha20_ctr32:
|
||||
b.ne ChaCha20_neon
|
||||
|
||||
Lshort:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -258,6 +260,7 @@ Loop:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
Labort:
|
||||
ret
|
||||
|
||||
@ -314,12 +317,14 @@ Loop_tail:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
|
||||
.align 5
|
||||
ChaCha20_neon:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -700,6 +705,7 @@ Loop_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
Ltail_neon:
|
||||
@ -809,11 +815,13 @@ Ldone_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.align 5
|
||||
ChaCha20_512_neon:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1977,6 +1985,7 @@ Ldone_512_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
|
@ -32,6 +32,8 @@ Lrcon:
|
||||
.align 5
|
||||
_aes_hw_set_encrypt_key:
|
||||
Lenc_key:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
@ -200,6 +202,7 @@ Lenc_key_abort:
|
||||
|
||||
.align 5
|
||||
_aes_hw_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl Lenc_key
|
||||
@ -233,6 +236,7 @@ Loop_imc:
|
||||
eor x0,x0,x0 // return value
|
||||
Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _aes_hw_encrypt
|
||||
@ -240,6 +244,7 @@ Ldec_key_abort:
|
||||
|
||||
.align 5
|
||||
_aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
@ -270,6 +275,7 @@ Loop_enc:
|
||||
|
||||
.align 5
|
||||
_aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
@ -300,6 +306,8 @@ Loop_dec:
|
||||
|
||||
.align 5
|
||||
_aes_hw_cbc_encrypt:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
@ -591,6 +599,8 @@ Lcbc_abort:
|
||||
|
||||
.align 5
|
||||
_aes_hw_ctr32_encrypt_blocks:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
|
@ -12,6 +12,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl _bn_mul_mont
|
||||
@ -19,6 +21,7 @@
|
||||
|
||||
.align 5
|
||||
_bn_mul_mont:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
tst x5,#7
|
||||
b.eq __bn_sqr8x_mont
|
||||
tst x5,#3
|
||||
@ -216,11 +219,14 @@ Lcond_copy:
|
||||
mov x0,#1
|
||||
ldp x23,x24,[x29,#48]
|
||||
ldr x29,[sp],#64
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.align 5
|
||||
__bn_sqr8x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
|
||||
// only from bn_mul_mont which has already signed the return address.
|
||||
cmp x1,x2
|
||||
b.ne __bn_mul4x_mont
|
||||
Lsqr8x_mont:
|
||||
@ -974,11 +980,16 @@ Lsqr8x_done:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.align 5
|
||||
__bn_mul4x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
|
||||
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
|
||||
// return address.
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
stp x19,x20,[sp,#16]
|
||||
@ -1412,6 +1423,8 @@ Lmul4x_done:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
|
@ -12,6 +12,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl _gcm_init_neon
|
||||
@ -19,6 +21,7 @@
|
||||
|
||||
.align 4
|
||||
_gcm_init_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
@ -44,6 +47,7 @@ _gcm_init_neon:
|
||||
|
||||
.align 4
|
||||
_gcm_gmult_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
@ -63,6 +67,7 @@ _gcm_gmult_neon:
|
||||
|
||||
.align 4
|
||||
_gcm_ghash_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
.align 4
|
||||
_gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
@ -72,6 +73,7 @@ _gcm_init_v8:
|
||||
|
||||
.align 4
|
||||
_gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
@ -114,6 +116,7 @@ _gcm_gmult_v8:
|
||||
|
||||
.align 4
|
||||
_gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
|
@ -22,6 +22,8 @@
|
||||
|
||||
.align 6
|
||||
_sha1_block_data_order:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
|
||||
#else
|
||||
@ -1089,6 +1091,8 @@ Loop:
|
||||
|
||||
.align 6
|
||||
sha1_block_armv8:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
Lv8_entry:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
@ -63,6 +63,7 @@
|
||||
|
||||
.align 6
|
||||
_sha256_block_data_order:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#ifndef __KERNEL__
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P
|
||||
@ -73,6 +74,7 @@ _sha256_block_data_order:
|
||||
tst w16,#ARMV8_SHA256
|
||||
b.ne Lv8_entry
|
||||
#endif
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1033,6 +1035,7 @@ Loop_16_xx:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -1067,6 +1070,7 @@ LK256:
|
||||
.align 6
|
||||
sha256_block_armv8:
|
||||
Lv8_entry:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
|
@ -63,6 +63,7 @@
|
||||
|
||||
.align 6
|
||||
_sha512_block_data_order:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1023,6 +1024,7 @@ Loop_16_xx:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
|
@ -12,6 +12,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.section __TEXT,__const
|
||||
|
||||
|
||||
@ -214,6 +216,7 @@ Lenc_entry:
|
||||
|
||||
.align 4
|
||||
_vpaes_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -223,6 +226,7 @@ _vpaes_encrypt:
|
||||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -451,6 +455,7 @@ Ldec_entry:
|
||||
|
||||
.align 4
|
||||
_vpaes_decrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -460,6 +465,7 @@ _vpaes_decrypt:
|
||||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -629,6 +635,7 @@ _vpaes_key_preheat:
|
||||
|
||||
.align 4
|
||||
_vpaes_schedule_core:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -798,6 +805,7 @@ Lschedule_mangle_last_dec:
|
||||
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
|
||||
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
|
||||
ldp x29, x30, [sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -1011,6 +1019,7 @@ Lschedule_mangle_both:
|
||||
|
||||
.align 4
|
||||
_vpaes_set_encrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1026,6 +1035,7 @@ _vpaes_set_encrypt_key:
|
||||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -1034,6 +1044,7 @@ _vpaes_set_encrypt_key:
|
||||
|
||||
.align 4
|
||||
_vpaes_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1053,6 +1064,7 @@ _vpaes_set_decrypt_key:
|
||||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _vpaes_cbc_encrypt
|
||||
@ -1060,6 +1072,7 @@ _vpaes_set_decrypt_key:
|
||||
|
||||
.align 4
|
||||
_vpaes_cbc_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
cbz x2, Lcbc_abort
|
||||
cmp w5, #0 // check direction
|
||||
b.eq vpaes_cbc_decrypt
|
||||
@ -1086,6 +1099,7 @@ Lcbc_enc_loop:
|
||||
st1 {v0.16b}, [x4] // write ivec
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
Lcbc_abort:
|
||||
ret
|
||||
|
||||
@ -1093,6 +1107,8 @@ Lcbc_abort:
|
||||
|
||||
.align 4
|
||||
vpaes_cbc_decrypt:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
|
||||
// only from vpaes_cbc_encrypt which has already signed the return address.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1134,6 +1150,7 @@ Lcbc_dec_done:
|
||||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _vpaes_ctr32_encrypt_blocks
|
||||
@ -1141,6 +1158,7 @@ Lcbc_dec_done:
|
||||
|
||||
.align 4
|
||||
_vpaes_ctr32_encrypt_blocks:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1208,6 +1226,7 @@ Lctr32_done:
|
||||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
|
@ -12,6 +12,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
@ -26,6 +28,7 @@
|
||||
.align 4
|
||||
_abi_test_trampoline:
|
||||
Labi_test_trampoline_begin:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
// Stack layout (low to high addresses)
|
||||
// x29,x30 (16 bytes)
|
||||
// d8-d15 (64 bytes)
|
||||
@ -128,6 +131,7 @@ Lx29_ok:
|
||||
ldp x27, x28, [sp, #144]
|
||||
|
||||
ldp x29, x30, [sp], #176
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
@ -135,6 +139,7 @@ Lx29_ok:
|
||||
.private_extern _abi_test_clobber_x0
|
||||
.align 4
|
||||
_abi_test_clobber_x0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x0, xzr
|
||||
ret
|
||||
|
||||
@ -143,6 +148,7 @@ _abi_test_clobber_x0:
|
||||
.private_extern _abi_test_clobber_x1
|
||||
.align 4
|
||||
_abi_test_clobber_x1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x1, xzr
|
||||
ret
|
||||
|
||||
@ -151,6 +157,7 @@ _abi_test_clobber_x1:
|
||||
.private_extern _abi_test_clobber_x2
|
||||
.align 4
|
||||
_abi_test_clobber_x2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x2, xzr
|
||||
ret
|
||||
|
||||
@ -159,6 +166,7 @@ _abi_test_clobber_x2:
|
||||
.private_extern _abi_test_clobber_x3
|
||||
.align 4
|
||||
_abi_test_clobber_x3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x3, xzr
|
||||
ret
|
||||
|
||||
@ -167,6 +175,7 @@ _abi_test_clobber_x3:
|
||||
.private_extern _abi_test_clobber_x4
|
||||
.align 4
|
||||
_abi_test_clobber_x4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x4, xzr
|
||||
ret
|
||||
|
||||
@ -175,6 +184,7 @@ _abi_test_clobber_x4:
|
||||
.private_extern _abi_test_clobber_x5
|
||||
.align 4
|
||||
_abi_test_clobber_x5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x5, xzr
|
||||
ret
|
||||
|
||||
@ -183,6 +193,7 @@ _abi_test_clobber_x5:
|
||||
.private_extern _abi_test_clobber_x6
|
||||
.align 4
|
||||
_abi_test_clobber_x6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x6, xzr
|
||||
ret
|
||||
|
||||
@ -191,6 +202,7 @@ _abi_test_clobber_x6:
|
||||
.private_extern _abi_test_clobber_x7
|
||||
.align 4
|
||||
_abi_test_clobber_x7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x7, xzr
|
||||
ret
|
||||
|
||||
@ -199,6 +211,7 @@ _abi_test_clobber_x7:
|
||||
.private_extern _abi_test_clobber_x8
|
||||
.align 4
|
||||
_abi_test_clobber_x8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x8, xzr
|
||||
ret
|
||||
|
||||
@ -207,6 +220,7 @@ _abi_test_clobber_x8:
|
||||
.private_extern _abi_test_clobber_x9
|
||||
.align 4
|
||||
_abi_test_clobber_x9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x9, xzr
|
||||
ret
|
||||
|
||||
@ -215,6 +229,7 @@ _abi_test_clobber_x9:
|
||||
.private_extern _abi_test_clobber_x10
|
||||
.align 4
|
||||
_abi_test_clobber_x10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x10, xzr
|
||||
ret
|
||||
|
||||
@ -223,6 +238,7 @@ _abi_test_clobber_x10:
|
||||
.private_extern _abi_test_clobber_x11
|
||||
.align 4
|
||||
_abi_test_clobber_x11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x11, xzr
|
||||
ret
|
||||
|
||||
@ -231,6 +247,7 @@ _abi_test_clobber_x11:
|
||||
.private_extern _abi_test_clobber_x12
|
||||
.align 4
|
||||
_abi_test_clobber_x12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x12, xzr
|
||||
ret
|
||||
|
||||
@ -239,6 +256,7 @@ _abi_test_clobber_x12:
|
||||
.private_extern _abi_test_clobber_x13
|
||||
.align 4
|
||||
_abi_test_clobber_x13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x13, xzr
|
||||
ret
|
||||
|
||||
@ -247,6 +265,7 @@ _abi_test_clobber_x13:
|
||||
.private_extern _abi_test_clobber_x14
|
||||
.align 4
|
||||
_abi_test_clobber_x14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x14, xzr
|
||||
ret
|
||||
|
||||
@ -255,6 +274,7 @@ _abi_test_clobber_x14:
|
||||
.private_extern _abi_test_clobber_x15
|
||||
.align 4
|
||||
_abi_test_clobber_x15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x15, xzr
|
||||
ret
|
||||
|
||||
@ -263,6 +283,7 @@ _abi_test_clobber_x15:
|
||||
.private_extern _abi_test_clobber_x16
|
||||
.align 4
|
||||
_abi_test_clobber_x16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x16, xzr
|
||||
ret
|
||||
|
||||
@ -271,6 +292,7 @@ _abi_test_clobber_x16:
|
||||
.private_extern _abi_test_clobber_x17
|
||||
.align 4
|
||||
_abi_test_clobber_x17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x17, xzr
|
||||
ret
|
||||
|
||||
@ -279,6 +301,7 @@ _abi_test_clobber_x17:
|
||||
.private_extern _abi_test_clobber_x19
|
||||
.align 4
|
||||
_abi_test_clobber_x19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x19, xzr
|
||||
ret
|
||||
|
||||
@ -287,6 +310,7 @@ _abi_test_clobber_x19:
|
||||
.private_extern _abi_test_clobber_x20
|
||||
.align 4
|
||||
_abi_test_clobber_x20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x20, xzr
|
||||
ret
|
||||
|
||||
@ -295,6 +319,7 @@ _abi_test_clobber_x20:
|
||||
.private_extern _abi_test_clobber_x21
|
||||
.align 4
|
||||
_abi_test_clobber_x21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x21, xzr
|
||||
ret
|
||||
|
||||
@ -303,6 +328,7 @@ _abi_test_clobber_x21:
|
||||
.private_extern _abi_test_clobber_x22
|
||||
.align 4
|
||||
_abi_test_clobber_x22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x22, xzr
|
||||
ret
|
||||
|
||||
@ -311,6 +337,7 @@ _abi_test_clobber_x22:
|
||||
.private_extern _abi_test_clobber_x23
|
||||
.align 4
|
||||
_abi_test_clobber_x23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x23, xzr
|
||||
ret
|
||||
|
||||
@ -319,6 +346,7 @@ _abi_test_clobber_x23:
|
||||
.private_extern _abi_test_clobber_x24
|
||||
.align 4
|
||||
_abi_test_clobber_x24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x24, xzr
|
||||
ret
|
||||
|
||||
@ -327,6 +355,7 @@ _abi_test_clobber_x24:
|
||||
.private_extern _abi_test_clobber_x25
|
||||
.align 4
|
||||
_abi_test_clobber_x25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x25, xzr
|
||||
ret
|
||||
|
||||
@ -335,6 +364,7 @@ _abi_test_clobber_x25:
|
||||
.private_extern _abi_test_clobber_x26
|
||||
.align 4
|
||||
_abi_test_clobber_x26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x26, xzr
|
||||
ret
|
||||
|
||||
@ -343,6 +373,7 @@ _abi_test_clobber_x26:
|
||||
.private_extern _abi_test_clobber_x27
|
||||
.align 4
|
||||
_abi_test_clobber_x27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x27, xzr
|
||||
ret
|
||||
|
||||
@ -351,6 +382,7 @@ _abi_test_clobber_x27:
|
||||
.private_extern _abi_test_clobber_x28
|
||||
.align 4
|
||||
_abi_test_clobber_x28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x28, xzr
|
||||
ret
|
||||
|
||||
@ -359,6 +391,7 @@ _abi_test_clobber_x28:
|
||||
.private_extern _abi_test_clobber_x29
|
||||
.align 4
|
||||
_abi_test_clobber_x29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x29, xzr
|
||||
ret
|
||||
|
||||
@ -367,6 +400,7 @@ _abi_test_clobber_x29:
|
||||
.private_extern _abi_test_clobber_d0
|
||||
.align 4
|
||||
_abi_test_clobber_d0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d0, xzr
|
||||
ret
|
||||
|
||||
@ -375,6 +409,7 @@ _abi_test_clobber_d0:
|
||||
.private_extern _abi_test_clobber_d1
|
||||
.align 4
|
||||
_abi_test_clobber_d1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d1, xzr
|
||||
ret
|
||||
|
||||
@ -383,6 +418,7 @@ _abi_test_clobber_d1:
|
||||
.private_extern _abi_test_clobber_d2
|
||||
.align 4
|
||||
_abi_test_clobber_d2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d2, xzr
|
||||
ret
|
||||
|
||||
@ -391,6 +427,7 @@ _abi_test_clobber_d2:
|
||||
.private_extern _abi_test_clobber_d3
|
||||
.align 4
|
||||
_abi_test_clobber_d3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d3, xzr
|
||||
ret
|
||||
|
||||
@ -399,6 +436,7 @@ _abi_test_clobber_d3:
|
||||
.private_extern _abi_test_clobber_d4
|
||||
.align 4
|
||||
_abi_test_clobber_d4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d4, xzr
|
||||
ret
|
||||
|
||||
@ -407,6 +445,7 @@ _abi_test_clobber_d4:
|
||||
.private_extern _abi_test_clobber_d5
|
||||
.align 4
|
||||
_abi_test_clobber_d5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d5, xzr
|
||||
ret
|
||||
|
||||
@ -415,6 +454,7 @@ _abi_test_clobber_d5:
|
||||
.private_extern _abi_test_clobber_d6
|
||||
.align 4
|
||||
_abi_test_clobber_d6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d6, xzr
|
||||
ret
|
||||
|
||||
@ -423,6 +463,7 @@ _abi_test_clobber_d6:
|
||||
.private_extern _abi_test_clobber_d7
|
||||
.align 4
|
||||
_abi_test_clobber_d7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d7, xzr
|
||||
ret
|
||||
|
||||
@ -431,6 +472,7 @@ _abi_test_clobber_d7:
|
||||
.private_extern _abi_test_clobber_d8
|
||||
.align 4
|
||||
_abi_test_clobber_d8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d8, xzr
|
||||
ret
|
||||
|
||||
@ -439,6 +481,7 @@ _abi_test_clobber_d8:
|
||||
.private_extern _abi_test_clobber_d9
|
||||
.align 4
|
||||
_abi_test_clobber_d9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d9, xzr
|
||||
ret
|
||||
|
||||
@ -447,6 +490,7 @@ _abi_test_clobber_d9:
|
||||
.private_extern _abi_test_clobber_d10
|
||||
.align 4
|
||||
_abi_test_clobber_d10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d10, xzr
|
||||
ret
|
||||
|
||||
@ -455,6 +499,7 @@ _abi_test_clobber_d10:
|
||||
.private_extern _abi_test_clobber_d11
|
||||
.align 4
|
||||
_abi_test_clobber_d11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d11, xzr
|
||||
ret
|
||||
|
||||
@ -463,6 +508,7 @@ _abi_test_clobber_d11:
|
||||
.private_extern _abi_test_clobber_d12
|
||||
.align 4
|
||||
_abi_test_clobber_d12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d12, xzr
|
||||
ret
|
||||
|
||||
@ -471,6 +517,7 @@ _abi_test_clobber_d12:
|
||||
.private_extern _abi_test_clobber_d13
|
||||
.align 4
|
||||
_abi_test_clobber_d13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d13, xzr
|
||||
ret
|
||||
|
||||
@ -479,6 +526,7 @@ _abi_test_clobber_d13:
|
||||
.private_extern _abi_test_clobber_d14
|
||||
.align 4
|
||||
_abi_test_clobber_d14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d14, xzr
|
||||
ret
|
||||
|
||||
@ -487,6 +535,7 @@ _abi_test_clobber_d14:
|
||||
.private_extern _abi_test_clobber_d15
|
||||
.align 4
|
||||
_abi_test_clobber_d15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d15, xzr
|
||||
ret
|
||||
|
||||
@ -495,6 +544,7 @@ _abi_test_clobber_d15:
|
||||
.private_extern _abi_test_clobber_d16
|
||||
.align 4
|
||||
_abi_test_clobber_d16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d16, xzr
|
||||
ret
|
||||
|
||||
@ -503,6 +553,7 @@ _abi_test_clobber_d16:
|
||||
.private_extern _abi_test_clobber_d17
|
||||
.align 4
|
||||
_abi_test_clobber_d17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d17, xzr
|
||||
ret
|
||||
|
||||
@ -511,6 +562,7 @@ _abi_test_clobber_d17:
|
||||
.private_extern _abi_test_clobber_d18
|
||||
.align 4
|
||||
_abi_test_clobber_d18:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d18, xzr
|
||||
ret
|
||||
|
||||
@ -519,6 +571,7 @@ _abi_test_clobber_d18:
|
||||
.private_extern _abi_test_clobber_d19
|
||||
.align 4
|
||||
_abi_test_clobber_d19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d19, xzr
|
||||
ret
|
||||
|
||||
@ -527,6 +580,7 @@ _abi_test_clobber_d19:
|
||||
.private_extern _abi_test_clobber_d20
|
||||
.align 4
|
||||
_abi_test_clobber_d20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d20, xzr
|
||||
ret
|
||||
|
||||
@ -535,6 +589,7 @@ _abi_test_clobber_d20:
|
||||
.private_extern _abi_test_clobber_d21
|
||||
.align 4
|
||||
_abi_test_clobber_d21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d21, xzr
|
||||
ret
|
||||
|
||||
@ -543,6 +598,7 @@ _abi_test_clobber_d21:
|
||||
.private_extern _abi_test_clobber_d22
|
||||
.align 4
|
||||
_abi_test_clobber_d22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d22, xzr
|
||||
ret
|
||||
|
||||
@ -551,6 +607,7 @@ _abi_test_clobber_d22:
|
||||
.private_extern _abi_test_clobber_d23
|
||||
.align 4
|
||||
_abi_test_clobber_d23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d23, xzr
|
||||
ret
|
||||
|
||||
@ -559,6 +616,7 @@ _abi_test_clobber_d23:
|
||||
.private_extern _abi_test_clobber_d24
|
||||
.align 4
|
||||
_abi_test_clobber_d24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d24, xzr
|
||||
ret
|
||||
|
||||
@ -567,6 +625,7 @@ _abi_test_clobber_d24:
|
||||
.private_extern _abi_test_clobber_d25
|
||||
.align 4
|
||||
_abi_test_clobber_d25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d25, xzr
|
||||
ret
|
||||
|
||||
@ -575,6 +634,7 @@ _abi_test_clobber_d25:
|
||||
.private_extern _abi_test_clobber_d26
|
||||
.align 4
|
||||
_abi_test_clobber_d26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d26, xzr
|
||||
ret
|
||||
|
||||
@ -583,6 +643,7 @@ _abi_test_clobber_d26:
|
||||
.private_extern _abi_test_clobber_d27
|
||||
.align 4
|
||||
_abi_test_clobber_d27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d27, xzr
|
||||
ret
|
||||
|
||||
@ -591,6 +652,7 @@ _abi_test_clobber_d27:
|
||||
.private_extern _abi_test_clobber_d28
|
||||
.align 4
|
||||
_abi_test_clobber_d28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d28, xzr
|
||||
ret
|
||||
|
||||
@ -599,6 +661,7 @@ _abi_test_clobber_d28:
|
||||
.private_extern _abi_test_clobber_d29
|
||||
.align 4
|
||||
_abi_test_clobber_d29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d29, xzr
|
||||
ret
|
||||
|
||||
@ -607,6 +670,7 @@ _abi_test_clobber_d29:
|
||||
.private_extern _abi_test_clobber_d30
|
||||
.align 4
|
||||
_abi_test_clobber_d30:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d30, xzr
|
||||
ret
|
||||
|
||||
@ -615,6 +679,7 @@ _abi_test_clobber_d30:
|
||||
.private_extern _abi_test_clobber_d31
|
||||
.align 4
|
||||
_abi_test_clobber_d31:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d31, xzr
|
||||
ret
|
||||
|
||||
@ -623,6 +688,7 @@ _abi_test_clobber_d31:
|
||||
.private_extern _abi_test_clobber_v8_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v8_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v8.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -631,6 +697,7 @@ _abi_test_clobber_v8_upper:
|
||||
.private_extern _abi_test_clobber_v9_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v9_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v9.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -639,6 +706,7 @@ _abi_test_clobber_v9_upper:
|
||||
.private_extern _abi_test_clobber_v10_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v10_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v10.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -647,6 +715,7 @@ _abi_test_clobber_v10_upper:
|
||||
.private_extern _abi_test_clobber_v11_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v11_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v11.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -655,6 +724,7 @@ _abi_test_clobber_v11_upper:
|
||||
.private_extern _abi_test_clobber_v12_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v12_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v12.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -663,6 +733,7 @@ _abi_test_clobber_v12_upper:
|
||||
.private_extern _abi_test_clobber_v13_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v13_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v13.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -671,6 +742,7 @@ _abi_test_clobber_v13_upper:
|
||||
.private_extern _abi_test_clobber_v14_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v14_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v14.d[1], xzr
|
||||
ret
|
||||
|
||||
@ -679,6 +751,7 @@ _abi_test_clobber_v14_upper:
|
||||
.private_extern _abi_test_clobber_v15_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v15_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v15.d[1], xzr
|
||||
ret
|
||||
|
||||
|
@ -1,996 +0,0 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.section __TEXT,__const
|
||||
|
||||
# p434 x 2
|
||||
Lp434x2:
|
||||
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
|
||||
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
|
||||
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
|
||||
|
||||
# p434 + 1
|
||||
Lp434p1:
|
||||
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
|
||||
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
|
||||
|
||||
.text
|
||||
.globl _sike_mpmul
|
||||
.private_extern _sike_mpmul
|
||||
.align 4
|
||||
_sike_mpmul:
|
||||
stp x29, x30, [sp,#-96]!
|
||||
add x29, sp, #0
|
||||
stp x19, x20, [sp,#16]
|
||||
stp x21, x22, [sp,#32]
|
||||
stp x23, x24, [sp,#48]
|
||||
stp x25, x26, [sp,#64]
|
||||
stp x27, x28, [sp,#80]
|
||||
|
||||
ldp x3, x4, [x0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x10, x11, [x1,#0]
|
||||
ldp x12, x13, [x1,#16]
|
||||
ldp x14, x15, [x1,#32]
|
||||
ldr x16, [x1,#48]
|
||||
|
||||
// x3-x7 <- AH + AL, x7 <- carry
|
||||
adds x3, x3, x7
|
||||
adcs x4, x4, x8
|
||||
adcs x5, x5, x9
|
||||
adcs x6, x6, xzr
|
||||
adc x7, xzr, xzr
|
||||
|
||||
// x10-x13 <- BH + BL, x8 <- carry
|
||||
adds x10, x10, x14
|
||||
adcs x11, x11, x15
|
||||
adcs x12, x12, x16
|
||||
adcs x13, x13, xzr
|
||||
adc x8, xzr, xzr
|
||||
|
||||
// x9 <- combined carry
|
||||
and x9, x7, x8
|
||||
// x7-x8 <- mask
|
||||
sub x7, xzr, x7
|
||||
sub x8, xzr, x8
|
||||
|
||||
// x15-x19 <- masked (BH + BL)
|
||||
and x14, x10, x7
|
||||
and x15, x11, x7
|
||||
and x16, x12, x7
|
||||
and x17, x13, x7
|
||||
|
||||
// x20-x23 <- masked (AH + AL)
|
||||
and x20, x3, x8
|
||||
and x21, x4, x8
|
||||
and x22, x5, x8
|
||||
and x23, x6, x8
|
||||
|
||||
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
|
||||
adds x14, x14, x20
|
||||
adcs x15, x15, x21
|
||||
adcs x16, x16, x22
|
||||
adcs x17, x17, x23
|
||||
adc x7, x9, xzr
|
||||
|
||||
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
|
||||
stp x3, x4, [x2,#0]
|
||||
// A0-A1 <- AH + AL, T0 <- mask
|
||||
adds x3, x3, x5
|
||||
adcs x4, x4, x6
|
||||
adc x25, xzr, xzr
|
||||
|
||||
// C6, T1 <- BH + BL, C7 <- mask
|
||||
adds x23, x10, x12
|
||||
adcs x26, x11, x13
|
||||
adc x24, xzr, xzr
|
||||
|
||||
// C0-C1 <- masked (BH + BL)
|
||||
sub x19, xzr, x25
|
||||
sub x20, xzr, x24
|
||||
and x8, x23, x19
|
||||
and x9, x26, x19
|
||||
|
||||
// C4-C5 <- masked (AH + AL), T0 <- combined carry
|
||||
and x21, x3, x20
|
||||
and x22, x4, x20
|
||||
mul x19, x3, x23
|
||||
mul x20, x3, x26
|
||||
and x25, x25, x24
|
||||
|
||||
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
|
||||
adds x8, x21, x8
|
||||
umulh x21, x3, x26
|
||||
adcs x9, x22, x9
|
||||
umulh x22, x3, x23
|
||||
adc x25, x25, xzr
|
||||
|
||||
// C2-C5 <- (AH+AL) x (BH+BL), low part
|
||||
mul x3, x4, x23
|
||||
umulh x23, x4, x23
|
||||
adds x20, x20, x22
|
||||
adc x21, x21, xzr
|
||||
|
||||
mul x24, x4, x26
|
||||
umulh x26, x4, x26
|
||||
adds x20, x20, x3
|
||||
adcs x21, x21, x23
|
||||
adc x22, xzr, xzr
|
||||
|
||||
adds x21, x21, x24
|
||||
adc x22, x22, x26
|
||||
|
||||
ldp x3, x4, [x2,#0]
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
|
||||
adds x21, x8, x21
|
||||
umulh x24, x3, x10
|
||||
umulh x26, x3, x11
|
||||
adcs x22, x9, x22
|
||||
mul x8, x3, x10
|
||||
mul x9, x3, x11
|
||||
adc x25, x25, xzr
|
||||
|
||||
// C0-C1, T1, C7 <- AL x BL
|
||||
mul x3, x4, x10
|
||||
umulh x10, x4, x10
|
||||
adds x9, x9, x24
|
||||
adc x26, x26, xzr
|
||||
|
||||
mul x23, x4, x11
|
||||
umulh x11, x4, x11
|
||||
adds x9, x9, x3
|
||||
adcs x26, x26, x10
|
||||
adc x24, xzr, xzr
|
||||
|
||||
adds x26, x26, x23
|
||||
adc x24, x24, x11
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
|
||||
mul x3, x5, x12
|
||||
umulh x10, x5, x12
|
||||
subs x19, x19, x8
|
||||
sbcs x20, x20, x9
|
||||
sbcs x21, x21, x26
|
||||
mul x4, x5, x13
|
||||
umulh x23, x5, x13
|
||||
sbcs x22, x22, x24
|
||||
sbc x25, x25, xzr
|
||||
|
||||
// A0, A1, C6, B0 <- AH x BH
|
||||
mul x5, x6, x12
|
||||
umulh x12, x6, x12
|
||||
adds x4, x4, x10
|
||||
adc x23, x23, xzr
|
||||
|
||||
mul x11, x6, x13
|
||||
umulh x13, x6, x13
|
||||
adds x4, x4, x5
|
||||
adcs x23, x23, x12
|
||||
adc x10, xzr, xzr
|
||||
|
||||
adds x23, x23, x11
|
||||
adc x10, x10, x13
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x19, x19, x3
|
||||
sbcs x20, x20, x4
|
||||
sbcs x21, x21, x23
|
||||
sbcs x22, x22, x10
|
||||
sbc x25, x25, xzr
|
||||
|
||||
adds x19, x19, x26
|
||||
adcs x20, x20, x24
|
||||
adcs x21, x21, x3
|
||||
adcs x22, x22, x4
|
||||
adcs x23, x25, x23
|
||||
adc x24, x10, xzr
|
||||
|
||||
|
||||
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
|
||||
adds x14, x14, x21
|
||||
adcs x15, x15, x22
|
||||
adcs x16, x16, x23
|
||||
adcs x17, x17, x24
|
||||
adc x7, x7, xzr
|
||||
|
||||
// Load AL
|
||||
ldp x3, x4, [x0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
// Load BL
|
||||
ldp x10, x11, [x1,#0]
|
||||
ldp x12, x13, [x1,#16]
|
||||
|
||||
// Temporarily store x8 in x2
|
||||
stp x8, x9, [x2,#0]
|
||||
// x21-x28 <- AL x BL
|
||||
// A0-A1 <- AH + AL, T0 <- mask
|
||||
adds x3, x3, x5
|
||||
adcs x4, x4, x6
|
||||
adc x8, xzr, xzr
|
||||
|
||||
// C6, T1 <- BH + BL, C7 <- mask
|
||||
adds x27, x10, x12
|
||||
adcs x9, x11, x13
|
||||
adc x28, xzr, xzr
|
||||
|
||||
// C0-C1 <- masked (BH + BL)
|
||||
sub x23, xzr, x8
|
||||
sub x24, xzr, x28
|
||||
and x21, x27, x23
|
||||
and x22, x9, x23
|
||||
|
||||
// C4-C5 <- masked (AH + AL), T0 <- combined carry
|
||||
and x25, x3, x24
|
||||
and x26, x4, x24
|
||||
mul x23, x3, x27
|
||||
mul x24, x3, x9
|
||||
and x8, x8, x28
|
||||
|
||||
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
|
||||
adds x21, x25, x21
|
||||
umulh x25, x3, x9
|
||||
adcs x22, x26, x22
|
||||
umulh x26, x3, x27
|
||||
adc x8, x8, xzr
|
||||
|
||||
// C2-C5 <- (AH+AL) x (BH+BL), low part
|
||||
mul x3, x4, x27
|
||||
umulh x27, x4, x27
|
||||
adds x24, x24, x26
|
||||
adc x25, x25, xzr
|
||||
|
||||
mul x28, x4, x9
|
||||
umulh x9, x4, x9
|
||||
adds x24, x24, x3
|
||||
adcs x25, x25, x27
|
||||
adc x26, xzr, xzr
|
||||
|
||||
adds x25, x25, x28
|
||||
adc x26, x26, x9
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
|
||||
adds x25, x21, x25
|
||||
umulh x28, x3, x10
|
||||
umulh x9, x3, x11
|
||||
adcs x26, x22, x26
|
||||
mul x21, x3, x10
|
||||
mul x22, x3, x11
|
||||
adc x8, x8, xzr
|
||||
|
||||
// C0-C1, T1, C7 <- AL x BL
|
||||
mul x3, x4, x10
|
||||
umulh x10, x4, x10
|
||||
adds x22, x22, x28
|
||||
adc x9, x9, xzr
|
||||
|
||||
mul x27, x4, x11
|
||||
umulh x11, x4, x11
|
||||
adds x22, x22, x3
|
||||
adcs x9, x9, x10
|
||||
adc x28, xzr, xzr
|
||||
|
||||
adds x9, x9, x27
|
||||
adc x28, x28, x11
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
|
||||
mul x3, x5, x12
|
||||
umulh x10, x5, x12
|
||||
subs x23, x23, x21
|
||||
sbcs x24, x24, x22
|
||||
sbcs x25, x25, x9
|
||||
mul x4, x5, x13
|
||||
umulh x27, x5, x13
|
||||
sbcs x26, x26, x28
|
||||
sbc x8, x8, xzr
|
||||
|
||||
// A0, A1, C6, B0 <- AH x BH
|
||||
mul x5, x6, x12
|
||||
umulh x12, x6, x12
|
||||
adds x4, x4, x10
|
||||
adc x27, x27, xzr
|
||||
|
||||
mul x11, x6, x13
|
||||
umulh x13, x6, x13
|
||||
adds x4, x4, x5
|
||||
adcs x27, x27, x12
|
||||
adc x10, xzr, xzr
|
||||
|
||||
adds x27, x27, x11
|
||||
adc x10, x10, x13
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x23, x23, x3
|
||||
sbcs x24, x24, x4
|
||||
sbcs x25, x25, x27
|
||||
sbcs x26, x26, x10
|
||||
sbc x8, x8, xzr
|
||||
|
||||
adds x23, x23, x9
|
||||
adcs x24, x24, x28
|
||||
adcs x25, x25, x3
|
||||
adcs x26, x26, x4
|
||||
adcs x27, x8, x27
|
||||
adc x28, x10, xzr
|
||||
|
||||
// Restore x8
|
||||
ldp x8, x9, [x2,#0]
|
||||
|
||||
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
|
||||
subs x8, x8, x21
|
||||
sbcs x9, x9, x22
|
||||
sbcs x19, x19, x23
|
||||
sbcs x20, x20, x24
|
||||
sbcs x14, x14, x25
|
||||
sbcs x15, x15, x26
|
||||
sbcs x16, x16, x27
|
||||
sbcs x17, x17, x28
|
||||
sbc x7, x7, xzr
|
||||
|
||||
// Store ALxBL, low
|
||||
stp x21, x22, [x2]
|
||||
stp x23, x24, [x2,#16]
|
||||
|
||||
// Load AH
|
||||
ldp x3, x4, [x0,#32]
|
||||
ldr x5, [x0,#48]
|
||||
// Load BH
|
||||
ldp x10, x11, [x1,#32]
|
||||
ldr x12, [x1,#48]
|
||||
|
||||
adds x8, x8, x25
|
||||
adcs x9, x9, x26
|
||||
adcs x19, x19, x27
|
||||
adcs x20, x20, x28
|
||||
adc x1, xzr, xzr
|
||||
|
||||
add x0, x0, #32
|
||||
// Temporarily store x8,x9 in x2
|
||||
stp x8,x9, [x2,#32]
|
||||
// x21-x28 <- AH x BH
|
||||
|
||||
// A0 * B0
|
||||
mul x21, x3, x10 // C0
|
||||
umulh x24, x3, x10
|
||||
|
||||
// A0 * B1
|
||||
mul x22, x3, x11
|
||||
umulh x23, x3, x11
|
||||
|
||||
// A1 * B0
|
||||
mul x8, x4, x10
|
||||
umulh x9, x4, x10
|
||||
adds x22, x22, x24
|
||||
adc x23, x23, xzr
|
||||
|
||||
// A0 * B2
|
||||
mul x27, x3, x12
|
||||
umulh x28, x3, x12
|
||||
adds x22, x22, x8 // C1
|
||||
adcs x23, x23, x9
|
||||
adc x24, xzr, xzr
|
||||
|
||||
// A2 * B0
|
||||
mul x8, x5, x10
|
||||
umulh x25, x5, x10
|
||||
adds x23, x23, x27
|
||||
adcs x24, x24, x25
|
||||
adc x25, xzr, xzr
|
||||
|
||||
// A1 * B1
|
||||
mul x27, x4, x11
|
||||
umulh x9, x4, x11
|
||||
adds x23, x23, x8
|
||||
adcs x24, x24, x28
|
||||
adc x25, x25, xzr
|
||||
|
||||
// A1 * B2
|
||||
mul x8, x4, x12
|
||||
umulh x28, x4, x12
|
||||
adds x23, x23, x27 // C2
|
||||
adcs x24, x24, x9
|
||||
adc x25, x25, xzr
|
||||
|
||||
// A2 * B1
|
||||
mul x27, x5, x11
|
||||
umulh x9, x5, x11
|
||||
adds x24, x24, x8
|
||||
adcs x25, x25, x28
|
||||
adc x26, xzr, xzr
|
||||
|
||||
// A2 * B2
|
||||
mul x8, x5, x12
|
||||
umulh x28, x5, x12
|
||||
adds x24, x24, x27 // C3
|
||||
adcs x25, x25, x9
|
||||
adc x26, x26, xzr
|
||||
|
||||
adds x25, x25, x8 // C4
|
||||
adc x26, x26, x28 // C5
|
||||
|
||||
// Restore x8,x9
|
||||
ldp x8,x9, [x2,#32]
|
||||
|
||||
neg x1, x1
|
||||
|
||||
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x8, x8, x21
|
||||
sbcs x9, x9, x22
|
||||
sbcs x19, x19, x23
|
||||
sbcs x20, x20, x24
|
||||
sbcs x14, x14, x25
|
||||
sbcs x15, x15, x26
|
||||
sbcs x16, x16, xzr
|
||||
sbcs x17, x17, xzr
|
||||
sbc x7, x7, xzr
|
||||
|
||||
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
|
||||
stp x8, x9, [x2,#32]
|
||||
stp x19, x20, [x2,#48]
|
||||
|
||||
adds x1, x1, #1
|
||||
adcs x14, x14, x21
|
||||
adcs x15, x15, x22
|
||||
adcs x16, x16, x23
|
||||
adcs x17, x17, x24
|
||||
adcs x25, x7, x25
|
||||
adc x26, x26, xzr
|
||||
|
||||
stp x14, x15, [x2,#64]
|
||||
stp x16, x17, [x2,#80]
|
||||
stp x25, x26, [x2,#96]
|
||||
|
||||
ldp x19, x20, [x29,#16]
|
||||
ldp x21, x22, [x29,#32]
|
||||
ldp x23, x24, [x29,#48]
|
||||
ldp x25, x26, [x29,#64]
|
||||
ldp x27, x28, [x29,#80]
|
||||
ldp x29, x30, [sp],#96
|
||||
ret
|
||||
.globl _sike_fprdc
|
||||
.private_extern _sike_fprdc
|
||||
.align 4
|
||||
_sike_fprdc:
|
||||
stp x29, x30, [sp, #-96]!
|
||||
add x29, sp, xzr
|
||||
stp x19, x20, [sp,#16]
|
||||
stp x21, x22, [sp,#32]
|
||||
stp x23, x24, [sp,#48]
|
||||
stp x25, x26, [sp,#64]
|
||||
stp x27, x28, [sp,#80]
|
||||
|
||||
ldp x2, x3, [x0,#0] // a[0-1]
|
||||
|
||||
// Load the prime constant
|
||||
adrp x26, Lp434p1@PAGE
|
||||
add x26, x26, Lp434p1@PAGEOFF
|
||||
ldp x23, x24, [x26, #0x0]
|
||||
ldp x25, x26, [x26,#0x10]
|
||||
|
||||
// a[0-1] * p434+1
|
||||
mul x4, x2, x23 // C0
|
||||
umulh x7, x2, x23
|
||||
|
||||
mul x5, x2, x24
|
||||
umulh x6, x2, x24
|
||||
|
||||
mul x10, x3, x23
|
||||
umulh x11, x3, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x2, x25
|
||||
umulh x28, x2, x25
|
||||
adds x5, x5, x10 // C1
|
||||
adcs x6, x6, x11
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x10, x3, x24
|
||||
umulh x11, x3, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x2, x26
|
||||
umulh x28, x2, x26
|
||||
adds x6, x6, x10 // C2
|
||||
adcs x7, x7, x11
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x10, x3, x25
|
||||
umulh x11, x3, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x3, x26
|
||||
umulh x28, x3, x26
|
||||
adds x7, x7, x10 // C3
|
||||
adcs x8, x8, x11
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
|
||||
ldp x10, x11, [x0, #0x18]
|
||||
ldp x12, x13, [x0, #0x28]
|
||||
ldp x14, x15, [x0, #0x38]
|
||||
ldp x16, x17, [x0, #0x48]
|
||||
ldp x19, x20, [x0, #0x58]
|
||||
ldr x21, [x0, #0x68]
|
||||
|
||||
adds x10, x10, x4
|
||||
adcs x11, x11, x5
|
||||
adcs x12, x12, x6
|
||||
adcs x13, x13, x7
|
||||
adcs x14, x14, x8
|
||||
adcs x15, x15, x9
|
||||
adcs x22, x16, xzr
|
||||
adcs x17, x17, xzr
|
||||
adcs x19, x19, xzr
|
||||
adcs x20, x20, xzr
|
||||
adc x21, x21, xzr
|
||||
|
||||
ldr x2, [x0,#0x10] // a[2]
|
||||
// a[2-3] * p434+1
|
||||
mul x4, x2, x23 // C0
|
||||
umulh x7, x2, x23
|
||||
|
||||
mul x5, x2, x24
|
||||
umulh x6, x2, x24
|
||||
|
||||
mul x0, x10, x23
|
||||
umulh x3, x10, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x2, x25
|
||||
umulh x28, x2, x25
|
||||
adds x5, x5, x0 // C1
|
||||
adcs x6, x6, x3
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x0, x10, x24
|
||||
umulh x3, x10, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x2, x26
|
||||
umulh x28, x2, x26
|
||||
adds x6, x6, x0 // C2
|
||||
adcs x7, x7, x3
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x0, x10, x25
|
||||
umulh x3, x10, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x10, x26
|
||||
umulh x28, x10, x26
|
||||
adds x7, x7, x0 // C3
|
||||
adcs x8, x8, x3
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
|
||||
adds x12, x12, x4
|
||||
adcs x13, x13, x5
|
||||
adcs x14, x14, x6
|
||||
adcs x15, x15, x7
|
||||
adcs x16, x22, x8
|
||||
adcs x17, x17, x9
|
||||
adcs x22, x19, xzr
|
||||
adcs x20, x20, xzr
|
||||
adc x21, x21, xzr
|
||||
|
||||
mul x4, x11, x23 // C0
|
||||
umulh x7, x11, x23
|
||||
|
||||
mul x5, x11, x24
|
||||
umulh x6, x11, x24
|
||||
|
||||
mul x10, x12, x23
|
||||
umulh x3, x12, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x11, x25
|
||||
umulh x28, x11, x25
|
||||
adds x5, x5, x10 // C1
|
||||
adcs x6, x6, x3
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x10, x12, x24
|
||||
umulh x3, x12, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x11, x26
|
||||
umulh x28, x11, x26
|
||||
adds x6, x6, x10 // C2
|
||||
adcs x7, x7, x3
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x10, x12, x25
|
||||
umulh x3, x12, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x12, x26
|
||||
umulh x28, x12, x26
|
||||
adds x7, x7, x10 // C3
|
||||
adcs x8, x8, x3
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
adds x14, x14, x4
|
||||
adcs x15, x15, x5
|
||||
adcs x16, x16, x6
|
||||
adcs x17, x17, x7
|
||||
adcs x19, x22, x8
|
||||
adcs x20, x20, x9
|
||||
adc x22, x21, xzr
|
||||
|
||||
stp x14, x15, [x1, #0x0] // C0, C1
|
||||
|
||||
mul x4, x13, x23 // C0
|
||||
umulh x10, x13, x23
|
||||
|
||||
mul x5, x13, x24
|
||||
umulh x27, x13, x24
|
||||
adds x5, x5, x10 // C1
|
||||
adc x10, xzr, xzr
|
||||
|
||||
mul x6, x13, x25
|
||||
umulh x28, x13, x25
|
||||
adds x27, x10, x27
|
||||
adcs x6, x6, x27 // C2
|
||||
adc x10, xzr, xzr
|
||||
|
||||
mul x7, x13, x26
|
||||
umulh x8, x13, x26
|
||||
adds x28, x10, x28
|
||||
adcs x7, x7, x28 // C3
|
||||
adc x8, x8, xzr // C4
|
||||
|
||||
adds x16, x16, x4
|
||||
adcs x17, x17, x5
|
||||
adcs x19, x19, x6
|
||||
adcs x20, x20, x7
|
||||
adc x21, x22, x8
|
||||
|
||||
str x16, [x1, #0x10]
|
||||
stp x17, x19, [x1, #0x18]
|
||||
stp x20, x21, [x1, #0x28]
|
||||
|
||||
ldp x19, x20, [x29,#16]
|
||||
ldp x21, x22, [x29,#32]
|
||||
ldp x23, x24, [x29,#48]
|
||||
ldp x25, x26, [x29,#64]
|
||||
ldp x27, x28, [x29,#80]
|
||||
ldp x29, x30, [sp],#96
|
||||
ret
|
||||
.globl _sike_fpadd
|
||||
.private_extern _sike_fpadd
|
||||
.align 4
|
||||
_sike_fpadd:
|
||||
stp x29,x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
// Add a + b
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adc x9, x9, x17
|
||||
|
||||
// Subtract 2xp434
|
||||
adrp x17, Lp434x2@PAGE
|
||||
add x17, x17, Lp434x2@PAGEOFF
|
||||
ldp x11, x12, [x17, #0]
|
||||
ldp x13, x14, [x17, #16]
|
||||
ldp x15, x16, [x17, #32]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x12
|
||||
sbcs x6, x6, x13
|
||||
sbcs x7, x7, x14
|
||||
sbcs x8, x8, x15
|
||||
sbcs x9, x9, x16
|
||||
sbc x0, xzr, xzr // x0 can be reused now
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adc x9, x9, x16
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl _sike_fpsub
|
||||
.private_extern _sike_fpsub
|
||||
.align 4
|
||||
_sike_fpsub:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
// Subtract a - b
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
sbcs x9, x9, x17
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
adrp x17, Lp434x2@PAGE
|
||||
add x17, x17, Lp434x2@PAGEOFF
|
||||
|
||||
// First half
|
||||
ldp x11, x12, [x17, #0]
|
||||
ldp x13, x14, [x17, #16]
|
||||
ldp x15, x16, [x17, #32]
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adc x9, x9, x16
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl _sike_mpadd_asm
|
||||
.private_extern _sike_mpadd_asm
|
||||
.align 4
|
||||
_sike_mpadd_asm:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adc x9, x9, x17
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl _sike_mpsubx2_asm
|
||||
.private_extern _sike_mpsubx2_asm
|
||||
.align 4
|
||||
_sike_mpsubx2_asm:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x11, x12, [x1,#32]
|
||||
ldp x13, x14, [x1,#48]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
sbcs x9, x9, x13
|
||||
sbcs x10, x10, x14
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
|
||||
ldp x3, x4, [x0,#64]
|
||||
ldp x5, x6, [x0,#80]
|
||||
ldp x11, x12, [x1,#64]
|
||||
ldp x13, x14, [x1,#80]
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#96]
|
||||
ldp x11, x12, [x1,#96]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
stp x3, x4, [x2,#64]
|
||||
stp x5, x6, [x2,#80]
|
||||
stp x7, x8, [x2,#96]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl _sike_mpdblsubx2_asm
|
||||
.private_extern _sike_mpdblsubx2_asm
|
||||
.align 4
|
||||
_sike_mpdblsubx2_asm:
|
||||
stp x29, x30, [sp, #-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x2, #0]
|
||||
ldp x5, x6, [x2,#16]
|
||||
ldp x7, x8, [x2,#32]
|
||||
|
||||
ldp x11, x12, [x0, #0]
|
||||
ldp x13, x14, [x0,#16]
|
||||
ldp x15, x16, [x0,#32]
|
||||
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
|
||||
// x9 stores carry
|
||||
adc x9, xzr, xzr
|
||||
|
||||
ldp x11, x12, [x1, #0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, x9, xzr
|
||||
|
||||
stp x3, x4, [x2, #0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
|
||||
ldp x3, x4, [x2,#48]
|
||||
ldp x5, x6, [x2,#64]
|
||||
ldp x7, x8, [x2,#80]
|
||||
|
||||
ldp x11, x12, [x0,#48]
|
||||
ldp x13, x14, [x0,#64]
|
||||
ldp x15, x16, [x0,#80]
|
||||
|
||||
// x9 = 2 - x9
|
||||
neg x9, x9
|
||||
add x9, x9, #2
|
||||
|
||||
subs x3, x3, x9
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, xzr, xzr
|
||||
|
||||
ldp x11, x12, [x1,#48]
|
||||
ldp x13, x14, [x1,#64]
|
||||
ldp x15, x16, [x1,#80]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, x9, xzr
|
||||
|
||||
stp x3, x4, [x2,#48]
|
||||
stp x5, x6, [x2,#64]
|
||||
stp x7, x8, [x2,#80]
|
||||
|
||||
ldp x3, x4, [x2,#96]
|
||||
ldp x11, x12, [x0,#96]
|
||||
ldp x13, x14, [x1,#96]
|
||||
|
||||
// x9 = 2 - x9
|
||||
neg x9, x9
|
||||
add x9, x9, #2
|
||||
|
||||
subs x3, x3, x9
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
subs x3, x3, x13
|
||||
sbc x4, x4, x14
|
||||
stp x3, x4, [x2,#96]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
#endif // !OPENSSL_NO_ASM
|
File diff suppressed because it is too large
Load Diff
@ -30,348 +30,6 @@
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
|
||||
.align 5
|
||||
rem_4bit:
|
||||
.short 0x0000,0x1C20,0x3840,0x2460
|
||||
.short 0x7080,0x6CA0,0x48C0,0x54E0
|
||||
.short 0xE100,0xFD20,0xD940,0xC560
|
||||
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
|
||||
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func rem_4bit_get
|
||||
#endif
|
||||
rem_4bit_get:
|
||||
#if defined(__thumb2__)
|
||||
adr r2,rem_4bit
|
||||
#else
|
||||
sub r2,pc,#8+32 @ &rem_4bit
|
||||
#endif
|
||||
b Lrem_4bit_got
|
||||
nop
|
||||
nop
|
||||
|
||||
|
||||
.globl _gcm_ghash_4bit
|
||||
.private_extern _gcm_ghash_4bit
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_ghash_4bit
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_ghash_4bit:
|
||||
#if defined(__thumb2__)
|
||||
adr r12,rem_4bit
|
||||
#else
|
||||
sub r12,pc,#8+48 @ &rem_4bit
|
||||
#endif
|
||||
add r3,r2,r3 @ r3 to point at the end
|
||||
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
|
||||
|
||||
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
|
||||
|
||||
ldrb r12,[r2,#15]
|
||||
ldrb r14,[r0,#15]
|
||||
Louter:
|
||||
eor r12,r12,r14
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
mov r3,#14
|
||||
|
||||
add r7,r1,r12,lsl#4
|
||||
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
|
||||
add r11,r1,r14
|
||||
ldrb r12,[r2,#14]
|
||||
|
||||
and r14,r4,#0xf @ rem
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
add r14,r14,r14
|
||||
eor r4,r8,r4,lsr#4
|
||||
ldrh r8,[sp,r14] @ rem_4bit[rem]
|
||||
eor r4,r4,r5,lsl#28
|
||||
ldrb r14,[r0,#14]
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
eor r12,r12,r14
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
eor r7,r7,r8,lsl#16
|
||||
|
||||
Linner:
|
||||
add r11,r1,r12,lsl#4
|
||||
and r12,r4,#0xf @ rem
|
||||
subs r3,r3,#1
|
||||
add r12,r12,r12
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
ldrh r8,[sp,r12] @ rem_4bit[rem]
|
||||
eor r6,r10,r6,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r12,[r2,r3]
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
add r14,r14,r14
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
eor r4,r8,r4,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r8,[r0,r3]
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
ldrh r9,[sp,r14]
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
eorpl r12,r12,r8
|
||||
eor r7,r11,r7,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl r14,r12,#0xf0
|
||||
andpl r12,r12,#0x0f
|
||||
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
|
||||
bpl Linner
|
||||
|
||||
ldr r3,[sp,#32] @ re-load r3/end
|
||||
add r2,r2,#16
|
||||
mov r14,r4
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r4,r4
|
||||
str r4,[r0,#12]
|
||||
#elif defined(__ARMEB__)
|
||||
str r4,[r0,#12]
|
||||
#else
|
||||
mov r9,r4,lsr#8
|
||||
strb r4,[r0,#12+3]
|
||||
mov r10,r4,lsr#16
|
||||
strb r9,[r0,#12+2]
|
||||
mov r11,r4,lsr#24
|
||||
strb r10,[r0,#12+1]
|
||||
strb r11,[r0,#12]
|
||||
#endif
|
||||
cmp r2,r3
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r5,r5
|
||||
str r5,[r0,#8]
|
||||
#elif defined(__ARMEB__)
|
||||
str r5,[r0,#8]
|
||||
#else
|
||||
mov r9,r5,lsr#8
|
||||
strb r5,[r0,#8+3]
|
||||
mov r10,r5,lsr#16
|
||||
strb r9,[r0,#8+2]
|
||||
mov r11,r5,lsr#24
|
||||
strb r10,[r0,#8+1]
|
||||
strb r11,[r0,#8]
|
||||
#endif
|
||||
|
||||
#ifdef __thumb2__
|
||||
it ne
|
||||
#endif
|
||||
ldrneb r12,[r2,#15]
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r6,r6
|
||||
str r6,[r0,#4]
|
||||
#elif defined(__ARMEB__)
|
||||
str r6,[r0,#4]
|
||||
#else
|
||||
mov r9,r6,lsr#8
|
||||
strb r6,[r0,#4+3]
|
||||
mov r10,r6,lsr#16
|
||||
strb r9,[r0,#4+2]
|
||||
mov r11,r6,lsr#24
|
||||
strb r10,[r0,#4+1]
|
||||
strb r11,[r0,#4]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r7,r7
|
||||
str r7,[r0,#0]
|
||||
#elif defined(__ARMEB__)
|
||||
str r7,[r0,#0]
|
||||
#else
|
||||
mov r9,r7,lsr#8
|
||||
strb r7,[r0,#0+3]
|
||||
mov r10,r7,lsr#16
|
||||
strb r9,[r0,#0+2]
|
||||
mov r11,r7,lsr#24
|
||||
strb r10,[r0,#0+1]
|
||||
strb r11,[r0,#0]
|
||||
#endif
|
||||
|
||||
bne Louter
|
||||
|
||||
add sp,sp,#36
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
|
||||
|
||||
.globl _gcm_gmult_4bit
|
||||
.private_extern _gcm_gmult_4bit
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_gmult_4bit
|
||||
#endif
|
||||
_gcm_gmult_4bit:
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
ldrb r12,[r0,#15]
|
||||
b rem_4bit_get
|
||||
Lrem_4bit_got:
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
mov r3,#14
|
||||
|
||||
add r7,r1,r12,lsl#4
|
||||
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
|
||||
ldrb r12,[r0,#14]
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
add r14,r14,r14
|
||||
eor r4,r8,r4,lsr#4
|
||||
ldrh r8,[r2,r14] @ rem_4bit[rem]
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
and r14,r12,#0xf0
|
||||
eor r7,r7,r8,lsl#16
|
||||
and r12,r12,#0x0f
|
||||
|
||||
Loop:
|
||||
add r11,r1,r12,lsl#4
|
||||
and r12,r4,#0xf @ rem
|
||||
subs r3,r3,#1
|
||||
add r12,r12,r12
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
ldrh r8,[r2,r12] @ rem_4bit[rem]
|
||||
eor r6,r10,r6,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r12,[r0,r3]
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
add r14,r14,r14
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
ldrh r8,[r2,r14] @ rem_4bit[rem]
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl r14,r12,#0xf0
|
||||
andpl r12,r12,#0x0f
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
bpl Loop
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r4,r4
|
||||
str r4,[r0,#12]
|
||||
#elif defined(__ARMEB__)
|
||||
str r4,[r0,#12]
|
||||
#else
|
||||
mov r9,r4,lsr#8
|
||||
strb r4,[r0,#12+3]
|
||||
mov r10,r4,lsr#16
|
||||
strb r9,[r0,#12+2]
|
||||
mov r11,r4,lsr#24
|
||||
strb r10,[r0,#12+1]
|
||||
strb r11,[r0,#12]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r5,r5
|
||||
str r5,[r0,#8]
|
||||
#elif defined(__ARMEB__)
|
||||
str r5,[r0,#8]
|
||||
#else
|
||||
mov r9,r5,lsr#8
|
||||
strb r5,[r0,#8+3]
|
||||
mov r10,r5,lsr#16
|
||||
strb r9,[r0,#8+2]
|
||||
mov r11,r5,lsr#24
|
||||
strb r10,[r0,#8+1]
|
||||
strb r11,[r0,#8]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r6,r6
|
||||
str r6,[r0,#4]
|
||||
#elif defined(__ARMEB__)
|
||||
str r6,[r0,#4]
|
||||
#else
|
||||
mov r9,r6,lsr#8
|
||||
strb r6,[r0,#4+3]
|
||||
mov r10,r6,lsr#16
|
||||
strb r9,[r0,#4+2]
|
||||
mov r11,r6,lsr#24
|
||||
strb r10,[r0,#4+1]
|
||||
strb r11,[r0,#4]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r7,r7
|
||||
str r7,[r0,#0]
|
||||
#elif defined(__ARMEB__)
|
||||
str r7,[r0,#0]
|
||||
#else
|
||||
mov r9,r7,lsr#8
|
||||
strb r7,[r0,#0+3]
|
||||
mov r10,r7,lsr#16
|
||||
strb r9,[r0,#0+2]
|
||||
mov r11,r7,lsr#24
|
||||
strb r10,[r0,#0+1]
|
||||
strb r11,[r0,#0]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
|
||||
|
||||
|
1265
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
1265
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -30,7 +30,6 @@
|
||||
.private_extern _abi_test_trampoline
|
||||
.align 4
|
||||
_abi_test_trampoline:
|
||||
Labi_test_trampoline_begin:
|
||||
@ Save parameters and all callee-saved registers. For convenience, we
|
||||
@ save r9 on iOS even though it's volatile.
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
|
@ -34,6 +34,7 @@
|
||||
.type ChaCha20_ctr32,%function
|
||||
.align 5
|
||||
ChaCha20_ctr32:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
cbz x2,.Labort
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x5,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
@ -47,6 +48,7 @@ ChaCha20_ctr32:
|
||||
b.ne ChaCha20_neon
|
||||
|
||||
.Lshort:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -259,6 +261,7 @@ ChaCha20_ctr32:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
.Labort:
|
||||
ret
|
||||
|
||||
@ -315,12 +318,14 @@ ChaCha20_ctr32:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size ChaCha20_ctr32,.-ChaCha20_ctr32
|
||||
|
||||
.type ChaCha20_neon,%function
|
||||
.align 5
|
||||
ChaCha20_neon:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -701,6 +706,7 @@ ChaCha20_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.Ltail_neon:
|
||||
@ -810,11 +816,13 @@ ChaCha20_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size ChaCha20_neon,.-ChaCha20_neon
|
||||
.type ChaCha20_512_neon,%function
|
||||
.align 5
|
||||
ChaCha20_512_neon:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-96]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1978,7 +1986,9 @@ ChaCha20_512_neon:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#96
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size ChaCha20_512_neon,.-ChaCha20_512_neon
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -33,6 +33,8 @@
|
||||
.align 5
|
||||
aes_hw_set_encrypt_key:
|
||||
.Lenc_key:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
@ -201,6 +203,7 @@ aes_hw_set_encrypt_key:
|
||||
.type aes_hw_set_decrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl .Lenc_key
|
||||
@ -234,6 +237,7 @@ aes_hw_set_decrypt_key:
|
||||
eor x0,x0,x0 // return value
|
||||
.Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
|
||||
.globl aes_hw_encrypt
|
||||
@ -241,6 +245,7 @@ aes_hw_set_decrypt_key:
|
||||
.type aes_hw_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
@ -271,6 +276,7 @@ aes_hw_encrypt:
|
||||
.type aes_hw_decrypt,%function
|
||||
.align 5
|
||||
aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
@ -301,6 +307,8 @@ aes_hw_decrypt:
|
||||
.type aes_hw_cbc_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_cbc_encrypt:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
@ -592,6 +600,8 @@ aes_hw_cbc_encrypt:
|
||||
.type aes_hw_ctr32_encrypt_blocks,%function
|
||||
.align 5
|
||||
aes_hw_ctr32_encrypt_blocks:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
@ -772,3 +782,4 @@ aes_hw_ctr32_encrypt_blocks:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -13,6 +13,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl bn_mul_mont
|
||||
@ -20,6 +22,7 @@
|
||||
.type bn_mul_mont,%function
|
||||
.align 5
|
||||
bn_mul_mont:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
tst x5,#7
|
||||
b.eq __bn_sqr8x_mont
|
||||
tst x5,#3
|
||||
@ -217,11 +220,14 @@ bn_mul_mont:
|
||||
mov x0,#1
|
||||
ldp x23,x24,[x29,#48]
|
||||
ldr x29,[sp],#64
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size bn_mul_mont,.-bn_mul_mont
|
||||
.type __bn_sqr8x_mont,%function
|
||||
.align 5
|
||||
__bn_sqr8x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
|
||||
// only from bn_mul_mont which has already signed the return address.
|
||||
cmp x1,x2
|
||||
b.ne __bn_mul4x_mont
|
||||
.Lsqr8x_mont:
|
||||
@ -975,11 +981,16 @@ __bn_sqr8x_mont:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
|
||||
.type __bn_mul4x_mont,%function
|
||||
.align 5
|
||||
__bn_mul4x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
|
||||
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
|
||||
// return address.
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
stp x19,x20,[sp,#16]
|
||||
@ -1413,6 +1424,8 @@ __bn_mul4x_mont:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size __bn_mul4x_mont,.-__bn_mul4x_mont
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
@ -1420,3 +1433,4 @@ __bn_mul4x_mont:
|
||||
.align 4
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -13,6 +13,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl gcm_init_neon
|
||||
@ -20,6 +22,7 @@
|
||||
.type gcm_init_neon,%function
|
||||
.align 4
|
||||
gcm_init_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
@ -45,6 +48,7 @@ gcm_init_neon:
|
||||
.type gcm_gmult_neon,%function
|
||||
.align 4
|
||||
gcm_gmult_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
@ -64,6 +68,7 @@ gcm_gmult_neon:
|
||||
.type gcm_ghash_neon,%function
|
||||
.align 4
|
||||
gcm_ghash_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
@ -338,3 +343,4 @@ gcm_ghash_neon:
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -22,6 +22,7 @@
|
||||
.type gcm_init_v8,%function
|
||||
.align 4
|
||||
gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
@ -73,6 +74,7 @@ gcm_init_v8:
|
||||
.type gcm_gmult_v8,%function
|
||||
.align 4
|
||||
gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
@ -115,6 +117,7 @@ gcm_gmult_v8:
|
||||
.type gcm_ghash_v8,%function
|
||||
.align 4
|
||||
gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
@ -246,3 +249,4 @@ gcm_ghash_v8:
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -23,6 +23,8 @@
|
||||
.type sha1_block_data_order,%function
|
||||
.align 6
|
||||
sha1_block_data_order:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
#else
|
||||
@ -1090,6 +1092,8 @@ sha1_block_data_order:
|
||||
.type sha1_block_armv8,%function
|
||||
.align 6
|
||||
sha1_block_armv8:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
.Lv8_entry:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
@ -1232,3 +1236,4 @@ sha1_block_armv8:
|
||||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -64,6 +64,7 @@
|
||||
.type sha256_block_data_order,%function
|
||||
.align 6
|
||||
sha256_block_data_order:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#ifndef __KERNEL__
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
@ -74,6 +75,7 @@ sha256_block_data_order:
|
||||
tst w16,#ARMV8_SHA256
|
||||
b.ne .Lv8_entry
|
||||
#endif
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1034,6 +1036,7 @@ sha256_block_data_order:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size sha256_block_data_order,.-sha256_block_data_order
|
||||
|
||||
@ -1068,6 +1071,7 @@ sha256_block_data_order:
|
||||
.align 6
|
||||
sha256_block_armv8:
|
||||
.Lv8_entry:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1210,3 +1214,4 @@ sha256_block_armv8:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -64,6 +64,7 @@
|
||||
.type sha512_block_data_order,%function
|
||||
.align 6
|
||||
sha512_block_data_order:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -1024,6 +1025,7 @@ sha512_block_data_order:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size sha512_block_data_order,.-sha512_block_data_order
|
||||
|
||||
@ -1082,3 +1084,4 @@ sha512_block_data_order:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -13,6 +13,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.section .rodata
|
||||
|
||||
.type _vpaes_consts,%object
|
||||
@ -215,6 +217,7 @@ _vpaes_encrypt_core:
|
||||
.type vpaes_encrypt,%function
|
||||
.align 4
|
||||
vpaes_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -224,6 +227,7 @@ vpaes_encrypt:
|
||||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_encrypt,.-vpaes_encrypt
|
||||
|
||||
@ -452,6 +456,7 @@ _vpaes_decrypt_core:
|
||||
.type vpaes_decrypt,%function
|
||||
.align 4
|
||||
vpaes_decrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -461,6 +466,7 @@ vpaes_decrypt:
|
||||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_decrypt,.-vpaes_decrypt
|
||||
|
||||
@ -630,6 +636,7 @@ _vpaes_key_preheat:
|
||||
.type _vpaes_schedule_core,%function
|
||||
.align 4
|
||||
_vpaes_schedule_core:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@ -799,6 +806,7 @@ _vpaes_schedule_core:
|
||||
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
|
||||
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
|
||||
ldp x29, x30, [sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size _vpaes_schedule_core,.-_vpaes_schedule_core
|
||||
|
||||
@ -1012,6 +1020,7 @@ _vpaes_schedule_mangle:
|
||||
.type vpaes_set_encrypt_key,%function
|
||||
.align 4
|
||||
vpaes_set_encrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1027,6 +1036,7 @@ vpaes_set_encrypt_key:
|
||||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
|
||||
|
||||
@ -1035,6 +1045,7 @@ vpaes_set_encrypt_key:
|
||||
.type vpaes_set_decrypt_key,%function
|
||||
.align 4
|
||||
vpaes_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1054,6 +1065,7 @@ vpaes_set_decrypt_key:
|
||||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
|
||||
.globl vpaes_cbc_encrypt
|
||||
@ -1061,6 +1073,7 @@ vpaes_set_decrypt_key:
|
||||
.type vpaes_cbc_encrypt,%function
|
||||
.align 4
|
||||
vpaes_cbc_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
cbz x2, .Lcbc_abort
|
||||
cmp w5, #0 // check direction
|
||||
b.eq vpaes_cbc_decrypt
|
||||
@ -1087,6 +1100,7 @@ vpaes_cbc_encrypt:
|
||||
st1 {v0.16b}, [x4] // write ivec
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
.Lcbc_abort:
|
||||
ret
|
||||
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
|
||||
@ -1094,6 +1108,8 @@ vpaes_cbc_encrypt:
|
||||
.type vpaes_cbc_decrypt,%function
|
||||
.align 4
|
||||
vpaes_cbc_decrypt:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
|
||||
// only from vpaes_cbc_encrypt which has already signed the return address.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1135,6 +1151,7 @@ vpaes_cbc_decrypt:
|
||||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt
|
||||
.globl vpaes_ctr32_encrypt_blocks
|
||||
@ -1142,6 +1159,7 @@ vpaes_cbc_decrypt:
|
||||
.type vpaes_ctr32_encrypt_blocks,%function
|
||||
.align 4
|
||||
vpaes_ctr32_encrypt_blocks:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@ -1209,7 +1227,9 @@ vpaes_ctr32_encrypt_blocks:
|
||||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -13,6 +13,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
@ -27,6 +29,7 @@
|
||||
.align 4
|
||||
abi_test_trampoline:
|
||||
.Labi_test_trampoline_begin:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
// Stack layout (low to high addresses)
|
||||
// x29,x30 (16 bytes)
|
||||
// d8-d15 (64 bytes)
|
||||
@ -129,6 +132,7 @@ abi_test_trampoline:
|
||||
ldp x27, x28, [sp, #144]
|
||||
|
||||
ldp x29, x30, [sp], #176
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size abi_test_trampoline,.-abi_test_trampoline
|
||||
.type abi_test_clobber_x0, %function
|
||||
@ -136,6 +140,7 @@ abi_test_trampoline:
|
||||
.hidden abi_test_clobber_x0
|
||||
.align 4
|
||||
abi_test_clobber_x0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x0, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x0,.-abi_test_clobber_x0
|
||||
@ -144,6 +149,7 @@ abi_test_clobber_x0:
|
||||
.hidden abi_test_clobber_x1
|
||||
.align 4
|
||||
abi_test_clobber_x1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x1, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x1,.-abi_test_clobber_x1
|
||||
@ -152,6 +158,7 @@ abi_test_clobber_x1:
|
||||
.hidden abi_test_clobber_x2
|
||||
.align 4
|
||||
abi_test_clobber_x2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x2, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x2,.-abi_test_clobber_x2
|
||||
@ -160,6 +167,7 @@ abi_test_clobber_x2:
|
||||
.hidden abi_test_clobber_x3
|
||||
.align 4
|
||||
abi_test_clobber_x3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x3, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x3,.-abi_test_clobber_x3
|
||||
@ -168,6 +176,7 @@ abi_test_clobber_x3:
|
||||
.hidden abi_test_clobber_x4
|
||||
.align 4
|
||||
abi_test_clobber_x4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x4, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x4,.-abi_test_clobber_x4
|
||||
@ -176,6 +185,7 @@ abi_test_clobber_x4:
|
||||
.hidden abi_test_clobber_x5
|
||||
.align 4
|
||||
abi_test_clobber_x5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x5, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x5,.-abi_test_clobber_x5
|
||||
@ -184,6 +194,7 @@ abi_test_clobber_x5:
|
||||
.hidden abi_test_clobber_x6
|
||||
.align 4
|
||||
abi_test_clobber_x6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x6, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x6,.-abi_test_clobber_x6
|
||||
@ -192,6 +203,7 @@ abi_test_clobber_x6:
|
||||
.hidden abi_test_clobber_x7
|
||||
.align 4
|
||||
abi_test_clobber_x7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x7, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x7,.-abi_test_clobber_x7
|
||||
@ -200,6 +212,7 @@ abi_test_clobber_x7:
|
||||
.hidden abi_test_clobber_x8
|
||||
.align 4
|
||||
abi_test_clobber_x8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x8, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x8,.-abi_test_clobber_x8
|
||||
@ -208,6 +221,7 @@ abi_test_clobber_x8:
|
||||
.hidden abi_test_clobber_x9
|
||||
.align 4
|
||||
abi_test_clobber_x9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x9, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x9,.-abi_test_clobber_x9
|
||||
@ -216,6 +230,7 @@ abi_test_clobber_x9:
|
||||
.hidden abi_test_clobber_x10
|
||||
.align 4
|
||||
abi_test_clobber_x10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x10, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x10,.-abi_test_clobber_x10
|
||||
@ -224,6 +239,7 @@ abi_test_clobber_x10:
|
||||
.hidden abi_test_clobber_x11
|
||||
.align 4
|
||||
abi_test_clobber_x11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x11, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x11,.-abi_test_clobber_x11
|
||||
@ -232,6 +248,7 @@ abi_test_clobber_x11:
|
||||
.hidden abi_test_clobber_x12
|
||||
.align 4
|
||||
abi_test_clobber_x12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x12, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x12,.-abi_test_clobber_x12
|
||||
@ -240,6 +257,7 @@ abi_test_clobber_x12:
|
||||
.hidden abi_test_clobber_x13
|
||||
.align 4
|
||||
abi_test_clobber_x13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x13, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x13,.-abi_test_clobber_x13
|
||||
@ -248,6 +266,7 @@ abi_test_clobber_x13:
|
||||
.hidden abi_test_clobber_x14
|
||||
.align 4
|
||||
abi_test_clobber_x14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x14, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x14,.-abi_test_clobber_x14
|
||||
@ -256,6 +275,7 @@ abi_test_clobber_x14:
|
||||
.hidden abi_test_clobber_x15
|
||||
.align 4
|
||||
abi_test_clobber_x15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x15, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x15,.-abi_test_clobber_x15
|
||||
@ -264,6 +284,7 @@ abi_test_clobber_x15:
|
||||
.hidden abi_test_clobber_x16
|
||||
.align 4
|
||||
abi_test_clobber_x16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x16, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x16,.-abi_test_clobber_x16
|
||||
@ -272,6 +293,7 @@ abi_test_clobber_x16:
|
||||
.hidden abi_test_clobber_x17
|
||||
.align 4
|
||||
abi_test_clobber_x17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x17, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x17,.-abi_test_clobber_x17
|
||||
@ -280,6 +302,7 @@ abi_test_clobber_x17:
|
||||
.hidden abi_test_clobber_x19
|
||||
.align 4
|
||||
abi_test_clobber_x19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x19, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x19,.-abi_test_clobber_x19
|
||||
@ -288,6 +311,7 @@ abi_test_clobber_x19:
|
||||
.hidden abi_test_clobber_x20
|
||||
.align 4
|
||||
abi_test_clobber_x20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x20, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x20,.-abi_test_clobber_x20
|
||||
@ -296,6 +320,7 @@ abi_test_clobber_x20:
|
||||
.hidden abi_test_clobber_x21
|
||||
.align 4
|
||||
abi_test_clobber_x21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x21, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x21,.-abi_test_clobber_x21
|
||||
@ -304,6 +329,7 @@ abi_test_clobber_x21:
|
||||
.hidden abi_test_clobber_x22
|
||||
.align 4
|
||||
abi_test_clobber_x22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x22, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x22,.-abi_test_clobber_x22
|
||||
@ -312,6 +338,7 @@ abi_test_clobber_x22:
|
||||
.hidden abi_test_clobber_x23
|
||||
.align 4
|
||||
abi_test_clobber_x23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x23, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x23,.-abi_test_clobber_x23
|
||||
@ -320,6 +347,7 @@ abi_test_clobber_x23:
|
||||
.hidden abi_test_clobber_x24
|
||||
.align 4
|
||||
abi_test_clobber_x24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x24, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x24,.-abi_test_clobber_x24
|
||||
@ -328,6 +356,7 @@ abi_test_clobber_x24:
|
||||
.hidden abi_test_clobber_x25
|
||||
.align 4
|
||||
abi_test_clobber_x25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x25, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x25,.-abi_test_clobber_x25
|
||||
@ -336,6 +365,7 @@ abi_test_clobber_x25:
|
||||
.hidden abi_test_clobber_x26
|
||||
.align 4
|
||||
abi_test_clobber_x26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x26, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x26,.-abi_test_clobber_x26
|
||||
@ -344,6 +374,7 @@ abi_test_clobber_x26:
|
||||
.hidden abi_test_clobber_x27
|
||||
.align 4
|
||||
abi_test_clobber_x27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x27, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x27,.-abi_test_clobber_x27
|
||||
@ -352,6 +383,7 @@ abi_test_clobber_x27:
|
||||
.hidden abi_test_clobber_x28
|
||||
.align 4
|
||||
abi_test_clobber_x28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x28, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x28,.-abi_test_clobber_x28
|
||||
@ -360,6 +392,7 @@ abi_test_clobber_x28:
|
||||
.hidden abi_test_clobber_x29
|
||||
.align 4
|
||||
abi_test_clobber_x29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x29, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x29,.-abi_test_clobber_x29
|
||||
@ -368,6 +401,7 @@ abi_test_clobber_x29:
|
||||
.hidden abi_test_clobber_d0
|
||||
.align 4
|
||||
abi_test_clobber_d0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d0, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d0,.-abi_test_clobber_d0
|
||||
@ -376,6 +410,7 @@ abi_test_clobber_d0:
|
||||
.hidden abi_test_clobber_d1
|
||||
.align 4
|
||||
abi_test_clobber_d1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d1, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d1,.-abi_test_clobber_d1
|
||||
@ -384,6 +419,7 @@ abi_test_clobber_d1:
|
||||
.hidden abi_test_clobber_d2
|
||||
.align 4
|
||||
abi_test_clobber_d2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d2, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d2,.-abi_test_clobber_d2
|
||||
@ -392,6 +428,7 @@ abi_test_clobber_d2:
|
||||
.hidden abi_test_clobber_d3
|
||||
.align 4
|
||||
abi_test_clobber_d3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d3, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d3,.-abi_test_clobber_d3
|
||||
@ -400,6 +437,7 @@ abi_test_clobber_d3:
|
||||
.hidden abi_test_clobber_d4
|
||||
.align 4
|
||||
abi_test_clobber_d4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d4, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d4,.-abi_test_clobber_d4
|
||||
@ -408,6 +446,7 @@ abi_test_clobber_d4:
|
||||
.hidden abi_test_clobber_d5
|
||||
.align 4
|
||||
abi_test_clobber_d5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d5, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d5,.-abi_test_clobber_d5
|
||||
@ -416,6 +455,7 @@ abi_test_clobber_d5:
|
||||
.hidden abi_test_clobber_d6
|
||||
.align 4
|
||||
abi_test_clobber_d6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d6, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d6,.-abi_test_clobber_d6
|
||||
@ -424,6 +464,7 @@ abi_test_clobber_d6:
|
||||
.hidden abi_test_clobber_d7
|
||||
.align 4
|
||||
abi_test_clobber_d7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d7, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d7,.-abi_test_clobber_d7
|
||||
@ -432,6 +473,7 @@ abi_test_clobber_d7:
|
||||
.hidden abi_test_clobber_d8
|
||||
.align 4
|
||||
abi_test_clobber_d8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d8, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d8,.-abi_test_clobber_d8
|
||||
@ -440,6 +482,7 @@ abi_test_clobber_d8:
|
||||
.hidden abi_test_clobber_d9
|
||||
.align 4
|
||||
abi_test_clobber_d9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d9, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d9,.-abi_test_clobber_d9
|
||||
@ -448,6 +491,7 @@ abi_test_clobber_d9:
|
||||
.hidden abi_test_clobber_d10
|
||||
.align 4
|
||||
abi_test_clobber_d10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d10, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d10,.-abi_test_clobber_d10
|
||||
@ -456,6 +500,7 @@ abi_test_clobber_d10:
|
||||
.hidden abi_test_clobber_d11
|
||||
.align 4
|
||||
abi_test_clobber_d11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d11, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d11,.-abi_test_clobber_d11
|
||||
@ -464,6 +509,7 @@ abi_test_clobber_d11:
|
||||
.hidden abi_test_clobber_d12
|
||||
.align 4
|
||||
abi_test_clobber_d12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d12, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d12,.-abi_test_clobber_d12
|
||||
@ -472,6 +518,7 @@ abi_test_clobber_d12:
|
||||
.hidden abi_test_clobber_d13
|
||||
.align 4
|
||||
abi_test_clobber_d13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d13, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d13,.-abi_test_clobber_d13
|
||||
@ -480,6 +527,7 @@ abi_test_clobber_d13:
|
||||
.hidden abi_test_clobber_d14
|
||||
.align 4
|
||||
abi_test_clobber_d14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d14, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d14,.-abi_test_clobber_d14
|
||||
@ -488,6 +536,7 @@ abi_test_clobber_d14:
|
||||
.hidden abi_test_clobber_d15
|
||||
.align 4
|
||||
abi_test_clobber_d15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d15, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d15,.-abi_test_clobber_d15
|
||||
@ -496,6 +545,7 @@ abi_test_clobber_d15:
|
||||
.hidden abi_test_clobber_d16
|
||||
.align 4
|
||||
abi_test_clobber_d16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d16, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d16,.-abi_test_clobber_d16
|
||||
@ -504,6 +554,7 @@ abi_test_clobber_d16:
|
||||
.hidden abi_test_clobber_d17
|
||||
.align 4
|
||||
abi_test_clobber_d17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d17, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d17,.-abi_test_clobber_d17
|
||||
@ -512,6 +563,7 @@ abi_test_clobber_d17:
|
||||
.hidden abi_test_clobber_d18
|
||||
.align 4
|
||||
abi_test_clobber_d18:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d18, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d18,.-abi_test_clobber_d18
|
||||
@ -520,6 +572,7 @@ abi_test_clobber_d18:
|
||||
.hidden abi_test_clobber_d19
|
||||
.align 4
|
||||
abi_test_clobber_d19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d19, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d19,.-abi_test_clobber_d19
|
||||
@ -528,6 +581,7 @@ abi_test_clobber_d19:
|
||||
.hidden abi_test_clobber_d20
|
||||
.align 4
|
||||
abi_test_clobber_d20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d20, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d20,.-abi_test_clobber_d20
|
||||
@ -536,6 +590,7 @@ abi_test_clobber_d20:
|
||||
.hidden abi_test_clobber_d21
|
||||
.align 4
|
||||
abi_test_clobber_d21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d21, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d21,.-abi_test_clobber_d21
|
||||
@ -544,6 +599,7 @@ abi_test_clobber_d21:
|
||||
.hidden abi_test_clobber_d22
|
||||
.align 4
|
||||
abi_test_clobber_d22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d22, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d22,.-abi_test_clobber_d22
|
||||
@ -552,6 +608,7 @@ abi_test_clobber_d22:
|
||||
.hidden abi_test_clobber_d23
|
||||
.align 4
|
||||
abi_test_clobber_d23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d23, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d23,.-abi_test_clobber_d23
|
||||
@ -560,6 +617,7 @@ abi_test_clobber_d23:
|
||||
.hidden abi_test_clobber_d24
|
||||
.align 4
|
||||
abi_test_clobber_d24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d24, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d24,.-abi_test_clobber_d24
|
||||
@ -568,6 +626,7 @@ abi_test_clobber_d24:
|
||||
.hidden abi_test_clobber_d25
|
||||
.align 4
|
||||
abi_test_clobber_d25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d25, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d25,.-abi_test_clobber_d25
|
||||
@ -576,6 +635,7 @@ abi_test_clobber_d25:
|
||||
.hidden abi_test_clobber_d26
|
||||
.align 4
|
||||
abi_test_clobber_d26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d26, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d26,.-abi_test_clobber_d26
|
||||
@ -584,6 +644,7 @@ abi_test_clobber_d26:
|
||||
.hidden abi_test_clobber_d27
|
||||
.align 4
|
||||
abi_test_clobber_d27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d27, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d27,.-abi_test_clobber_d27
|
||||
@ -592,6 +653,7 @@ abi_test_clobber_d27:
|
||||
.hidden abi_test_clobber_d28
|
||||
.align 4
|
||||
abi_test_clobber_d28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d28, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d28,.-abi_test_clobber_d28
|
||||
@ -600,6 +662,7 @@ abi_test_clobber_d28:
|
||||
.hidden abi_test_clobber_d29
|
||||
.align 4
|
||||
abi_test_clobber_d29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d29, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d29,.-abi_test_clobber_d29
|
||||
@ -608,6 +671,7 @@ abi_test_clobber_d29:
|
||||
.hidden abi_test_clobber_d30
|
||||
.align 4
|
||||
abi_test_clobber_d30:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d30, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d30,.-abi_test_clobber_d30
|
||||
@ -616,6 +680,7 @@ abi_test_clobber_d30:
|
||||
.hidden abi_test_clobber_d31
|
||||
.align 4
|
||||
abi_test_clobber_d31:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d31, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d31,.-abi_test_clobber_d31
|
||||
@ -624,6 +689,7 @@ abi_test_clobber_d31:
|
||||
.hidden abi_test_clobber_v8_upper
|
||||
.align 4
|
||||
abi_test_clobber_v8_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v8.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
|
||||
@ -632,6 +698,7 @@ abi_test_clobber_v8_upper:
|
||||
.hidden abi_test_clobber_v9_upper
|
||||
.align 4
|
||||
abi_test_clobber_v9_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v9.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
|
||||
@ -640,6 +707,7 @@ abi_test_clobber_v9_upper:
|
||||
.hidden abi_test_clobber_v10_upper
|
||||
.align 4
|
||||
abi_test_clobber_v10_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v10.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
|
||||
@ -648,6 +716,7 @@ abi_test_clobber_v10_upper:
|
||||
.hidden abi_test_clobber_v11_upper
|
||||
.align 4
|
||||
abi_test_clobber_v11_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v11.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
|
||||
@ -656,6 +725,7 @@ abi_test_clobber_v11_upper:
|
||||
.hidden abi_test_clobber_v12_upper
|
||||
.align 4
|
||||
abi_test_clobber_v12_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v12.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
|
||||
@ -664,6 +734,7 @@ abi_test_clobber_v12_upper:
|
||||
.hidden abi_test_clobber_v13_upper
|
||||
.align 4
|
||||
abi_test_clobber_v13_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v13.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
|
||||
@ -672,6 +743,7 @@ abi_test_clobber_v13_upper:
|
||||
.hidden abi_test_clobber_v14_upper
|
||||
.align 4
|
||||
abi_test_clobber_v14_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v14.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
|
||||
@ -680,8 +752,10 @@ abi_test_clobber_v14_upper:
|
||||
.hidden abi_test_clobber_v15_upper
|
||||
.align 4
|
||||
abi_test_clobber_v15_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v15.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -1,998 +0,0 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.section .rodata
|
||||
|
||||
# p434 x 2
|
||||
.Lp434x2:
|
||||
.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF
|
||||
.quad 0xFB82ECF5C5FFFFFF, 0xF78CB8F062B15D47
|
||||
.quad 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
|
||||
|
||||
# p434 + 1
|
||||
.Lp434p1:
|
||||
.quad 0xFDC1767AE3000000, 0x7BC65C783158AEA3
|
||||
.quad 0x6CFC5FD681C52056, 0x0002341F27177344
|
||||
|
||||
.text
|
||||
.globl sike_mpmul
|
||||
.hidden sike_mpmul
|
||||
.align 4
|
||||
sike_mpmul:
|
||||
stp x29, x30, [sp,#-96]!
|
||||
add x29, sp, #0
|
||||
stp x19, x20, [sp,#16]
|
||||
stp x21, x22, [sp,#32]
|
||||
stp x23, x24, [sp,#48]
|
||||
stp x25, x26, [sp,#64]
|
||||
stp x27, x28, [sp,#80]
|
||||
|
||||
ldp x3, x4, [x0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x10, x11, [x1,#0]
|
||||
ldp x12, x13, [x1,#16]
|
||||
ldp x14, x15, [x1,#32]
|
||||
ldr x16, [x1,#48]
|
||||
|
||||
// x3-x7 <- AH + AL, x7 <- carry
|
||||
adds x3, x3, x7
|
||||
adcs x4, x4, x8
|
||||
adcs x5, x5, x9
|
||||
adcs x6, x6, xzr
|
||||
adc x7, xzr, xzr
|
||||
|
||||
// x10-x13 <- BH + BL, x8 <- carry
|
||||
adds x10, x10, x14
|
||||
adcs x11, x11, x15
|
||||
adcs x12, x12, x16
|
||||
adcs x13, x13, xzr
|
||||
adc x8, xzr, xzr
|
||||
|
||||
// x9 <- combined carry
|
||||
and x9, x7, x8
|
||||
// x7-x8 <- mask
|
||||
sub x7, xzr, x7
|
||||
sub x8, xzr, x8
|
||||
|
||||
// x15-x19 <- masked (BH + BL)
|
||||
and x14, x10, x7
|
||||
and x15, x11, x7
|
||||
and x16, x12, x7
|
||||
and x17, x13, x7
|
||||
|
||||
// x20-x23 <- masked (AH + AL)
|
||||
and x20, x3, x8
|
||||
and x21, x4, x8
|
||||
and x22, x5, x8
|
||||
and x23, x6, x8
|
||||
|
||||
// x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1
|
||||
adds x14, x14, x20
|
||||
adcs x15, x15, x21
|
||||
adcs x16, x16, x22
|
||||
adcs x17, x17, x23
|
||||
adc x7, x9, xzr
|
||||
|
||||
// x8-x9,x19,x20-x24 <- (AH+AL) x (BH+BL), low part
|
||||
stp x3, x4, [x2,#0]
|
||||
// A0-A1 <- AH + AL, T0 <- mask
|
||||
adds x3, x3, x5
|
||||
adcs x4, x4, x6
|
||||
adc x25, xzr, xzr
|
||||
|
||||
// C6, T1 <- BH + BL, C7 <- mask
|
||||
adds x23, x10, x12
|
||||
adcs x26, x11, x13
|
||||
adc x24, xzr, xzr
|
||||
|
||||
// C0-C1 <- masked (BH + BL)
|
||||
sub x19, xzr, x25
|
||||
sub x20, xzr, x24
|
||||
and x8, x23, x19
|
||||
and x9, x26, x19
|
||||
|
||||
// C4-C5 <- masked (AH + AL), T0 <- combined carry
|
||||
and x21, x3, x20
|
||||
and x22, x4, x20
|
||||
mul x19, x3, x23
|
||||
mul x20, x3, x26
|
||||
and x25, x25, x24
|
||||
|
||||
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
|
||||
adds x8, x21, x8
|
||||
umulh x21, x3, x26
|
||||
adcs x9, x22, x9
|
||||
umulh x22, x3, x23
|
||||
adc x25, x25, xzr
|
||||
|
||||
// C2-C5 <- (AH+AL) x (BH+BL), low part
|
||||
mul x3, x4, x23
|
||||
umulh x23, x4, x23
|
||||
adds x20, x20, x22
|
||||
adc x21, x21, xzr
|
||||
|
||||
mul x24, x4, x26
|
||||
umulh x26, x4, x26
|
||||
adds x20, x20, x3
|
||||
adcs x21, x21, x23
|
||||
adc x22, xzr, xzr
|
||||
|
||||
adds x21, x21, x24
|
||||
adc x22, x22, x26
|
||||
|
||||
ldp x3, x4, [x2,#0]
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
|
||||
adds x21, x8, x21
|
||||
umulh x24, x3, x10
|
||||
umulh x26, x3, x11
|
||||
adcs x22, x9, x22
|
||||
mul x8, x3, x10
|
||||
mul x9, x3, x11
|
||||
adc x25, x25, xzr
|
||||
|
||||
// C0-C1, T1, C7 <- AL x BL
|
||||
mul x3, x4, x10
|
||||
umulh x10, x4, x10
|
||||
adds x9, x9, x24
|
||||
adc x26, x26, xzr
|
||||
|
||||
mul x23, x4, x11
|
||||
umulh x11, x4, x11
|
||||
adds x9, x9, x3
|
||||
adcs x26, x26, x10
|
||||
adc x24, xzr, xzr
|
||||
|
||||
adds x26, x26, x23
|
||||
adc x24, x24, x11
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
|
||||
mul x3, x5, x12
|
||||
umulh x10, x5, x12
|
||||
subs x19, x19, x8
|
||||
sbcs x20, x20, x9
|
||||
sbcs x21, x21, x26
|
||||
mul x4, x5, x13
|
||||
umulh x23, x5, x13
|
||||
sbcs x22, x22, x24
|
||||
sbc x25, x25, xzr
|
||||
|
||||
// A0, A1, C6, B0 <- AH x BH
|
||||
mul x5, x6, x12
|
||||
umulh x12, x6, x12
|
||||
adds x4, x4, x10
|
||||
adc x23, x23, xzr
|
||||
|
||||
mul x11, x6, x13
|
||||
umulh x13, x6, x13
|
||||
adds x4, x4, x5
|
||||
adcs x23, x23, x12
|
||||
adc x10, xzr, xzr
|
||||
|
||||
adds x23, x23, x11
|
||||
adc x10, x10, x13
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x19, x19, x3
|
||||
sbcs x20, x20, x4
|
||||
sbcs x21, x21, x23
|
||||
sbcs x22, x22, x10
|
||||
sbc x25, x25, xzr
|
||||
|
||||
adds x19, x19, x26
|
||||
adcs x20, x20, x24
|
||||
adcs x21, x21, x3
|
||||
adcs x22, x22, x4
|
||||
adcs x23, x25, x23
|
||||
adc x24, x10, xzr
|
||||
|
||||
|
||||
// x15-x19, x7 <- (AH+AL) x (BH+BL), final step
|
||||
adds x14, x14, x21
|
||||
adcs x15, x15, x22
|
||||
adcs x16, x16, x23
|
||||
adcs x17, x17, x24
|
||||
adc x7, x7, xzr
|
||||
|
||||
// Load AL
|
||||
ldp x3, x4, [x0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
// Load BL
|
||||
ldp x10, x11, [x1,#0]
|
||||
ldp x12, x13, [x1,#16]
|
||||
|
||||
// Temporarily store x8 in x2
|
||||
stp x8, x9, [x2,#0]
|
||||
// x21-x28 <- AL x BL
|
||||
// A0-A1 <- AH + AL, T0 <- mask
|
||||
adds x3, x3, x5
|
||||
adcs x4, x4, x6
|
||||
adc x8, xzr, xzr
|
||||
|
||||
// C6, T1 <- BH + BL, C7 <- mask
|
||||
adds x27, x10, x12
|
||||
adcs x9, x11, x13
|
||||
adc x28, xzr, xzr
|
||||
|
||||
// C0-C1 <- masked (BH + BL)
|
||||
sub x23, xzr, x8
|
||||
sub x24, xzr, x28
|
||||
and x21, x27, x23
|
||||
and x22, x9, x23
|
||||
|
||||
// C4-C5 <- masked (AH + AL), T0 <- combined carry
|
||||
and x25, x3, x24
|
||||
and x26, x4, x24
|
||||
mul x23, x3, x27
|
||||
mul x24, x3, x9
|
||||
and x8, x8, x28
|
||||
|
||||
// C0-C1, T0 <- (AH+AL) x (BH+BL), part 1
|
||||
adds x21, x25, x21
|
||||
umulh x25, x3, x9
|
||||
adcs x22, x26, x22
|
||||
umulh x26, x3, x27
|
||||
adc x8, x8, xzr
|
||||
|
||||
// C2-C5 <- (AH+AL) x (BH+BL), low part
|
||||
mul x3, x4, x27
|
||||
umulh x27, x4, x27
|
||||
adds x24, x24, x26
|
||||
adc x25, x25, xzr
|
||||
|
||||
mul x28, x4, x9
|
||||
umulh x9, x4, x9
|
||||
adds x24, x24, x3
|
||||
adcs x25, x25, x27
|
||||
adc x26, xzr, xzr
|
||||
|
||||
adds x25, x25, x28
|
||||
adc x26, x26, x9
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL), final part
|
||||
adds x25, x21, x25
|
||||
umulh x28, x3, x10
|
||||
umulh x9, x3, x11
|
||||
adcs x26, x22, x26
|
||||
mul x21, x3, x10
|
||||
mul x22, x3, x11
|
||||
adc x8, x8, xzr
|
||||
|
||||
// C0-C1, T1, C7 <- AL x BL
|
||||
mul x3, x4, x10
|
||||
umulh x10, x4, x10
|
||||
adds x22, x22, x28
|
||||
adc x9, x9, xzr
|
||||
|
||||
mul x27, x4, x11
|
||||
umulh x11, x4, x11
|
||||
adds x22, x22, x3
|
||||
adcs x9, x9, x10
|
||||
adc x28, xzr, xzr
|
||||
|
||||
adds x9, x9, x27
|
||||
adc x28, x28, x11
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL
|
||||
mul x3, x5, x12
|
||||
umulh x10, x5, x12
|
||||
subs x23, x23, x21
|
||||
sbcs x24, x24, x22
|
||||
sbcs x25, x25, x9
|
||||
mul x4, x5, x13
|
||||
umulh x27, x5, x13
|
||||
sbcs x26, x26, x28
|
||||
sbc x8, x8, xzr
|
||||
|
||||
// A0, A1, C6, B0 <- AH x BH
|
||||
mul x5, x6, x12
|
||||
umulh x12, x6, x12
|
||||
adds x4, x4, x10
|
||||
adc x27, x27, xzr
|
||||
|
||||
mul x11, x6, x13
|
||||
umulh x13, x6, x13
|
||||
adds x4, x4, x5
|
||||
adcs x27, x27, x12
|
||||
adc x10, xzr, xzr
|
||||
|
||||
adds x27, x27, x11
|
||||
adc x10, x10, x13
|
||||
|
||||
|
||||
// C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x23, x23, x3
|
||||
sbcs x24, x24, x4
|
||||
sbcs x25, x25, x27
|
||||
sbcs x26, x26, x10
|
||||
sbc x8, x8, xzr
|
||||
|
||||
adds x23, x23, x9
|
||||
adcs x24, x24, x28
|
||||
adcs x25, x25, x3
|
||||
adcs x26, x26, x4
|
||||
adcs x27, x8, x27
|
||||
adc x28, x10, xzr
|
||||
|
||||
// Restore x8
|
||||
ldp x8, x9, [x2,#0]
|
||||
|
||||
// x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL
|
||||
subs x8, x8, x21
|
||||
sbcs x9, x9, x22
|
||||
sbcs x19, x19, x23
|
||||
sbcs x20, x20, x24
|
||||
sbcs x14, x14, x25
|
||||
sbcs x15, x15, x26
|
||||
sbcs x16, x16, x27
|
||||
sbcs x17, x17, x28
|
||||
sbc x7, x7, xzr
|
||||
|
||||
// Store ALxBL, low
|
||||
stp x21, x22, [x2]
|
||||
stp x23, x24, [x2,#16]
|
||||
|
||||
// Load AH
|
||||
ldp x3, x4, [x0,#32]
|
||||
ldr x5, [x0,#48]
|
||||
// Load BH
|
||||
ldp x10, x11, [x1,#32]
|
||||
ldr x12, [x1,#48]
|
||||
|
||||
adds x8, x8, x25
|
||||
adcs x9, x9, x26
|
||||
adcs x19, x19, x27
|
||||
adcs x20, x20, x28
|
||||
adc x1, xzr, xzr
|
||||
|
||||
add x0, x0, #32
|
||||
// Temporarily store x8,x9 in x2
|
||||
stp x8,x9, [x2,#32]
|
||||
// x21-x28 <- AH x BH
|
||||
|
||||
// A0 * B0
|
||||
mul x21, x3, x10 // C0
|
||||
umulh x24, x3, x10
|
||||
|
||||
// A0 * B1
|
||||
mul x22, x3, x11
|
||||
umulh x23, x3, x11
|
||||
|
||||
// A1 * B0
|
||||
mul x8, x4, x10
|
||||
umulh x9, x4, x10
|
||||
adds x22, x22, x24
|
||||
adc x23, x23, xzr
|
||||
|
||||
// A0 * B2
|
||||
mul x27, x3, x12
|
||||
umulh x28, x3, x12
|
||||
adds x22, x22, x8 // C1
|
||||
adcs x23, x23, x9
|
||||
adc x24, xzr, xzr
|
||||
|
||||
// A2 * B0
|
||||
mul x8, x5, x10
|
||||
umulh x25, x5, x10
|
||||
adds x23, x23, x27
|
||||
adcs x24, x24, x25
|
||||
adc x25, xzr, xzr
|
||||
|
||||
// A1 * B1
|
||||
mul x27, x4, x11
|
||||
umulh x9, x4, x11
|
||||
adds x23, x23, x8
|
||||
adcs x24, x24, x28
|
||||
adc x25, x25, xzr
|
||||
|
||||
// A1 * B2
|
||||
mul x8, x4, x12
|
||||
umulh x28, x4, x12
|
||||
adds x23, x23, x27 // C2
|
||||
adcs x24, x24, x9
|
||||
adc x25, x25, xzr
|
||||
|
||||
// A2 * B1
|
||||
mul x27, x5, x11
|
||||
umulh x9, x5, x11
|
||||
adds x24, x24, x8
|
||||
adcs x25, x25, x28
|
||||
adc x26, xzr, xzr
|
||||
|
||||
// A2 * B2
|
||||
mul x8, x5, x12
|
||||
umulh x28, x5, x12
|
||||
adds x24, x24, x27 // C3
|
||||
adcs x25, x25, x9
|
||||
adc x26, x26, xzr
|
||||
|
||||
adds x25, x25, x8 // C4
|
||||
adc x26, x26, x28 // C5
|
||||
|
||||
// Restore x8,x9
|
||||
ldp x8,x9, [x2,#32]
|
||||
|
||||
neg x1, x1
|
||||
|
||||
// x8-x9,x19,x20,x14-x17 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH
|
||||
subs x8, x8, x21
|
||||
sbcs x9, x9, x22
|
||||
sbcs x19, x19, x23
|
||||
sbcs x20, x20, x24
|
||||
sbcs x14, x14, x25
|
||||
sbcs x15, x15, x26
|
||||
sbcs x16, x16, xzr
|
||||
sbcs x17, x17, xzr
|
||||
sbc x7, x7, xzr
|
||||
|
||||
// Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low
|
||||
stp x8, x9, [x2,#32]
|
||||
stp x19, x20, [x2,#48]
|
||||
|
||||
adds x1, x1, #1
|
||||
adcs x14, x14, x21
|
||||
adcs x15, x15, x22
|
||||
adcs x16, x16, x23
|
||||
adcs x17, x17, x24
|
||||
adcs x25, x7, x25
|
||||
adc x26, x26, xzr
|
||||
|
||||
stp x14, x15, [x2,#64]
|
||||
stp x16, x17, [x2,#80]
|
||||
stp x25, x26, [x2,#96]
|
||||
|
||||
ldp x19, x20, [x29,#16]
|
||||
ldp x21, x22, [x29,#32]
|
||||
ldp x23, x24, [x29,#48]
|
||||
ldp x25, x26, [x29,#64]
|
||||
ldp x27, x28, [x29,#80]
|
||||
ldp x29, x30, [sp],#96
|
||||
ret
|
||||
.globl sike_fprdc
|
||||
.hidden sike_fprdc
|
||||
.align 4
|
||||
sike_fprdc:
|
||||
stp x29, x30, [sp, #-96]!
|
||||
add x29, sp, xzr
|
||||
stp x19, x20, [sp,#16]
|
||||
stp x21, x22, [sp,#32]
|
||||
stp x23, x24, [sp,#48]
|
||||
stp x25, x26, [sp,#64]
|
||||
stp x27, x28, [sp,#80]
|
||||
|
||||
ldp x2, x3, [x0,#0] // a[0-1]
|
||||
|
||||
// Load the prime constant
|
||||
adrp x26, .Lp434p1
|
||||
add x26, x26, :lo12:.Lp434p1
|
||||
ldp x23, x24, [x26, #0x0]
|
||||
ldp x25, x26, [x26,#0x10]
|
||||
|
||||
// a[0-1] * p434+1
|
||||
mul x4, x2, x23 // C0
|
||||
umulh x7, x2, x23
|
||||
|
||||
mul x5, x2, x24
|
||||
umulh x6, x2, x24
|
||||
|
||||
mul x10, x3, x23
|
||||
umulh x11, x3, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x2, x25
|
||||
umulh x28, x2, x25
|
||||
adds x5, x5, x10 // C1
|
||||
adcs x6, x6, x11
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x10, x3, x24
|
||||
umulh x11, x3, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x2, x26
|
||||
umulh x28, x2, x26
|
||||
adds x6, x6, x10 // C2
|
||||
adcs x7, x7, x11
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x10, x3, x25
|
||||
umulh x11, x3, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x3, x26
|
||||
umulh x28, x3, x26
|
||||
adds x7, x7, x10 // C3
|
||||
adcs x8, x8, x11
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
|
||||
ldp x10, x11, [x0, #0x18]
|
||||
ldp x12, x13, [x0, #0x28]
|
||||
ldp x14, x15, [x0, #0x38]
|
||||
ldp x16, x17, [x0, #0x48]
|
||||
ldp x19, x20, [x0, #0x58]
|
||||
ldr x21, [x0, #0x68]
|
||||
|
||||
adds x10, x10, x4
|
||||
adcs x11, x11, x5
|
||||
adcs x12, x12, x6
|
||||
adcs x13, x13, x7
|
||||
adcs x14, x14, x8
|
||||
adcs x15, x15, x9
|
||||
adcs x22, x16, xzr
|
||||
adcs x17, x17, xzr
|
||||
adcs x19, x19, xzr
|
||||
adcs x20, x20, xzr
|
||||
adc x21, x21, xzr
|
||||
|
||||
ldr x2, [x0,#0x10] // a[2]
|
||||
// a[2-3] * p434+1
|
||||
mul x4, x2, x23 // C0
|
||||
umulh x7, x2, x23
|
||||
|
||||
mul x5, x2, x24
|
||||
umulh x6, x2, x24
|
||||
|
||||
mul x0, x10, x23
|
||||
umulh x3, x10, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x2, x25
|
||||
umulh x28, x2, x25
|
||||
adds x5, x5, x0 // C1
|
||||
adcs x6, x6, x3
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x0, x10, x24
|
||||
umulh x3, x10, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x2, x26
|
||||
umulh x28, x2, x26
|
||||
adds x6, x6, x0 // C2
|
||||
adcs x7, x7, x3
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x0, x10, x25
|
||||
umulh x3, x10, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x10, x26
|
||||
umulh x28, x10, x26
|
||||
adds x7, x7, x0 // C3
|
||||
adcs x8, x8, x3
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
|
||||
adds x12, x12, x4
|
||||
adcs x13, x13, x5
|
||||
adcs x14, x14, x6
|
||||
adcs x15, x15, x7
|
||||
adcs x16, x22, x8
|
||||
adcs x17, x17, x9
|
||||
adcs x22, x19, xzr
|
||||
adcs x20, x20, xzr
|
||||
adc x21, x21, xzr
|
||||
|
||||
mul x4, x11, x23 // C0
|
||||
umulh x7, x11, x23
|
||||
|
||||
mul x5, x11, x24
|
||||
umulh x6, x11, x24
|
||||
|
||||
mul x10, x12, x23
|
||||
umulh x3, x12, x23
|
||||
adds x5, x5, x7
|
||||
adc x6, x6, xzr
|
||||
|
||||
mul x27, x11, x25
|
||||
umulh x28, x11, x25
|
||||
adds x5, x5, x10 // C1
|
||||
adcs x6, x6, x3
|
||||
adc x7, xzr, xzr
|
||||
|
||||
mul x10, x12, x24
|
||||
umulh x3, x12, x24
|
||||
adds x6, x6, x27
|
||||
adcs x7, x7, x28
|
||||
adc x8, xzr, xzr
|
||||
|
||||
mul x27, x11, x26
|
||||
umulh x28, x11, x26
|
||||
adds x6, x6, x10 // C2
|
||||
adcs x7, x7, x3
|
||||
adc x8, x8, xzr
|
||||
|
||||
mul x10, x12, x25
|
||||
umulh x3, x12, x25
|
||||
adds x7, x7, x27
|
||||
adcs x8, x8, x28
|
||||
adc x9, xzr, xzr
|
||||
|
||||
mul x27, x12, x26
|
||||
umulh x28, x12, x26
|
||||
adds x7, x7, x10 // C3
|
||||
adcs x8, x8, x3
|
||||
adc x9, x9, xzr
|
||||
adds x8, x8, x27 // C4
|
||||
adc x9, x9, x28 // C5
|
||||
|
||||
|
||||
adds x14, x14, x4
|
||||
adcs x15, x15, x5
|
||||
adcs x16, x16, x6
|
||||
adcs x17, x17, x7
|
||||
adcs x19, x22, x8
|
||||
adcs x20, x20, x9
|
||||
adc x22, x21, xzr
|
||||
|
||||
stp x14, x15, [x1, #0x0] // C0, C1
|
||||
|
||||
mul x4, x13, x23 // C0
|
||||
umulh x10, x13, x23
|
||||
|
||||
mul x5, x13, x24
|
||||
umulh x27, x13, x24
|
||||
adds x5, x5, x10 // C1
|
||||
adc x10, xzr, xzr
|
||||
|
||||
mul x6, x13, x25
|
||||
umulh x28, x13, x25
|
||||
adds x27, x10, x27
|
||||
adcs x6, x6, x27 // C2
|
||||
adc x10, xzr, xzr
|
||||
|
||||
mul x7, x13, x26
|
||||
umulh x8, x13, x26
|
||||
adds x28, x10, x28
|
||||
adcs x7, x7, x28 // C3
|
||||
adc x8, x8, xzr // C4
|
||||
|
||||
adds x16, x16, x4
|
||||
adcs x17, x17, x5
|
||||
adcs x19, x19, x6
|
||||
adcs x20, x20, x7
|
||||
adc x21, x22, x8
|
||||
|
||||
str x16, [x1, #0x10]
|
||||
stp x17, x19, [x1, #0x18]
|
||||
stp x20, x21, [x1, #0x28]
|
||||
|
||||
ldp x19, x20, [x29,#16]
|
||||
ldp x21, x22, [x29,#32]
|
||||
ldp x23, x24, [x29,#48]
|
||||
ldp x25, x26, [x29,#64]
|
||||
ldp x27, x28, [x29,#80]
|
||||
ldp x29, x30, [sp],#96
|
||||
ret
|
||||
.globl sike_fpadd
|
||||
.hidden sike_fpadd
|
||||
.align 4
|
||||
sike_fpadd:
|
||||
stp x29,x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
// Add a + b
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adc x9, x9, x17
|
||||
|
||||
// Subtract 2xp434
|
||||
adrp x17, .Lp434x2
|
||||
add x17, x17, :lo12:.Lp434x2
|
||||
ldp x11, x12, [x17, #0]
|
||||
ldp x13, x14, [x17, #16]
|
||||
ldp x15, x16, [x17, #32]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x12
|
||||
sbcs x6, x6, x13
|
||||
sbcs x7, x7, x14
|
||||
sbcs x8, x8, x15
|
||||
sbcs x9, x9, x16
|
||||
sbc x0, xzr, xzr // x0 can be reused now
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adc x9, x9, x16
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl sike_fpsub
|
||||
.hidden sike_fpsub
|
||||
.align 4
|
||||
sike_fpsub:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
// Subtract a - b
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
sbcs x9, x9, x17
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
adrp x17, .Lp434x2
|
||||
add x17, x17, :lo12:.Lp434x2
|
||||
|
||||
// First half
|
||||
ldp x11, x12, [x17, #0]
|
||||
ldp x13, x14, [x17, #16]
|
||||
ldp x15, x16, [x17, #32]
|
||||
|
||||
// Add 2xp434 anded with the mask in x0
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adc x9, x9, x16
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl sike_mpadd_asm
|
||||
.hidden sike_mpadd_asm
|
||||
.align 4
|
||||
sike_mpadd_asm:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldr x9, [x0,#48]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldr x17, [x1,#48]
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adc x9, x9, x17
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
str x9, [x2,#48]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl sike_mpsubx2_asm
|
||||
.hidden sike_mpsubx2_asm
|
||||
.align 4
|
||||
sike_mpsubx2_asm:
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x11, x12, [x1,#32]
|
||||
ldp x13, x14, [x1,#48]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
sbcs x9, x9, x13
|
||||
sbcs x10, x10, x14
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
|
||||
ldp x3, x4, [x0,#64]
|
||||
ldp x5, x6, [x0,#80]
|
||||
ldp x11, x12, [x1,#64]
|
||||
ldp x13, x14, [x1,#80]
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#96]
|
||||
ldp x11, x12, [x1,#96]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
stp x3, x4, [x2,#64]
|
||||
stp x5, x6, [x2,#80]
|
||||
stp x7, x8, [x2,#96]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
.globl sike_mpdblsubx2_asm
|
||||
.hidden sike_mpdblsubx2_asm
|
||||
.align 4
|
||||
sike_mpdblsubx2_asm:
|
||||
stp x29, x30, [sp, #-16]!
|
||||
add x29, sp, #0
|
||||
|
||||
ldp x3, x4, [x2, #0]
|
||||
ldp x5, x6, [x2,#16]
|
||||
ldp x7, x8, [x2,#32]
|
||||
|
||||
ldp x11, x12, [x0, #0]
|
||||
ldp x13, x14, [x0,#16]
|
||||
ldp x15, x16, [x0,#32]
|
||||
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
|
||||
// x9 stores carry
|
||||
adc x9, xzr, xzr
|
||||
|
||||
ldp x11, x12, [x1, #0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
ldp x15, x16, [x1,#32]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, x9, xzr
|
||||
|
||||
stp x3, x4, [x2, #0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
|
||||
ldp x3, x4, [x2,#48]
|
||||
ldp x5, x6, [x2,#64]
|
||||
ldp x7, x8, [x2,#80]
|
||||
|
||||
ldp x11, x12, [x0,#48]
|
||||
ldp x13, x14, [x0,#64]
|
||||
ldp x15, x16, [x0,#80]
|
||||
|
||||
// x9 = 2 - x9
|
||||
neg x9, x9
|
||||
add x9, x9, #2
|
||||
|
||||
subs x3, x3, x9
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, xzr, xzr
|
||||
|
||||
ldp x11, x12, [x1,#48]
|
||||
ldp x13, x14, [x1,#64]
|
||||
ldp x15, x16, [x1,#80]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
adc x9, x9, xzr
|
||||
|
||||
stp x3, x4, [x2,#48]
|
||||
stp x5, x6, [x2,#64]
|
||||
stp x7, x8, [x2,#80]
|
||||
|
||||
ldp x3, x4, [x2,#96]
|
||||
ldp x11, x12, [x0,#96]
|
||||
ldp x13, x14, [x1,#96]
|
||||
|
||||
// x9 = 2 - x9
|
||||
neg x9, x9
|
||||
add x9, x9, #2
|
||||
|
||||
subs x3, x3, x9
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
subs x3, x3, x13
|
||||
sbc x4, x4, x14
|
||||
stp x3, x4, [x2,#96]
|
||||
|
||||
ldp x29, x30, [sp],#16
|
||||
ret
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
@ -1490,3 +1490,4 @@ ChaCha20_neon:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -778,3 +778,4 @@ aes_hw_ctr32_encrypt_blocks:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -974,3 +974,4 @@ bn_mul8x_mont_neon:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -1526,3 +1526,4 @@ bsaes_ctr32_encrypt_blocks:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -31,342 +31,6 @@
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
.type rem_4bit,%object
|
||||
.align 5
|
||||
rem_4bit:
|
||||
.short 0x0000,0x1C20,0x3840,0x2460
|
||||
.short 0x7080,0x6CA0,0x48C0,0x54E0
|
||||
.short 0xE100,0xFD20,0xD940,0xC560
|
||||
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
|
||||
.size rem_4bit,.-rem_4bit
|
||||
|
||||
.type rem_4bit_get,%function
|
||||
rem_4bit_get:
|
||||
#if defined(__thumb2__)
|
||||
adr r2,rem_4bit
|
||||
#else
|
||||
sub r2,pc,#8+32 @ &rem_4bit
|
||||
#endif
|
||||
b .Lrem_4bit_got
|
||||
nop
|
||||
nop
|
||||
.size rem_4bit_get,.-rem_4bit_get
|
||||
|
||||
.globl gcm_ghash_4bit
|
||||
.hidden gcm_ghash_4bit
|
||||
.type gcm_ghash_4bit,%function
|
||||
.align 4
|
||||
gcm_ghash_4bit:
|
||||
#if defined(__thumb2__)
|
||||
adr r12,rem_4bit
|
||||
#else
|
||||
sub r12,pc,#8+48 @ &rem_4bit
|
||||
#endif
|
||||
add r3,r2,r3 @ r3 to point at the end
|
||||
stmdb sp!,{r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} @ save r3/end too
|
||||
|
||||
ldmia r12,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy rem_4bit ...
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ ... to stack
|
||||
|
||||
ldrb r12,[r2,#15]
|
||||
ldrb r14,[r0,#15]
|
||||
.Louter:
|
||||
eor r12,r12,r14
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
mov r3,#14
|
||||
|
||||
add r7,r1,r12,lsl#4
|
||||
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
|
||||
add r11,r1,r14
|
||||
ldrb r12,[r2,#14]
|
||||
|
||||
and r14,r4,#0xf @ rem
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
add r14,r14,r14
|
||||
eor r4,r8,r4,lsr#4
|
||||
ldrh r8,[sp,r14] @ rem_4bit[rem]
|
||||
eor r4,r4,r5,lsl#28
|
||||
ldrb r14,[r0,#14]
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
eor r12,r12,r14
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
eor r7,r7,r8,lsl#16
|
||||
|
||||
.Linner:
|
||||
add r11,r1,r12,lsl#4
|
||||
and r12,r4,#0xf @ rem
|
||||
subs r3,r3,#1
|
||||
add r12,r12,r12
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
ldrh r8,[sp,r12] @ rem_4bit[rem]
|
||||
eor r6,r10,r6,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r12,[r2,r3]
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
add r14,r14,r14
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
eor r4,r8,r4,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r8,[r0,r3]
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
ldrh r9,[sp,r14]
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
eorpl r12,r12,r8
|
||||
eor r7,r11,r7,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl r14,r12,#0xf0
|
||||
andpl r12,r12,#0x0f
|
||||
eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
|
||||
bpl .Linner
|
||||
|
||||
ldr r3,[sp,#32] @ re-load r3/end
|
||||
add r2,r2,#16
|
||||
mov r14,r4
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r4,r4
|
||||
str r4,[r0,#12]
|
||||
#elif defined(__ARMEB__)
|
||||
str r4,[r0,#12]
|
||||
#else
|
||||
mov r9,r4,lsr#8
|
||||
strb r4,[r0,#12+3]
|
||||
mov r10,r4,lsr#16
|
||||
strb r9,[r0,#12+2]
|
||||
mov r11,r4,lsr#24
|
||||
strb r10,[r0,#12+1]
|
||||
strb r11,[r0,#12]
|
||||
#endif
|
||||
cmp r2,r3
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r5,r5
|
||||
str r5,[r0,#8]
|
||||
#elif defined(__ARMEB__)
|
||||
str r5,[r0,#8]
|
||||
#else
|
||||
mov r9,r5,lsr#8
|
||||
strb r5,[r0,#8+3]
|
||||
mov r10,r5,lsr#16
|
||||
strb r9,[r0,#8+2]
|
||||
mov r11,r5,lsr#24
|
||||
strb r10,[r0,#8+1]
|
||||
strb r11,[r0,#8]
|
||||
#endif
|
||||
|
||||
#ifdef __thumb2__
|
||||
it ne
|
||||
#endif
|
||||
ldrneb r12,[r2,#15]
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r6,r6
|
||||
str r6,[r0,#4]
|
||||
#elif defined(__ARMEB__)
|
||||
str r6,[r0,#4]
|
||||
#else
|
||||
mov r9,r6,lsr#8
|
||||
strb r6,[r0,#4+3]
|
||||
mov r10,r6,lsr#16
|
||||
strb r9,[r0,#4+2]
|
||||
mov r11,r6,lsr#24
|
||||
strb r10,[r0,#4+1]
|
||||
strb r11,[r0,#4]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r7,r7
|
||||
str r7,[r0,#0]
|
||||
#elif defined(__ARMEB__)
|
||||
str r7,[r0,#0]
|
||||
#else
|
||||
mov r9,r7,lsr#8
|
||||
strb r7,[r0,#0+3]
|
||||
mov r10,r7,lsr#16
|
||||
strb r9,[r0,#0+2]
|
||||
mov r11,r7,lsr#24
|
||||
strb r10,[r0,#0+1]
|
||||
strb r11,[r0,#0]
|
||||
#endif
|
||||
|
||||
bne .Louter
|
||||
|
||||
add sp,sp,#36
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size gcm_ghash_4bit,.-gcm_ghash_4bit
|
||||
|
||||
.globl gcm_gmult_4bit
|
||||
.hidden gcm_gmult_4bit
|
||||
.type gcm_gmult_4bit,%function
|
||||
gcm_gmult_4bit:
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
ldrb r12,[r0,#15]
|
||||
b rem_4bit_get
|
||||
.Lrem_4bit_got:
|
||||
and r14,r12,#0xf0
|
||||
and r12,r12,#0x0f
|
||||
mov r3,#14
|
||||
|
||||
add r7,r1,r12,lsl#4
|
||||
ldmia r7,{r4,r5,r6,r7} @ load Htbl[nlo]
|
||||
ldrb r12,[r0,#14]
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
add r14,r14,r14
|
||||
eor r4,r8,r4,lsr#4
|
||||
ldrh r8,[r2,r14] @ rem_4bit[rem]
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
and r14,r12,#0xf0
|
||||
eor r7,r7,r8,lsl#16
|
||||
and r12,r12,#0x0f
|
||||
|
||||
.Loop:
|
||||
add r11,r1,r12,lsl#4
|
||||
and r12,r4,#0xf @ rem
|
||||
subs r3,r3,#1
|
||||
add r12,r12,r12
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nlo]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
eor r5,r5,r6,lsl#28
|
||||
ldrh r8,[r2,r12] @ rem_4bit[rem]
|
||||
eor r6,r10,r6,lsr#4
|
||||
#ifdef __thumb2__
|
||||
it pl
|
||||
#endif
|
||||
ldrplb r12,[r0,r3]
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
|
||||
add r11,r1,r14
|
||||
and r14,r4,#0xf @ rem
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
add r14,r14,r14
|
||||
ldmia r11,{r8,r9,r10,r11} @ load Htbl[nhi]
|
||||
eor r4,r8,r4,lsr#4
|
||||
eor r4,r4,r5,lsl#28
|
||||
eor r5,r9,r5,lsr#4
|
||||
ldrh r8,[r2,r14] @ rem_4bit[rem]
|
||||
eor r5,r5,r6,lsl#28
|
||||
eor r6,r10,r6,lsr#4
|
||||
eor r6,r6,r7,lsl#28
|
||||
eor r7,r11,r7,lsr#4
|
||||
#ifdef __thumb2__
|
||||
itt pl
|
||||
#endif
|
||||
andpl r14,r12,#0xf0
|
||||
andpl r12,r12,#0x0f
|
||||
eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
|
||||
bpl .Loop
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r4,r4
|
||||
str r4,[r0,#12]
|
||||
#elif defined(__ARMEB__)
|
||||
str r4,[r0,#12]
|
||||
#else
|
||||
mov r9,r4,lsr#8
|
||||
strb r4,[r0,#12+3]
|
||||
mov r10,r4,lsr#16
|
||||
strb r9,[r0,#12+2]
|
||||
mov r11,r4,lsr#24
|
||||
strb r10,[r0,#12+1]
|
||||
strb r11,[r0,#12]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r5,r5
|
||||
str r5,[r0,#8]
|
||||
#elif defined(__ARMEB__)
|
||||
str r5,[r0,#8]
|
||||
#else
|
||||
mov r9,r5,lsr#8
|
||||
strb r5,[r0,#8+3]
|
||||
mov r10,r5,lsr#16
|
||||
strb r9,[r0,#8+2]
|
||||
mov r11,r5,lsr#24
|
||||
strb r10,[r0,#8+1]
|
||||
strb r11,[r0,#8]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r6,r6
|
||||
str r6,[r0,#4]
|
||||
#elif defined(__ARMEB__)
|
||||
str r6,[r0,#4]
|
||||
#else
|
||||
mov r9,r6,lsr#8
|
||||
strb r6,[r0,#4+3]
|
||||
mov r10,r6,lsr#16
|
||||
strb r9,[r0,#4+2]
|
||||
mov r11,r6,lsr#24
|
||||
strb r10,[r0,#4+1]
|
||||
strb r11,[r0,#4]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
||||
rev r7,r7
|
||||
str r7,[r0,#0]
|
||||
#elif defined(__ARMEB__)
|
||||
str r7,[r0,#0]
|
||||
#else
|
||||
mov r9,r7,lsr#8
|
||||
strb r7,[r0,#0+3]
|
||||
mov r10,r7,lsr#16
|
||||
strb r9,[r0,#0+2]
|
||||
mov r11,r7,lsr#24
|
||||
strb r10,[r0,#0+1]
|
||||
strb r11,[r0,#0]
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__>=5
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
||||
#else
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size gcm_gmult_4bit,.-gcm_gmult_4bit
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
@ -588,3 +252,4 @@ gcm_ghash_neon:
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -250,3 +250,4 @@ gcm_ghash_v8:
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -1508,3 +1508,4 @@ sha1_block_data_order_armv8:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -2836,3 +2836,4 @@ sha256_block_data_order_armv8:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -1891,3 +1891,4 @@ sha512_block_data_order_neon:
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
1236
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
1236
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -31,7 +31,6 @@
|
||||
.hidden abi_test_trampoline
|
||||
.align 4
|
||||
abi_test_trampoline:
|
||||
.Labi_test_trampoline_begin:
|
||||
@ Save parameters and all callee-saved registers. For convenience, we
|
||||
@ save r9 on iOS even though it's volatile.
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
@ -377,3 +376,4 @@ abi_test_clobber_d15:
|
||||
.size abi_test_clobber_d15,.-abi_test_clobber_d15
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
@ -3667,3 +3667,4 @@ _aesp8_xts_dec5x:
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,0,0
|
||||
#endif // !OPENSSL_NO_ASM && __powerpc64__
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -584,3 +584,4 @@ gcm_ghash_p8:
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM && __powerpc64__
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
1410
contrib/boringssl-cmake/linux-ppc64le/crypto/test/trampoline-ppc.S
Normal file
1410
contrib/boringssl-cmake/linux-ppc64le/crypto/test/trampoline-ppc.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -972,3 +972,4 @@ ChaCha20_ssse3:
|
||||
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
|
||||
.byte 114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,7 @@
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
#endif
|
||||
.globl aes_hw_encrypt
|
||||
.hidden aes_hw_encrypt
|
||||
@ -14,7 +14,7 @@
|
||||
.align 16
|
||||
aes_hw_encrypt:
|
||||
.L_aes_hw_encrypt_begin:
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L000pic
|
||||
@ -845,7 +845,7 @@ aes_hw_ctr32_encrypt_blocks:
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L038pic
|
||||
@ -2440,7 +2440,7 @@ _aesni_set_encrypt_key:
|
||||
.align 16
|
||||
aes_hw_set_encrypt_key:
|
||||
.L_aes_hw_set_encrypt_key_begin:
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L116pic
|
||||
@ -2510,3 +2510,4 @@ aes_hw_set_decrypt_key:
|
||||
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
|
||||
.byte 115,108,46,111,114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -993,551 +993,5 @@ bn_sub_words:
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_sub_words,.-.L_bn_sub_words_begin
|
||||
.globl bn_sub_part_words
|
||||
.hidden bn_sub_part_words
|
||||
.type bn_sub_part_words,@function
|
||||
.align 16
|
||||
bn_sub_part_words:
|
||||
.L_bn_sub_part_words_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl 20(%esp),%ebx
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edi
|
||||
movl 32(%esp),%ebp
|
||||
xorl %eax,%eax
|
||||
andl $4294967288,%ebp
|
||||
jz .L029aw_finish
|
||||
.L030aw_loop:
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,4(%ebx)
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,8(%ebx)
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,12(%ebx)
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,16(%ebx)
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,20(%ebx)
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
|
||||
movl 28(%esi),%ecx
|
||||
movl 28(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,28(%ebx)
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%edi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz .L030aw_loop
|
||||
.L029aw_finish:
|
||||
movl 32(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
decl %ebp
|
||||
jz .L031aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
addl $4,%esi
|
||||
addl $4,%edi
|
||||
addl $4,%ebx
|
||||
.L031aw_end:
|
||||
cmpl $0,36(%esp)
|
||||
je .L032pw_end
|
||||
movl 36(%esp),%ebp
|
||||
cmpl $0,%ebp
|
||||
je .L032pw_end
|
||||
jge .L033pw_pos
|
||||
|
||||
movl $0,%edx
|
||||
subl %ebp,%edx
|
||||
movl %edx,%ebp
|
||||
andl $4294967288,%ebp
|
||||
jz .L034pw_neg_finish
|
||||
.L035pw_neg_loop:
|
||||
|
||||
movl $0,%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,4(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,8(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,12(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,16(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,20(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
|
||||
movl $0,%ecx
|
||||
movl 28(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,28(%ebx)
|
||||
|
||||
addl $32,%edi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz .L035pw_neg_loop
|
||||
.L034pw_neg_finish:
|
||||
movl 36(%esp),%edx
|
||||
movl $0,%ebp
|
||||
subl %edx,%ebp
|
||||
andl $7,%ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,(%ebx)
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,4(%ebx)
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,8(%ebx)
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,12(%ebx)
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,16(%ebx)
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,20(%ebx)
|
||||
jz .L032pw_end
|
||||
|
||||
movl $0,%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
jmp .L032pw_end
|
||||
.L033pw_pos:
|
||||
andl $4294967288,%ebp
|
||||
jz .L036pw_pos_finish
|
||||
.L037pw_pos_loop:
|
||||
|
||||
movl (%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,(%ebx)
|
||||
jnc .L038pw_nc0
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,4(%ebx)
|
||||
jnc .L039pw_nc1
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,8(%ebx)
|
||||
jnc .L040pw_nc2
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,12(%ebx)
|
||||
jnc .L041pw_nc3
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,16(%ebx)
|
||||
jnc .L042pw_nc4
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,20(%ebx)
|
||||
jnc .L043pw_nc5
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,24(%ebx)
|
||||
jnc .L044pw_nc6
|
||||
|
||||
movl 28(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,28(%ebx)
|
||||
jnc .L045pw_nc7
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz .L037pw_pos_loop
|
||||
.L036pw_pos_finish:
|
||||
movl 36(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,(%ebx)
|
||||
jnc .L046pw_tail_nc0
|
||||
decl %ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,4(%ebx)
|
||||
jnc .L047pw_tail_nc1
|
||||
decl %ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,8(%ebx)
|
||||
jnc .L048pw_tail_nc2
|
||||
decl %ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,12(%ebx)
|
||||
jnc .L049pw_tail_nc3
|
||||
decl %ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,16(%ebx)
|
||||
jnc .L050pw_tail_nc4
|
||||
decl %ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,20(%ebx)
|
||||
jnc .L051pw_tail_nc5
|
||||
decl %ebp
|
||||
jz .L032pw_end
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
subl %eax,%ecx
|
||||
movl %ecx,24(%ebx)
|
||||
jnc .L052pw_tail_nc6
|
||||
movl $1,%eax
|
||||
jmp .L032pw_end
|
||||
.L053pw_nc_loop:
|
||||
movl (%esi),%ecx
|
||||
movl %ecx,(%ebx)
|
||||
.L038pw_nc0:
|
||||
movl 4(%esi),%ecx
|
||||
movl %ecx,4(%ebx)
|
||||
.L039pw_nc1:
|
||||
movl 8(%esi),%ecx
|
||||
movl %ecx,8(%ebx)
|
||||
.L040pw_nc2:
|
||||
movl 12(%esi),%ecx
|
||||
movl %ecx,12(%ebx)
|
||||
.L041pw_nc3:
|
||||
movl 16(%esi),%ecx
|
||||
movl %ecx,16(%ebx)
|
||||
.L042pw_nc4:
|
||||
movl 20(%esi),%ecx
|
||||
movl %ecx,20(%ebx)
|
||||
.L043pw_nc5:
|
||||
movl 24(%esi),%ecx
|
||||
movl %ecx,24(%ebx)
|
||||
.L044pw_nc6:
|
||||
movl 28(%esi),%ecx
|
||||
movl %ecx,28(%ebx)
|
||||
.L045pw_nc7:
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz .L053pw_nc_loop
|
||||
movl 36(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz .L054pw_nc_end
|
||||
movl (%esi),%ecx
|
||||
movl %ecx,(%ebx)
|
||||
.L046pw_tail_nc0:
|
||||
decl %ebp
|
||||
jz .L054pw_nc_end
|
||||
movl 4(%esi),%ecx
|
||||
movl %ecx,4(%ebx)
|
||||
.L047pw_tail_nc1:
|
||||
decl %ebp
|
||||
jz .L054pw_nc_end
|
||||
movl 8(%esi),%ecx
|
||||
movl %ecx,8(%ebx)
|
||||
.L048pw_tail_nc2:
|
||||
decl %ebp
|
||||
jz .L054pw_nc_end
|
||||
movl 12(%esi),%ecx
|
||||
movl %ecx,12(%ebx)
|
||||
.L049pw_tail_nc3:
|
||||
decl %ebp
|
||||
jz .L054pw_nc_end
|
||||
movl 16(%esi),%ecx
|
||||
movl %ecx,16(%ebx)
|
||||
.L050pw_tail_nc4:
|
||||
decl %ebp
|
||||
jz .L054pw_nc_end
|
||||
movl 20(%esi),%ecx
|
||||
movl %ecx,20(%ebx)
|
||||
.L051pw_tail_nc5:
|
||||
decl %ebp
|
||||
jz .L054pw_nc_end
|
||||
movl 24(%esi),%ecx
|
||||
movl %ecx,24(%ebx)
|
||||
.L052pw_tail_nc6:
|
||||
.L054pw_nc_end:
|
||||
movl $0,%eax
|
||||
.L032pw_end:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -1263,3 +1263,4 @@ bn_sqr_comba4:
|
||||
ret
|
||||
.size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -291,3 +291,4 @@ gcm_ghash_ssse3:
|
||||
.Llow4_mask:
|
||||
.long 252645135,252645135,252645135,252645135
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -6,711 +6,6 @@
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl gcm_gmult_4bit_mmx
|
||||
.hidden gcm_gmult_4bit_mmx
|
||||
.type gcm_gmult_4bit_mmx,@function
|
||||
.align 16
|
||||
gcm_gmult_4bit_mmx:
|
||||
.L_gcm_gmult_4bit_mmx_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
call .L000pic_point
|
||||
.L000pic_point:
|
||||
popl %eax
|
||||
leal .Lrem_4bit-.L000pic_point(%eax),%eax
|
||||
movzbl 15(%edi),%ebx
|
||||
xorl %ecx,%ecx
|
||||
movl %ebx,%edx
|
||||
movb %dl,%cl
|
||||
movl $14,%ebp
|
||||
shlb $4,%cl
|
||||
andl $240,%edx
|
||||
movq 8(%esi,%ecx,1),%mm0
|
||||
movq (%esi,%ecx,1),%mm1
|
||||
movd %mm0,%ebx
|
||||
jmp .L001mmx_loop
|
||||
.align 16
|
||||
.L001mmx_loop:
|
||||
psrlq $4,%mm0
|
||||
andl $15,%ebx
|
||||
movq %mm1,%mm2
|
||||
psrlq $4,%mm1
|
||||
pxor 8(%esi,%edx,1),%mm0
|
||||
movb (%edi,%ebp,1),%cl
|
||||
psllq $60,%mm2
|
||||
pxor (%eax,%ebx,8),%mm1
|
||||
decl %ebp
|
||||
movd %mm0,%ebx
|
||||
pxor (%esi,%edx,1),%mm1
|
||||
movl %ecx,%edx
|
||||
pxor %mm2,%mm0
|
||||
js .L002mmx_break
|
||||
shlb $4,%cl
|
||||
andl $15,%ebx
|
||||
psrlq $4,%mm0
|
||||
andl $240,%edx
|
||||
movq %mm1,%mm2
|
||||
psrlq $4,%mm1
|
||||
pxor 8(%esi,%ecx,1),%mm0
|
||||
psllq $60,%mm2
|
||||
pxor (%eax,%ebx,8),%mm1
|
||||
movd %mm0,%ebx
|
||||
pxor (%esi,%ecx,1),%mm1
|
||||
pxor %mm2,%mm0
|
||||
jmp .L001mmx_loop
|
||||
.align 16
|
||||
.L002mmx_break:
|
||||
shlb $4,%cl
|
||||
andl $15,%ebx
|
||||
psrlq $4,%mm0
|
||||
andl $240,%edx
|
||||
movq %mm1,%mm2
|
||||
psrlq $4,%mm1
|
||||
pxor 8(%esi,%ecx,1),%mm0
|
||||
psllq $60,%mm2
|
||||
pxor (%eax,%ebx,8),%mm1
|
||||
movd %mm0,%ebx
|
||||
pxor (%esi,%ecx,1),%mm1
|
||||
pxor %mm2,%mm0
|
||||
psrlq $4,%mm0
|
||||
andl $15,%ebx
|
||||
movq %mm1,%mm2
|
||||
psrlq $4,%mm1
|
||||
pxor 8(%esi,%edx,1),%mm0
|
||||
psllq $60,%mm2
|
||||
pxor (%eax,%ebx,8),%mm1
|
||||
movd %mm0,%ebx
|
||||
pxor (%esi,%edx,1),%mm1
|
||||
pxor %mm2,%mm0
|
||||
psrlq $32,%mm0
|
||||
movd %mm1,%edx
|
||||
psrlq $32,%mm1
|
||||
movd %mm0,%ecx
|
||||
movd %mm1,%ebp
|
||||
bswap %ebx
|
||||
bswap %edx
|
||||
bswap %ecx
|
||||
bswap %ebp
|
||||
emms
|
||||
movl %ebx,12(%edi)
|
||||
movl %edx,4(%edi)
|
||||
movl %ecx,8(%edi)
|
||||
movl %ebp,(%edi)
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
|
||||
.globl gcm_ghash_4bit_mmx
|
||||
.hidden gcm_ghash_4bit_mmx
|
||||
.type gcm_ghash_4bit_mmx,@function
|
||||
.align 16
|
||||
gcm_ghash_4bit_mmx:
|
||||
.L_gcm_ghash_4bit_mmx_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%eax
|
||||
movl 24(%esp),%ebx
|
||||
movl 28(%esp),%ecx
|
||||
movl 32(%esp),%edx
|
||||
movl %esp,%ebp
|
||||
call .L003pic_point
|
||||
.L003pic_point:
|
||||
popl %esi
|
||||
leal .Lrem_8bit-.L003pic_point(%esi),%esi
|
||||
subl $544,%esp
|
||||
andl $-64,%esp
|
||||
subl $16,%esp
|
||||
addl %ecx,%edx
|
||||
movl %eax,544(%esp)
|
||||
movl %edx,552(%esp)
|
||||
movl %ebp,556(%esp)
|
||||
addl $128,%ebx
|
||||
leal 144(%esp),%edi
|
||||
leal 400(%esp),%ebp
|
||||
movl -120(%ebx),%edx
|
||||
movq -120(%ebx),%mm0
|
||||
movq -128(%ebx),%mm3
|
||||
shll $4,%edx
|
||||
movb %dl,(%esp)
|
||||
movl -104(%ebx),%edx
|
||||
movq -104(%ebx),%mm2
|
||||
movq -112(%ebx),%mm5
|
||||
movq %mm0,-128(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,(%edi)
|
||||
movq %mm3,%mm7
|
||||
psrlq $4,%mm3
|
||||
shll $4,%edx
|
||||
movb %dl,1(%esp)
|
||||
movl -88(%ebx),%edx
|
||||
movq -88(%ebx),%mm1
|
||||
psllq $60,%mm7
|
||||
movq -96(%ebx),%mm4
|
||||
por %mm7,%mm0
|
||||
movq %mm2,-120(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,8(%edi)
|
||||
movq %mm5,%mm6
|
||||
movq %mm0,-128(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,2(%esp)
|
||||
movl -72(%ebx),%edx
|
||||
movq -72(%ebx),%mm0
|
||||
psllq $60,%mm6
|
||||
movq -80(%ebx),%mm3
|
||||
por %mm6,%mm2
|
||||
movq %mm1,-112(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,16(%edi)
|
||||
movq %mm4,%mm7
|
||||
movq %mm2,-120(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,8(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,3(%esp)
|
||||
movl -56(%ebx),%edx
|
||||
movq -56(%ebx),%mm2
|
||||
psllq $60,%mm7
|
||||
movq -64(%ebx),%mm5
|
||||
por %mm7,%mm1
|
||||
movq %mm0,-104(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,24(%edi)
|
||||
movq %mm3,%mm6
|
||||
movq %mm1,-112(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,16(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,4(%esp)
|
||||
movl -40(%ebx),%edx
|
||||
movq -40(%ebx),%mm1
|
||||
psllq $60,%mm6
|
||||
movq -48(%ebx),%mm4
|
||||
por %mm6,%mm0
|
||||
movq %mm2,-96(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,32(%edi)
|
||||
movq %mm5,%mm7
|
||||
movq %mm0,-104(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,24(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,5(%esp)
|
||||
movl -24(%ebx),%edx
|
||||
movq -24(%ebx),%mm0
|
||||
psllq $60,%mm7
|
||||
movq -32(%ebx),%mm3
|
||||
por %mm7,%mm2
|
||||
movq %mm1,-88(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,40(%edi)
|
||||
movq %mm4,%mm6
|
||||
movq %mm2,-96(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,32(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,6(%esp)
|
||||
movl -8(%ebx),%edx
|
||||
movq -8(%ebx),%mm2
|
||||
psllq $60,%mm6
|
||||
movq -16(%ebx),%mm5
|
||||
por %mm6,%mm1
|
||||
movq %mm0,-80(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,48(%edi)
|
||||
movq %mm3,%mm7
|
||||
movq %mm1,-88(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,40(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,7(%esp)
|
||||
movl 8(%ebx),%edx
|
||||
movq 8(%ebx),%mm1
|
||||
psllq $60,%mm7
|
||||
movq (%ebx),%mm4
|
||||
por %mm7,%mm0
|
||||
movq %mm2,-72(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,56(%edi)
|
||||
movq %mm5,%mm6
|
||||
movq %mm0,-80(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,48(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,8(%esp)
|
||||
movl 24(%ebx),%edx
|
||||
movq 24(%ebx),%mm0
|
||||
psllq $60,%mm6
|
||||
movq 16(%ebx),%mm3
|
||||
por %mm6,%mm2
|
||||
movq %mm1,-64(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,64(%edi)
|
||||
movq %mm4,%mm7
|
||||
movq %mm2,-72(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,56(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,9(%esp)
|
||||
movl 40(%ebx),%edx
|
||||
movq 40(%ebx),%mm2
|
||||
psllq $60,%mm7
|
||||
movq 32(%ebx),%mm5
|
||||
por %mm7,%mm1
|
||||
movq %mm0,-56(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,72(%edi)
|
||||
movq %mm3,%mm6
|
||||
movq %mm1,-64(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,64(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,10(%esp)
|
||||
movl 56(%ebx),%edx
|
||||
movq 56(%ebx),%mm1
|
||||
psllq $60,%mm6
|
||||
movq 48(%ebx),%mm4
|
||||
por %mm6,%mm0
|
||||
movq %mm2,-48(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,80(%edi)
|
||||
movq %mm5,%mm7
|
||||
movq %mm0,-56(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,72(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,11(%esp)
|
||||
movl 72(%ebx),%edx
|
||||
movq 72(%ebx),%mm0
|
||||
psllq $60,%mm7
|
||||
movq 64(%ebx),%mm3
|
||||
por %mm7,%mm2
|
||||
movq %mm1,-40(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,88(%edi)
|
||||
movq %mm4,%mm6
|
||||
movq %mm2,-48(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,80(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,12(%esp)
|
||||
movl 88(%ebx),%edx
|
||||
movq 88(%ebx),%mm2
|
||||
psllq $60,%mm6
|
||||
movq 80(%ebx),%mm5
|
||||
por %mm6,%mm1
|
||||
movq %mm0,-32(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,96(%edi)
|
||||
movq %mm3,%mm7
|
||||
movq %mm1,-40(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,88(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,13(%esp)
|
||||
movl 104(%ebx),%edx
|
||||
movq 104(%ebx),%mm1
|
||||
psllq $60,%mm7
|
||||
movq 96(%ebx),%mm4
|
||||
por %mm7,%mm0
|
||||
movq %mm2,-24(%edi)
|
||||
psrlq $4,%mm2
|
||||
movq %mm5,104(%edi)
|
||||
movq %mm5,%mm6
|
||||
movq %mm0,-32(%ebp)
|
||||
psrlq $4,%mm5
|
||||
movq %mm3,96(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,14(%esp)
|
||||
movl 120(%ebx),%edx
|
||||
movq 120(%ebx),%mm0
|
||||
psllq $60,%mm6
|
||||
movq 112(%ebx),%mm3
|
||||
por %mm6,%mm2
|
||||
movq %mm1,-16(%edi)
|
||||
psrlq $4,%mm1
|
||||
movq %mm4,112(%edi)
|
||||
movq %mm4,%mm7
|
||||
movq %mm2,-24(%ebp)
|
||||
psrlq $4,%mm4
|
||||
movq %mm5,104(%ebp)
|
||||
shll $4,%edx
|
||||
movb %dl,15(%esp)
|
||||
psllq $60,%mm7
|
||||
por %mm7,%mm1
|
||||
movq %mm0,-8(%edi)
|
||||
psrlq $4,%mm0
|
||||
movq %mm3,120(%edi)
|
||||
movq %mm3,%mm6
|
||||
movq %mm1,-16(%ebp)
|
||||
psrlq $4,%mm3
|
||||
movq %mm4,112(%ebp)
|
||||
psllq $60,%mm6
|
||||
por %mm6,%mm0
|
||||
movq %mm0,-8(%ebp)
|
||||
movq %mm3,120(%ebp)
|
||||
movq (%eax),%mm6
|
||||
movl 8(%eax),%ebx
|
||||
movl 12(%eax),%edx
|
||||
.align 16
|
||||
.L004outer:
|
||||
xorl 12(%ecx),%edx
|
||||
xorl 8(%ecx),%ebx
|
||||
pxor (%ecx),%mm6
|
||||
leal 16(%ecx),%ecx
|
||||
movl %ebx,536(%esp)
|
||||
movq %mm6,528(%esp)
|
||||
movl %ecx,548(%esp)
|
||||
xorl %eax,%eax
|
||||
roll $8,%edx
|
||||
movb %dl,%al
|
||||
movl %eax,%ebp
|
||||
andb $15,%al
|
||||
shrl $4,%ebp
|
||||
pxor %mm0,%mm0
|
||||
roll $8,%edx
|
||||
pxor %mm1,%mm1
|
||||
pxor %mm2,%mm2
|
||||
movq 16(%esp,%eax,8),%mm7
|
||||
movq 144(%esp,%eax,8),%mm6
|
||||
movb %dl,%al
|
||||
movd %mm7,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
shrl $4,%edi
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movl 536(%esp),%edx
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm1,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm0
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm0,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movl 532(%esp),%edx
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm1,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm0
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm0,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm1,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm0
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movl 528(%esp),%edx
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm0,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm1,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm0
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
movb %dl,%al
|
||||
movd %mm7,%ecx
|
||||
movzbl %bl,%ebx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%ebp
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%edi,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm0,%mm6
|
||||
shrl $4,%ebp
|
||||
pinsrw $2,(%esi,%ebx,2),%mm2
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
roll $8,%edx
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%edi,8),%mm6
|
||||
xorb (%esp,%edi,1),%cl
|
||||
movb %dl,%al
|
||||
movl 524(%esp),%edx
|
||||
movd %mm7,%ebx
|
||||
movzbl %cl,%ecx
|
||||
psrlq $8,%mm7
|
||||
movq %mm6,%mm3
|
||||
movl %eax,%edi
|
||||
psrlq $8,%mm6
|
||||
pxor 272(%esp,%ebp,8),%mm7
|
||||
andb $15,%al
|
||||
psllq $56,%mm3
|
||||
pxor %mm2,%mm6
|
||||
shrl $4,%edi
|
||||
pinsrw $2,(%esi,%ecx,2),%mm1
|
||||
pxor 16(%esp,%eax,8),%mm7
|
||||
pxor 144(%esp,%eax,8),%mm6
|
||||
xorb (%esp,%ebp,1),%bl
|
||||
pxor %mm3,%mm7
|
||||
pxor 400(%esp,%ebp,8),%mm6
|
||||
movzbl %bl,%ebx
|
||||
pxor %mm2,%mm2
|
||||
psllq $4,%mm1
|
||||
movd %mm7,%ecx
|
||||
psrlq $4,%mm7
|
||||
movq %mm6,%mm3
|
||||
psrlq $4,%mm6
|
||||
shll $4,%ecx
|
||||
pxor 16(%esp,%edi,8),%mm7
|
||||
psllq $60,%mm3
|
||||
movzbl %cl,%ecx
|
||||
pxor %mm3,%mm7
|
||||
pxor 144(%esp,%edi,8),%mm6
|
||||
pinsrw $2,(%esi,%ebx,2),%mm0
|
||||
pxor %mm1,%mm6
|
||||
movd %mm7,%edx
|
||||
pinsrw $3,(%esi,%ecx,2),%mm2
|
||||
psllq $12,%mm0
|
||||
pxor %mm0,%mm6
|
||||
psrlq $32,%mm7
|
||||
pxor %mm2,%mm6
|
||||
movl 548(%esp),%ecx
|
||||
movd %mm7,%ebx
|
||||
movq %mm6,%mm3
|
||||
psllw $8,%mm6
|
||||
psrlw $8,%mm3
|
||||
por %mm3,%mm6
|
||||
bswap %edx
|
||||
pshufw $27,%mm6,%mm6
|
||||
bswap %ebx
|
||||
cmpl 552(%esp),%ecx
|
||||
jne .L004outer
|
||||
movl 544(%esp),%eax
|
||||
movl %edx,12(%eax)
|
||||
movl %ebx,8(%eax)
|
||||
movq %mm6,(%eax)
|
||||
movl 556(%esp),%esp
|
||||
emms
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
|
||||
.globl gcm_init_clmul
|
||||
.hidden gcm_init_clmul
|
||||
.type gcm_init_clmul,@function
|
||||
@ -719,10 +14,10 @@ gcm_init_clmul:
|
||||
.L_gcm_init_clmul_begin:
|
||||
movl 4(%esp),%edx
|
||||
movl 8(%esp),%eax
|
||||
call .L005pic
|
||||
.L005pic:
|
||||
call .L000pic
|
||||
.L000pic:
|
||||
popl %ecx
|
||||
leal .Lbswap-.L005pic(%ecx),%ecx
|
||||
leal .Lbswap-.L000pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm2
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $255,%xmm2,%xmm4
|
||||
@ -789,10 +84,10 @@ gcm_gmult_clmul:
|
||||
.L_gcm_gmult_clmul_begin:
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
call .L006pic
|
||||
.L006pic:
|
||||
call .L001pic
|
||||
.L001pic:
|
||||
popl %ecx
|
||||
leal .Lbswap-.L006pic(%ecx),%ecx
|
||||
leal .Lbswap-.L001pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
movups (%edx),%xmm2
|
||||
@ -849,16 +144,16 @@ gcm_ghash_clmul:
|
||||
movl 24(%esp),%edx
|
||||
movl 28(%esp),%esi
|
||||
movl 32(%esp),%ebx
|
||||
call .L007pic
|
||||
.L007pic:
|
||||
call .L002pic
|
||||
.L002pic:
|
||||
popl %ecx
|
||||
leal .Lbswap-.L007pic(%ecx),%ecx
|
||||
leal .Lbswap-.L002pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
movdqu (%edx),%xmm2
|
||||
.byte 102,15,56,0,197
|
||||
subl $16,%ebx
|
||||
jz .L008odd_tail
|
||||
jz .L003odd_tail
|
||||
movdqu (%esi),%xmm3
|
||||
movdqu 16(%esi),%xmm6
|
||||
.byte 102,15,56,0,221
|
||||
@ -875,10 +170,10 @@ gcm_ghash_clmul:
|
||||
movups 16(%edx),%xmm2
|
||||
nop
|
||||
subl $32,%ebx
|
||||
jbe .L009even_tail
|
||||
jmp .L010mod_loop
|
||||
jbe .L004even_tail
|
||||
jmp .L005mod_loop
|
||||
.align 32
|
||||
.L010mod_loop:
|
||||
.L005mod_loop:
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
@ -933,8 +228,8 @@ gcm_ghash_clmul:
|
||||
.byte 102,15,58,68,221,0
|
||||
leal 32(%esi),%esi
|
||||
subl $32,%ebx
|
||||
ja .L010mod_loop
|
||||
.L009even_tail:
|
||||
ja .L005mod_loop
|
||||
.L004even_tail:
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
@ -973,9 +268,9 @@ gcm_ghash_clmul:
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
testl %ebx,%ebx
|
||||
jnz .L011done
|
||||
jnz .L006done
|
||||
movups (%edx),%xmm2
|
||||
.L008odd_tail:
|
||||
.L003odd_tail:
|
||||
movdqu (%esi),%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
pxor %xmm3,%xmm0
|
||||
@ -1014,7 +309,7 @@ gcm_ghash_clmul:
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
.L011done:
|
||||
.L006done:
|
||||
.byte 102,15,56,0,197
|
||||
movdqu %xmm0,(%eax)
|
||||
popl %edi
|
||||
@ -1027,48 +322,9 @@ gcm_ghash_clmul:
|
||||
.Lbswap:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
|
||||
.align 64
|
||||
.Lrem_8bit:
|
||||
.value 0,450,900,582,1800,1738,1164,1358
|
||||
.value 3600,4050,3476,3158,2328,2266,2716,2910
|
||||
.value 7200,7650,8100,7782,6952,6890,6316,6510
|
||||
.value 4656,5106,4532,4214,5432,5370,5820,6014
|
||||
.value 14400,14722,15300,14854,16200,16010,15564,15630
|
||||
.value 13904,14226,13780,13334,12632,12442,13020,13086
|
||||
.value 9312,9634,10212,9766,9064,8874,8428,8494
|
||||
.value 10864,11186,10740,10294,11640,11450,12028,12094
|
||||
.value 28800,28994,29444,29382,30600,30282,29708,30158
|
||||
.value 32400,32594,32020,31958,31128,30810,31260,31710
|
||||
.value 27808,28002,28452,28390,27560,27242,26668,27118
|
||||
.value 25264,25458,24884,24822,26040,25722,26172,26622
|
||||
.value 18624,18690,19268,19078,20424,19978,19532,19854
|
||||
.value 18128,18194,17748,17558,16856,16410,16988,17310
|
||||
.value 21728,21794,22372,22182,21480,21034,20588,20910
|
||||
.value 23280,23346,22900,22710,24056,23610,24188,24510
|
||||
.value 57600,57538,57988,58182,58888,59338,58764,58446
|
||||
.value 61200,61138,60564,60758,59416,59866,60316,59998
|
||||
.value 64800,64738,65188,65382,64040,64490,63916,63598
|
||||
.value 62256,62194,61620,61814,62520,62970,63420,63102
|
||||
.value 55616,55426,56004,56070,56904,57226,56780,56334
|
||||
.value 55120,54930,54484,54550,53336,53658,54236,53790
|
||||
.value 50528,50338,50916,50982,49768,50090,49644,49198
|
||||
.value 52080,51890,51444,51510,52344,52666,53244,52798
|
||||
.value 37248,36930,37380,37830,38536,38730,38156,38094
|
||||
.value 40848,40530,39956,40406,39064,39258,39708,39646
|
||||
.value 36256,35938,36388,36838,35496,35690,35116,35054
|
||||
.value 33712,33394,32820,33270,33976,34170,34620,34558
|
||||
.value 43456,43010,43588,43910,44744,44810,44364,44174
|
||||
.value 42960,42514,42068,42390,41176,41242,41820,41630
|
||||
.value 46560,46114,46692,47014,45800,45866,45420,45230
|
||||
.value 48112,47666,47220,47542,48376,48442,49020,48830
|
||||
.align 64
|
||||
.Lrem_4bit:
|
||||
.long 0,0,0,471859200,0,943718400,0,610271232
|
||||
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
|
||||
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
|
||||
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
|
||||
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
|
||||
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
|
||||
.byte 0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -685,3 +685,4 @@ md5_block_asm_data_order:
|
||||
ret
|
||||
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -3805,3 +3805,4 @@ _sha1_block_data_order_avx:
|
||||
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
|
||||
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -5564,3 +5564,4 @@ sha256_block_data_order:
|
||||
ret
|
||||
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -2834,3 +2834,4 @@ sha512_block_data_order:
|
||||
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
|
||||
.byte 62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
#endif
|
||||
.align 64
|
||||
.L_vpaes_consts:
|
||||
@ -485,7 +485,7 @@ vpaes_set_encrypt_key:
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L016pic
|
||||
@ -570,7 +570,7 @@ vpaes_encrypt:
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
#ifndef NDEBUG
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
pushl %ebx
|
||||
pushl %edx
|
||||
call .L019pic
|
||||
@ -705,3 +705,4 @@ vpaes_cbc_encrypt:
|
||||
ret
|
||||
.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -481,3 +481,4 @@ bn_mul_mont:
|
||||
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
|
||||
.byte 111,114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -203,3 +203,4 @@ abi_test_clobber_xmm7:
|
||||
ret
|
||||
.size abi_test_clobber_xmm7,.-.L_abi_test_clobber_xmm7_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -1630,3 +1630,4 @@ ChaCha20_8x:
|
||||
.cfi_endproc
|
||||
.size ChaCha20_8x,.-ChaCha20_8x
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -3076,3 +3076,4 @@ aes256gcmsiv_kdf:
|
||||
.cfi_endproc
|
||||
.size aes256gcmsiv_kdf, .-aes256gcmsiv_kdf
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -3935,7 +3935,7 @@ do_length_block:
|
||||
popq %rbp
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_adjust_cfa_offset (8 * 6) + 288 + 32
|
||||
.cfi_adjust_cfa_offset (8 * 7) + 288 + 32
|
||||
|
||||
seal_sse_128:
|
||||
movdqu .chacha20_consts(%rip),%xmm0
|
||||
@ -8984,3 +8984,4 @@ seal_avx2_short_tail:
|
||||
jmp seal_sse_tail_16
|
||||
.cfi_endproc
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -556,12 +556,10 @@ _aesni_ctr32_6x:
|
||||
.align 32
|
||||
aesni_gcm_encrypt:
|
||||
.cfi_startproc
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
.extern BORINGSSL_function_hit
|
||||
.hidden BORINGSSL_function_hit
|
||||
movb $1,BORINGSSL_function_hit+2(%rip)
|
||||
#endif
|
||||
#endif
|
||||
xorq %r10,%r10
|
||||
|
||||
@ -851,3 +849,4 @@ aesni_gcm_encrypt:
|
||||
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -20,12 +20,10 @@
|
||||
.align 16
|
||||
aes_hw_encrypt:
|
||||
.cfi_startproc
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
.extern BORINGSSL_function_hit
|
||||
.hidden BORINGSSL_function_hit
|
||||
movb $1,BORINGSSL_function_hit+1(%rip)
|
||||
#endif
|
||||
#endif
|
||||
movups (%rdi),%xmm2
|
||||
movl 240(%rdx),%eax
|
||||
@ -887,10 +885,8 @@ aes_hw_ecb_encrypt:
|
||||
.align 16
|
||||
aes_hw_ctr32_encrypt_blocks:
|
||||
.cfi_startproc
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
movb $1,BORINGSSL_function_hit(%rip)
|
||||
#endif
|
||||
#endif
|
||||
cmpq $1,%rdx
|
||||
jne .Lctr32_bulk
|
||||
@ -2111,11 +2107,9 @@ aes_hw_set_decrypt_key:
|
||||
aes_hw_set_encrypt_key:
|
||||
__aesni_set_encrypt_key:
|
||||
.cfi_startproc
|
||||
#ifndef NDEBUG
|
||||
#ifndef BORINGSSL_FIPS
|
||||
#ifdef BORINGSSL_DISPATCH_TEST
|
||||
movb $1,BORINGSSL_function_hit+3(%rip)
|
||||
#endif
|
||||
#endif
|
||||
.byte 0x48,0x83,0xEC,0x08
|
||||
.cfi_adjust_cfa_offset 8
|
||||
movq $-1,%rax
|
||||
@ -2509,3 +2503,4 @@ __aesni_set_encrypt_key:
|
||||
.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -424,3 +424,4 @@ gcm_ghash_ssse3:
|
||||
.Llow4_mask:
|
||||
.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -14,709 +14,6 @@
|
||||
.text
|
||||
.extern OPENSSL_ia32cap_P
|
||||
.hidden OPENSSL_ia32cap_P
|
||||
|
||||
.globl gcm_gmult_4bit
|
||||
.hidden gcm_gmult_4bit
|
||||
.type gcm_gmult_4bit,@function
|
||||
.align 16
|
||||
gcm_gmult_4bit:
|
||||
.cfi_startproc
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
subq $280,%rsp
|
||||
.cfi_adjust_cfa_offset 280
|
||||
.Lgmult_prologue:
|
||||
|
||||
movzbq 15(%rdi),%r8
|
||||
leaq .Lrem_4bit(%rip),%r11
|
||||
xorq %rax,%rax
|
||||
xorq %rbx,%rbx
|
||||
movb %r8b,%al
|
||||
movb %r8b,%bl
|
||||
shlb $4,%al
|
||||
movq $14,%rcx
|
||||
movq 8(%rsi,%rax,1),%r8
|
||||
movq (%rsi,%rax,1),%r9
|
||||
andb $0xf0,%bl
|
||||
movq %r8,%rdx
|
||||
jmp .Loop1
|
||||
|
||||
.align 16
|
||||
.Loop1:
|
||||
shrq $4,%r8
|
||||
andq $0xf,%rdx
|
||||
movq %r9,%r10
|
||||
movb (%rdi,%rcx,1),%al
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rbx,1),%r8
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rbx,1),%r9
|
||||
movb %al,%bl
|
||||
xorq (%r11,%rdx,8),%r9
|
||||
movq %r8,%rdx
|
||||
shlb $4,%al
|
||||
xorq %r10,%r8
|
||||
decq %rcx
|
||||
js .Lbreak1
|
||||
|
||||
shrq $4,%r8
|
||||
andq $0xf,%rdx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
andb $0xf0,%bl
|
||||
xorq (%r11,%rdx,8),%r9
|
||||
movq %r8,%rdx
|
||||
xorq %r10,%r8
|
||||
jmp .Loop1
|
||||
|
||||
.align 16
|
||||
.Lbreak1:
|
||||
shrq $4,%r8
|
||||
andq $0xf,%rdx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
andb $0xf0,%bl
|
||||
xorq (%r11,%rdx,8),%r9
|
||||
movq %r8,%rdx
|
||||
xorq %r10,%r8
|
||||
|
||||
shrq $4,%r8
|
||||
andq $0xf,%rdx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rbx,1),%r8
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rbx,1),%r9
|
||||
xorq %r10,%r8
|
||||
xorq (%r11,%rdx,8),%r9
|
||||
|
||||
bswapq %r8
|
||||
bswapq %r9
|
||||
movq %r8,8(%rdi)
|
||||
movq %r9,(%rdi)
|
||||
|
||||
leaq 280+48(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq (%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lgmult_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size gcm_gmult_4bit,.-gcm_gmult_4bit
|
||||
.globl gcm_ghash_4bit
|
||||
.hidden gcm_ghash_4bit
|
||||
.type gcm_ghash_4bit,@function
|
||||
.align 16
|
||||
gcm_ghash_4bit:
|
||||
.cfi_startproc
|
||||
pushq %rbx
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbx,-16
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp,-24
|
||||
pushq %r12
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r12,-32
|
||||
pushq %r13
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r13,-40
|
||||
pushq %r14
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r14,-48
|
||||
pushq %r15
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %r15,-56
|
||||
subq $280,%rsp
|
||||
.cfi_adjust_cfa_offset 280
|
||||
.Lghash_prologue:
|
||||
movq %rdx,%r14
|
||||
movq %rcx,%r15
|
||||
subq $-128,%rsi
|
||||
leaq 16+128(%rsp),%rbp
|
||||
xorl %edx,%edx
|
||||
movq 0+0-128(%rsi),%r8
|
||||
movq 0+8-128(%rsi),%rax
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq 16+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq 16+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,0(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,0(%rbp)
|
||||
movq 32+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,0-128(%rbp)
|
||||
movq 32+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,1(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,8(%rbp)
|
||||
movq 48+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,8-128(%rbp)
|
||||
movq 48+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,2(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,16(%rbp)
|
||||
movq 64+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,16-128(%rbp)
|
||||
movq 64+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,3(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,24(%rbp)
|
||||
movq 80+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,24-128(%rbp)
|
||||
movq 80+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,4(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,32(%rbp)
|
||||
movq 96+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,32-128(%rbp)
|
||||
movq 96+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,5(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,40(%rbp)
|
||||
movq 112+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,40-128(%rbp)
|
||||
movq 112+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,6(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,48(%rbp)
|
||||
movq 128+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,48-128(%rbp)
|
||||
movq 128+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,7(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,56(%rbp)
|
||||
movq 144+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,56-128(%rbp)
|
||||
movq 144+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,8(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,64(%rbp)
|
||||
movq 160+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,64-128(%rbp)
|
||||
movq 160+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,9(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,72(%rbp)
|
||||
movq 176+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,72-128(%rbp)
|
||||
movq 176+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,10(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,80(%rbp)
|
||||
movq 192+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,80-128(%rbp)
|
||||
movq 192+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,11(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,88(%rbp)
|
||||
movq 208+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,88-128(%rbp)
|
||||
movq 208+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,12(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,96(%rbp)
|
||||
movq 224+0-128(%rsi),%r8
|
||||
shlb $4,%dl
|
||||
movq %rax,96-128(%rbp)
|
||||
movq 224+8-128(%rsi),%rax
|
||||
shlq $60,%r10
|
||||
movb %dl,13(%rsp)
|
||||
orq %r10,%rbx
|
||||
movb %al,%dl
|
||||
shrq $4,%rax
|
||||
movq %r8,%r10
|
||||
shrq $4,%r8
|
||||
movq %r9,104(%rbp)
|
||||
movq 240+0-128(%rsi),%r9
|
||||
shlb $4,%dl
|
||||
movq %rbx,104-128(%rbp)
|
||||
movq 240+8-128(%rsi),%rbx
|
||||
shlq $60,%r10
|
||||
movb %dl,14(%rsp)
|
||||
orq %r10,%rax
|
||||
movb %bl,%dl
|
||||
shrq $4,%rbx
|
||||
movq %r9,%r10
|
||||
shrq $4,%r9
|
||||
movq %r8,112(%rbp)
|
||||
shlb $4,%dl
|
||||
movq %rax,112-128(%rbp)
|
||||
shlq $60,%r10
|
||||
movb %dl,15(%rsp)
|
||||
orq %r10,%rbx
|
||||
movq %r9,120(%rbp)
|
||||
movq %rbx,120-128(%rbp)
|
||||
addq $-128,%rsi
|
||||
movq 8(%rdi),%r8
|
||||
movq 0(%rdi),%r9
|
||||
addq %r14,%r15
|
||||
leaq .Lrem_8bit(%rip),%r11
|
||||
jmp .Louter_loop
|
||||
.align 16
|
||||
.Louter_loop:
|
||||
xorq (%r14),%r9
|
||||
movq 8(%r14),%rdx
|
||||
leaq 16(%r14),%r14
|
||||
xorq %r8,%rdx
|
||||
movq %r9,(%rdi)
|
||||
movq %rdx,8(%rdi)
|
||||
shrq $32,%rdx
|
||||
xorq %rax,%rax
|
||||
roll $8,%edx
|
||||
movb %dl,%al
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
shrl $4,%ebx
|
||||
roll $8,%edx
|
||||
movq 8(%rsi,%rax,1),%r8
|
||||
movq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
movl 8(%rdi),%edx
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
movl 4(%rdi),%edx
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
movl 0(%rdi),%edx
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
shrl $4,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
movzbl %dl,%ebx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rcx,1),%r13
|
||||
shrl $4,%ebx
|
||||
shlq $48,%r12
|
||||
xorq %r8,%r13
|
||||
movq %r9,%r10
|
||||
xorq %r12,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r13b,%r13
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rcx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rcx,8),%r9
|
||||
roll $8,%edx
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
movb %dl,%al
|
||||
xorq %r10,%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
movzbl %dl,%ecx
|
||||
shlb $4,%al
|
||||
movzbq (%rsp,%rbx,1),%r12
|
||||
andl $240,%ecx
|
||||
shlq $48,%r13
|
||||
xorq %r8,%r12
|
||||
movq %r9,%r10
|
||||
xorq %r13,%r9
|
||||
shrq $8,%r8
|
||||
movzbq %r12b,%r12
|
||||
movl -4(%rdi),%edx
|
||||
shrq $8,%r9
|
||||
xorq -128(%rbp,%rbx,8),%r8
|
||||
shlq $56,%r10
|
||||
xorq (%rbp,%rbx,8),%r9
|
||||
movzwq (%r11,%r12,2),%r12
|
||||
xorq 8(%rsi,%rax,1),%r8
|
||||
xorq (%rsi,%rax,1),%r9
|
||||
shlq $48,%r12
|
||||
xorq %r10,%r8
|
||||
xorq %r12,%r9
|
||||
movzbq %r8b,%r13
|
||||
shrq $4,%r8
|
||||
movq %r9,%r10
|
||||
shlb $4,%r13b
|
||||
shrq $4,%r9
|
||||
xorq 8(%rsi,%rcx,1),%r8
|
||||
movzwq (%r11,%r13,2),%r13
|
||||
shlq $60,%r10
|
||||
xorq (%rsi,%rcx,1),%r9
|
||||
xorq %r10,%r8
|
||||
shlq $48,%r13
|
||||
bswapq %r8
|
||||
xorq %r13,%r9
|
||||
bswapq %r9
|
||||
cmpq %r15,%r14
|
||||
jb .Louter_loop
|
||||
movq %r8,8(%rdi)
|
||||
movq %r9,(%rdi)
|
||||
|
||||
leaq 280+48(%rsp),%rsi
|
||||
.cfi_def_cfa %rsi,8
|
||||
movq -48(%rsi),%r15
|
||||
.cfi_restore %r15
|
||||
movq -40(%rsi),%r14
|
||||
.cfi_restore %r14
|
||||
movq -32(%rsi),%r13
|
||||
.cfi_restore %r13
|
||||
movq -24(%rsi),%r12
|
||||
.cfi_restore %r12
|
||||
movq -16(%rsi),%rbp
|
||||
.cfi_restore %rbp
|
||||
movq -8(%rsi),%rbx
|
||||
.cfi_restore %rbx
|
||||
leaq 0(%rsi),%rsp
|
||||
.cfi_def_cfa_register %rsp
|
||||
.Lghash_epilogue:
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size gcm_ghash_4bit,.-gcm_ghash_4bit
|
||||
.globl gcm_init_clmul
|
||||
.hidden gcm_init_clmul
|
||||
.type gcm_init_clmul,@function
|
||||
@ -1822,50 +1119,9 @@ gcm_ghash_avx:
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
|
||||
.L7_mask:
|
||||
.long 7,0,7,0
|
||||
.L7_mask_poly:
|
||||
.long 7,0,450,0
|
||||
.align 64
|
||||
.type .Lrem_4bit,@object
|
||||
.Lrem_4bit:
|
||||
.long 0,0,0,471859200,0,943718400,0,610271232
|
||||
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
|
||||
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
|
||||
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
|
||||
.type .Lrem_8bit,@object
|
||||
.Lrem_8bit:
|
||||
.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
|
||||
.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
|
||||
.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
|
||||
.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
|
||||
.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
|
||||
.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
|
||||
.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
|
||||
.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
|
||||
.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
|
||||
.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
|
||||
.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
|
||||
.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
|
||||
.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
|
||||
.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
|
||||
.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
|
||||
.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
|
||||
.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
|
||||
.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
|
||||
.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
|
||||
.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
|
||||
.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
|
||||
.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
|
||||
.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
|
||||
.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
|
||||
.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
|
||||
.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
|
||||
.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
|
||||
.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
|
||||
.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
|
||||
.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
|
||||
.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
|
||||
.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 64
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -699,3 +699,4 @@ md5_block_asm_data_order:
|
||||
.cfi_endproc
|
||||
.size md5_block_asm_data_order,.-md5_block_asm_data_order
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -4540,3 +4540,4 @@ ecp_nistz256_point_add_affinex:
|
||||
.cfi_endproc
|
||||
.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -340,3 +340,4 @@ beeu_mod_inverse_vartime:
|
||||
|
||||
.size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -60,3 +60,4 @@ CRYPTO_rdrand_multiple8_buf:
|
||||
.cfi_endproc
|
||||
.size CRYPTO_rdrand_multiple8_buf,.-CRYPTO_rdrand_multiple8_buf
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
@ -1746,3 +1746,4 @@ rsaz_1024_gather5_avx2:
|
||||
.long 4,4,4,4, 4,4,4,4
|
||||
.align 64
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user