mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge branch 'master' into FawnD2-switch-upstream-for-arrow-submodule
This commit is contained in:
commit
bf2df558d4
9
.github/workflows/codeql-analysis.yml
vendored
9
.github/workflows/codeql-analysis.yml
vendored
@ -1,3 +1,5 @@
|
||||
# See the example here: https://github.com/github/codeql-action
|
||||
|
||||
name: "CodeQL Scanning"
|
||||
|
||||
on:
|
||||
@ -16,17 +18,14 @@ jobs:
|
||||
fetch-depth: 2
|
||||
submodules: 'recursive'
|
||||
|
||||
- run: git checkout HEAD^2
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v1
|
||||
|
||||
with:
|
||||
languages: cpp
|
||||
|
||||
- run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-9 g++-9 && mkdir build
|
||||
- run: cd build && CC=gcc-9 CXX=g++-9 cmake ..
|
||||
- run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-10 g++-10 && mkdir build
|
||||
- run: cd build && CC=gcc-10 CXX=g++-10 cmake ..
|
||||
- run: cd build && ninja
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
|
11
.gitignore
vendored
11
.gitignore
vendored
@ -125,4 +125,15 @@ website/package-lock.json
|
||||
# Toolchains
|
||||
/cmake/toolchain/*
|
||||
|
||||
# ANTLR extension cache
|
||||
.antlr
|
||||
|
||||
# ANTLR generated files
|
||||
/src/Parsers/New/*.interp
|
||||
/src/Parsers/New/*.tokens
|
||||
/src/Parsers/New/ClickHouseParserBaseVisitor.*
|
||||
|
||||
# pytest-profiling
|
||||
/prof
|
||||
|
||||
*.iml
|
||||
|
25
.gitmodules
vendored
25
.gitmodules
vendored
@ -125,9 +125,6 @@
|
||||
[submodule "contrib/curl"]
|
||||
path = contrib/curl
|
||||
url = https://github.com/curl/curl.git
|
||||
[submodule "contrib/openssl"]
|
||||
path = contrib/openssl
|
||||
url = https://github.com/ClickHouse-Extras/openssl.git
|
||||
[submodule "contrib/icudata"]
|
||||
path = contrib/icudata
|
||||
url = https://github.com/ClickHouse-Extras/icudata.git
|
||||
@ -143,9 +140,6 @@
|
||||
[submodule "contrib/replxx"]
|
||||
path = contrib/replxx
|
||||
url = https://github.com/ClickHouse-Extras/replxx.git
|
||||
[submodule "contrib/ryu"]
|
||||
path = contrib/ryu
|
||||
url = https://github.com/ClickHouse-Extras/ryu.git
|
||||
[submodule "contrib/avro"]
|
||||
path = contrib/avro
|
||||
url = https://github.com/ClickHouse-Extras/avro.git
|
||||
@ -158,7 +152,7 @@
|
||||
url = https://github.com/ClickHouse-Extras/libcpuid.git
|
||||
[submodule "contrib/openldap"]
|
||||
path = contrib/openldap
|
||||
url = https://github.com/openldap/openldap.git
|
||||
url = https://github.com/ClickHouse-Extras/openldap.git
|
||||
[submodule "contrib/AMQP-CPP"]
|
||||
path = contrib/AMQP-CPP
|
||||
url = https://github.com/ClickHouse-Extras/AMQP-CPP.git
|
||||
@ -173,6 +167,9 @@
|
||||
[submodule "contrib/fmtlib"]
|
||||
path = contrib/fmtlib
|
||||
url = https://github.com/fmtlib/fmt.git
|
||||
[submodule "contrib/antlr4-runtime"]
|
||||
path = contrib/antlr4-runtime
|
||||
url = https://github.com/ClickHouse-Extras/antlr4-runtime.git
|
||||
[submodule "contrib/sentry-native"]
|
||||
path = contrib/sentry-native
|
||||
url = https://github.com/ClickHouse-Extras/sentry-native.git
|
||||
@ -184,7 +181,7 @@
|
||||
url = https://github.com/kthohr/stats.git
|
||||
[submodule "contrib/krb5"]
|
||||
path = contrib/krb5
|
||||
url = https://github.com/krb5/krb5
|
||||
url = https://github.com/ClickHouse-Extras/krb5
|
||||
[submodule "contrib/cyrus-sasl"]
|
||||
path = contrib/cyrus-sasl
|
||||
url = https://github.com/cyrusimap/cyrus-sasl
|
||||
@ -198,8 +195,7 @@
|
||||
url = https://github.com/danlark1/miniselect
|
||||
[submodule "contrib/rocksdb"]
|
||||
path = contrib/rocksdb
|
||||
url = https://github.com/facebook/rocksdb
|
||||
branch = v6.14.5
|
||||
url = https://github.com/ClickHouse-Extras/rocksdb.git
|
||||
[submodule "contrib/xz"]
|
||||
path = contrib/xz
|
||||
url = https://github.com/xz-mirror/xz
|
||||
@ -207,3 +203,12 @@
|
||||
path = contrib/abseil-cpp
|
||||
url = https://github.com/ClickHouse-Extras/abseil-cpp.git
|
||||
branch = lts_2020_02_25
|
||||
[submodule "contrib/dragonbox"]
|
||||
path = contrib/dragonbox
|
||||
url = https://github.com/ClickHouse-Extras/dragonbox.git
|
||||
[submodule "contrib/fast_float"]
|
||||
path = contrib/fast_float
|
||||
url = https://github.com/fastfloat/fast_float
|
||||
[submodule "contrib/boringssl"]
|
||||
path = contrib/boringssl
|
||||
url = https://github.com/ClickHouse-Extras/boringssl.git
|
||||
|
264
CHANGELOG.md
264
CHANGELOG.md
@ -1,3 +1,126 @@
|
||||
### ClickHouse release 20.12
|
||||
|
||||
### ClickHouse release v20.12.3.3-stable, 2020-12-13
|
||||
|
||||
#### Backward Incompatible Change
|
||||
|
||||
* Enable `use_compact_format_in_distributed_parts_names` by default (see the documentation for the reference). [#16728](https://github.com/ClickHouse/ClickHouse/pull/16728) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Accept user settings related to file formats (e.g. `format_csv_delimiter`) in the `SETTINGS` clause when creating a table that uses `File` engine, and use these settings in all `INSERT`s and `SELECT`s. The file format settings changed in the current user session, or in the `SETTINGS` clause of a DML query itself, no longer affect the query. [#16591](https://github.com/ClickHouse/ClickHouse/pull/16591) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
|
||||
#### New Feature
|
||||
|
||||
* add `*.xz` compression/decompression support.It enables using `*.xz` in `file()` function. This closes [#8828](https://github.com/ClickHouse/ClickHouse/issues/8828). [#16578](https://github.com/ClickHouse/ClickHouse/pull/16578) ([Abi Palagashvili](https://github.com/fibersel)).
|
||||
* Introduce the query `ALTER TABLE ... DROP|DETACH PART 'part_name'`. [#15511](https://github.com/ClickHouse/ClickHouse/pull/15511) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Added new ALTER UPDATE/DELETE IN PARTITION syntax. [#13403](https://github.com/ClickHouse/ClickHouse/pull/13403) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Allow formatting named tuples as JSON objects when using JSON input/output formats, controlled by the `output_format_json_named_tuples_as_objects` setting, disabled by default. [#17175](https://github.com/ClickHouse/ClickHouse/pull/17175) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Add a possibility to input enum value as it's id in TSV and CSV formats by default. [#16834](https://github.com/ClickHouse/ClickHouse/pull/16834) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add COLLATE support for Nullable, LowCardinality, Array and Tuple, where nested type is String. Also refactor the code associated with collations in ColumnString.cpp. [#16273](https://github.com/ClickHouse/ClickHouse/pull/16273) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* New `tcpPort` function returns TCP port listened by this server. [#17134](https://github.com/ClickHouse/ClickHouse/pull/17134) ([Ivan](https://github.com/abyss7)).
|
||||
* Add new math functions: `acosh`, `asinh`, `atan2`, `atanh`, `cosh`, `hypot`, `log1p`, `sinh`. [#16636](https://github.com/ClickHouse/ClickHouse/pull/16636) ([Konstantin Malanchev](https://github.com/hombit)).
|
||||
* Possibility to distribute the merges between different replicas. Introduces the `execute_merges_on_single_replica_time_threshold` mergetree setting. [#16424](https://github.com/ClickHouse/ClickHouse/pull/16424) ([filimonov](https://github.com/filimonov)).
|
||||
* Add setting `aggregate_functions_null_for_empty` for SQL standard compatibility. This option will rewrite all aggregate functions in a query, adding -OrNull suffix to them. Implements [10273](https://github.com/ClickHouse/ClickHouse/issues/10273). [#16123](https://github.com/ClickHouse/ClickHouse/pull/16123) ([flynn](https://github.com/ucasFL)).
|
||||
* Updated DateTime, DateTime64 parsing to accept string Date literal format. [#16040](https://github.com/ClickHouse/ClickHouse/pull/16040) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Make it possible to change the path to history file in `clickhouse-client` using the `--history_file` parameter. [#15960](https://github.com/ClickHouse/ClickHouse/pull/15960) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)).
|
||||
* fix incorrect initialization of `max_compress_block_size` of MergeTreeWriterSettings with `min_compress_block_size`. [#17833](https://github.com/ClickHouse/ClickHouse/pull/17833) ([flynn](https://github.com/ucasFL)).
|
||||
* Exception message about max table size to drop was displayed incorrectly. [#17764](https://github.com/ClickHouse/ClickHouse/pull/17764) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed possible segfault when there is not enough space when inserting into `Distributed` table. [#17737](https://github.com/ClickHouse/ClickHouse/pull/17737) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)).
|
||||
* In might be determined incorrectly if cluster is circular- (cross-) replicated or not when executing `ON CLUSTER` query due to race condition when `pool_size` > 1. It's fixed. [#17640](https://github.com/ClickHouse/ClickHouse/pull/17640) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Exception `fmt::v7::format_error` can be logged in background for MergeTree tables. This fixes [#17613](https://github.com/ClickHouse/ClickHouse/issues/17613). [#17615](https://github.com/ClickHouse/ClickHouse/pull/17615) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix issue when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* fix `toInt256(inf)` stack overflow. Int256 is an experimental feature. Closed [#17235](https://github.com/ClickHouse/ClickHouse/issues/17235). [#17257](https://github.com/ClickHouse/ClickHouse/pull/17257) ([flynn](https://github.com/ucasFL)).
|
||||
* Fix possible `Unexpected packet Data received from client` error logged for Distributed queries with `LIMIT`. [#17254](https://github.com/ClickHouse/ClickHouse/pull/17254) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246). [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix possible wrong index analysis when the types of the index comparison are different. This fixes [#17122](https://github.com/ClickHouse/ClickHouse/issues/17122). [#17145](https://github.com/ClickHouse/ClickHouse/pull/17145) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Multiple fixed for MaterializeMySQL (experimental feature). Fixes [#16923](https://github.com/ClickHouse/ClickHouse/issues/16923) Fixes [#15883](https://github.com/ClickHouse/ClickHouse/issues/15883) Fix MaterializeMySQL SYNC failure when the modify MySQL binlog_checksum. [#17091](https://github.com/ClickHouse/ClickHouse/pull/17091) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Bug unfinished implementation for funciton fuzzBits, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Fix LLVM's libunwind in the case when CFA register is RAX. This is the [bug](https://bugs.llvm.org/show_bug.cgi?id=48186) in [LLVM's libunwind](https://github.com/llvm/llvm-project/tree/master/libunwind). We already have workarounds for this bug. [#17046](https://github.com/ClickHouse/ClickHouse/pull/17046) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix `optimize_distributed_group_by_sharding_key` setting (that is disabled by default) for query with OFFSET only. [#16996](https://github.com/ClickHouse/ClickHouse/pull/16996) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix for Merge tables over Distributed tables with JOIN. [#16993](https://github.com/ClickHouse/ClickHouse/pull/16993) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed wrong result in big integers (128, 256 bit) when casting from double. Big integers support is experimental. [#16986](https://github.com/ClickHouse/ClickHouse/pull/16986) ([Mike](https://github.com/myrrc)).
|
||||
* Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Blame info was not calculated correctly in `clickhouse-git-import`. [#16959](https://github.com/ClickHouse/ClickHouse/pull/16959) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix order by optimization with monotonous functions. Fixes [#16107](https://github.com/ClickHouse/ClickHouse/issues/16107). [#16956](https://github.com/ClickHouse/ClickHouse/pull/16956) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix optimization of group by with enabled setting `optimize_aggregators_of_group_by_keys` and joins. Fixes [#12604](https://github.com/ClickHouse/ClickHouse/issues/12604). [#16951](https://github.com/ClickHouse/ClickHouse/pull/16951) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix strange code in InterpreterShowAccessQuery. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Prevent clickhouse server crashes when using the function `timeSeriesGroupSum`. The function is removed from newer ClickHouse releases. [#16865](https://github.com/ClickHouse/ClickHouse/pull/16865) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)).
|
||||
* If no memory can be allocated while writing table metadata on disk, broken metadata file can be written. [#16772](https://github.com/ClickHouse/ClickHouse/pull/16772) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix trivial query optimization with partition predicate. [#16767](https://github.com/ClickHouse/ClickHouse/pull/16767) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Return number of affected rows for INSERT queries via MySQL protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix remote query failure when using 'if' suffix aggregate function. Fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) Fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix inconsistent behavior caused by `select_sequential_consistency` for optimized trivial count query and system.tables. [#16309](https://github.com/ClickHouse/ClickHouse/pull/16309) ([Hao Chen](https://github.com/haoch)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
* Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm. [#16895](https://github.com/ClickHouse/ClickHouse/pull/16895) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Enable compact format of directories for asynchronous sends in Distributed tables: `use_compact_format_in_distributed_parts_names` is set to 1 by default. [#16788](https://github.com/ClickHouse/ClickHouse/pull/16788) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Abort multipart upload if no data was written to S3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
* Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)).
|
||||
* Mask password in data_path in the system.distribution_queue. [#16727](https://github.com/ClickHouse/ClickHouse/pull/16727) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Throw error when use column transformer replaces non existing column. [#16183](https://github.com/ClickHouse/ClickHouse/pull/16183) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Turn off parallel parsing when there is no enough memory for all threads to work simultaneously. Also there could be exceptions like "Memory limit exceeded" when somebody will try to insert extremely huge rows (> min_chunk_bytes_for_parallel_parsing), because each piece to parse has to be independent set of strings (one or more). [#16721](https://github.com/ClickHouse/ClickHouse/pull/16721) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)).
|
||||
* Correct grammar in error message in JSONEachRow, JSONCompactEachRow, and RegexpRow input formats. [#17205](https://github.com/ClickHouse/ClickHouse/pull/17205) ([nico piderman](https://github.com/sneako)).
|
||||
* Set default `host` and `port` parameters for `SOURCE(CLICKHOUSE(...))` to current instance and set default `user` value to `'default'`. [#16997](https://github.com/ClickHouse/ClickHouse/pull/16997) ([vdimir](https://github.com/vdimir)).
|
||||
* Throw an informative error message when doing `ATTACH/DETACH TABLE <DICTIONARY>`. Before this PR, `detach table <dict>` works but leads to an ill-formed in-memory metadata. [#16885](https://github.com/ClickHouse/ClickHouse/pull/16885) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Add cutToFirstSignificantSubdomainWithWWW(). [#16845](https://github.com/ClickHouse/ClickHouse/pull/16845) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Server refused to startup with exception message if wrong config is given (`metric_log`.`collect_interval_milliseconds` is missing). [#16815](https://github.com/ClickHouse/ClickHouse/pull/16815) ([Ivan](https://github.com/abyss7)).
|
||||
* Better exception message when configuration for distributed DDL is absent. This fixes [#5075](https://github.com/ClickHouse/ClickHouse/issues/5075). [#16769](https://github.com/ClickHouse/ClickHouse/pull/16769) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Usability improvement: better suggestions in syntax error message when `CODEC` expression is misplaced in `CREATE TABLE` query. This fixes [#12493](https://github.com/ClickHouse/ClickHouse/issues/12493). [#16768](https://github.com/ClickHouse/ClickHouse/pull/16768) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove empty directories for async INSERT at start of Distributed engine. [#16729](https://github.com/ClickHouse/ClickHouse/pull/16729) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Workaround for use S3 with nginx server as proxy. Nginx currenty does not accept urls with empty path like `http://domain.com?delete`, but vanilla aws-sdk-cpp produces this kind of urls. This commit uses patched aws-sdk-cpp version, which makes urls with "/" as path in this cases, like `http://domain.com/?delete`. [#16709](https://github.com/ClickHouse/ClickHouse/pull/16709) ([ianton-ru](https://github.com/ianton-ru)).
|
||||
* Allow `reinterpretAs*` functions to work for integers and floats of the same size. Implements [16640](https://github.com/ClickHouse/ClickHouse/issues/16640). [#16657](https://github.com/ClickHouse/ClickHouse/pull/16657) ([flynn](https://github.com/ucasFL)).
|
||||
* Now, `<auxiliary_zookeepers>` configuration can be changed in `config.xml` and reloaded without server startup. [#16627](https://github.com/ClickHouse/ClickHouse/pull/16627) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Support SNI in https connections to remote resources. This will allow to connect to Cloudflare servers that require SNI. This fixes [#10055](https://github.com/ClickHouse/ClickHouse/issues/10055). [#16252](https://github.com/ClickHouse/ClickHouse/pull/16252) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix possible stack overflow if a loop of materialized views is created. This closes [#15732](https://github.com/ClickHouse/ClickHouse/issues/15732). [#16048](https://github.com/ClickHouse/ClickHouse/pull/16048) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Simplify the implementation of background tasks processing for the MergeTree table engines family. There should be no visible changes for user. [#15983](https://github.com/ClickHouse/ClickHouse/pull/15983) ([alesapin](https://github.com/alesapin)).
|
||||
* Improvement for MaterializeMySQL (experimental feature). Throw exception about right sync privileges when MySQL sync user has error privileges. [#15977](https://github.com/ClickHouse/ClickHouse/pull/15977) ([TCeason](https://github.com/TCeason)).
|
||||
* Made `indexOf()` use BloomFilter. [#14977](https://github.com/ClickHouse/ClickHouse/pull/14977) ([achimbab](https://github.com/achimbab)).
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* Use Floyd-Rivest algorithm, it is the best for the ClickHouse use case of partial sorting. Bechmarks are in https://github.com/danlark1/miniselect and [here](https://drive.google.com/drive/folders/1DHEaeXgZuX6AJ9eByeZ8iQVQv0ueP8XM). [#16825](https://github.com/ClickHouse/ClickHouse/pull/16825) ([Danila Kutenin](https://github.com/danlark1)).
|
||||
* Now `ReplicatedMergeTree` tree engines family uses a separate thread pool for replicated fetches. Size of the pool limited by setting `background_fetches_pool_size` which can be tuned with a server restart. The default value of the setting is 3 and it means that the maximum amount of parallel fetches is equal to 3 (and it allows to utilize 10G network). Fixes #520. [#16390](https://github.com/ClickHouse/ClickHouse/pull/16390) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed uncontrolled growth of the state of `quantileTDigest`. [#16680](https://github.com/ClickHouse/ClickHouse/pull/16680) ([hrissan](https://github.com/hrissan)).
|
||||
* Add `VIEW` subquery description to `EXPLAIN`. Limit push down optimisation for `VIEW`. Add local replicas of `Distributed` to query plan. [#14936](https://github.com/ClickHouse/ClickHouse/pull/14936) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix optimize_read_in_order/optimize_aggregation_in_order with max_threads > 0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Now we can safely prune partitions with exact match. Useful case: Suppose table is partitioned by `intHash64(x) % 100` and the query has condition on `intHash64(x) % 100` verbatim, not on x. [#16253](https://github.com/ClickHouse/ClickHouse/pull/16253) ([Amos Bird](https://github.com/amosbird)).
|
||||
|
||||
#### Experimental Feature
|
||||
|
||||
* Add `EmbeddedRocksDB` table engine (can be used for dictionaries). [#15073](https://github.com/ClickHouse/ClickHouse/pull/15073) ([sundyli](https://github.com/sundy-li)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
||||
* Improvements in test coverage building images. [#17233](https://github.com/ClickHouse/ClickHouse/pull/17233) ([alesapin](https://github.com/alesapin)).
|
||||
* Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix UBSan report in Poco. This closes [#12719](https://github.com/ClickHouse/ClickHouse/issues/12719). [#16765](https://github.com/ClickHouse/ClickHouse/pull/16765) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Do not instrument 3rd-party libraries with UBSan. [#16764](https://github.com/ClickHouse/ClickHouse/pull/16764) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix UBSan report in cache dictionaries. This closes [#12641](https://github.com/ClickHouse/ClickHouse/issues/12641). [#16763](https://github.com/ClickHouse/ClickHouse/pull/16763) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix UBSan report when trying to convert infinite floating point number to integer. This closes [#14190](https://github.com/ClickHouse/ClickHouse/issues/14190). [#16677](https://github.com/ClickHouse/ClickHouse/pull/16677) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
|
||||
## ClickHouse release 20.11
|
||||
|
||||
### ClickHouse release v20.11.3.3-stable, 2020-11-13
|
||||
@ -15,7 +138,8 @@
|
||||
* Restrict to use of non-comparable data types (like `AggregateFunction`) in keys (Sorting key, Primary key, Partition key, and so on). [#16601](https://github.com/ClickHouse/ClickHouse/pull/16601) ([alesapin](https://github.com/alesapin)).
|
||||
* Remove `ANALYZE` and `AST` queries, and make the setting `enable_debug_queries` obsolete since now it is the part of full featured `EXPLAIN` query. [#16536](https://github.com/ClickHouse/ClickHouse/pull/16536) ([Ivan](https://github.com/abyss7)).
|
||||
* Aggregate functions `boundingRatio`, `rankCorr`, `retention`, `timeSeriesGroupSum`, `timeSeriesGroupRateSum`, `windowFunnel` were erroneously made case-insensitive. Now their names are made case sensitive as designed. Only functions that are specified in SQL standard or made for compatibility with other DBMS or functions similar to those should be case-insensitive. [#16407](https://github.com/ClickHouse/ClickHouse/pull/16407) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Make `rankCorr` function return nan on insufficient data https://github.com/ClickHouse/ClickHouse/issues/16124. [#16135](https://github.com/ClickHouse/ClickHouse/pull/16135) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Make `rankCorr` function return nan on insufficient data [#16124](https://github.com/ClickHouse/ClickHouse/issues/16124). [#16135](https://github.com/ClickHouse/ClickHouse/pull/16135) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
|
||||
|
||||
#### New Feature
|
||||
|
||||
@ -32,7 +156,7 @@
|
||||
* Now we can provide identifiers via query parameters. And these parameters can be used as table objects or columns. [#16594](https://github.com/ClickHouse/ClickHouse/pull/16594) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Added big integers (UInt256, Int128, Int256) and UUID data types support for MergeTree BloomFilter index. Big integers is an experimental feature. [#16642](https://github.com/ClickHouse/ClickHouse/pull/16642) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Add `farmFingerprint64` function (non-cryptographic string hashing). [#16570](https://github.com/ClickHouse/ClickHouse/pull/16570) ([Jacob Hayes](https://github.com/JacobHayes)).
|
||||
* Add `log_queries_min_query_duration_ms`, only queries slower then the value of this setting will go to `query_log`/`query_thread_log` (i.e. something like `slow_query_log` in mysql). [#16529](https://github.com/ClickHouse/ClickHouse/pull/16529) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Add `log_queries_min_query_duration_ms`, only queries slower than the value of this setting will go to `query_log`/`query_thread_log` (i.e. something like `slow_query_log` in mysql). [#16529](https://github.com/ClickHouse/ClickHouse/pull/16529) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Ability to create a docker image on the top of `Alpine`. Uses precompiled binary and glibc components from ubuntu 20.04. [#16479](https://github.com/ClickHouse/ClickHouse/pull/16479) ([filimonov](https://github.com/filimonov)).
|
||||
* Added `toUUIDOrNull`, `toUUIDOrZero` cast functions. [#16337](https://github.com/ClickHouse/ClickHouse/pull/16337) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Add `max_concurrent_queries_for_all_users` setting, see [#6636](https://github.com/ClickHouse/ClickHouse/issues/6636) for use cases. [#16154](https://github.com/ClickHouse/ClickHouse/pull/16154) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
@ -154,6 +278,7 @@
|
||||
* Change default value of `format_regexp_escaping_rule` setting (it's related to `Regexp` format) to `Raw` (it means - read whole subpattern as a value) to make the behaviour more like to what users expect. [#15426](https://github.com/ClickHouse/ClickHouse/pull/15426) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add support for nested multiline comments `/* comment /* comment */ */` in SQL. This conforms to the SQL standard. [#14655](https://github.com/ClickHouse/ClickHouse/pull/14655) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Added MergeTree settings (`max_replicated_merges_with_ttl_in_queue` and `max_number_of_merges_with_ttl_in_pool`) to control the number of merges with TTL in the background pool and replicated queue. This change breaks compatibility with older versions only if you use delete TTL. Otherwise, replication will stay compatible. You can avoid incompatibility issues if you update all shard replicas at once or execute `SYSTEM STOP TTL MERGES` until you finish the update of all replicas. If you'll get an incompatible entry in the replication queue, first of all, execute `SYSTEM STOP TTL MERGES` and after `ALTER TABLE ... DETACH PARTITION ...` the partition where incompatible TTL merge was assigned. Attach it back on a single replica. [#14490](https://github.com/ClickHouse/ClickHouse/pull/14490) ([alesapin](https://github.com/alesapin)).
|
||||
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
|
||||
|
||||
#### New Feature
|
||||
|
||||
@ -176,7 +301,7 @@
|
||||
* Add `JSONStrings` format which output data in arrays of strings. [#14333](https://github.com/ClickHouse/ClickHouse/pull/14333) ([hcz](https://github.com/hczhcz)).
|
||||
* Add support for "Raw" column format for `Regexp` format. It allows to simply extract subpatterns as a whole without any escaping rules. [#15363](https://github.com/ClickHouse/ClickHouse/pull/15363) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Allow configurable `NULL` representation for `TSV` output format. It is controlled by the setting `output_format_tsv_null_representation` which is `\N` by default. This closes [#9375](https://github.com/ClickHouse/ClickHouse/issues/9375). Note that the setting only controls output format and `\N` is the only supported `NULL` representation for `TSV` input format. [#14586](https://github.com/ClickHouse/ClickHouse/pull/14586) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Support Decimal data type for `MaterializedMySQL`. `MaterializedMySQL` is an experimental feature. [#14535](https://github.com/ClickHouse/ClickHouse/pull/14535) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Support Decimal data type for `MaterializeMySQL`. `MaterializeMySQL` is an experimental feature. [#14535](https://github.com/ClickHouse/ClickHouse/pull/14535) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Add new feature: `SHOW DATABASES LIKE 'xxx'`. [#14521](https://github.com/ClickHouse/ClickHouse/pull/14521) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Added a script to import (arbitrary) git repository to ClickHouse as a sample dataset. [#14471](https://github.com/ClickHouse/ClickHouse/pull/14471) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Now insert statements can have asterisk (or variants) with column transformers in the column list. [#14453](https://github.com/ClickHouse/ClickHouse/pull/14453) ([Amos Bird](https://github.com/amosbird)).
|
||||
@ -198,18 +323,18 @@
|
||||
* Fix a very wrong code in TwoLevelStringHashTable implementation, which might lead to memory leak. [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix segfault in some cases of wrong aggregation in lambdas. [#16082](https://github.com/ClickHouse/ClickHouse/pull/16082) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)).
|
||||
* `MaterializedMySQL` (experimental feature): Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* `MaterializeMySQL` (experimental feature): Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Allow to use `direct` layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* `MaterializedMySQL` (experimental feature): Fix crash on create database failure. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* `MaterializeMySQL` (experimental feature): Fix crash on create database failure. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) - Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixes [#12513](https://github.com/ClickHouse/ClickHouse/issues/12513): difference expressions with same alias when query is reanalyzed. [#15886](https://github.com/ClickHouse/ClickHouse/pull/15886) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix possible very rare deadlocks in RBAC implementation. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)).
|
||||
* `MaterializedMySQL` (experimental feature): Fix `select count()` inaccuracy. [#15767](https://github.com/ClickHouse/ClickHouse/pull/15767) ([tavplubix](https://github.com/tavplubix)).
|
||||
* `MaterializeMySQL` (experimental feature): Fix `select count()` inaccuracy. [#15767](https://github.com/ClickHouse/ClickHouse/pull/15767) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix some cases of queries, in which only virtual columns are selected. Previously `Not found column _nothing in block` exception may be thrown. Fixes [#12298](https://github.com/ClickHouse/ClickHouse/issues/12298). [#15756](https://github.com/ClickHouse/ClickHouse/pull/15756) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix drop of materialized view with inner table in Atomic database (hangs all subsequent DROP TABLE due to hang of the worker thread, due to recursive DROP TABLE for inner table of MV). [#15743](https://github.com/ClickHouse/ClickHouse/pull/15743) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Possibility to move part to another disk/volume if the first attempt was failed. [#15723](https://github.com/ClickHouse/ClickHouse/pull/15723) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
@ -241,37 +366,37 @@
|
||||
* Fix hang of queries with a lot of subqueries to same table of `MySQL` engine. Previously, if there were more than 16 subqueries to same `MySQL` table in query, it hang forever. [#15299](https://github.com/ClickHouse/ClickHouse/pull/15299) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix 'Unknown identifier' in GROUP BY when query has JOIN over Merge table. [#15242](https://github.com/ClickHouse/ClickHouse/pull/15242) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fix instance crash when using `joinGet` with `LowCardinality` types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix instance crash when using `joinGet` with `LowCardinality` types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)).
|
||||
* Adjust Decimal field size in MySQL column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)).
|
||||
* Fixes `Data compressed with different methods` in `join_algorithm='auto'`. Keep LowCardinality as type for left table join key in `join_algorithm='partial_merge'`. [#15088](https://github.com/ClickHouse/ClickHouse/pull/15088) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Update `jemalloc` to fix `percpu_arena` with affinity mask. [#15035](https://github.com/ClickHouse/ClickHouse/pull/15035) ([Azat Khuzhin](https://github.com/azat)). [#14957](https://github.com/ClickHouse/ClickHouse/pull/14957) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes https://github.com/ClickHouse/ClickHouse/issues/14908. [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
|
||||
* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
|
||||
* If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in Docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix crash in RIGHT or FULL JOIN with join_algorith='auto' when memory limit exceeded and we should change HashJoin with MergeJoin. [#15002](https://github.com/ClickHouse/ClickHouse/pull/15002) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix to make predicate push down work when subquery contains `finalizeAggregation` function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)).
|
||||
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes https://github.com/ClickHouse/ClickHouse/issues/14923. [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* `MaterializedMySQL` (experimental feature): Fixed `.metadata.tmp File exists` error. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* `MaterializeMySQL` (experimental feature): Fixed `.metadata.tmp File exists` error. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix SIGSEGV for an attempt to INSERT into StorageFile with file descriptor. [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed segfault in `cache` dictionary [#14837](https://github.com/ClickHouse/ClickHouse/issues/14837). [#14879](https://github.com/ClickHouse/ClickHouse/pull/14879) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* `MaterializedMySQL` (experimental feature): Fixed bug in parsing MySQL binlog events, which causes `Attempt to read after eof` and `Packet payload is not fully read` in `MaterializeMySQL` database engine. [#14852](https://github.com/ClickHouse/ClickHouse/pull/14852) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* `MaterializeMySQL` (experimental feature): Fixed bug in parsing MySQL binlog events, which causes `Attempt to read after eof` and `Packet payload is not fully read` in `MaterializeMySQL` database engine. [#14852](https://github.com/ClickHouse/ClickHouse/pull/14852) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix rare error in `SELECT` queries when the queried column has `DEFAULT` expression which depends on the other column which also has `DEFAULT` and not present in select query and not exists on disk. Partially fixes [#14531](https://github.com/ClickHouse/ClickHouse/issues/14531). [#14845](https://github.com/ClickHouse/ClickHouse/pull/14845) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)).
|
||||
* `Replace` column transformer should replace identifiers with cloned ASTs. This fixes https://github.com/ClickHouse/ClickHouse/issues/14695 . [#14734](https://github.com/ClickHouse/ClickHouse/pull/14734) ([Amos Bird](https://github.com/amosbird)).
|
||||
* `Replace` column transformer should replace identifiers with cloned ASTs. This fixes [#14695](https://github.com/ClickHouse/ClickHouse/issues/14695) . [#14734](https://github.com/ClickHouse/ClickHouse/pull/14734) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed missed default database name in metadata of materialized view when executing `ALTER ... MODIFY QUERY`. [#14664](https://github.com/ClickHouse/ClickHouse/pull/14664) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix bug when `ALTER UPDATE` mutation with `Nullable` column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fix function `has` with `LowCardinality` of `Nullable`. [#14591](https://github.com/ClickHouse/ClickHouse/pull/14591) ([Mike](https://github.com/myrrc)).
|
||||
* Cleanup data directory after Zookeeper exceptions during CreateQuery for StorageReplicatedMergeTree Engine. [#14563](https://github.com/ClickHouse/ClickHouse/pull/14563) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Fix rare segfaults in functions with combinator `-Resample`, which could appear in result of overflow with very large parameters. [#14562](https://github.com/ClickHouse/ClickHouse/pull/14562) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix a bug when converting `Nullable(String)` to Enum. Introduced by https://github.com/ClickHouse/ClickHouse/pull/12745. This fixes https://github.com/ClickHouse/ClickHouse/issues/14435. [#14530](https://github.com/ClickHouse/ClickHouse/pull/14530) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix a bug when converting `Nullable(String)` to Enum. Introduced by [#12745](https://github.com/ClickHouse/ClickHouse/pull/12745). This fixes [#14435](https://github.com/ClickHouse/ClickHouse/issues/14435). [#14530](https://github.com/ClickHouse/ClickHouse/pull/14530) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix `currentDatabase()` function cannot be used in `ON CLUSTER` ddl query. [#14211](https://github.com/ClickHouse/ClickHouse/pull/14211) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* `MaterializedMySQL` (experimental feature): Fixed `Packet payload is not fully read` error in `MaterializeMySQL` database engine. [#14696](https://github.com/ClickHouse/ClickHouse/pull/14696) ([BohuTANG](https://github.com/BohuTANG)).
|
||||
* `MaterializeMySQL` (experimental feature): Fixed `Packet payload is not fully read` error in `MaterializeMySQL` database engine. [#14696](https://github.com/ClickHouse/ClickHouse/pull/14696) ([BohuTANG](https://github.com/BohuTANG)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
@ -306,7 +431,7 @@
|
||||
* Add an option to skip access checks for `DiskS3`. `s3` disk is an experimental feature. [#14497](https://github.com/ClickHouse/ClickHouse/pull/14497) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
* Speed up server shutdown process if there are ongoing S3 requests. [#14496](https://github.com/ClickHouse/ClickHouse/pull/14496) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
* `SYSTEM RELOAD CONFIG` now throws an exception if failed to reload and continues using the previous users.xml. The background periodic reloading also continues using the previous users.xml if failed to reload. [#14492](https://github.com/ClickHouse/ClickHouse/pull/14492) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* For INSERTs with inline data in VALUES format in the script mode of `clickhouse-client`, support semicolon as the data terminator, in addition to the new line. Closes https://github.com/ClickHouse/ClickHouse/issues/12288. [#13192](https://github.com/ClickHouse/ClickHouse/pull/13192) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* For INSERTs with inline data in VALUES format in the script mode of `clickhouse-client`, support semicolon as the data terminator, in addition to the new line. Closes [#12288](https://github.com/ClickHouse/ClickHouse/issues/12288). [#13192](https://github.com/ClickHouse/ClickHouse/pull/13192) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Support custom codecs in compact parts. [#12183](https://github.com/ClickHouse/ClickHouse/pull/12183) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### Performance Improvement
|
||||
@ -318,7 +443,7 @@
|
||||
* Improve performance of 256-bit types using (u)int64_t as base type for wide integers. Original wide integers use 8-bit types as base. [#14859](https://github.com/ClickHouse/ClickHouse/pull/14859) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Explicitly use a temporary disk to store vertical merge temporary data. [#15639](https://github.com/ClickHouse/ClickHouse/pull/15639) ([Grigory Pervakov](https://github.com/GrigoryPervakov)).
|
||||
* Use one S3 DeleteObjects request instead of multiple DeleteObject in a loop. No any functionality changes, so covered by existing tests like integration/test_log_family_s3. [#15238](https://github.com/ClickHouse/ClickHouse/pull/15238) ([ianton-ru](https://github.com/ianton-ru)).
|
||||
* Fix `DateTime <op> DateTime` mistakenly choosing the slow generic implementation. This fixes https://github.com/ClickHouse/ClickHouse/issues/15153. [#15178](https://github.com/ClickHouse/ClickHouse/pull/15178) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix `DateTime <op> DateTime` mistakenly choosing the slow generic implementation. This fixes [#15153](https://github.com/ClickHouse/ClickHouse/issues/15153). [#15178](https://github.com/ClickHouse/ClickHouse/pull/15178) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Improve performance of GROUP BY key of type `FixedString`. [#15034](https://github.com/ClickHouse/ClickHouse/pull/15034) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Only `mlock` code segment when starting clickhouse-server. In previous versions, all mapped regions were locked in memory, including debug info. Debug info is usually splitted to a separate file but if it isn't, it led to +2..3 GiB memory usage. [#14929](https://github.com/ClickHouse/ClickHouse/pull/14929) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* ClickHouse binary become smaller due to link time optimization.
|
||||
@ -385,7 +510,7 @@
|
||||
* Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes https://github.com/ClickHouse/ClickHouse/issues/15628. [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -396,7 +521,7 @@
|
||||
* Fixed too low default value of `max_replicated_logs_to_keep` setting, which might cause replicas to become lost too often. Improve lost replica recovery process by choosing the most up-to-date replica to clone. Also do not remove old parts from lost replica, detach them instead. [#15701](https://github.com/ClickHouse/ClickHouse/pull/15701) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes https://github.com/ClickHouse/ClickHouse/issues/15598. [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)).
|
||||
* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes [#15598](https://github.com/ClickHouse/ClickHouse/issues/15598). [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)).
|
||||
* Throw an error when a single parameter is passed to ReplicatedMergeTree instead of ignoring it. [#15516](https://github.com/ClickHouse/ClickHouse/pull/15516) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
|
||||
#### Improvement
|
||||
@ -420,11 +545,11 @@
|
||||
* Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)).
|
||||
* Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)).
|
||||
* Fix bug where queries like SELECT toStartOfDay(today()) fail complaining about empty time_zone argument. [#15319](https://github.com/ClickHouse/ClickHouse/pull/15319) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Fix bug where queries like `SELECT toStartOfDay(today())` fail complaining about empty time_zone argument. [#15319](https://github.com/ClickHouse/ClickHouse/pull/15319) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix instance crash when using joinGet with LowCardinality types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix instance crash when using joinGet with LowCardinality types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)).
|
||||
* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)).
|
||||
* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)).
|
||||
@ -438,6 +563,10 @@
|
||||
|
||||
### ClickHouse release v20.9.2.20, 2020-09-22
|
||||
|
||||
#### Backward Incompatible Change
|
||||
|
||||
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
|
||||
|
||||
#### New Feature
|
||||
|
||||
* Added column transformers `EXCEPT`, `REPLACE`, `APPLY`, which can be applied to the list of selected columns (after `*` or `COLUMNS(...)`). For example, you can write `SELECT * EXCEPT(URL) REPLACE(number + 1 AS number)`. Another example: `select * apply(length) apply(max) from wide_string_table` to find out the maxium length of all string columns. [#14233](https://github.com/ClickHouse/ClickHouse/pull/14233) ([Amos Bird](https://github.com/amosbird)).
|
||||
@ -449,10 +578,10 @@
|
||||
* Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fixed inconsistent comparison with primary key of type `FixedString` on index analysis if they're compered with a string of less size. This fixes https://github.com/ClickHouse/ClickHouse/issues/14908. [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed inconsistent comparison with primary key of type `FixedString` on index analysis if they're compered with a string of less size. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix bug which leads to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)).
|
||||
* If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes https://github.com/ClickHouse/ClickHouse/issues/14923. [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fixed `.metadata.tmp File exists` error when using `MaterializeMySQL` database engine. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix SIGSEGV for an attempt to INSERT into StorageFile(fd). [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)).
|
||||
@ -495,7 +624,7 @@
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* Optimize queries with LIMIT/LIMIT BY/ORDER BY for distributed with GROUP BY sharding_key (under optimize_skip_unused_shards and optimize_distributed_group_by_sharding_key). [#10373](https://github.com/ClickHouse/ClickHouse/pull/10373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Optimize queries with LIMIT/LIMIT BY/ORDER BY for distributed with GROUP BY sharding_key (under `optimize_skip_unused_shards` and `optimize_distributed_group_by_sharding_key`). [#10373](https://github.com/ClickHouse/ClickHouse/pull/10373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Creating sets for multiple `JOIN` and `IN` in parallel. It may slightly improve performance for queries with several different `IN subquery` expressions. [#14412](https://github.com/ClickHouse/ClickHouse/pull/14412) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Improve Kafka engine performance by providing independent thread for each consumer. Separate thread pool for streaming engines (like Kafka). [#13939](https://github.com/ClickHouse/ClickHouse/pull/13939) ([fastio](https://github.com/fastio)).
|
||||
|
||||
@ -573,15 +702,15 @@
|
||||
* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix instance crash when using joinGet with LowCardinality types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix instance crash when using joinGet with LowCardinality types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)).
|
||||
* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)).
|
||||
* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes https://github.com/ClickHouse/ClickHouse/issues/14908. [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
|
||||
* If function `bar` was called with specifically crafter arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
|
||||
* If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)).
|
||||
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes https://github.com/ClickHouse/ClickHouse/issues/14923. [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fixed `.metadata.tmp File exists` error when using `MaterializeMySQL` database engine. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)).
|
||||
@ -621,6 +750,7 @@
|
||||
* Now `OPTIMIZE FINAL` query doesn't recalculate TTL for parts that were added before TTL was created. Use `ALTER TABLE ... MATERIALIZE TTL` once to calculate them, after that `OPTIMIZE FINAL` will evaluate TTL's properly. This behavior never worked for replicated tables. [#14220](https://github.com/ClickHouse/ClickHouse/pull/14220) ([alesapin](https://github.com/alesapin)).
|
||||
* Extend `parallel_distributed_insert_select` setting, adding an option to run `INSERT` into local table. The setting changes type from `Bool` to `UInt64`, so the values `false` and `true` are no longer supported. If you have these values in server configuration, the server will not start. Please replace them with `0` and `1`, respectively. [#14060](https://github.com/ClickHouse/ClickHouse/pull/14060) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Remove support for the `ODBCDriver` input/output format. This was a deprecated format once used for communication with the ClickHouse ODBC driver, now long superseded by the `ODBCDriver2` format. Resolves [#13629](https://github.com/ClickHouse/ClickHouse/issues/13629). [#13847](https://github.com/ClickHouse/ClickHouse/pull/13847) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
|
||||
|
||||
#### New Feature
|
||||
|
||||
@ -640,16 +770,16 @@
|
||||
|
||||
* Fix visible data clobbering by progress bar in client in interactive mode. This fixes [#12562](https://github.com/ClickHouse/ClickHouse/issues/12562) and [#13369](https://github.com/ClickHouse/ClickHouse/issues/13369) and [#13584](https://github.com/ClickHouse/ClickHouse/issues/13584) and fixes [#12964](https://github.com/ClickHouse/ClickHouse/issues/12964). [#13691](https://github.com/ClickHouse/ClickHouse/pull/13691) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed incorrect sorting order if `LowCardinality` column when sorting by multiple columns. This fixes [#13958](https://github.com/ClickHouse/ClickHouse/issues/13958). [#14223](https://github.com/ClickHouse/ClickHouse/pull/14223) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafter parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafted parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix bug which can lead to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)).
|
||||
* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) [#14334](https://github.com/ClickHouse/ClickHouse/pull/14334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix crash during `ALTER` query for table which was created `AS table_function`. Fixes [#14212](https://github.com/ClickHouse/ClickHouse/issues/14212). [#14326](https://github.com/ClickHouse/ClickHouse/pull/14326) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix exception during ALTER LIVE VIEW query with REFRESH command. Live view is an experimental feature. [#14320](https://github.com/ClickHouse/ClickHouse/pull/14320) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Fix QueryPlan lifetime (for EXPLAIN PIPELINE graph=1) for queries with nested interpreter. [#14315](https://github.com/ClickHouse/ClickHouse/pull/14315) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix segfault in `clickhouse-odbc-bridge` during schema fetch from some external sources. This PR fixes https://github.com/ClickHouse/ClickHouse/issues/13861. [#14267](https://github.com/ClickHouse/ClickHouse/pull/14267) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash in mark inclusion search introduced in https://github.com/ClickHouse/ClickHouse/pull/12277. [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix segfault in `clickhouse-odbc-bridge` during schema fetch from some external sources. This PR fixes [#13861](https://github.com/ClickHouse/ClickHouse/issues/13861). [#14267](https://github.com/ClickHouse/ClickHouse/pull/14267) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash in mark inclusion search introduced in [#12277](https://github.com/ClickHouse/ClickHouse/pull/12277). [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix creation of tables with named tuples. This fixes [#13027](https://github.com/ClickHouse/ClickHouse/issues/13027). [#14143](https://github.com/ClickHouse/ClickHouse/pull/14143) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix formatting of minimal negative decimal numbers. This fixes https://github.com/ClickHouse/ClickHouse/issues/14111. [#14119](https://github.com/ClickHouse/ClickHouse/pull/14119) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fix formatting of minimal negative decimal numbers. This fixes [#14111](https://github.com/ClickHouse/ClickHouse/issues/14111). [#14119](https://github.com/ClickHouse/ClickHouse/pull/14119) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fix `DistributedFilesToInsert` metric (zeroed when it should not). [#14095](https://github.com/ClickHouse/ClickHouse/pull/14095) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix `pointInPolygon` with const 2d array as polygon. [#14079](https://github.com/ClickHouse/ClickHouse/pull/14079) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fixed wrong mount point in extra info for `Poco::Exception: no space left on device`. [#14050](https://github.com/ClickHouse/ClickHouse/pull/14050) ([tavplubix](https://github.com/tavplubix)).
|
||||
@ -678,10 +808,10 @@
|
||||
* Fix wrong code in function `netloc`. This fixes [#13335](https://github.com/ClickHouse/ClickHouse/issues/13335). [#13446](https://github.com/ClickHouse/ClickHouse/pull/13446) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix possible race in `StorageMemory`. [#13416](https://github.com/ClickHouse/ClickHouse/pull/13416) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix missing or excessive headers in `TSV/CSVWithNames` formats in HTTP protocol. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes https://github.com/ClickHouse/ClickHouse/issues/5779, https://github.com/ClickHouse/ClickHouse/issues/12527. [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779), [#12527](https://github.com/ClickHouse/ClickHouse/issues/12527). [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix access to `redis` dictionary after connection was dropped once. It may happen with `cache` and `direct` dictionary layouts. [#13082](https://github.com/ClickHouse/ClickHouse/pull/13082) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Removed wrong auth access check when using ClickHouseDictionarySource to query remote tables. [#12756](https://github.com/ClickHouse/ClickHouse/pull/12756) ([sundyli](https://github.com/sundy-li)).
|
||||
* Properly distinguish subqueries in some cases for common subexpression elimination. https://github.com/ClickHouse/ClickHouse/issues/8333. [#8367](https://github.com/ClickHouse/ClickHouse/pull/8367) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Properly distinguish subqueries in some cases for common subexpression elimination. [#8333](https://github.com/ClickHouse/ClickHouse/issues/8333). [#8367](https://github.com/ClickHouse/ClickHouse/pull/8367) ([Amos Bird](https://github.com/amosbird)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
@ -749,7 +879,7 @@
|
||||
* Updating LDAP user authentication suite to check that it works with RBAC. [#13656](https://github.com/ClickHouse/ClickHouse/pull/13656) ([vzakaznikov](https://github.com/vzakaznikov)).
|
||||
* Removed `-DENABLE_CURL_CLIENT` for `contrib/aws`. [#13628](https://github.com/ClickHouse/ClickHouse/pull/13628) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Increasing health-check timeouts for ClickHouse nodes and adding support to dump docker-compose logs if unhealthy containers found. [#13612](https://github.com/ClickHouse/ClickHouse/pull/13612) ([vzakaznikov](https://github.com/vzakaznikov)).
|
||||
* Make sure https://github.com/ClickHouse/ClickHouse/issues/10977 is invalid. [#13539](https://github.com/ClickHouse/ClickHouse/pull/13539) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Make sure [#10977](https://github.com/ClickHouse/ClickHouse/issues/10977) is invalid. [#13539](https://github.com/ClickHouse/ClickHouse/pull/13539) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Skip PR's from robot-clickhouse. [#13489](https://github.com/ClickHouse/ClickHouse/pull/13489) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Move Dockerfiles from integration tests to `docker/test` directory. docker_compose files are available in `runner` docker container. Docker images are built in CI and not in integration tests. [#13448](https://github.com/ClickHouse/ClickHouse/pull/13448) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
|
||||
@ -765,6 +895,7 @@
|
||||
* The function `groupArrayMoving*` was not working for distributed queries. It's result was calculated within incorrect data type (without promotion to the largest type). The function `groupArrayMovingAvg` was returning integer number that was inconsistent with the `avg` function. This fixes [#12568](https://github.com/ClickHouse/ClickHouse/issues/12568). [#12622](https://github.com/ClickHouse/ClickHouse/pull/12622) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add sanity check for MergeTree settings. If the settings are incorrect, the server will refuse to start or to create a table, printing detailed explanation to the user. [#13153](https://github.com/ClickHouse/ClickHouse/pull/13153) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Protect from the cases when user may set `background_pool_size` to value lower than `number_of_free_entries_in_pool_to_execute_mutation` or `number_of_free_entries_in_pool_to_lower_max_size_of_merge`. In these cases ALTERs won't work or the maximum size of merge will be too limited. It will throw exception explaining what to do. This closes [#10897](https://github.com/ClickHouse/ClickHouse/issues/10897). [#12728](https://github.com/ClickHouse/ClickHouse/pull/12728) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
|
||||
|
||||
#### New Feature
|
||||
|
||||
@ -780,7 +911,7 @@
|
||||
* Add `FROM_UNIXTIME` function for compatibility with MySQL, related to [12149](https://github.com/ClickHouse/ClickHouse/issues/12149). [#12484](https://github.com/ClickHouse/ClickHouse/pull/12484) ([flynn](https://github.com/ucasFL)).
|
||||
* Allow Nullable types as keys in MergeTree tables if `allow_nullable_key` table setting is enabled. Closes [#5319](https://github.com/ClickHouse/ClickHouse/issues/5319). [#12433](https://github.com/ClickHouse/ClickHouse/pull/12433) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Integration with [COS](https://intl.cloud.tencent.com/product/cos). [#12386](https://github.com/ClickHouse/ClickHouse/pull/12386) ([fastio](https://github.com/fastio)).
|
||||
* Add mapAdd and mapSubtract functions for adding/subtracting key-mapped values. [#11735](https://github.com/ClickHouse/ClickHouse/pull/11735) ([Ildus Kurbangaliev](https://github.com/ildus)).
|
||||
* Add `mapAdd` and `mapSubtract` functions for adding/subtracting key-mapped values. [#11735](https://github.com/ClickHouse/ClickHouse/pull/11735) ([Ildus Kurbangaliev](https://github.com/ildus)).
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
@ -951,6 +1082,10 @@
|
||||
|
||||
### ClickHouse release v20.6.3.28-stable
|
||||
|
||||
#### Backward Incompatible Change
|
||||
|
||||
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
|
||||
|
||||
#### New Feature
|
||||
|
||||
* Added an initial implementation of `EXPLAIN` query. Syntax: `EXPLAIN SELECT ...`. This fixes [#1118](https://github.com/ClickHouse/ClickHouse/issues/1118). [#11873](https://github.com/ClickHouse/ClickHouse/pull/11873) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -1059,7 +1194,7 @@
|
||||
|
||||
* Improved performace of 'ORDER BY' and 'GROUP BY' by prefix of sorting key (enabled with `optimize_aggregation_in_order` setting, disabled by default). [#11696](https://github.com/ClickHouse/ClickHouse/pull/11696) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Removed injective functions inside `uniq*()` if `set optimize_injective_functions_inside_uniq=1`. [#12337](https://github.com/ClickHouse/ClickHouse/pull/12337) ([Ruslan Kamalov](https://github.com/kamalov-ruslan)).
|
||||
* Index not used for IN operator with literals", performance regression introduced around v19.3. This fixes "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Index not used for IN operator with literals, performance regression introduced around v19.3. This fixes [#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Implemented single part uploads for DiskS3 (experimental feature). [#12026](https://github.com/ClickHouse/ClickHouse/pull/12026) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
|
||||
#### Experimental Feature
|
||||
@ -1121,7 +1256,7 @@
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* Index not used for IN operator with literals", performance regression introduced around v19.3. This fixes "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Index not used for IN operator with literals, performance regression introduced around v19.3. This fixes [#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
||||
@ -1139,6 +1274,7 @@
|
||||
* Update `zstd` to 1.4.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. This change is backward compatible but we list it here in changelog in case you will wonder about these messages. [#10663](https://github.com/ClickHouse/ClickHouse/pull/10663) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Added a check for meaningless codecs and a setting `allow_suspicious_codecs` to control this check. This closes [#4966](https://github.com/ClickHouse/ClickHouse/issues/4966). [#10645](https://github.com/ClickHouse/ClickHouse/pull/10645) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Several Kafka setting changes their defaults. See [#11388](https://github.com/ClickHouse/ClickHouse/pull/11388).
|
||||
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
|
||||
|
||||
#### New Feature
|
||||
|
||||
@ -1200,7 +1336,7 @@
|
||||
* Fix wrong result of comparison of FixedString with constant String. This fixes [#11393](https://github.com/ClickHouse/ClickHouse/issues/11393). This bug appeared in version 20.4. [#11828](https://github.com/ClickHouse/ClickHouse/pull/11828) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix wrong result for `if` with NULLs in condition. [#11807](https://github.com/ClickHouse/ClickHouse/pull/11807) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fix using too many threads for queries. [#11788](https://github.com/ClickHouse/ClickHouse/pull/11788) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed `Scalar doesn't exist` exception when using `WITH <scalar subquery> ...` in `SELECT ... FROM merge_tree_table ...` https://github.com/ClickHouse/ClickHouse/issues/11621. [#11767](https://github.com/ClickHouse/ClickHouse/pull/11767) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed `Scalar doesn't exist` exception when using `WITH <scalar subquery> ...` in `SELECT ... FROM merge_tree_table ...` [#11621](https://github.com/ClickHouse/ClickHouse/issues/11621). [#11767](https://github.com/ClickHouse/ClickHouse/pull/11767) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Now replicated fetches will be cancelled during metadata alter. [#11744](https://github.com/ClickHouse/ClickHouse/pull/11744) ([alesapin](https://github.com/alesapin)).
|
||||
* Parse metadata stored in zookeeper before checking for equality. [#11739](https://github.com/ClickHouse/ClickHouse/pull/11739) ([Azat Khuzhin](https://github.com/azat)).
|
||||
@ -1251,8 +1387,8 @@
|
||||
* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash when `SET DEFAULT ROLE` is called with wrong arguments. This fixes https://github.com/ClickHouse/ClickHouse/issues/10586. [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash while reading malformed data in `Protobuf` format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash when `SET DEFAULT ROLE` is called with wrong arguments. This fixes [#10586](https://github.com/ClickHouse/ClickHouse/issues/10586). [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash while reading malformed data in `Protobuf` format. This fixes [#5957](https://github.com/ClickHouse/ClickHouse/issues/5957), fixes [#11203](https://github.com/ClickHouse/ClickHouse/issues/11203). [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fixed a bug when `cache` dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -1318,7 +1454,7 @@
|
||||
* Fix error `the BloomFilter false positive must be a double number between 0 and 1` [#10551](https://github.com/ClickHouse/ClickHouse/issues/10551). [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix SELECT of column ALIAS which default expression type different from column type. [#10563](https://github.com/ClickHouse/ClickHouse/pull/10563) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Implemented comparison between DateTime64 and String values (just like for DateTime). [#10560](https://github.com/ClickHouse/ClickHouse/pull/10560) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
* Fix index corruption, which may accur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix index corruption, which may occur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Disable GROUP BY sharding_key optimization by default (`optimize_distributed_group_by_sharding_key` had been introduced and turned of by default, due to trickery of sharding_key analyzing, simple example is `if` in sharding key) and fix it for WITH ROLLUP/CUBE/TOTALS. [#10516](https://github.com/ClickHouse/ClickHouse/pull/10516) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixes: [#10263](https://github.com/ClickHouse/ClickHouse/issues/10263) (after that PR dist send via INSERT had been postponing on each INSERT) Fixes: [#8756](https://github.com/ClickHouse/ClickHouse/issues/8756) (that PR breaks distributed sends with all of the following conditions met (unlikely setup for now I guess): `internal_replication == false`, multiple local shards (activates the hardlinking code) and `distributed_storage_policy` (makes `link(2)` fails on `EXDEV`)). [#10486](https://github.com/ClickHouse/ClickHouse/pull/10486) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed error with "max_rows_to_sort" limit. [#10268](https://github.com/ClickHouse/ClickHouse/pull/10268) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
@ -1475,7 +1611,7 @@
|
||||
* Lower memory usage in tests. [#10617](https://github.com/ClickHouse/ClickHouse/pull/10617) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixing hard coded timeouts in new live view tests. [#10604](https://github.com/ClickHouse/ClickHouse/pull/10604) ([vzakaznikov](https://github.com/vzakaznikov)).
|
||||
* Increasing timeout when opening a client in tests/queries/0_stateless/helpers/client.py. [#10599](https://github.com/ClickHouse/ClickHouse/pull/10599) ([vzakaznikov](https://github.com/vzakaznikov)).
|
||||
* Enable ThinLTO for clang builds, continuation of https://github.com/ClickHouse/ClickHouse/pull/10435. [#10585](https://github.com/ClickHouse/ClickHouse/pull/10585) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Enable ThinLTO for clang builds, continuation of [#10435](https://github.com/ClickHouse/ClickHouse/pull/10435). [#10585](https://github.com/ClickHouse/ClickHouse/pull/10585) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Adding fuzzers and preparing for oss-fuzz integration. [#10546](https://github.com/ClickHouse/ClickHouse/pull/10546) ([kyprizel](https://github.com/kyprizel)).
|
||||
* Fix FreeBSD build. [#10150](https://github.com/ClickHouse/ClickHouse/pull/10150) ([Ivan](https://github.com/abyss7)).
|
||||
* Add new build for query tests using pytest framework. [#10039](https://github.com/ClickHouse/ClickHouse/pull/10039) ([Ivan](https://github.com/abyss7)).
|
||||
@ -1550,7 +1686,7 @@
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* Index not used for IN operator with literals", performance regression introduced around v19.3. This fixes "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Index not used for IN operator with literals, performance regression introduced around v19.3. This fixes [#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
||||
@ -1604,7 +1740,7 @@
|
||||
* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fix pointInPolygon with nan as point. Fixes [#11375](https://github.com/ClickHouse/ClickHouse/issues/11375). [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fix potential uninitialized memory read in MergeTree shutdown if table was not created successfully. [#11420](https://github.com/ClickHouse/ClickHouse/pull/11420) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -1620,8 +1756,8 @@
|
||||
* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash when SET DEFAULT ROLE is called with wrong arguments. This fixes https://github.com/ClickHouse/ClickHouse/issues/10586. [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash when SET DEFAULT ROLE is called with wrong arguments. This fixes [#10586](https://github.com/ClickHouse/ClickHouse/issues/10586). [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash while reading malformed data in Protobuf format. This fixes [#5957](https://github.com/ClickHouse/ClickHouse/issues/5957), fixes [#11203](https://github.com/ClickHouse/ClickHouse/issues/11203). [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fixed a bug when cache-dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -1666,7 +1802,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Now constraints are updated if the column participating in `CONSTRAINT` expression was renamed. Fixes [#10844](https://github.com/ClickHouse/ClickHouse/issues/10844). [#10847](https://github.com/ClickHouse/ClickHouse/pull/10847) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed potential read of uninitialized memory in cache-dictionary. [#10834](https://github.com/ClickHouse/ClickHouse/pull/10834) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed columns order after `Block::sortColumns()`. [#10826](https://github.com/ClickHouse/ClickHouse/pull/10826) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed the issue with `ODBC` bridge when no quoting of identifiers is requested. Fixes [#7984] (https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed the issue with `ODBC` bridge when no quoting of identifiers is requested. Fixes [#7984](https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed `UBSan` and `MSan` report in `DateLUT`. [#10798](https://github.com/ClickHouse/ClickHouse/pull/10798) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed incorrect type conversion in key conditions. Fixes [#6287](https://github.com/ClickHouse/ClickHouse/issues/6287). [#10791](https://github.com/ClickHouse/ClickHouse/pull/10791) ([Andrew Onyshchuk](https://github.com/oandrew)).
|
||||
* Fixed `parallel_view_processing` behavior. Now all insertions into `MATERIALIZED VIEW` without exception should be finished if exception happened. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#10757](https://github.com/ClickHouse/ClickHouse/pull/10757) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -1694,15 +1830,15 @@ No changes compared to v20.4.3.16-stable.
|
||||
|
||||
#### New Feature
|
||||
* Add support for secured connection from ClickHouse to Zookeeper [#10184](https://github.com/ClickHouse/ClickHouse/pull/10184) ([Konstantin Lebedev](https://github.com/xzkostyan))
|
||||
* Support custom HTTP handlers. See ISSUES-5436 for description. [#7572](https://github.com/ClickHouse/ClickHouse/pull/7572) ([Winter Zhang](https://github.com/zhang2014))
|
||||
* Support custom HTTP handlers. See [#5436](https://github.com/ClickHouse/ClickHouse/issues/5436) for description. [#7572](https://github.com/ClickHouse/ClickHouse/pull/7572) ([Winter Zhang](https://github.com/zhang2014))
|
||||
* Add MessagePack Input/Output format. [#9889](https://github.com/ClickHouse/ClickHouse/pull/9889) ([Kruglov Pavel](https://github.com/Avogar))
|
||||
* Add Regexp input format. [#9196](https://github.com/ClickHouse/ClickHouse/pull/9196) ([Kruglov Pavel](https://github.com/Avogar))
|
||||
* Added output format `Markdown` for embedding tables in markdown documents. [#10317](https://github.com/ClickHouse/ClickHouse/pull/10317) ([Kruglov Pavel](https://github.com/Avogar))
|
||||
* Added support for custom settings section in dictionaries. Also fixes issue [#2829](https://github.com/ClickHouse/ClickHouse/issues/2829). [#10137](https://github.com/ClickHouse/ClickHouse/pull/10137) ([Artem Streltsov](https://github.com/kekekekule))
|
||||
* Added custom settings support in DDL-queries for CREATE DICTIONARY [#10465](https://github.com/ClickHouse/ClickHouse/pull/10465) ([Artem Streltsov](https://github.com/kekekekule))
|
||||
* Added custom settings support in DDL-queries for `CREATE DICTIONARY` [#10465](https://github.com/ClickHouse/ClickHouse/pull/10465) ([Artem Streltsov](https://github.com/kekekekule))
|
||||
* Add simple server-wide memory profiler that will collect allocation contexts when server memory usage becomes higher than the next allocation threshold. [#10444](https://github.com/ClickHouse/ClickHouse/pull/10444) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Add setting `always_fetch_merged_part` which restrict replica to merge parts by itself and always prefer dowloading from other replicas. [#10379](https://github.com/ClickHouse/ClickHouse/pull/10379) ([alesapin](https://github.com/alesapin))
|
||||
* Add function JSONExtractKeysAndValuesRaw which extracts raw data from JSON objects [#10378](https://github.com/ClickHouse/ClickHouse/pull/10378) ([hcz](https://github.com/hczhcz))
|
||||
* Add function `JSONExtractKeysAndValuesRaw` which extracts raw data from JSON objects [#10378](https://github.com/ClickHouse/ClickHouse/pull/10378) ([hcz](https://github.com/hczhcz))
|
||||
* Add memory usage from OS to `system.asynchronous_metrics`. [#10361](https://github.com/ClickHouse/ClickHouse/pull/10361) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Added generic variants for functions `least` and `greatest`. Now they work with arbitrary number of arguments of arbitrary types. This fixes [#4767](https://github.com/ClickHouse/ClickHouse/issues/4767) [#10318](https://github.com/ClickHouse/ClickHouse/pull/10318) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Now ClickHouse controls timeouts of dictionary sources on its side. Two new settings added to cache dictionary configuration: `strict_max_lifetime_seconds`, which is `max_lifetime` by default, and `query_wait_timeout_milliseconds`, which is one minute by default. The first settings is also useful with `allow_read_expired_keys` settings (to forbid reading very expired keys). [#10337](https://github.com/ClickHouse/ClickHouse/pull/10337) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov))
|
||||
@ -1715,7 +1851,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Add ability to query Distributed over Distributed (w/o `distributed_group_by_no_merge`) ... [#9923](https://github.com/ClickHouse/ClickHouse/pull/9923) ([Azat Khuzhin](https://github.com/azat))
|
||||
* Add function `arrayReduceInRanges` which aggregates array elements in given ranges. [#9598](https://github.com/ClickHouse/ClickHouse/pull/9598) ([hcz](https://github.com/hczhcz))
|
||||
* Add Dictionary Status on prometheus exporter. [#9622](https://github.com/ClickHouse/ClickHouse/pull/9622) ([Guillaume Tassery](https://github.com/YiuRULE))
|
||||
* Add function arrayAUC [#8698](https://github.com/ClickHouse/ClickHouse/pull/8698) ([taiyang-li](https://github.com/taiyang-li))
|
||||
* Add function `arrayAUC` [#8698](https://github.com/ClickHouse/ClickHouse/pull/8698) ([taiyang-li](https://github.com/taiyang-li))
|
||||
* Support `DROP VIEW` statement for better TPC-H compatibility. [#9831](https://github.com/ClickHouse/ClickHouse/pull/9831) ([Amos Bird](https://github.com/amosbird))
|
||||
* Add 'strict_order' option to windowFunnel() [#9773](https://github.com/ClickHouse/ClickHouse/pull/9773) ([achimbab](https://github.com/achimbab))
|
||||
* Support `DATE` and `TIMESTAMP` SQL operators, e.g. `SELECT date '2001-01-01'` [#9691](https://github.com/ClickHouse/ClickHouse/pull/9691) ([Artem Zuikov](https://github.com/4ertus2))
|
||||
@ -1919,7 +2055,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Move integration tests docker files to docker/ directory. [#10335](https://github.com/ClickHouse/ClickHouse/pull/10335) ([Ilya Yatsishin](https://github.com/qoega))
|
||||
* Allow to use `clang-10` in CI. It ensures that [#10238](https://github.com/ClickHouse/ClickHouse/issues/10238) is fixed. [#10384](https://github.com/ClickHouse/ClickHouse/pull/10384) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Update OpenSSL to upstream master. Fixed the issue when TLS connections may fail with the message `OpenSSL SSL_read: error:14094438:SSL routines:ssl3_read_bytes:tlsv1 alert internal error` and `SSL Exception: error:2400006E:random number generator::error retrieving entropy`. The issue was present in version 20.1. [#8956](https://github.com/ClickHouse/ClickHouse/pull/8956) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Fix clang-10 build. https://github.com/ClickHouse/ClickHouse/issues/10238 [#10370](https://github.com/ClickHouse/ClickHouse/pull/10370) ([Amos Bird](https://github.com/amosbird))
|
||||
* Fix clang-10 build. [#10238](https://github.com/ClickHouse/ClickHouse/issues/10238) [#10370](https://github.com/ClickHouse/ClickHouse/pull/10370) ([Amos Bird](https://github.com/amosbird))
|
||||
* Add performance test for [Parallel INSERT for materialized view](https://github.com/ClickHouse/ClickHouse/pull/10052). [#10345](https://github.com/ClickHouse/ClickHouse/pull/10345) ([vxider](https://github.com/Vxider))
|
||||
* Fix flaky test `test_settings_constraints_distributed.test_insert_clamps_settings`. [#10346](https://github.com/ClickHouse/ClickHouse/pull/10346) ([Vitaly Baranov](https://github.com/vitlibar))
|
||||
* Add util to test results upload in CI ClickHouse [#10330](https://github.com/ClickHouse/ClickHouse/pull/10330) ([Ilya Yatsishin](https://github.com/qoega))
|
||||
@ -2093,7 +2229,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* Index not used for IN operator with literals", performance regression introduced around v19.3. This fixes "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Index not used for IN operator with literals, performance regression introduced around v19.3. This fixes [#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
|
||||
|
||||
### ClickHouse release v20.3.12.112-lts 2020-06-25
|
||||
@ -2135,7 +2271,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fix pointInPolygon with nan as point. Fixes [#11375](https://github.com/ClickHouse/ClickHouse/issues/11375). [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fix crash in JOIN over LowCarinality(T) and Nullable(T). [#11380](https://github.com/ClickHouse/ClickHouse/issues/11380). [#11414](https://github.com/ClickHouse/ClickHouse/pull/11414) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fix error code for wrong `USING` key. [#11373](https://github.com/ClickHouse/ClickHouse/issues/11373). [#11404](https://github.com/ClickHouse/ClickHouse/pull/11404) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
@ -2152,7 +2288,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash while reading malformed data in Protobuf format. This fixes [#5957](https://github.com/ClickHouse/ClickHouse/issues/5957), fixes [#11203](https://github.com/ClickHouse/ClickHouse/issues/11203). [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fixed a bug when cache-dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -2183,7 +2319,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fixed `SIGSEGV` in `StringHashTable` if such a key does not exist. [#10870](https://github.com/ClickHouse/ClickHouse/pull/10870) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed bug in `ReplicatedMergeTree` which might cause some `ALTER` on `OPTIMIZE` query to hang waiting for some replica after it become inactive. [#10849](https://github.com/ClickHouse/ClickHouse/pull/10849) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fixed columns order after `Block::sortColumns()`. [#10826](https://github.com/ClickHouse/ClickHouse/pull/10826) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed the issue with `ODBC` bridge when no quoting of identifiers is requested. Fixes [#7984] (https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed the issue with `ODBC` bridge when no quoting of identifiers is requested. Fixes [#7984](https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed `UBSan` and `MSan` report in `DateLUT`. [#10798](https://github.com/ClickHouse/ClickHouse/pull/10798) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed incorrect type conversion in key conditions. Fixes [#6287](https://github.com/ClickHouse/ClickHouse/issues/6287). [#10791](https://github.com/ClickHouse/ClickHouse/pull/10791) ([Andrew Onyshchuk](https://github.com/oandrew))
|
||||
* Fixed `parallel_view_processing` behavior. Now all insertions into `MATERIALIZED VIEW` without exception should be finished if exception happened. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#10757](https://github.com/ClickHouse/ClickHouse/pull/10757) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -2202,7 +2338,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fixed incorrect scalar results inside inner query of `MATERIALIZED VIEW` in case if this query contained dependent table. [#10603](https://github.com/ClickHouse/ClickHouse/pull/10603) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed `SELECT` of column `ALIAS` which default expression type different from column type. [#10563](https://github.com/ClickHouse/ClickHouse/pull/10563) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Implemented comparison between DateTime64 and String values. [#10560](https://github.com/ClickHouse/ClickHouse/pull/10560) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
* Fixed index corruption, which may accur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixed index corruption, which may occur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixed the situation, when mutation finished all parts, but hung up in `is_done=0`. [#10526](https://github.com/ClickHouse/ClickHouse/pull/10526) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed overflow at beginning of unix epoch for timezones with fractional offset from `UTC`. This fixes [#9335](https://github.com/ClickHouse/ClickHouse/issues/9335). [#10513](https://github.com/ClickHouse/ClickHouse/pull/10513) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed improper shutdown of `Distributed` storage. [#10491](https://github.com/ClickHouse/ClickHouse/pull/10491) ([Azat Khuzhin](https://github.com/azat)).
|
||||
@ -2212,14 +2348,14 @@ No changes compared to v20.4.3.16-stable.
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
||||
* Fix UBSan report in LZ4 library. [#10631](https://github.com/ClickHouse/ClickHouse/pull/10631) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix clang-10 build. https://github.com/ClickHouse/ClickHouse/issues/10238. [#10370](https://github.com/ClickHouse/ClickHouse/pull/10370) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix clang-10 build. [#10238](https://github.com/ClickHouse/ClickHouse/issues/10238). [#10370](https://github.com/ClickHouse/ClickHouse/pull/10370) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Added failing tests about `max_rows_to_sort` setting. [#10268](https://github.com/ClickHouse/ClickHouse/pull/10268) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Added some improvements in printing diagnostic info in input formats. Fixes [#10204](https://github.com/ClickHouse/ClickHouse/issues/10204). [#10418](https://github.com/ClickHouse/ClickHouse/pull/10418) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Added CA certificates to clickhouse-server docker image. [#10476](https://github.com/ClickHouse/ClickHouse/pull/10476) ([filimonov](https://github.com/filimonov)).
|
||||
|
||||
#### Bug fix
|
||||
|
||||
* #10551. [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix error `the BloomFilter false positive must be a double number between 0 and 1` [#10551](https://github.com/ClickHouse/ClickHouse/issues/10551). [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
|
||||
|
||||
### ClickHouse release v20.3.8.53, 2020-04-23
|
||||
@ -2411,7 +2547,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fixed the behaviour of `match` and `extract` functions when haystack has zero bytes. The behaviour was wrong when haystack was constant. This fixes [#9160](https://github.com/ClickHouse/ClickHouse/issues/9160) [#9163](https://github.com/ClickHouse/ClickHouse/pull/9163) ([alexey-milovidov](https://github.com/alexey-milovidov)) [#9345](https://github.com/ClickHouse/ClickHouse/pull/9345) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Avoid throwing from destructor in Apache Avro 3rd-party library. [#9066](https://github.com/ClickHouse/ClickHouse/pull/9066) ([Andrew Onyshchuk](https://github.com/oandrew))
|
||||
* Don't commit a batch polled from `Kafka` partially as it can lead to holes in data. [#8876](https://github.com/ClickHouse/ClickHouse/pull/8876) ([filimonov](https://github.com/filimonov))
|
||||
* Fix `joinGet` with nullable return types. https://github.com/ClickHouse/ClickHouse/issues/8919 [#9014](https://github.com/ClickHouse/ClickHouse/pull/9014) ([Amos Bird](https://github.com/amosbird))
|
||||
* Fix `joinGet` with nullable return types. [#8919](https://github.com/ClickHouse/ClickHouse/issues/8919) [#9014](https://github.com/ClickHouse/ClickHouse/pull/9014) ([Amos Bird](https://github.com/amosbird))
|
||||
* Fix data incompatibility when compressed with `T64` codec. [#9016](https://github.com/ClickHouse/ClickHouse/pull/9016) ([Artem Zuikov](https://github.com/4ertus2)) Fix data type ids in `T64` compression codec that leads to wrong (de)compression in affected versions. [#9033](https://github.com/ClickHouse/ClickHouse/pull/9033) ([Artem Zuikov](https://github.com/4ertus2))
|
||||
* Add setting `enable_early_constant_folding` and disable it in some cases that leads to errors. [#9010](https://github.com/ClickHouse/ClickHouse/pull/9010) ([Artem Zuikov](https://github.com/4ertus2))
|
||||
* Fix pushdown predicate optimizer with VIEW and enable the test [#9011](https://github.com/ClickHouse/ClickHouse/pull/9011) ([Winter Zhang](https://github.com/zhang2014))
|
||||
@ -2613,7 +2749,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fix pointInPolygon with nan as point. Fixes [#11375](https://github.com/ClickHouse/ClickHouse/issues/11375). [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix crash in `quantilesExactWeightedArray`. [#11337](https://github.com/ClickHouse/ClickHouse/pull/11337) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -2623,7 +2759,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash while reading malformed data in Protobuf format. This fixes [#5957](https://github.com/ClickHouse/ClickHouse/issues/5957), fixes [#11203](https://github.com/ClickHouse/ClickHouse/issues/11203). [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix possible error `Cannot capture column` for higher-order functions with `Array(Array(LowCardinality))` captured argument. [#11185](https://github.com/ClickHouse/ClickHouse/pull/11185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Remove logging from mutation finalization task if nothing was finalized. [#11109](https://github.com/ClickHouse/ClickHouse/pull/11109) ([alesapin](https://github.com/alesapin)).
|
||||
@ -2901,7 +3037,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Several improvements ClickHouse grammar in `.g4` file. [#8294](https://github.com/ClickHouse/ClickHouse/pull/8294) ([taiyang-li](https://github.com/taiyang-li))
|
||||
* Fix bug that leads to crashes in `JOIN`s with tables with engine `Join`. This fixes [#7556](https://github.com/ClickHouse/ClickHouse/issues/7556) [#8254](https://github.com/ClickHouse/ClickHouse/issues/8254) [#7915](https://github.com/ClickHouse/ClickHouse/issues/7915) [#8100](https://github.com/ClickHouse/ClickHouse/issues/8100). [#8298](https://github.com/ClickHouse/ClickHouse/pull/8298) ([Artem Zuikov](https://github.com/4ertus2))
|
||||
* Fix redundant dictionaries reload on `CREATE DATABASE`. [#7916](https://github.com/ClickHouse/ClickHouse/pull/7916) ([Azat Khuzhin](https://github.com/azat))
|
||||
* Limit maximum number of streams for read from `StorageFile` and `StorageHDFS`. Fixes https://github.com/ClickHouse/ClickHouse/issues/7650. [#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([alesapin](https://github.com/alesapin))
|
||||
* Limit maximum number of streams for read from `StorageFile` and `StorageHDFS`. Fixes [#7650](https://github.com/ClickHouse/ClickHouse/issues/7650). [#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([alesapin](https://github.com/alesapin))
|
||||
* Fix bug in `ALTER ... MODIFY ... CODEC` query, when user specify both default expression and codec. Fixes [8593](https://github.com/ClickHouse/ClickHouse/issues/8593). [#8614](https://github.com/ClickHouse/ClickHouse/pull/8614) ([alesapin](https://github.com/alesapin))
|
||||
* Fix error in background merge of columns with `SimpleAggregateFunction(LowCardinality)` type. [#8613](https://github.com/ClickHouse/ClickHouse/pull/8613) ([Nikolai Kochetov](https://github.com/KochetovNicolai))
|
||||
* Fixed type check in function `toDateTime64`. [#8375](https://github.com/ClickHouse/ClickHouse/pull/8375) ([Vasily Nemkov](https://github.com/Enmk))
|
||||
@ -2985,7 +3121,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Added check for extra parts of `MergeTree` at different disks, in order to not allow to miss data parts at undefined disks. [#8118](https://github.com/ClickHouse/ClickHouse/pull/8118) ([Vladimir Chebotarev](https://github.com/excitoon))
|
||||
* Enable SSL support for Mac client and server. [#8297](https://github.com/ClickHouse/ClickHouse/pull/8297) ([Ivan](https://github.com/abyss7))
|
||||
* Now ClickHouse can work as MySQL federated server (see https://dev.mysql.com/doc/refman/5.7/en/federated-create-server.html). [#7717](https://github.com/ClickHouse/ClickHouse/pull/7717) ([Maxim Fedotov](https://github.com/MaxFedotov))
|
||||
* `clickhouse-client` now only enable `bracketed-paste` when multiquery is on and multiline is off. This fixes (#7757)[https://github.com/ClickHouse/ClickHouse/issues/7757]. [#7761](https://github.com/ClickHouse/ClickHouse/pull/7761) ([Amos Bird](https://github.com/amosbird))
|
||||
* `clickhouse-client` now only enable `bracketed-paste` when multiquery is on and multiline is off. This fixes [#7757](https://github.com/ClickHouse/ClickHouse/issues/7757). [#7761](https://github.com/ClickHouse/ClickHouse/pull/7761) ([Amos Bird](https://github.com/amosbird))
|
||||
* Support `Array(Decimal)` in `if` function. [#7721](https://github.com/ClickHouse/ClickHouse/pull/7721) ([Artem Zuikov](https://github.com/4ertus2))
|
||||
* Support Decimals in `arrayDifference`, `arrayCumSum` and `arrayCumSumNegative` functions. [#7724](https://github.com/ClickHouse/ClickHouse/pull/7724) ([Artem Zuikov](https://github.com/4ertus2))
|
||||
* Added `lifetime` column to `system.dictionaries` table. [#6820](https://github.com/ClickHouse/ClickHouse/issues/6820) [#7727](https://github.com/ClickHouse/ClickHouse/pull/7727) ([kekekekule](https://github.com/kekekekule))
|
||||
|
@ -112,6 +112,12 @@ if (ENABLE_FUZZING)
|
||||
set (FUZZER "libfuzzer")
|
||||
endif()
|
||||
|
||||
# Global libraries
|
||||
# See:
|
||||
# - default_libs.cmake
|
||||
# - sanitize.cmake
|
||||
add_library(global-libs INTERFACE)
|
||||
|
||||
include (cmake/fuzzer.cmake)
|
||||
include (cmake/sanitize.cmake)
|
||||
|
||||
@ -223,16 +229,16 @@ if (ARCH_NATIVE)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native")
|
||||
endif ()
|
||||
|
||||
if (UNBUNDLED AND (COMPILER_GCC OR COMPILER_CLANG))
|
||||
# to make numeric_limits<__int128> works for unbundled build
|
||||
set (_CXX_STANDARD "-std=gnu++2a")
|
||||
if (COMPILER_GCC OR COMPILER_CLANG)
|
||||
# to make numeric_limits<__int128> works with GCC
|
||||
set (_CXX_STANDARD "gnu++2a")
|
||||
else()
|
||||
set (_CXX_STANDARD "-std=c++2a")
|
||||
set (_CXX_STANDARD "c++2a")
|
||||
endif()
|
||||
|
||||
# cmake < 3.12 doesn't support 20. We'll set CMAKE_CXX_FLAGS for now
|
||||
# set (CMAKE_CXX_STANDARD 20)
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_CXX_STANDARD}")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=${_CXX_STANDARD}")
|
||||
|
||||
set (CMAKE_CXX_EXTENSIONS 0) # https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html#prop_tgt:CXX_EXTENSIONS
|
||||
set (CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
@ -257,6 +263,8 @@ if (WITH_COVERAGE AND COMPILER_GCC)
|
||||
set(WITHOUT_COVERAGE "-fno-profile-arcs -fno-test-coverage")
|
||||
endif()
|
||||
|
||||
set(COMPILER_FLAGS "${COMPILER_FLAGS}")
|
||||
|
||||
set (CMAKE_BUILD_COLOR_MAKEFILE ON)
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${PLATFORM_EXTRA_CXX_FLAG} ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS}")
|
||||
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_CXX_FLAGS_ADD}")
|
||||
@ -455,6 +463,7 @@ include (cmake/find/s3.cmake)
|
||||
include (cmake/find/base64.cmake)
|
||||
include (cmake/find/parquet.cmake)
|
||||
include (cmake/find/simdjson.cmake)
|
||||
include (cmake/find/fast_float.cmake)
|
||||
include (cmake/find/rapidjson.cmake)
|
||||
include (cmake/find/fastops.cmake)
|
||||
include (cmake/find/odbc.cmake)
|
||||
@ -510,8 +519,11 @@ macro (add_executable target)
|
||||
|
||||
get_target_property (type ${target} TYPE)
|
||||
if (${type} STREQUAL EXECUTABLE)
|
||||
# operator::new/delete for executables (MemoryTracker stuff)
|
||||
target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES})
|
||||
# disabled for TSAN and gcc since libtsan.a provides overrides too
|
||||
if (TARGET clickhouse_new_delete)
|
||||
# operator::new/delete for executables (MemoryTracker stuff)
|
||||
target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES})
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
@ -522,8 +534,8 @@ include_directories(${ConfigIncludePath})
|
||||
include (cmake/warnings.cmake)
|
||||
|
||||
add_subdirectory (base)
|
||||
add_subdirectory (programs)
|
||||
add_subdirectory (src)
|
||||
add_subdirectory (programs)
|
||||
add_subdirectory (tests)
|
||||
add_subdirectory (utils)
|
||||
|
||||
|
@ -14,3 +14,6 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
|
||||
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
|
||||
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
|
||||
|
||||
## Upcoming Events
|
||||
* [SF Bay Area ClickHouse Virtual Office Hours (online)](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/274273549/) on 20 January 2020.
|
||||
|
@ -6,6 +6,7 @@ set (SRCS
|
||||
demangle.cpp
|
||||
getFQDNOrHostName.cpp
|
||||
getMemoryAmount.cpp
|
||||
getPageSize.cpp
|
||||
getThreadId.cpp
|
||||
JSON.cpp
|
||||
LineReader.cpp
|
||||
|
@ -127,7 +127,7 @@ String LineReader::readLine(const String & first_prompt, const String & second_p
|
||||
}
|
||||
#endif
|
||||
|
||||
line += (line.empty() ? "" : " ") + input;
|
||||
line += (line.empty() ? "" : "\n") + input;
|
||||
|
||||
if (!need_next_line)
|
||||
break;
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <common/ReadlineLineReader.h>
|
||||
#include <common/errnoToString.h>
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
#include <errno.h>
|
||||
@ -69,7 +70,7 @@ ReadlineLineReader::ReadlineLineReader(
|
||||
{
|
||||
int res = read_history(history_file_path.c_str());
|
||||
if (res)
|
||||
std::cerr << "Cannot read history from file " + history_file_path + ": "+ strerror(errno) << std::endl;
|
||||
std::cerr << "Cannot read history from file " + history_file_path + ": "+ errnoToString(errno) << std::endl;
|
||||
}
|
||||
|
||||
/// Added '.' to the default list. Because it is used to separate database and table.
|
||||
@ -107,7 +108,7 @@ ReadlineLineReader::ReadlineLineReader(
|
||||
};
|
||||
|
||||
if (signal(SIGINT, clear_prompt_or_exit) == SIG_ERR)
|
||||
throw std::runtime_error(std::string("Cannot set signal handler for readline: ") + strerror(errno));
|
||||
throw std::runtime_error(std::string("Cannot set signal handler for readline: ") + errnoToString(errno));
|
||||
|
||||
rl_variable_bind("completion-ignore-case", "on");
|
||||
// TODO: it doesn't work
|
||||
|
@ -47,7 +47,7 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
{
|
||||
if (!rx.history_load(history_file_path))
|
||||
{
|
||||
rx.print("Loading history failed: %s\n", strerror(errno));
|
||||
rx.print("Loading history failed: %s\n", errnoToString(errno).c_str());
|
||||
}
|
||||
|
||||
if (flock(history_file_fd, LOCK_UN))
|
||||
@ -58,6 +58,8 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
}
|
||||
}
|
||||
|
||||
rx.install_window_change_handler();
|
||||
|
||||
auto callback = [&suggest] (const String & context, size_t context_size)
|
||||
{
|
||||
if (auto range = suggest.getCompletions(context, context_size))
|
||||
@ -86,7 +88,7 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
ReplxxLineReader::~ReplxxLineReader()
|
||||
{
|
||||
if (close(history_file_fd))
|
||||
rx.print("Close of history file failed: %s\n", strerror(errno));
|
||||
rx.print("Close of history file failed: %s\n", errnoToString(errno).c_str());
|
||||
}
|
||||
|
||||
LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
|
||||
@ -111,7 +113,7 @@ void ReplxxLineReader::addToHistory(const String & line)
|
||||
// and that is why flock() is added here.
|
||||
bool locked = false;
|
||||
if (flock(history_file_fd, LOCK_EX))
|
||||
rx.print("Lock of history file failed: %s\n", strerror(errno));
|
||||
rx.print("Lock of history file failed: %s\n", errnoToString(errno).c_str());
|
||||
else
|
||||
locked = true;
|
||||
|
||||
@ -119,10 +121,10 @@ void ReplxxLineReader::addToHistory(const String & line)
|
||||
|
||||
// flush changes to the disk
|
||||
if (!rx.history_save(history_file_path))
|
||||
rx.print("Saving history failed: %s\n", strerror(errno));
|
||||
rx.print("Saving history failed: %s\n", errnoToString(errno).c_str());
|
||||
|
||||
if (locked && 0 != flock(history_file_fd, LOCK_UN))
|
||||
rx.print("Unlock of history file failed: %s\n", strerror(errno));
|
||||
rx.print("Unlock of history file failed: %s\n", errnoToString(errno).c_str());
|
||||
}
|
||||
|
||||
void ReplxxLineReader::enableBracketedPaste()
|
||||
|
@ -76,12 +76,8 @@
|
||||
# define NO_SANITIZE_THREAD
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__ && !defined __clang__
|
||||
# define OPTIMIZE(x) __attribute__((__optimize__(x)))
|
||||
#else
|
||||
# define OPTIMIZE(x)
|
||||
#endif
|
||||
|
||||
/// A macro for suppressing warnings about unused variables or function results.
|
||||
/// Useful for structured bindings which have no standard way to declare this.
|
||||
#define UNUSED(...) (void)(__VA_ARGS__)
|
||||
/// A template function for suppressing warnings about unused variables or function results.
|
||||
template <typename... Args>
|
||||
constexpr void UNUSED(Args &&... args [[maybe_unused]])
|
||||
{
|
||||
}
|
||||
|
@ -1,100 +1,29 @@
|
||||
#include <stdexcept>
|
||||
#include "common/getMemoryAmount.h"
|
||||
#include "common/getPageSize.h"
|
||||
|
||||
// http://nadeausoftware.com/articles/2012/09/c_c_tip_how_get_physical_memory_size_system
|
||||
|
||||
/*
|
||||
* Author: David Robert Nadeau
|
||||
* Site: http://NadeauSoftware.com/
|
||||
* License: Creative Commons Attribution 3.0 Unported License
|
||||
* http://creativecommons.org/licenses/by/3.0/deed.en_US
|
||||
*/
|
||||
|
||||
#if defined(WIN32) || defined(_WIN32)
|
||||
#include <Windows.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#if defined(BSD)
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Returns the size of physical memory (RAM) in bytes.
|
||||
* Returns 0 on unsupported platform
|
||||
*/
|
||||
/** Returns the size of physical memory (RAM) in bytes.
|
||||
* Returns 0 on unsupported platform
|
||||
*/
|
||||
uint64_t getMemoryAmountOrZero()
|
||||
{
|
||||
#if defined(_WIN32) && (defined(__CYGWIN__) || defined(__CYGWIN32__))
|
||||
/* Cygwin under Windows. ------------------------------------ */
|
||||
/* New 64-bit MEMORYSTATUSEX isn't available. Use old 32.bit */
|
||||
MEMORYSTATUS status;
|
||||
status.dwLength = sizeof(status);
|
||||
GlobalMemoryStatus(&status);
|
||||
return status.dwTotalPhys;
|
||||
int64_t num_pages = sysconf(_SC_PHYS_PAGES);
|
||||
if (num_pages <= 0)
|
||||
return 0;
|
||||
|
||||
#elif defined(WIN32) || defined(_WIN32)
|
||||
/* Windows. ------------------------------------------------- */
|
||||
/* Use new 64-bit MEMORYSTATUSEX, not old 32-bit MEMORYSTATUS */
|
||||
MEMORYSTATUSEX status;
|
||||
status.dwLength = sizeof(status);
|
||||
GlobalMemoryStatusEx(&status);
|
||||
return status.ullTotalPhys;
|
||||
int64_t page_size = getPageSize();
|
||||
if (page_size <= 0)
|
||||
return 0;
|
||||
|
||||
#else
|
||||
/* UNIX variants. ------------------------------------------- */
|
||||
/* Prefer sysctl() over sysconf() except sysctl() HW_REALMEM and HW_PHYSMEM */
|
||||
|
||||
#if defined(CTL_HW) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM64))
|
||||
int mib[2];
|
||||
mib[0] = CTL_HW;
|
||||
#if defined(HW_MEMSIZE)
|
||||
mib[1] = HW_MEMSIZE; /* OSX. --------------------- */
|
||||
#elif defined(HW_PHYSMEM64)
|
||||
mib[1] = HW_PHYSMEM64; /* NetBSD, OpenBSD. --------- */
|
||||
#endif
|
||||
uint64_t size = 0; /* 64-bit */
|
||||
size_t len = sizeof(size);
|
||||
if (sysctl(mib, 2, &size, &len, nullptr, 0) == 0)
|
||||
return size;
|
||||
|
||||
return 0; /* Failed? */
|
||||
|
||||
#elif defined(_SC_AIX_REALMEM)
|
||||
/* AIX. ----------------------------------------------------- */
|
||||
return sysconf(_SC_AIX_REALMEM) * 1024;
|
||||
|
||||
#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
|
||||
/* FreeBSD, Linux, OpenBSD, and Solaris. -------------------- */
|
||||
return uint64_t(sysconf(_SC_PHYS_PAGES))
|
||||
*uint64_t(sysconf(_SC_PAGESIZE));
|
||||
|
||||
#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGE_SIZE)
|
||||
/* Legacy. -------------------------------------------------- */
|
||||
return uint64_t(sysconf(_SC_PHYS_PAGES))
|
||||
* uint64_t(sysconf(_SC_PAGE_SIZE));
|
||||
|
||||
#elif defined(CTL_HW) && (defined(HW_PHYSMEM) || defined(HW_REALMEM))
|
||||
/* DragonFly BSD, FreeBSD, NetBSD, OpenBSD, and OSX. -------- */
|
||||
int mib[2];
|
||||
mib[0] = CTL_HW;
|
||||
#if defined(HW_REALMEM)
|
||||
mib[1] = HW_REALMEM; /* FreeBSD. ----------------- */
|
||||
#elif defined(HW_PYSMEM)
|
||||
mib[1] = HW_PHYSMEM; /* Others. ------------------ */
|
||||
#endif
|
||||
unsigned int size = 0; /* 32-bit */
|
||||
size_t len = sizeof(size);
|
||||
if (sysctl(mib, 2, &size, &len, nullptr, 0) == 0)
|
||||
return size;
|
||||
|
||||
return 0; /* Failed? */
|
||||
#endif /* sysctl and sysconf variants */
|
||||
|
||||
#endif
|
||||
return num_pages * page_size;
|
||||
}
|
||||
|
||||
|
||||
|
8
base/common/getPageSize.cpp
Normal file
8
base/common/getPageSize.cpp
Normal file
@ -0,0 +1,8 @@
|
||||
#include "common/getPageSize.h"
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
Int64 getPageSize()
|
||||
{
|
||||
return sysconf(_SC_PAGESIZE);
|
||||
}
|
6
base/common/getPageSize.h
Normal file
6
base/common/getPageSize.h
Normal file
@ -0,0 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
/// Get memory page size
|
||||
Int64 getPageSize();
|
@ -1,6 +1,7 @@
|
||||
// https://stackoverflow.com/questions/1413445/reading-a-password-from-stdcin
|
||||
|
||||
#include <common/setTerminalEcho.h>
|
||||
#include <common/errnoToString.h>
|
||||
#include <stdexcept>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
@ -31,7 +32,7 @@ void setTerminalEcho(bool enable)
|
||||
#else
|
||||
struct termios tty;
|
||||
if (tcgetattr(STDIN_FILENO, &tty))
|
||||
throw std::runtime_error(std::string("setTerminalEcho failed get: ") + strerror(errno));
|
||||
throw std::runtime_error(std::string("setTerminalEcho failed get: ") + errnoToString(errno));
|
||||
if (!enable)
|
||||
tty.c_lflag &= ~ECHO;
|
||||
else
|
||||
@ -39,6 +40,6 @@ void setTerminalEcho(bool enable)
|
||||
|
||||
auto ret = tcsetattr(STDIN_FILENO, TCSANOW, &tty);
|
||||
if (ret)
|
||||
throw std::runtime_error(std::string("setTerminalEcho failed set: ") + strerror(errno));
|
||||
throw std::runtime_error(std::string("setTerminalEcho failed set: ") + errnoToString(errno));
|
||||
#endif
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ using Int16 = int16_t;
|
||||
using Int32 = int32_t;
|
||||
using Int64 = int64_t;
|
||||
|
||||
#if __cplusplus <= 201703L
|
||||
#ifndef __cpp_char8_t
|
||||
using char8_t = unsigned char;
|
||||
#endif
|
||||
|
||||
|
@ -58,8 +58,7 @@ public:
|
||||
using signed_base_type = int64_t;
|
||||
|
||||
// ctors
|
||||
integer() = default;
|
||||
|
||||
constexpr integer() noexcept;
|
||||
template <typename T>
|
||||
constexpr integer(T rhs) noexcept;
|
||||
template <typename T>
|
||||
|
@ -916,6 +916,11 @@ public:
|
||||
|
||||
// Members
|
||||
|
||||
template <size_t Bits, typename Signed>
|
||||
constexpr integer<Bits, Signed>::integer() noexcept
|
||||
: items{}
|
||||
{}
|
||||
|
||||
template <size_t Bits, typename Signed>
|
||||
template <typename T>
|
||||
constexpr integer<Bits, Signed>::integer(T rhs) noexcept
|
||||
|
@ -5,7 +5,6 @@ LIBRARY()
|
||||
|
||||
ADDINCL(
|
||||
GLOBAL clickhouse/base
|
||||
GLOBAL contrib/libs/cctz/include
|
||||
)
|
||||
|
||||
CFLAGS (GLOBAL -DARCADIA_BUILD)
|
||||
@ -24,7 +23,7 @@ ELSEIF (OS_LINUX)
|
||||
ENDIF ()
|
||||
|
||||
PEERDIR(
|
||||
contrib/libs/cctz/src
|
||||
contrib/libs/cctz
|
||||
contrib/libs/cxxsupp/libcxx-filesystem
|
||||
contrib/libs/poco/Net
|
||||
contrib/libs/poco/Util
|
||||
@ -48,6 +47,7 @@ SRCS(
|
||||
errnoToString.cpp
|
||||
getFQDNOrHostName.cpp
|
||||
getMemoryAmount.cpp
|
||||
getPageSize.cpp
|
||||
getResource.cpp
|
||||
getThreadId.cpp
|
||||
mremap.cpp
|
||||
|
@ -4,7 +4,6 @@ LIBRARY()
|
||||
|
||||
ADDINCL(
|
||||
GLOBAL clickhouse/base
|
||||
GLOBAL contrib/libs/cctz/include
|
||||
)
|
||||
|
||||
CFLAGS (GLOBAL -DARCADIA_BUILD)
|
||||
@ -23,7 +22,7 @@ ELSEIF (OS_LINUX)
|
||||
ENDIF ()
|
||||
|
||||
PEERDIR(
|
||||
contrib/libs/cctz/src
|
||||
contrib/libs/cctz
|
||||
contrib/libs/cxxsupp/libcxx-filesystem
|
||||
contrib/libs/poco/Net
|
||||
contrib/libs/poco/Util
|
||||
|
@ -761,14 +761,14 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
|
||||
static KillingErrorHandler killing_error_handler;
|
||||
Poco::ErrorHandler::set(&killing_error_handler);
|
||||
|
||||
signal_pipe.setNonBlocking();
|
||||
signal_pipe.setNonBlockingWrite();
|
||||
signal_pipe.tryIncreaseSize(1 << 20);
|
||||
|
||||
signal_listener = std::make_unique<SignalListener>(*this);
|
||||
signal_listener_thread.start(*signal_listener);
|
||||
|
||||
#if defined(__ELF__) && !defined(__FreeBSD__)
|
||||
String build_id_hex = DB::SymbolIndex::instance().getBuildIDHex();
|
||||
String build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex();
|
||||
if (build_id_hex.empty())
|
||||
build_id_info = "no build id";
|
||||
else
|
||||
|
@ -6,10 +6,12 @@
|
||||
|
||||
#include <common/defines.h>
|
||||
#include <common/getFQDNOrHostName.h>
|
||||
#include <common/getMemoryAmount.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
#include <Common/SymbolIndex.h>
|
||||
#include <Common/StackTrace.h>
|
||||
#include <Common/getNumberOfPhysicalCPUCores.h>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include "Common/config_version.h"
|
||||
@ -28,14 +30,13 @@ namespace
|
||||
|
||||
bool initialized = false;
|
||||
bool anonymize = false;
|
||||
std::string server_data_path;
|
||||
|
||||
void setExtras()
|
||||
{
|
||||
|
||||
if (!anonymize)
|
||||
{
|
||||
sentry_set_extra("server_name", sentry_value_new_string(getFQDNOrHostName().c_str()));
|
||||
}
|
||||
|
||||
sentry_set_tag("version", VERSION_STRING);
|
||||
sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH));
|
||||
sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE));
|
||||
@ -44,6 +45,15 @@ void setExtras()
|
||||
sentry_set_extra("version_major", sentry_value_new_int32(VERSION_MAJOR));
|
||||
sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR));
|
||||
sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH));
|
||||
sentry_set_extra("version_official", sentry_value_new_string(VERSION_OFFICIAL));
|
||||
|
||||
/// Sentry does not support 64-bit integers.
|
||||
sentry_set_extra("total_ram", sentry_value_new_string(formatReadableSizeWithBinarySuffix(getMemoryAmountOrZero()).c_str()));
|
||||
sentry_set_extra("physical_cpu_cores", sentry_value_new_int32(getNumberOfPhysicalCPUCores()));
|
||||
|
||||
if (!server_data_path.empty())
|
||||
sentry_set_extra("disk_free_space", sentry_value_new_string(formatReadableSizeWithBinarySuffix(
|
||||
Poco::File(server_data_path).freeSpace()).c_str()));
|
||||
}
|
||||
|
||||
void sentry_logger(sentry_level_e level, const char * message, va_list args, void *)
|
||||
@ -98,6 +108,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config)
|
||||
}
|
||||
if (enabled)
|
||||
{
|
||||
server_data_path = config.getString("path", "");
|
||||
const std::filesystem::path & default_tmp_path = std::filesystem::path(config.getString("tmp_path", Poco::Path::temp())) / "sentry";
|
||||
const std::string & endpoint
|
||||
= config.getString("send_crash_reports.endpoint");
|
||||
@ -168,7 +179,7 @@ void SentryWriter::onFault(int sig, const std::string & error_message, const Sta
|
||||
sentry_set_extra("signal_number", sentry_value_new_int32(sig));
|
||||
|
||||
#if defined(__ELF__) && !defined(__FreeBSD__)
|
||||
const String & build_id_hex = DB::SymbolIndex::instance().getBuildIDHex();
|
||||
const String & build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex();
|
||||
sentry_set_tag("build_id", build_id_hex.c_str());
|
||||
#endif
|
||||
|
||||
|
@ -104,6 +104,11 @@ void Connection::connect(const char* db,
|
||||
if (mysql_options(driver.get(), MYSQL_OPT_LOCAL_INFILE, &enable_local_infile_arg))
|
||||
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
|
||||
|
||||
/// Enables auto-reconnect.
|
||||
bool reconnect = true;
|
||||
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&reconnect)))
|
||||
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
|
||||
|
||||
/// Specifies particular ssl key and certificate if it needs
|
||||
if (mysql_ssl_set(driver.get(), ifNotEmpty(ssl_key), ifNotEmpty(ssl_cert), ifNotEmpty(ssl_ca), nullptr, nullptr))
|
||||
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
|
||||
@ -115,11 +120,6 @@ void Connection::connect(const char* db,
|
||||
if (mysql_set_character_set(driver.get(), "UTF8"))
|
||||
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
|
||||
|
||||
/// Enables auto-reconnect.
|
||||
bool reconnect = true;
|
||||
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&reconnect)))
|
||||
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
|
||||
|
||||
is_connected = true;
|
||||
}
|
||||
|
||||
|
@ -26,6 +26,7 @@ void Pool::Entry::incrementRefCount()
|
||||
mysql_thread_init();
|
||||
}
|
||||
|
||||
|
||||
void Pool::Entry::decrementRefCount()
|
||||
{
|
||||
if (!data)
|
||||
@ -150,28 +151,39 @@ Pool::Entry Pool::tryGet()
|
||||
|
||||
initialize();
|
||||
|
||||
/// Searching for connection which was established but wasn't used.
|
||||
for (auto & connection : connections)
|
||||
/// Try to pick an idle connection from already allocated
|
||||
for (auto connection_it = connections.cbegin(); connection_it != connections.cend();)
|
||||
{
|
||||
if (connection->ref_count == 0)
|
||||
Connection * connection_ptr = *connection_it;
|
||||
/// Fixme: There is a race condition here b/c we do not synchronize with Pool::Entry's copy-assignment operator
|
||||
if (connection_ptr->ref_count == 0)
|
||||
{
|
||||
Entry res(connection, this);
|
||||
return res.tryForceConnected() ? res : Entry();
|
||||
Entry res(connection_ptr, this);
|
||||
if (res.tryForceConnected()) /// Tries to reestablish connection as well
|
||||
return res;
|
||||
|
||||
auto & logger = Poco::Util::Application::instance().logger();
|
||||
logger.information("Idle connection to mysql server cannot be recovered, dropping it.");
|
||||
|
||||
/// This one is disconnected, cannot be reestablished and so needs to be disposed of.
|
||||
connection_it = connections.erase(connection_it);
|
||||
::delete connection_ptr; /// TODO: Manual memory management is awkward (matches allocConnection() method)
|
||||
}
|
||||
else
|
||||
++connection_it;
|
||||
}
|
||||
|
||||
/// Throws if pool is overflowed.
|
||||
if (connections.size() >= max_connections)
|
||||
throw Poco::Exception("mysqlxx::Pool is full");
|
||||
|
||||
/// Allocates new connection.
|
||||
Connection * conn = allocConnection(true);
|
||||
if (conn)
|
||||
return Entry(conn, this);
|
||||
Connection * connection_ptr = allocConnection(true);
|
||||
if (connection_ptr)
|
||||
return {connection_ptr, this};
|
||||
|
||||
return Entry();
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
void Pool::removeConnection(Connection* connection)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
@ -199,11 +211,9 @@ void Pool::Entry::forceConnected() const
|
||||
throw Poco::RuntimeException("Tried to access NULL database connection.");
|
||||
|
||||
Poco::Util::Application & app = Poco::Util::Application::instance();
|
||||
if (data->conn.ping())
|
||||
return;
|
||||
|
||||
bool first = true;
|
||||
do
|
||||
while (!tryForceConnected())
|
||||
{
|
||||
if (first)
|
||||
first = false;
|
||||
@ -225,7 +235,26 @@ void Pool::Entry::forceConnected() const
|
||||
pool->rw_timeout,
|
||||
pool->enable_local_infile);
|
||||
}
|
||||
while (!data->conn.ping());
|
||||
}
|
||||
|
||||
|
||||
bool Pool::Entry::tryForceConnected() const
|
||||
{
|
||||
auto * const mysql_driver = data->conn.getDriver();
|
||||
const auto prev_connection_id = mysql_thread_id(mysql_driver);
|
||||
if (data->conn.ping()) /// Attempts to reestablish lost connection
|
||||
{
|
||||
const auto current_connection_id = mysql_thread_id(mysql_driver);
|
||||
if (prev_connection_id != current_connection_id)
|
||||
{
|
||||
auto & logger = Poco::Util::Application::instance().logger();
|
||||
logger.information("Connection to mysql server has been reestablished. Connection id changed: %lu -> %lu",
|
||||
prev_connection_id, current_connection_id);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -127,10 +127,7 @@ public:
|
||||
void forceConnected() const;
|
||||
|
||||
/// Connects to database. If connection is failed then returns false.
|
||||
bool tryForceConnected() const
|
||||
{
|
||||
return data->conn.ping();
|
||||
}
|
||||
bool tryForceConnected() const;
|
||||
|
||||
void incrementRefCount();
|
||||
void decrementRefCount();
|
||||
|
@ -22,4 +22,12 @@ ResultBase::~ResultBase()
|
||||
mysql_free_result(res);
|
||||
}
|
||||
|
||||
std::string ResultBase::getFieldName(size_t n) const
|
||||
{
|
||||
if (num_fields <= n)
|
||||
throw Exception(std::string("Unknown column position ") + std::to_string(n));
|
||||
|
||||
return fields[n].name;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -31,6 +31,8 @@ public:
|
||||
MYSQL_RES * getRes() { return res; }
|
||||
const Query * getQuery() const { return query; }
|
||||
|
||||
std::string getFieldName(size_t n) const;
|
||||
|
||||
virtual ~ResultBase();
|
||||
|
||||
protected:
|
||||
|
@ -1,2 +1,5 @@
|
||||
add_executable (mysqlxx_test mysqlxx_test.cpp)
|
||||
target_link_libraries (mysqlxx_test PRIVATE mysqlxx)
|
||||
|
||||
add_executable (mysqlxx_pool_test mysqlxx_pool_test.cpp)
|
||||
target_link_libraries (mysqlxx_pool_test PRIVATE mysqlxx)
|
||||
|
98
base/mysqlxx/tests/mysqlxx_pool_test.cpp
Normal file
98
base/mysqlxx/tests/mysqlxx_pool_test.cpp
Normal file
@ -0,0 +1,98 @@
|
||||
#include <mysqlxx/mysqlxx.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
mysqlxx::Pool::Entry getWithFailover(mysqlxx::Pool & connections_pool)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
|
||||
constexpr size_t max_tries = 3;
|
||||
|
||||
mysqlxx::Pool::Entry worker_connection;
|
||||
|
||||
for (size_t try_no = 1; try_no <= max_tries; ++try_no)
|
||||
{
|
||||
try
|
||||
{
|
||||
worker_connection = connections_pool.tryGet();
|
||||
|
||||
if (!worker_connection.isNull())
|
||||
{
|
||||
return worker_connection;
|
||||
}
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
if (e.displayText().find("mysqlxx::Pool is full") != std::string::npos)
|
||||
{
|
||||
std::cerr << e.displayText() << std::endl;
|
||||
}
|
||||
|
||||
std::cerr << "Connection to " << connections_pool.getDescription() << " failed: " << e.displayText() << std::endl;
|
||||
}
|
||||
|
||||
std::clog << "Connection to all replicas failed " << try_no << " times" << std::endl;
|
||||
std::this_thread::sleep_for(1s);
|
||||
}
|
||||
|
||||
std::stringstream message;
|
||||
message << "Connections to all replicas failed: " << connections_pool.getDescription();
|
||||
|
||||
throw Poco::Exception(message.str());
|
||||
}
|
||||
}
|
||||
|
||||
int main(int, char **)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
|
||||
const char * remote_mysql = "localhost";
|
||||
const std::string test_query = "SHOW DATABASES";
|
||||
|
||||
mysqlxx::Pool mysql_conn_pool("", remote_mysql, "default", "10203040", 3306);
|
||||
|
||||
size_t iteration = 0;
|
||||
while (++iteration)
|
||||
{
|
||||
std::clog << "Iteration: " << iteration << std::endl;
|
||||
try
|
||||
{
|
||||
std::clog << "Acquiring DB connection ...";
|
||||
mysqlxx::Pool::Entry worker = getWithFailover(mysql_conn_pool);
|
||||
std::clog << "ok" << std::endl;
|
||||
|
||||
std::clog << "Preparing query (5s sleep) ...";
|
||||
std::this_thread::sleep_for(5s);
|
||||
mysqlxx::Query query = worker->query();
|
||||
query << test_query;
|
||||
std::clog << "ok" << std::endl;
|
||||
|
||||
std::clog << "Querying result (5s sleep) ...";
|
||||
std::this_thread::sleep_for(5s);
|
||||
mysqlxx::UseQueryResult result = query.use();
|
||||
std::clog << "ok" << std::endl;
|
||||
|
||||
std::clog << "Fetching result data (5s sleep) ...";
|
||||
std::this_thread::sleep_for(5s);
|
||||
size_t rows_count = 0;
|
||||
while (result.fetch())
|
||||
++rows_count;
|
||||
std::clog << "ok" << std::endl;
|
||||
|
||||
std::clog << "Read " << rows_count << " rows." << std::endl;
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
std::cerr << "Iteration FAILED:\n" << e.displayText() << std::endl;
|
||||
}
|
||||
|
||||
std::clog << "====================" << std::endl;
|
||||
std::this_thread::sleep_for(3s);
|
||||
}
|
||||
}
|
@ -14,10 +14,6 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
|
||||
# Minimal supported SDK version
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
|
||||
|
||||
# Global libraries
|
||||
|
||||
add_library(global-libs INTERFACE)
|
||||
|
||||
# Unfortunately '-pthread' doesn't work with '-nodefaultlibs'.
|
||||
# Just make sure we have pthreads at all.
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
|
6
cmake/find/fast_float.cmake
Normal file
6
cmake/find/fast_float.cmake
Normal file
@ -0,0 +1,6 @@
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/fast_float/fast_float.h")
|
||||
message (FATAL_ERROR "submodule contrib/fast_float is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
endif ()
|
||||
|
||||
set(FAST_FLOAT_LIBRARY fast_float)
|
||||
set(FAST_FLOAT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/")
|
@ -1,4 +1,11 @@
|
||||
option(ENABLE_GRPC "Use gRPC" ${ENABLE_LIBRARIES})
|
||||
# disable grpc due to conflicts of abseil (required by grpc) dynamic annotations with libtsan.a
|
||||
if (SANITIZE STREQUAL "thread" AND COMPILER_GCC)
|
||||
set(ENABLE_GRPC_DEFAULT OFF)
|
||||
else()
|
||||
set(ENABLE_GRPC_DEFAULT ${ENABLE_LIBRARIES})
|
||||
endif()
|
||||
|
||||
option(ENABLE_GRPC "Use gRPC" ${ENABLE_GRPC_DEFAULT})
|
||||
|
||||
if(NOT ENABLE_GRPC)
|
||||
if(USE_INTERNAL_GRPC_LIBRARY)
|
||||
|
@ -11,9 +11,9 @@ endif()
|
||||
|
||||
option(USE_INTERNAL_SSL_LIBRARY "Set to FALSE to use system *ssl library instead of bundled" ${NOT_UNBUNDLED})
|
||||
|
||||
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/openssl/README")
|
||||
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boringssl/README.md")
|
||||
if(USE_INTERNAL_SSL_LIBRARY)
|
||||
message(WARNING "submodule contrib/openssl is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message(WARNING "submodule contrib/boringssl is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal ssl library")
|
||||
endif()
|
||||
set(USE_INTERNAL_SSL_LIBRARY 0)
|
||||
@ -52,12 +52,12 @@ endif ()
|
||||
|
||||
if (NOT OPENSSL_FOUND AND NOT MISSING_INTERNAL_SSL_LIBRARY)
|
||||
set (USE_INTERNAL_SSL_LIBRARY 1)
|
||||
set (OPENSSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/openssl")
|
||||
set (OPENSSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/boringssl")
|
||||
|
||||
if (ARCH_AMD64)
|
||||
set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include" "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake/linux_x86_64/include")
|
||||
set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include")
|
||||
elseif (ARCH_AARCH64)
|
||||
set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include" "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake/linux_aarch64/include")
|
||||
set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include")
|
||||
endif ()
|
||||
set (OPENSSL_CRYPTO_LIBRARY crypto)
|
||||
set (OPENSSL_SSL_LIBRARY ssl)
|
||||
|
@ -17,10 +17,6 @@ message(STATUS "Default libraries: ${DEFAULT_LIBS}")
|
||||
set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
|
||||
set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
|
||||
|
||||
# Global libraries
|
||||
|
||||
add_library(global-libs INTERFACE)
|
||||
|
||||
# Unfortunately '-pthread' doesn't work with '-nodefaultlibs'.
|
||||
# Just make sure we have pthreads at all.
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
|
@ -35,6 +35,15 @@ if (NOT PARALLEL_LINK_JOBS AND AVAILABLE_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# ThinLTO provides its own parallel linking
|
||||
# But use 2 parallel jobs, since:
|
||||
# - this is what llvm does
|
||||
# - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary
|
||||
if (ENABLE_THINLTO AND PARALLEL_LINK_JOBS GREATER 2)
|
||||
message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.")
|
||||
set (PARALLEL_LINK_JOBS 2)
|
||||
endif()
|
||||
|
||||
if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES))
|
||||
set(CMAKE_JOB_POOL_LINK link_job_pool${CMAKE_CURRENT_SOURCE_DIR})
|
||||
string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_LINK ${CMAKE_JOB_POOL_LINK})
|
||||
|
@ -12,10 +12,10 @@ else ()
|
||||
endif ()
|
||||
|
||||
if (OS_ANDROID)
|
||||
# pthread and rt are included in libc
|
||||
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -ldl")
|
||||
# pthread and rt are included in libc
|
||||
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -ldl")
|
||||
else ()
|
||||
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread -ldl")
|
||||
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread -ldl")
|
||||
endif ()
|
||||
|
||||
message(STATUS "Default libraries: ${DEFAULT_LIBS}")
|
||||
@ -31,10 +31,6 @@ if (ARCH_AMD64 AND NOT_UNBUNDLED)
|
||||
set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers)
|
||||
endif ()
|
||||
|
||||
# Global libraries
|
||||
|
||||
add_library(global-libs INTERFACE)
|
||||
|
||||
# Unfortunately '-pthread' doesn't work with '-nodefaultlibs'.
|
||||
# Just make sure we have pthreads at all.
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
|
@ -1,18 +1,34 @@
|
||||
# Possible values: `address` (ASan), `memory` (MSan), `thread` (TSan), `undefined` (UBSan), and "" (no sanitizing)
|
||||
# Possible values:
|
||||
# - `address` (ASan)
|
||||
# - `memory` (MSan)
|
||||
# - `thread` (TSan)
|
||||
# - `undefined` (UBSan)
|
||||
# - "" (no sanitizing)
|
||||
option (SANITIZE "Enable one of the code sanitizers" "")
|
||||
|
||||
set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")
|
||||
|
||||
# gcc with -nodefaultlibs does not add sanitizer libraries
|
||||
# with -static-libasan and similar
|
||||
macro(add_explicit_sanitizer_library lib)
|
||||
target_link_libraries(global-libs INTERFACE "-Wl,-static -l${lib} -Wl,-Bdynamic")
|
||||
endmacro()
|
||||
|
||||
if (SANITIZE)
|
||||
if (SANITIZE STREQUAL "address")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope")
|
||||
set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_FLAGS}")
|
||||
endif()
|
||||
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libasan")
|
||||
endif ()
|
||||
if (COMPILER_GCC)
|
||||
add_explicit_sanitizer_library(asan)
|
||||
endif()
|
||||
|
||||
elseif (SANITIZE STREQUAL "memory")
|
||||
# MemorySanitizer flags are set according to the official documentation:
|
||||
@ -41,9 +57,10 @@ if (SANITIZE)
|
||||
if (COMPILER_CLANG)
|
||||
set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt")
|
||||
else()
|
||||
message (WARNING "TSAN suppressions was not passed to the compiler (since the compiler is not clang)")
|
||||
message (WARNING "Use the following command to pass them manually:")
|
||||
message (WARNING " export TSAN_OPTIONS=\"$TSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt\"")
|
||||
set (MESSAGE "TSAN suppressions was not passed to the compiler (since the compiler is not clang)\n")
|
||||
set (MESSAGE "${MESSAGE}Use the following command to pass them manually:\n")
|
||||
set (MESSAGE "${MESSAGE} export TSAN_OPTIONS=\"$TSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt\"")
|
||||
message (WARNING "${MESSAGE}")
|
||||
endif()
|
||||
|
||||
|
||||
@ -55,16 +72,32 @@ if (SANITIZE)
|
||||
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libtsan")
|
||||
endif ()
|
||||
if (COMPILER_GCC)
|
||||
add_explicit_sanitizer_library(tsan)
|
||||
endif()
|
||||
|
||||
elseif (SANITIZE STREQUAL "undefined")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
set (UBSAN_FLAGS "-fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
|
||||
if (COMPILER_CLANG)
|
||||
set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
else()
|
||||
set (MESSAGE "UBSAN suppressions was not passed to the compiler (since the compiler is not clang)\n")
|
||||
set (MESSAGE "${MESSAGE}Use the following command to pass them manually:\n")
|
||||
set (MESSAGE "${MESSAGE} export UBSAN_OPTIONS=\"$UBSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt\"")
|
||||
message (WARNING "${MESSAGE}")
|
||||
endif()
|
||||
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
|
||||
endif()
|
||||
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan")
|
||||
endif ()
|
||||
if (COMPILER_GCC)
|
||||
add_explicit_sanitizer_library(ubsan)
|
||||
endif()
|
||||
|
||||
# llvm-tblgen, that is used during LLVM build, doesn't work with UBSan.
|
||||
set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "")
|
||||
|
@ -24,7 +24,7 @@ option (WEVERYTHING "Enable -Weverything option with some exceptions." ON)
|
||||
# Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size.
|
||||
# Only in release build because debug has too large stack frames.
|
||||
if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE_CXX_COMPILER_ID MATCHES "AppleClang"))
|
||||
add_warning(frame-larger-than=32768)
|
||||
add_warning(frame-larger-than=65536)
|
||||
endif ()
|
||||
|
||||
if (COMPILER_CLANG)
|
||||
|
2
contrib/AMQP-CPP
vendored
2
contrib/AMQP-CPP
vendored
@ -1 +1 @@
|
||||
Subproject commit d63e1f016582e9faaaf279aa24513087a07bc6e7
|
||||
Subproject commit 03781aaff0f10ef41f902b8cf865fe0067180c10
|
20
contrib/CMakeLists.txt
vendored
20
contrib/CMakeLists.txt
vendored
@ -21,10 +21,12 @@ endif()
|
||||
|
||||
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
|
||||
|
||||
add_subdirectory (antlr4-runtime-cmake)
|
||||
add_subdirectory (boost-cmake)
|
||||
add_subdirectory (cctz-cmake)
|
||||
add_subdirectory (consistent-hashing-sumbur)
|
||||
add_subdirectory (consistent-hashing)
|
||||
add_subdirectory (dragonbox-cmake)
|
||||
add_subdirectory (FastMemcpy)
|
||||
add_subdirectory (hyperscan-cmake)
|
||||
add_subdirectory (jemalloc-cmake)
|
||||
@ -34,7 +36,6 @@ add_subdirectory (libmetrohash)
|
||||
add_subdirectory (lz4-cmake)
|
||||
add_subdirectory (murmurhash)
|
||||
add_subdirectory (replxx-cmake)
|
||||
add_subdirectory (ryu-cmake)
|
||||
add_subdirectory (unixodbc-cmake)
|
||||
add_subdirectory (xz)
|
||||
|
||||
@ -98,10 +99,10 @@ if (USE_INTERNAL_H3_LIBRARY)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_SSL_LIBRARY)
|
||||
add_subdirectory (openssl-cmake)
|
||||
add_subdirectory (boringssl-cmake)
|
||||
|
||||
add_library(OpenSSL::Crypto ALIAS ${OPENSSL_CRYPTO_LIBRARY})
|
||||
add_library(OpenSSL::SSL ALIAS ${OPENSSL_SSL_LIBRARY})
|
||||
add_library(OpenSSL::Crypto ALIAS crypto)
|
||||
add_library(OpenSSL::SSL ALIAS ssl)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_LDAP_LIBRARY)
|
||||
@ -209,6 +210,14 @@ if (USE_EMBEDDED_COMPILER AND USE_INTERNAL_LLVM_LIBRARY)
|
||||
set (LLVM_ENABLE_RTTI 1 CACHE INTERNAL "")
|
||||
set (LLVM_ENABLE_PIC 0 CACHE INTERNAL "")
|
||||
set (LLVM_TARGETS_TO_BUILD "X86;AArch64" CACHE STRING "")
|
||||
# Yes it is set globally, but this is not enough, since llvm will add -std=c++11 after default
|
||||
# And c++2a cannot be used, due to ambiguous operator !=
|
||||
if (COMPILER_GCC OR COMPILER_CLANG)
|
||||
set (_CXX_STANDARD "gnu++17")
|
||||
else()
|
||||
set (_CXX_STANDARD "c++17")
|
||||
endif()
|
||||
set (LLVM_CXX_STD ${_CXX_STANDARD} CACHE STRING "" FORCE)
|
||||
add_subdirectory (llvm/llvm)
|
||||
target_include_directories(LLVMSupport SYSTEM BEFORE PRIVATE ${ZLIB_INCLUDE_DIR})
|
||||
endif ()
|
||||
@ -290,3 +299,6 @@ endif()
|
||||
if (USE_INTERNAL_ROCKSDB_LIBRARY)
|
||||
add_subdirectory(rocksdb-cmake)
|
||||
endif()
|
||||
|
||||
add_subdirectory(fast_float)
|
||||
|
||||
|
1
contrib/antlr4-runtime
vendored
Submodule
1
contrib/antlr4-runtime
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit a2fa7b76e2ee16d2ad955e9214a90bbf79da66fc
|
156
contrib/antlr4-runtime-cmake/CMakeLists.txt
Normal file
156
contrib/antlr4-runtime-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,156 @@
|
||||
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/antlr4-runtime)
|
||||
|
||||
set (SRCS
|
||||
${LIBRARY_DIR}/ANTLRErrorListener.cpp
|
||||
${LIBRARY_DIR}/ANTLRErrorStrategy.cpp
|
||||
${LIBRARY_DIR}/ANTLRFileStream.cpp
|
||||
${LIBRARY_DIR}/ANTLRInputStream.cpp
|
||||
${LIBRARY_DIR}/atn/AbstractPredicateTransition.cpp
|
||||
${LIBRARY_DIR}/atn/ActionTransition.cpp
|
||||
${LIBRARY_DIR}/atn/AmbiguityInfo.cpp
|
||||
${LIBRARY_DIR}/atn/ArrayPredictionContext.cpp
|
||||
${LIBRARY_DIR}/atn/ATN.cpp
|
||||
${LIBRARY_DIR}/atn/ATNConfig.cpp
|
||||
${LIBRARY_DIR}/atn/ATNConfigSet.cpp
|
||||
${LIBRARY_DIR}/atn/ATNDeserializationOptions.cpp
|
||||
${LIBRARY_DIR}/atn/ATNDeserializer.cpp
|
||||
${LIBRARY_DIR}/atn/ATNSerializer.cpp
|
||||
${LIBRARY_DIR}/atn/ATNSimulator.cpp
|
||||
${LIBRARY_DIR}/atn/ATNState.cpp
|
||||
${LIBRARY_DIR}/atn/AtomTransition.cpp
|
||||
${LIBRARY_DIR}/atn/BasicBlockStartState.cpp
|
||||
${LIBRARY_DIR}/atn/BasicState.cpp
|
||||
${LIBRARY_DIR}/atn/BlockEndState.cpp
|
||||
${LIBRARY_DIR}/atn/BlockStartState.cpp
|
||||
${LIBRARY_DIR}/atn/ContextSensitivityInfo.cpp
|
||||
${LIBRARY_DIR}/atn/DecisionEventInfo.cpp
|
||||
${LIBRARY_DIR}/atn/DecisionInfo.cpp
|
||||
${LIBRARY_DIR}/atn/DecisionState.cpp
|
||||
${LIBRARY_DIR}/atn/EmptyPredictionContext.cpp
|
||||
${LIBRARY_DIR}/atn/EpsilonTransition.cpp
|
||||
${LIBRARY_DIR}/atn/ErrorInfo.cpp
|
||||
${LIBRARY_DIR}/atn/LexerAction.cpp
|
||||
${LIBRARY_DIR}/atn/LexerActionExecutor.cpp
|
||||
${LIBRARY_DIR}/atn/LexerATNConfig.cpp
|
||||
${LIBRARY_DIR}/atn/LexerATNSimulator.cpp
|
||||
${LIBRARY_DIR}/atn/LexerChannelAction.cpp
|
||||
${LIBRARY_DIR}/atn/LexerCustomAction.cpp
|
||||
${LIBRARY_DIR}/atn/LexerIndexedCustomAction.cpp
|
||||
${LIBRARY_DIR}/atn/LexerModeAction.cpp
|
||||
${LIBRARY_DIR}/atn/LexerMoreAction.cpp
|
||||
${LIBRARY_DIR}/atn/LexerPopModeAction.cpp
|
||||
${LIBRARY_DIR}/atn/LexerPushModeAction.cpp
|
||||
${LIBRARY_DIR}/atn/LexerSkipAction.cpp
|
||||
${LIBRARY_DIR}/atn/LexerTypeAction.cpp
|
||||
${LIBRARY_DIR}/atn/LL1Analyzer.cpp
|
||||
${LIBRARY_DIR}/atn/LookaheadEventInfo.cpp
|
||||
${LIBRARY_DIR}/atn/LoopEndState.cpp
|
||||
${LIBRARY_DIR}/atn/NotSetTransition.cpp
|
||||
${LIBRARY_DIR}/atn/OrderedATNConfigSet.cpp
|
||||
${LIBRARY_DIR}/atn/ParseInfo.cpp
|
||||
${LIBRARY_DIR}/atn/ParserATNSimulator.cpp
|
||||
${LIBRARY_DIR}/atn/PlusBlockStartState.cpp
|
||||
${LIBRARY_DIR}/atn/PlusLoopbackState.cpp
|
||||
${LIBRARY_DIR}/atn/PrecedencePredicateTransition.cpp
|
||||
${LIBRARY_DIR}/atn/PredicateEvalInfo.cpp
|
||||
${LIBRARY_DIR}/atn/PredicateTransition.cpp
|
||||
${LIBRARY_DIR}/atn/PredictionContext.cpp
|
||||
${LIBRARY_DIR}/atn/PredictionMode.cpp
|
||||
${LIBRARY_DIR}/atn/ProfilingATNSimulator.cpp
|
||||
${LIBRARY_DIR}/atn/RangeTransition.cpp
|
||||
${LIBRARY_DIR}/atn/RuleStartState.cpp
|
||||
${LIBRARY_DIR}/atn/RuleStopState.cpp
|
||||
${LIBRARY_DIR}/atn/RuleTransition.cpp
|
||||
${LIBRARY_DIR}/atn/SemanticContext.cpp
|
||||
${LIBRARY_DIR}/atn/SetTransition.cpp
|
||||
${LIBRARY_DIR}/atn/SingletonPredictionContext.cpp
|
||||
${LIBRARY_DIR}/atn/StarBlockStartState.cpp
|
||||
${LIBRARY_DIR}/atn/StarLoopbackState.cpp
|
||||
${LIBRARY_DIR}/atn/StarLoopEntryState.cpp
|
||||
${LIBRARY_DIR}/atn/TokensStartState.cpp
|
||||
${LIBRARY_DIR}/atn/Transition.cpp
|
||||
${LIBRARY_DIR}/atn/WildcardTransition.cpp
|
||||
${LIBRARY_DIR}/BailErrorStrategy.cpp
|
||||
${LIBRARY_DIR}/BaseErrorListener.cpp
|
||||
${LIBRARY_DIR}/BufferedTokenStream.cpp
|
||||
${LIBRARY_DIR}/CharStream.cpp
|
||||
${LIBRARY_DIR}/CommonToken.cpp
|
||||
${LIBRARY_DIR}/CommonTokenFactory.cpp
|
||||
${LIBRARY_DIR}/CommonTokenStream.cpp
|
||||
${LIBRARY_DIR}/ConsoleErrorListener.cpp
|
||||
${LIBRARY_DIR}/DefaultErrorStrategy.cpp
|
||||
${LIBRARY_DIR}/dfa/DFA.cpp
|
||||
${LIBRARY_DIR}/dfa/DFASerializer.cpp
|
||||
${LIBRARY_DIR}/dfa/DFAState.cpp
|
||||
${LIBRARY_DIR}/dfa/LexerDFASerializer.cpp
|
||||
${LIBRARY_DIR}/DiagnosticErrorListener.cpp
|
||||
${LIBRARY_DIR}/Exceptions.cpp
|
||||
${LIBRARY_DIR}/FailedPredicateException.cpp
|
||||
${LIBRARY_DIR}/InputMismatchException.cpp
|
||||
${LIBRARY_DIR}/InterpreterRuleContext.cpp
|
||||
${LIBRARY_DIR}/IntStream.cpp
|
||||
${LIBRARY_DIR}/Lexer.cpp
|
||||
${LIBRARY_DIR}/LexerInterpreter.cpp
|
||||
${LIBRARY_DIR}/LexerNoViableAltException.cpp
|
||||
${LIBRARY_DIR}/ListTokenSource.cpp
|
||||
${LIBRARY_DIR}/misc/InterpreterDataReader.cpp
|
||||
${LIBRARY_DIR}/misc/Interval.cpp
|
||||
${LIBRARY_DIR}/misc/IntervalSet.cpp
|
||||
${LIBRARY_DIR}/misc/MurmurHash.cpp
|
||||
${LIBRARY_DIR}/misc/Predicate.cpp
|
||||
${LIBRARY_DIR}/NoViableAltException.cpp
|
||||
${LIBRARY_DIR}/Parser.cpp
|
||||
${LIBRARY_DIR}/ParserInterpreter.cpp
|
||||
${LIBRARY_DIR}/ParserRuleContext.cpp
|
||||
${LIBRARY_DIR}/ProxyErrorListener.cpp
|
||||
${LIBRARY_DIR}/RecognitionException.cpp
|
||||
${LIBRARY_DIR}/Recognizer.cpp
|
||||
${LIBRARY_DIR}/RuleContext.cpp
|
||||
${LIBRARY_DIR}/RuleContextWithAltNum.cpp
|
||||
${LIBRARY_DIR}/RuntimeMetaData.cpp
|
||||
${LIBRARY_DIR}/support/Any.cpp
|
||||
${LIBRARY_DIR}/support/Arrays.cpp
|
||||
${LIBRARY_DIR}/support/CPPUtils.cpp
|
||||
${LIBRARY_DIR}/support/guid.cpp
|
||||
${LIBRARY_DIR}/support/StringUtils.cpp
|
||||
${LIBRARY_DIR}/Token.cpp
|
||||
${LIBRARY_DIR}/TokenSource.cpp
|
||||
${LIBRARY_DIR}/TokenStream.cpp
|
||||
${LIBRARY_DIR}/TokenStreamRewriter.cpp
|
||||
${LIBRARY_DIR}/tree/ErrorNode.cpp
|
||||
${LIBRARY_DIR}/tree/ErrorNodeImpl.cpp
|
||||
${LIBRARY_DIR}/tree/IterativeParseTreeWalker.cpp
|
||||
${LIBRARY_DIR}/tree/ParseTree.cpp
|
||||
${LIBRARY_DIR}/tree/ParseTreeListener.cpp
|
||||
${LIBRARY_DIR}/tree/ParseTreeVisitor.cpp
|
||||
${LIBRARY_DIR}/tree/ParseTreeWalker.cpp
|
||||
${LIBRARY_DIR}/tree/pattern/Chunk.cpp
|
||||
${LIBRARY_DIR}/tree/pattern/ParseTreeMatch.cpp
|
||||
${LIBRARY_DIR}/tree/pattern/ParseTreePattern.cpp
|
||||
${LIBRARY_DIR}/tree/pattern/ParseTreePatternMatcher.cpp
|
||||
${LIBRARY_DIR}/tree/pattern/RuleTagToken.cpp
|
||||
${LIBRARY_DIR}/tree/pattern/TagChunk.cpp
|
||||
${LIBRARY_DIR}/tree/pattern/TextChunk.cpp
|
||||
${LIBRARY_DIR}/tree/pattern/TokenTagToken.cpp
|
||||
${LIBRARY_DIR}/tree/TerminalNode.cpp
|
||||
${LIBRARY_DIR}/tree/TerminalNodeImpl.cpp
|
||||
${LIBRARY_DIR}/tree/Trees.cpp
|
||||
${LIBRARY_DIR}/tree/xpath/XPath.cpp
|
||||
${LIBRARY_DIR}/tree/xpath/XPathElement.cpp
|
||||
${LIBRARY_DIR}/tree/xpath/XPathLexer.cpp
|
||||
${LIBRARY_DIR}/tree/xpath/XPathLexerErrorListener.cpp
|
||||
${LIBRARY_DIR}/tree/xpath/XPathRuleAnywhereElement.cpp
|
||||
${LIBRARY_DIR}/tree/xpath/XPathRuleElement.cpp
|
||||
${LIBRARY_DIR}/tree/xpath/XPathTokenAnywhereElement.cpp
|
||||
${LIBRARY_DIR}/tree/xpath/XPathTokenElement.cpp
|
||||
${LIBRARY_DIR}/tree/xpath/XPathWildcardAnywhereElement.cpp
|
||||
${LIBRARY_DIR}/tree/xpath/XPathWildcardElement.cpp
|
||||
${LIBRARY_DIR}/UnbufferedCharStream.cpp
|
||||
${LIBRARY_DIR}/UnbufferedTokenStream.cpp
|
||||
${LIBRARY_DIR}/Vocabulary.cpp
|
||||
${LIBRARY_DIR}/WritableToken.cpp
|
||||
)
|
||||
|
||||
add_library (antlr4-runtime ${SRCS})
|
||||
|
||||
target_include_directories (antlr4-runtime SYSTEM PUBLIC ${LIBRARY_DIR})
|
2
contrib/boost
vendored
2
contrib/boost
vendored
@ -1 +1 @@
|
||||
Subproject commit a04e72c0464f0c31d3384f18f0c0db36a05538e0
|
||||
Subproject commit 0b98b443aa7bb77d65efd7b23b3b8c8a0ab5f1f3
|
1
contrib/boringssl
vendored
Submodule
1
contrib/boringssl
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 8b2bf912ba04823cfe9e7e8f5bb60cb7f6252449
|
661
contrib/boringssl-cmake/CMakeLists.txt
Normal file
661
contrib/boringssl-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,661 @@
|
||||
# Copyright (c) 2019 The Chromium Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
# This file is created by generate_build_files.py and edited accordingly.
|
||||
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
|
||||
project(BoringSSL LANGUAGES C CXX)
|
||||
|
||||
set(BORINGSSL_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/boringssl)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
set(CLANG 1)
|
||||
endif()
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CLANG)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fvisibility=hidden -fno-common -fno-exceptions -fno-rtti")
|
||||
if(APPLE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden -fno-common")
|
||||
if((CMAKE_C_COMPILER_VERSION VERSION_GREATER "4.8.99") OR CLANG)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11")
|
||||
else()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# pthread_rwlock_t requires a feature flag.
|
||||
if(NOT WIN32)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_XOPEN_SOURCE=700")
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
add_definitions(-D_HAS_EXCEPTIONS=0)
|
||||
add_definitions(-DWIN32_LEAN_AND_MEAN)
|
||||
add_definitions(-DNOMINMAX)
|
||||
# Allow use of fopen.
|
||||
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
|
||||
# VS 2017 and higher supports STL-only warning suppressions.
|
||||
# A bug in CMake < 3.13.0 may cause the space in this value to
|
||||
# cause issues when building with NASM. In that case, update CMake.
|
||||
add_definitions("-D_STL_EXTRA_DISABLED_WARNINGS=4774 4987")
|
||||
endif()
|
||||
|
||||
add_definitions(-DBORINGSSL_IMPLEMENTATION)
|
||||
|
||||
# CMake's iOS support uses Apple's multiple-architecture toolchain. It takes an
|
||||
# architecture list from CMAKE_OSX_ARCHITECTURES, leaves CMAKE_SYSTEM_PROCESSOR
|
||||
# alone, and expects all architecture-specific logic to be conditioned within
|
||||
# the source files rather than the build. This does not work for our assembly
|
||||
# files, so we fix CMAKE_SYSTEM_PROCESSOR and only support single-architecture
|
||||
# builds.
|
||||
if(NOT OPENSSL_NO_ASM AND CMAKE_OSX_ARCHITECTURES)
|
||||
list(LENGTH CMAKE_OSX_ARCHITECTURES NUM_ARCHES)
|
||||
if(NOT ${NUM_ARCHES} EQUAL 1)
|
||||
message(FATAL_ERROR "Universal binaries not supported.")
|
||||
endif()
|
||||
list(GET CMAKE_OSX_ARCHITECTURES 0 CMAKE_SYSTEM_PROCESSOR)
|
||||
endif()
|
||||
|
||||
if(OPENSSL_NO_ASM)
|
||||
add_definitions(-DOPENSSL_NO_ASM)
|
||||
set(ARCH "generic")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
|
||||
set(ARCH "x86_64")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64")
|
||||
set(ARCH "x86_64")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
|
||||
# cmake reports AMD64 on Windows, but we might be building for 32-bit.
|
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(ARCH "x86_64")
|
||||
else()
|
||||
set(ARCH "x86")
|
||||
endif()
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86")
|
||||
set(ARCH "x86")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386")
|
||||
set(ARCH "x86")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
|
||||
set(ARCH "x86")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
|
||||
set(ARCH "aarch64")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
|
||||
set(ARCH "aarch64")
|
||||
# Apple A12 Bionic chipset which is added in iPhone XS/XS Max/XR uses arm64e architecture.
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64e")
|
||||
set(ARCH "aarch64")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm*")
|
||||
set(ARCH "arm")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "mips")
|
||||
# Just to avoid the “unknown processor” error.
|
||||
set(ARCH "generic")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le")
|
||||
set(ARCH "ppc64le")
|
||||
else()
|
||||
message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR})
|
||||
endif()
|
||||
|
||||
if(NOT OPENSSL_NO_ASM)
|
||||
if(UNIX)
|
||||
enable_language(ASM)
|
||||
|
||||
# Clang's integerated assembler does not support debug symbols.
|
||||
if(NOT CMAKE_ASM_COMPILER_ID MATCHES "Clang")
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -Wa,-g")
|
||||
endif()
|
||||
|
||||
# CMake does not add -isysroot and -arch flags to assembly.
|
||||
if(APPLE)
|
||||
if(CMAKE_OSX_SYSROOT)
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -isysroot \"${CMAKE_OSX_SYSROOT}\"")
|
||||
endif()
|
||||
foreach(arch ${CMAKE_OSX_ARCHITECTURES})
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -arch ${arch}")
|
||||
endforeach()
|
||||
endif()
|
||||
else()
|
||||
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -gcv8")
|
||||
enable_language(ASM_NASM)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_definitions(-DBORINGSSL_SHARED_LIBRARY)
|
||||
# Enable position-independent code globally. This is needed because
|
||||
# some library targets are OBJECT libraries.
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
|
||||
endif()
|
||||
|
||||
include_directories(${BORINGSSL_SOURCE_DIR}/include)
|
||||
|
||||
set(
|
||||
CRYPTO_ios_aarch64_SOURCES
|
||||
|
||||
ios-aarch64/crypto/chacha/chacha-armv8.S
|
||||
ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
|
||||
ios-aarch64/crypto/fipsmodule/armv8-mont.S
|
||||
ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
|
||||
ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
|
||||
ios-aarch64/crypto/fipsmodule/sha1-armv8.S
|
||||
ios-aarch64/crypto/fipsmodule/sha256-armv8.S
|
||||
ios-aarch64/crypto/fipsmodule/sha512-armv8.S
|
||||
ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
|
||||
ios-aarch64/crypto/test/trampoline-armv8.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_ios_arm_SOURCES
|
||||
|
||||
ios-arm/crypto/chacha/chacha-armv4.S
|
||||
ios-arm/crypto/fipsmodule/aesv8-armx32.S
|
||||
ios-arm/crypto/fipsmodule/armv4-mont.S
|
||||
ios-arm/crypto/fipsmodule/bsaes-armv7.S
|
||||
ios-arm/crypto/fipsmodule/ghash-armv4.S
|
||||
ios-arm/crypto/fipsmodule/ghashv8-armx32.S
|
||||
ios-arm/crypto/fipsmodule/sha1-armv4-large.S
|
||||
ios-arm/crypto/fipsmodule/sha256-armv4.S
|
||||
ios-arm/crypto/fipsmodule/sha512-armv4.S
|
||||
ios-arm/crypto/fipsmodule/vpaes-armv7.S
|
||||
ios-arm/crypto/test/trampoline-armv4.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_aarch64_SOURCES
|
||||
|
||||
linux-aarch64/crypto/chacha/chacha-armv8.S
|
||||
linux-aarch64/crypto/fipsmodule/aesv8-armx64.S
|
||||
linux-aarch64/crypto/fipsmodule/armv8-mont.S
|
||||
linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
|
||||
linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S
|
||||
linux-aarch64/crypto/fipsmodule/sha1-armv8.S
|
||||
linux-aarch64/crypto/fipsmodule/sha256-armv8.S
|
||||
linux-aarch64/crypto/fipsmodule/sha512-armv8.S
|
||||
linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
|
||||
linux-aarch64/crypto/test/trampoline-armv8.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_arm_SOURCES
|
||||
|
||||
linux-arm/crypto/chacha/chacha-armv4.S
|
||||
linux-arm/crypto/fipsmodule/aesv8-armx32.S
|
||||
linux-arm/crypto/fipsmodule/armv4-mont.S
|
||||
linux-arm/crypto/fipsmodule/bsaes-armv7.S
|
||||
linux-arm/crypto/fipsmodule/ghash-armv4.S
|
||||
linux-arm/crypto/fipsmodule/ghashv8-armx32.S
|
||||
linux-arm/crypto/fipsmodule/sha1-armv4-large.S
|
||||
linux-arm/crypto/fipsmodule/sha256-armv4.S
|
||||
linux-arm/crypto/fipsmodule/sha512-armv4.S
|
||||
linux-arm/crypto/fipsmodule/vpaes-armv7.S
|
||||
linux-arm/crypto/test/trampoline-armv4.S
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/curve25519/asm/x25519-asm-arm.S
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_arm_asm.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_ppc64le_SOURCES
|
||||
|
||||
linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
|
||||
linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
|
||||
linux-ppc64le/crypto/test/trampoline-ppc.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_x86_SOURCES
|
||||
|
||||
linux-x86/crypto/chacha/chacha-x86.S
|
||||
linux-x86/crypto/fipsmodule/aesni-x86.S
|
||||
linux-x86/crypto/fipsmodule/bn-586.S
|
||||
linux-x86/crypto/fipsmodule/co-586.S
|
||||
linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S
|
||||
linux-x86/crypto/fipsmodule/ghash-x86.S
|
||||
linux-x86/crypto/fipsmodule/md5-586.S
|
||||
linux-x86/crypto/fipsmodule/sha1-586.S
|
||||
linux-x86/crypto/fipsmodule/sha256-586.S
|
||||
linux-x86/crypto/fipsmodule/sha512-586.S
|
||||
linux-x86/crypto/fipsmodule/vpaes-x86.S
|
||||
linux-x86/crypto/fipsmodule/x86-mont.S
|
||||
linux-x86/crypto/test/trampoline-x86.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_x86_64_SOURCES
|
||||
|
||||
linux-x86_64/crypto/chacha/chacha-x86_64.S
|
||||
linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
|
||||
linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/ghash-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/md5-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
|
||||
linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
|
||||
linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/rsaz-avx2.S
|
||||
linux-x86_64/crypto/fipsmodule/sha1-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/sha256-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/sha512-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/x86_64-mont.S
|
||||
linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
|
||||
linux-x86_64/crypto/test/trampoline-x86_64.S
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/hrss/asm/poly_rq_mul.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_mac_x86_SOURCES
|
||||
|
||||
mac-x86/crypto/chacha/chacha-x86.S
|
||||
mac-x86/crypto/fipsmodule/aesni-x86.S
|
||||
mac-x86/crypto/fipsmodule/bn-586.S
|
||||
mac-x86/crypto/fipsmodule/co-586.S
|
||||
mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S
|
||||
mac-x86/crypto/fipsmodule/ghash-x86.S
|
||||
mac-x86/crypto/fipsmodule/md5-586.S
|
||||
mac-x86/crypto/fipsmodule/sha1-586.S
|
||||
mac-x86/crypto/fipsmodule/sha256-586.S
|
||||
mac-x86/crypto/fipsmodule/sha512-586.S
|
||||
mac-x86/crypto/fipsmodule/vpaes-x86.S
|
||||
mac-x86/crypto/fipsmodule/x86-mont.S
|
||||
mac-x86/crypto/test/trampoline-x86.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_mac_x86_64_SOURCES
|
||||
|
||||
mac-x86_64/crypto/chacha/chacha-x86_64.S
|
||||
mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
|
||||
mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/aesni-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/ghash-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/md5-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
|
||||
mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
|
||||
mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/rsaz-avx2.S
|
||||
mac-x86_64/crypto/fipsmodule/sha1-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/sha256-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/sha512-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/x86_64-mont.S
|
||||
mac-x86_64/crypto/fipsmodule/x86_64-mont5.S
|
||||
mac-x86_64/crypto/test/trampoline-x86_64.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_win_x86_SOURCES
|
||||
|
||||
win-x86/crypto/chacha/chacha-x86.asm
|
||||
win-x86/crypto/fipsmodule/aesni-x86.asm
|
||||
win-x86/crypto/fipsmodule/bn-586.asm
|
||||
win-x86/crypto/fipsmodule/co-586.asm
|
||||
win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm
|
||||
win-x86/crypto/fipsmodule/ghash-x86.asm
|
||||
win-x86/crypto/fipsmodule/md5-586.asm
|
||||
win-x86/crypto/fipsmodule/sha1-586.asm
|
||||
win-x86/crypto/fipsmodule/sha256-586.asm
|
||||
win-x86/crypto/fipsmodule/sha512-586.asm
|
||||
win-x86/crypto/fipsmodule/vpaes-x86.asm
|
||||
win-x86/crypto/fipsmodule/x86-mont.asm
|
||||
win-x86/crypto/test/trampoline-x86.asm
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_win_x86_64_SOURCES
|
||||
|
||||
win-x86_64/crypto/chacha/chacha-x86_64.asm
|
||||
win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
|
||||
win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/ghash-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/md5-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm
|
||||
win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm
|
||||
win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/rsaz-avx2.asm
|
||||
win-x86_64/crypto/fipsmodule/sha1-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/sha256-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/sha512-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/x86_64-mont.asm
|
||||
win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
|
||||
win-x86_64/crypto/test/trampoline-x86_64.asm
|
||||
)
|
||||
|
||||
if(APPLE AND ${ARCH} STREQUAL "aarch64")
|
||||
set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_aarch64_SOURCES})
|
||||
elseif(APPLE AND ${ARCH} STREQUAL "arm")
|
||||
set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_arm_SOURCES})
|
||||
elseif(APPLE)
|
||||
set(CRYPTO_ARCH_SOURCES ${CRYPTO_mac_${ARCH}_SOURCES})
|
||||
elseif(UNIX)
|
||||
set(CRYPTO_ARCH_SOURCES ${CRYPTO_linux_${ARCH}_SOURCES})
|
||||
elseif(WIN32)
|
||||
set(CRYPTO_ARCH_SOURCES ${CRYPTO_win_${ARCH}_SOURCES})
|
||||
endif()
|
||||
|
||||
add_library(
|
||||
crypto
|
||||
|
||||
${CRYPTO_ARCH_SOURCES}
|
||||
err_data.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_bitstr.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_bool.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_d2i_fp.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_dup.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_enum.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_gentm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_i2d_fp.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_int.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_mbstr.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_utctm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_utf8.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn1_lib.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn1_par.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn_pack.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_enum.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_int.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_string.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_dec.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_enc.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_fre.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_new.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_typ.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_utl.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/time_support.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/base64/base64.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/bio.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/bio_mem.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/connect.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/fd.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/file.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/hexdump.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/pair.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/asn1_compat.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/ber.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/cbb.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/cbs.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/unicode.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/chacha/chacha.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/cipher_extra.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/derive_key.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesccm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesctrhmac.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesgcmsiv.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_chacha20poly1305.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_null.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_rc2.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_rc4.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_tls.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/tls_cbc.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cmac/cmac.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cpu-ppc64le.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/crypto.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/curve25519/curve25519.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/curve25519/spake25519.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/dh_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/params.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/digest_extra/digest_extra.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_derive.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/hash_to_curve.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/ecdh_extra/ecdh_extra.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/ecdsa_extra/ecdsa_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/engine/engine.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/err/err.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/digestsign.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/evp.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/evp_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/evp_ctx.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_dsa_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ec.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ec_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ed25519.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ed25519_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_rsa.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_rsa_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_x25519.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_x25519_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/pbkdf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/print.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/scrypt.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/sign.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/is_fips.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/lhash/lhash.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/mem.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/obj/obj.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/obj/obj_xref.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_all.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_info.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_lib.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_oth.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_pk8.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_pkey.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_x509.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_xaux.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pkcs7/pkcs7.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pkcs7/pkcs7_x509.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/p5_pbev2.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/pkcs8.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/pkcs8_x509.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_arm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_vec.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pool/pool.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/deterministic.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/forkunsafe.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/fuchsia.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/passive.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/rand_extra.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/windows.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rc4/rc4.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/refcount_c11.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/refcount_lock.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rsa_extra/rsa_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rsa_extra/rsa_print.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/siphash/siphash.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/stack/stack.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/thread.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/thread_none.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/thread_pthread.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/thread_win.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/trust_token/pmbtoken.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/trust_token/trust_token.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/a_strex.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/t_x509.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/t_x509a.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_att.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_cmp.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_d2.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_def.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_ext.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_lu.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_obj.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_r2x.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_req.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_set.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_trs.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_txt.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_v3.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_vfy.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_vpm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509cset.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509name.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509rset.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509spki.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_algor.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_all.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_attrib.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_crl.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_exten.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_info.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_name.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_pkey.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_pubkey.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_req.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_sig.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_spki.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_val.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_x509.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_x509a.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_cache.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_data.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_lib.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_map.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_node.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_tree.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_akey.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_akeya.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_alt.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_bcons.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_bitst.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_conf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_cpols.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_crld.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_enum.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_extku.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_genn.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ia5.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_info.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_int.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_lib.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ncons.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ocsp.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pci.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcia.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcons.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pmaps.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_prn.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_purp.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_skey.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_utl.c
|
||||
)
|
||||
|
||||
add_library(
|
||||
ssl
|
||||
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/bio_ssl.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/d1_both.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/d1_lib.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/d1_pkt.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/handshake_server.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/s3_both.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/s3_lib.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/s3_pkt.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_aead_ctx.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_asn1.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_buffer.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_cert.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_cipher.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_file.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_key_share.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_lib.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_privkey.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_session.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_stat.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_transcript.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/t1_lib.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/tls13_server.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/tls_method.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/tls_record.cc
|
||||
|
||||
${BORINGSSL_SOURCE_DIR}/decrepit/ssl/ssl_decrepit.c
|
||||
${BORINGSSL_SOURCE_DIR}/decrepit/cfb/cfb.c
|
||||
)
|
||||
|
||||
add_executable(
|
||||
bssl
|
||||
|
||||
${BORINGSSL_SOURCE_DIR}/tool/args.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/ciphers.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/client.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/const.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/digest.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/fd.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/file.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/rand.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/server.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/sign.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/speed.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/tool.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/transport_common.cc
|
||||
)
|
||||
|
||||
target_link_libraries(ssl crypto)
|
||||
target_link_libraries(bssl ssl)
|
||||
|
||||
if(NOT WIN32 AND NOT ANDROID)
|
||||
target_link_libraries(crypto pthread)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
target_link_libraries(bssl ws2_32)
|
||||
endif()
|
||||
|
||||
target_include_directories(crypto SYSTEM PUBLIC ${BORINGSSL_SOURCE_DIR}/include)
|
||||
target_include_directories(ssl SYSTEM PUBLIC ${BORINGSSL_SOURCE_DIR}/include)
|
||||
|
||||
target_compile_options(crypto PRIVATE -Wno-gnu-anonymous-struct)
|
5012
contrib/boringssl-cmake/crypto_test_data.cc
Normal file
5012
contrib/boringssl-cmake/crypto_test_data.cc
Normal file
File diff suppressed because one or more lines are too long
1457
contrib/boringssl-cmake/err_data.c
Normal file
1457
contrib/boringssl-cmake/err_data.c
Normal file
File diff suppressed because it is too large
Load Diff
1991
contrib/boringssl-cmake/ios-aarch64/crypto/chacha/chacha-armv8.S
Normal file
1991
contrib/boringssl-cmake/ios-aarch64/crypto/chacha/chacha-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,782 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
|
||||
.section __TEXT,__const
|
||||
.align 5
|
||||
Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl _aes_hw_set_encrypt_key
|
||||
.private_extern _aes_hw_set_encrypt_key
|
||||
|
||||
.align 5
|
||||
_aes_hw_set_encrypt_key:
|
||||
Lenc_key:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
cmp x0,#0
|
||||
b.eq Lenc_key_abort
|
||||
cmp x2,#0
|
||||
b.eq Lenc_key_abort
|
||||
mov x3,#-2
|
||||
cmp w1,#128
|
||||
b.lt Lenc_key_abort
|
||||
cmp w1,#256
|
||||
b.gt Lenc_key_abort
|
||||
tst w1,#0x3f
|
||||
b.ne Lenc_key_abort
|
||||
|
||||
adrp x3,Lrcon@PAGE
|
||||
add x3,x3,Lrcon@PAGEOFF
|
||||
cmp w1,#192
|
||||
|
||||
eor v0.16b,v0.16b,v0.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
mov w1,#8 // reuse w1
|
||||
ld1 {v1.4s,v2.4s},[x3],#32
|
||||
|
||||
b.lt Loop128
|
||||
b.eq L192
|
||||
b L256
|
||||
|
||||
.align 4
|
||||
Loop128:
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
b.ne Loop128
|
||||
|
||||
ld1 {v1.4s},[x3]
|
||||
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
st1 {v3.4s},[x2]
|
||||
add x2,x2,#0x50
|
||||
|
||||
mov w12,#10
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L192:
|
||||
ld1 {v4.8b},[x0],#8
|
||||
movi v6.16b,#8 // borrow v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
sub v2.16b,v2.16b,v6.16b // adjust the mask
|
||||
|
||||
Loop192:
|
||||
tbl v6.16b,{v4.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v4.8b},[x2],#8
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
|
||||
dup v5.4s,v3.s[3]
|
||||
eor v5.16b,v5.16b,v4.16b
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
ext v4.16b,v0.16b,v4.16b,#12
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
eor v4.16b,v4.16b,v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
b.ne Loop192
|
||||
|
||||
mov w12,#12
|
||||
add x2,x2,#0x20
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L256:
|
||||
ld1 {v4.16b},[x0]
|
||||
mov w1,#7
|
||||
mov w12,#14
|
||||
st1 {v3.4s},[x2],#16
|
||||
|
||||
Loop256:
|
||||
tbl v6.16b,{v4.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v4.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
b.eq Ldone
|
||||
|
||||
dup v6.4s,v3.s[3] // just splat
|
||||
ext v5.16b,v0.16b,v4.16b,#12
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
|
||||
eor v4.16b,v4.16b,v6.16b
|
||||
b Loop256
|
||||
|
||||
Ldone:
|
||||
str w12,[x2]
|
||||
mov x3,#0
|
||||
|
||||
Lenc_key_abort:
|
||||
mov x0,x3 // return value
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes_hw_set_decrypt_key
|
||||
.private_extern _aes_hw_set_decrypt_key
|
||||
|
||||
.align 5
|
||||
_aes_hw_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl Lenc_key
|
||||
|
||||
cmp x0,#0
|
||||
b.ne Ldec_key_abort
|
||||
|
||||
sub x2,x2,#240 // restore original x2
|
||||
mov x4,#-16
|
||||
add x0,x2,x12,lsl#4 // end of key schedule
|
||||
|
||||
ld1 {v0.4s},[x2]
|
||||
ld1 {v1.4s},[x0]
|
||||
st1 {v0.4s},[x0],x4
|
||||
st1 {v1.4s},[x2],#16
|
||||
|
||||
Loop_imc:
|
||||
ld1 {v0.4s},[x2]
|
||||
ld1 {v1.4s},[x0]
|
||||
aesimc v0.16b,v0.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
st1 {v0.4s},[x0],x4
|
||||
st1 {v1.4s},[x2],#16
|
||||
cmp x0,x2
|
||||
b.hi Loop_imc
|
||||
|
||||
ld1 {v0.4s},[x2]
|
||||
aesimc v0.16b,v0.16b
|
||||
st1 {v0.4s},[x0]
|
||||
|
||||
eor x0,x0,x0 // return value
|
||||
Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _aes_hw_encrypt
|
||||
.private_extern _aes_hw_encrypt
|
||||
|
||||
.align 5
|
||||
_aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
sub w3,w3,#2
|
||||
ld1 {v1.4s},[x2],#16
|
||||
|
||||
Loop_enc:
|
||||
aese v2.16b,v0.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2],#16
|
||||
subs w3,w3,#2
|
||||
aese v2.16b,v1.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v1.4s},[x2],#16
|
||||
b.gt Loop_enc
|
||||
|
||||
aese v2.16b,v0.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2]
|
||||
aese v2.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
|
||||
st1 {v2.16b},[x1]
|
||||
ret
|
||||
|
||||
.globl _aes_hw_decrypt
|
||||
.private_extern _aes_hw_decrypt
|
||||
|
||||
.align 5
|
||||
_aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
sub w3,w3,#2
|
||||
ld1 {v1.4s},[x2],#16
|
||||
|
||||
Loop_dec:
|
||||
aesd v2.16b,v0.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2],#16
|
||||
subs w3,w3,#2
|
||||
aesd v2.16b,v1.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v1.4s},[x2],#16
|
||||
b.gt Loop_dec
|
||||
|
||||
aesd v2.16b,v0.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2]
|
||||
aesd v2.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
|
||||
st1 {v2.16b},[x1]
|
||||
ret
|
||||
|
||||
.globl _aes_hw_cbc_encrypt
|
||||
.private_extern _aes_hw_cbc_encrypt
|
||||
|
||||
.align 5
|
||||
_aes_hw_cbc_encrypt:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
mov x8,#16
|
||||
b.lo Lcbc_abort
|
||||
csel x8,xzr,x8,eq
|
||||
|
||||
cmp w5,#0 // en- or decrypting?
|
||||
ldr w5,[x3,#240]
|
||||
and x2,x2,#-16
|
||||
ld1 {v6.16b},[x4]
|
||||
ld1 {v0.16b},[x0],x8
|
||||
|
||||
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
|
||||
sub w5,w5,#6
|
||||
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
|
||||
sub w5,w5,#2
|
||||
ld1 {v18.4s,v19.4s},[x7],#32
|
||||
ld1 {v20.4s,v21.4s},[x7],#32
|
||||
ld1 {v22.4s,v23.4s},[x7],#32
|
||||
ld1 {v7.4s},[x7]
|
||||
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
b.eq Lcbc_dec
|
||||
|
||||
cmp w5,#2
|
||||
eor v0.16b,v0.16b,v6.16b
|
||||
eor v5.16b,v16.16b,v7.16b
|
||||
b.eq Lcbc_enc128
|
||||
|
||||
ld1 {v2.4s,v3.4s},[x7]
|
||||
add x7,x3,#16
|
||||
add x6,x3,#16*4
|
||||
add x12,x3,#16*5
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
add x14,x3,#16*6
|
||||
add x3,x3,#16*7
|
||||
b Lenter_cbc_enc
|
||||
|
||||
.align 4
|
||||
Loop_cbc_enc:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
st1 {v6.16b},[x1],#16
|
||||
Lenter_cbc_enc:
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v2.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.4s},[x6]
|
||||
cmp w5,#4
|
||||
aese v0.16b,v3.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x12]
|
||||
b.eq Lcbc_enc192
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.4s},[x14]
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x3]
|
||||
nop
|
||||
|
||||
Lcbc_enc192:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
subs x2,x2,#16
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
csel x8,xzr,x8,eq
|
||||
aese v0.16b,v18.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v19.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.16b},[x0],x8
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
eor v16.16b,v16.16b,v5.16b
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v23.16b
|
||||
eor v6.16b,v0.16b,v7.16b
|
||||
b.hs Loop_cbc_enc
|
||||
|
||||
st1 {v6.16b},[x1],#16
|
||||
b Lcbc_done
|
||||
|
||||
.align 5
|
||||
Lcbc_enc128:
|
||||
ld1 {v2.4s,v3.4s},[x7]
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
b Lenter_cbc_enc128
|
||||
Loop_cbc_enc128:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
st1 {v6.16b},[x1],#16
|
||||
Lenter_cbc_enc128:
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
subs x2,x2,#16
|
||||
aese v0.16b,v2.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
csel x8,xzr,x8,eq
|
||||
aese v0.16b,v3.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v18.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v19.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.16b},[x0],x8
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
eor v16.16b,v16.16b,v5.16b
|
||||
aese v0.16b,v23.16b
|
||||
eor v6.16b,v0.16b,v7.16b
|
||||
b.hs Loop_cbc_enc128
|
||||
|
||||
st1 {v6.16b},[x1],#16
|
||||
b Lcbc_done
|
||||
.align 5
|
||||
Lcbc_dec:
|
||||
ld1 {v18.16b},[x0],#16
|
||||
subs x2,x2,#32 // bias
|
||||
add w6,w5,#2
|
||||
orr v3.16b,v0.16b,v0.16b
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
orr v19.16b,v18.16b,v18.16b
|
||||
b.lo Lcbc_dec_tail
|
||||
|
||||
orr v1.16b,v18.16b,v18.16b
|
||||
ld1 {v18.16b},[x0],#16
|
||||
orr v2.16b,v0.16b,v0.16b
|
||||
orr v3.16b,v1.16b,v1.16b
|
||||
orr v19.16b,v18.16b,v18.16b
|
||||
|
||||
Loop3x_cbc_dec:
|
||||
aesd v0.16b,v16.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aesd v0.16b,v17.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt Loop3x_cbc_dec
|
||||
|
||||
aesd v0.16b,v16.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v4.16b,v6.16b,v7.16b
|
||||
subs x2,x2,#0x30
|
||||
eor v5.16b,v2.16b,v7.16b
|
||||
csel x6,x2,x6,lo // x6, w6, is zero at this point
|
||||
aesd v0.16b,v17.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v17.16b,v3.16b,v7.16b
|
||||
add x0,x0,x6 // x0 is adjusted in such way that
|
||||
// at exit from the loop v1.16b-v18.16b
|
||||
// are loaded with last "words"
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
mov x7,x3
|
||||
aesd v0.16b,v20.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v20.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v20.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
aesd v0.16b,v21.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v21.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v21.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
aesd v0.16b,v22.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v22.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v22.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v19.16b},[x0],#16
|
||||
aesd v0.16b,v23.16b
|
||||
aesd v1.16b,v23.16b
|
||||
aesd v18.16b,v23.16b
|
||||
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
|
||||
add w6,w5,#2
|
||||
eor v4.16b,v4.16b,v0.16b
|
||||
eor v5.16b,v5.16b,v1.16b
|
||||
eor v18.16b,v18.16b,v17.16b
|
||||
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
|
||||
st1 {v4.16b},[x1],#16
|
||||
orr v0.16b,v2.16b,v2.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
orr v1.16b,v3.16b,v3.16b
|
||||
st1 {v18.16b},[x1],#16
|
||||
orr v18.16b,v19.16b,v19.16b
|
||||
b.hs Loop3x_cbc_dec
|
||||
|
||||
cmn x2,#0x30
|
||||
b.eq Lcbc_done
|
||||
nop
|
||||
|
||||
Lcbc_dec_tail:
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt Lcbc_dec_tail
|
||||
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
aesd v1.16b,v20.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v20.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
cmn x2,#0x20
|
||||
aesd v1.16b,v21.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v21.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v5.16b,v6.16b,v7.16b
|
||||
aesd v1.16b,v22.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v22.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v17.16b,v3.16b,v7.16b
|
||||
aesd v1.16b,v23.16b
|
||||
aesd v18.16b,v23.16b
|
||||
b.eq Lcbc_dec_one
|
||||
eor v5.16b,v5.16b,v1.16b
|
||||
eor v17.16b,v17.16b,v18.16b
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
st1 {v17.16b},[x1],#16
|
||||
b Lcbc_done
|
||||
|
||||
Lcbc_dec_one:
|
||||
eor v5.16b,v5.16b,v18.16b
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
|
||||
Lcbc_done:
|
||||
st1 {v6.16b},[x4]
|
||||
Lcbc_abort:
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
|
||||
.globl _aes_hw_ctr32_encrypt_blocks
|
||||
.private_extern _aes_hw_ctr32_encrypt_blocks
|
||||
|
||||
.align 5
|
||||
_aes_hw_ctr32_encrypt_blocks:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
|
||||
ldr w8, [x4, #12]
|
||||
ld1 {v0.4s},[x4]
|
||||
|
||||
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
|
||||
sub w5,w5,#4
|
||||
mov x12,#16
|
||||
cmp x2,#2
|
||||
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
|
||||
sub w5,w5,#2
|
||||
ld1 {v20.4s,v21.4s},[x7],#32
|
||||
ld1 {v22.4s,v23.4s},[x7],#32
|
||||
ld1 {v7.4s},[x7]
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
csel x12,xzr,x12,lo
|
||||
#ifndef __ARMEB__
|
||||
rev w8, w8
|
||||
#endif
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
add w10, w8, #1
|
||||
orr v18.16b,v0.16b,v0.16b
|
||||
add w8, w8, #2
|
||||
orr v6.16b,v0.16b,v0.16b
|
||||
rev w10, w10
|
||||
mov v1.s[3],w10
|
||||
b.ls Lctr32_tail
|
||||
rev w12, w8
|
||||
sub x2,x2,#3 // bias
|
||||
mov v18.s[3],w12
|
||||
b Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
Loop3x_ctr32:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt Loop3x_ctr32
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v4.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v5.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
aese v4.16b,v17.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v17.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
ld1 {v19.16b},[x0],#16
|
||||
mov x7,x3
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v17.16b,v18.16b
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
add w9,w8,#1
|
||||
aese v4.16b,v20.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v20.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
eor v2.16b,v2.16b,v7.16b
|
||||
add w10,w8,#2
|
||||
aese v17.16b,v20.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
eor v3.16b,v3.16b,v7.16b
|
||||
add w8,w8,#3
|
||||
aese v4.16b,v21.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v21.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
eor v19.16b,v19.16b,v7.16b
|
||||
rev w9,w9
|
||||
aese v17.16b,v21.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v0.s[3], w9
|
||||
rev w10,w10
|
||||
aese v4.16b,v22.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v22.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
mov v1.s[3], w10
|
||||
rev w12,w8
|
||||
aese v17.16b,v22.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v18.s[3], w12
|
||||
subs x2,x2,#3
|
||||
aese v4.16b,v23.16b
|
||||
aese v5.16b,v23.16b
|
||||
aese v17.16b,v23.16b
|
||||
|
||||
eor v2.16b,v2.16b,v4.16b
|
||||
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
|
||||
st1 {v2.16b},[x1],#16
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
mov w6,w5
|
||||
st1 {v3.16b},[x1],#16
|
||||
eor v19.16b,v19.16b,v17.16b
|
||||
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
|
||||
st1 {v19.16b},[x1],#16
|
||||
b.hs Loop3x_ctr32
|
||||
|
||||
adds x2,x2,#3
|
||||
b.eq Lctr32_done
|
||||
cmp x2,#1
|
||||
mov x12,#16
|
||||
csel x12,xzr,x12,eq
|
||||
|
||||
Lctr32_tail:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt Lctr32_tail
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],x12
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v20.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v3.16b},[x0]
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v21.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v7.16b
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v22.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v7.16b
|
||||
aese v0.16b,v23.16b
|
||||
aese v1.16b,v23.16b
|
||||
|
||||
cmp x2,#1
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
eor v3.16b,v3.16b,v1.16b
|
||||
st1 {v2.16b},[x1],#16
|
||||
b.eq Lctr32_done
|
||||
st1 {v3.16b},[x1]
|
||||
|
||||
Lctr32_done:
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
1433
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/armv8-mont.S
Normal file
1433
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/armv8-mont.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,343 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl _gcm_init_neon
|
||||
.private_extern _gcm_init_neon
|
||||
|
||||
.align 4
|
||||
_gcm_init_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
shl v19.2d, v19.2d, #57 // 0xc2.0
|
||||
ext v3.16b, v17.16b, v17.16b, #8
|
||||
ushr v18.2d, v19.2d, #63
|
||||
dup v17.4s, v17.s[1]
|
||||
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
|
||||
ushr v18.2d, v3.2d, #63
|
||||
sshr v17.4s, v17.4s, #31 // broadcast carry bit
|
||||
and v18.16b, v18.16b, v16.16b
|
||||
shl v3.2d, v3.2d, #1
|
||||
ext v18.16b, v18.16b, v18.16b, #8
|
||||
and v16.16b, v16.16b, v17.16b
|
||||
orr v3.16b, v3.16b, v18.16b // H<<<=1
|
||||
eor v5.16b, v3.16b, v16.16b // twisted H
|
||||
st1 {v5.2d}, [x0] // store Htable[0]
|
||||
ret
|
||||
|
||||
|
||||
.globl _gcm_gmult_neon
|
||||
.private_extern _gcm_gmult_neon
|
||||
|
||||
.align 4
|
||||
_gcm_gmult_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, Lmasks@PAGE // load constants
|
||||
add x9, x9, Lmasks@PAGEOFF
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v3.16b, v3.16b // byteswap Xi
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
mov x3, #16
|
||||
b Lgmult_neon
|
||||
|
||||
|
||||
.globl _gcm_ghash_neon
|
||||
.private_extern _gcm_ghash_neon
|
||||
|
||||
.align 4
|
||||
_gcm_ghash_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, Lmasks@PAGE // load constants
|
||||
add x9, x9, Lmasks@PAGEOFF
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v0.16b, v0.16b // byteswap Xi
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
Loop_neon:
|
||||
ld1 {v3.16b}, [x2], #16 // load inp
|
||||
rev64 v3.16b, v3.16b // byteswap inp
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
|
||||
|
||||
Lgmult_neon:
|
||||
// Split the input into v3 and v4. (The upper halves are unused,
|
||||
// so it is okay to leave them alone.)
|
||||
ins v4.d[0], v3.d[1]
|
||||
ext v16.8b, v5.8b, v5.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v0.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v0.8h, v5.8b, v0.8b // E = A*B1
|
||||
ext v17.8b, v5.8b, v5.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v5.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v5.8b, v5.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v0.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v0.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v0.8h, v5.8b, v0.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v0.16b // N = I + J
|
||||
pmull v19.8h, v5.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v0.8h, v5.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v0.16b, v0.16b, v16.16b
|
||||
eor v0.16b, v0.16b, v18.16b
|
||||
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
|
||||
ext v16.8b, v7.8b, v7.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v1.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v1.8h, v7.8b, v1.8b // E = A*B1
|
||||
ext v17.8b, v7.8b, v7.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v7.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v7.8b, v7.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v1.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v1.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v1.8h, v7.8b, v1.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v1.16b // N = I + J
|
||||
pmull v19.8h, v7.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v1.8h, v7.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v1.16b, v1.16b, v16.16b
|
||||
eor v1.16b, v1.16b, v18.16b
|
||||
ext v16.8b, v6.8b, v6.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v4.8b // F = A1*B
|
||||
ext v2.8b, v4.8b, v4.8b, #1 // B1
|
||||
pmull v2.8h, v6.8b, v2.8b // E = A*B1
|
||||
ext v17.8b, v6.8b, v6.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v4.8b // H = A2*B
|
||||
ext v19.8b, v4.8b, v4.8b, #2 // B2
|
||||
pmull v19.8h, v6.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v6.8b, v6.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v2.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v4.8b // J = A3*B
|
||||
ext v2.8b, v4.8b, v4.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v2.8h, v6.8b, v2.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v4.8b, v4.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v2.16b // N = I + J
|
||||
pmull v19.8h, v6.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v2.8h, v6.8b, v4.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v2.16b, v2.16b, v16.16b
|
||||
eor v2.16b, v2.16b, v18.16b
|
||||
ext v16.16b, v0.16b, v2.16b, #8
|
||||
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
|
||||
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
|
||||
// This is a no-op due to the ins instruction below.
|
||||
// ins v2.d[0], v1.d[1]
|
||||
|
||||
// equivalent of reduction_avx from ghash-x86_64.pl
|
||||
shl v17.2d, v0.2d, #57 // 1st phase
|
||||
shl v18.2d, v0.2d, #62
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
shl v17.2d, v0.2d, #63
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
// Note Xm contains {Xl.d[1], Xh.d[0]}.
|
||||
eor v18.16b, v18.16b, v1.16b
|
||||
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
|
||||
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
|
||||
|
||||
ushr v18.2d, v0.2d, #1 // 2nd phase
|
||||
eor v2.16b, v2.16b,v0.16b
|
||||
eor v0.16b, v0.16b,v18.16b //
|
||||
ushr v18.2d, v18.2d, #6
|
||||
ushr v0.2d, v0.2d, #1 //
|
||||
eor v0.16b, v0.16b, v2.16b //
|
||||
eor v0.16b, v0.16b, v18.16b //
|
||||
|
||||
subs x3, x3, #16
|
||||
bne Loop_neon
|
||||
|
||||
rev64 v0.16b, v0.16b // byteswap Xi and write
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
st1 {v0.16b}, [x0]
|
||||
|
||||
ret
|
||||
|
||||
|
||||
.section __TEXT,__const
|
||||
.align 4
|
||||
Lmasks:
|
||||
.quad 0x0000ffffffffffff // k48
|
||||
.quad 0x00000000ffffffff // k32
|
||||
.quad 0x000000000000ffff // k16
|
||||
.quad 0x0000000000000000 // k0
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
@ -0,0 +1,249 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl _gcm_init_v8
|
||||
.private_extern _gcm_init_v8
|
||||
|
||||
.align 4
|
||||
_gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
ushr v18.2d,v19.2d,#63
|
||||
dup v17.4s,v17.s[1]
|
||||
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
|
||||
ushr v18.2d,v3.2d,#63
|
||||
sshr v17.4s,v17.4s,#31 //broadcast carry bit
|
||||
and v18.16b,v18.16b,v16.16b
|
||||
shl v3.2d,v3.2d,#1
|
||||
ext v18.16b,v18.16b,v18.16b,#8
|
||||
and v16.16b,v16.16b,v17.16b
|
||||
orr v3.16b,v3.16b,v18.16b //H<<<=1
|
||||
eor v20.16b,v3.16b,v16.16b //twisted H
|
||||
st1 {v20.2d},[x0],#16 //store Htable[0]
|
||||
|
||||
//calculate H^2
|
||||
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
|
||||
pmull v0.1q,v20.1d,v20.1d
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
pmull2 v2.1q,v20.2d,v20.2d
|
||||
pmull v1.1q,v16.1d,v16.1d
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v22.16b,v0.16b,v18.16b
|
||||
|
||||
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
|
||||
|
||||
ret
|
||||
|
||||
.globl _gcm_gmult_v8
|
||||
.private_extern _gcm_gmult_v8
|
||||
|
||||
.align 4
|
||||
_gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
shl v19.2d,v19.2d,#57
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
|
||||
.globl _gcm_ghash_v8
|
||||
.private_extern _gcm_ghash_v8
|
||||
|
||||
.align 4
|
||||
_gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
//to be rotated in order to
|
||||
//make it appear as in
|
||||
//algorithm specification
|
||||
subs x3,x3,#32 //see if x3 is 32 or larger
|
||||
mov x12,#16 //x12 is used as post-
|
||||
//increment for input pointer;
|
||||
//as loop is modulo-scheduled
|
||||
//x12 is zeroed just in time
|
||||
//to preclude overstepping
|
||||
//inp[len], which means that
|
||||
//last block[s] are actually
|
||||
//loaded twice, but last
|
||||
//copy is not processed
|
||||
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v22.2d},[x1]
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
|
||||
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
|
||||
b.lo Lodd_tail_v8 //x3 was less than 32
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
Loop_mod2x_v8:
|
||||
ext v18.16b,v3.16b,v3.16b,#8
|
||||
subs x3,x3,#32 //is there more data?
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
|
||||
csel x12,xzr,x12,lo //is it time to zero x12?
|
||||
|
||||
pmull v5.1q,v21.1d,v17.1d
|
||||
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
|
||||
eor v0.16b,v0.16b,v4.16b //accumulate
|
||||
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
|
||||
|
||||
eor v2.16b,v2.16b,v6.16b
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
eor v1.16b,v1.16b,v5.16b
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
#endif
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v3.16b,v3.16b,v18.16b
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
eor v3.16b,v3.16b,v0.16b
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b.hs Loop_mod2x_v8 //there was at least 32 more bytes
|
||||
|
||||
eor v2.16b,v2.16b,v18.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
|
||||
adds x3,x3,#32 //re-construct x3
|
||||
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
|
||||
b.eq Ldone_v8 //is x3 zero?
|
||||
Lodd_tail_v8:
|
||||
ext v18.16b,v0.16b,v0.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //inp^=Xi
|
||||
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
1236
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha1-armv8.S
Normal file
1236
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha1-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
1214
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha256-armv8.S
Normal file
1214
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha256-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
1084
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha512-armv8.S
Normal file
1084
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha512-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
1232
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
Normal file
1232
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,758 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
// with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
// the result of |func|. The |unwind| argument is unused.
|
||||
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
// const uint64_t *argv, size_t argc,
|
||||
// uint64_t unwind);
|
||||
|
||||
.globl _abi_test_trampoline
|
||||
.private_extern _abi_test_trampoline
|
||||
.align 4
|
||||
_abi_test_trampoline:
|
||||
Labi_test_trampoline_begin:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
// Stack layout (low to high addresses)
|
||||
// x29,x30 (16 bytes)
|
||||
// d8-d15 (64 bytes)
|
||||
// x19-x28 (80 bytes)
|
||||
// x1 (8 bytes)
|
||||
// padding (8 bytes)
|
||||
stp x29, x30, [sp, #-176]!
|
||||
mov x29, sp
|
||||
|
||||
// Saved callee-saved registers and |state|.
|
||||
stp d8, d9, [sp, #16]
|
||||
stp d10, d11, [sp, #32]
|
||||
stp d12, d13, [sp, #48]
|
||||
stp d14, d15, [sp, #64]
|
||||
stp x19, x20, [sp, #80]
|
||||
stp x21, x22, [sp, #96]
|
||||
stp x23, x24, [sp, #112]
|
||||
stp x25, x26, [sp, #128]
|
||||
stp x27, x28, [sp, #144]
|
||||
str x1, [sp, #160]
|
||||
|
||||
// Load registers from |state|, with the exception of x29. x29 is the
|
||||
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
|
||||
// mandate that x29 always point to a frame. iOS64 does so, which means
|
||||
// we cannot fill x29 with entropy without violating ABI rules
|
||||
// ourselves. x29 is tested separately below.
|
||||
ldp d8, d9, [x1], #16
|
||||
ldp d10, d11, [x1], #16
|
||||
ldp d12, d13, [x1], #16
|
||||
ldp d14, d15, [x1], #16
|
||||
ldp x19, x20, [x1], #16
|
||||
ldp x21, x22, [x1], #16
|
||||
ldp x23, x24, [x1], #16
|
||||
ldp x25, x26, [x1], #16
|
||||
ldp x27, x28, [x1], #16
|
||||
|
||||
// Move parameters into temporary registers.
|
||||
mov x9, x0
|
||||
mov x10, x2
|
||||
mov x11, x3
|
||||
|
||||
// Load parameters into registers.
|
||||
cbz x11, Largs_done
|
||||
ldr x0, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x1, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x2, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x3, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x4, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x5, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x6, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x7, [x10], #8
|
||||
|
||||
Largs_done:
|
||||
blr x9
|
||||
|
||||
// Reload |state| and store registers.
|
||||
ldr x1, [sp, #160]
|
||||
stp d8, d9, [x1], #16
|
||||
stp d10, d11, [x1], #16
|
||||
stp d12, d13, [x1], #16
|
||||
stp d14, d15, [x1], #16
|
||||
stp x19, x20, [x1], #16
|
||||
stp x21, x22, [x1], #16
|
||||
stp x23, x24, [x1], #16
|
||||
stp x25, x26, [x1], #16
|
||||
stp x27, x28, [x1], #16
|
||||
|
||||
// |func| is required to preserve x29, the frame pointer. We cannot load
|
||||
// random values into x29 (see comment above), so compare it against the
|
||||
// expected value and zero the field of |state| if corrupted.
|
||||
mov x9, sp
|
||||
cmp x29, x9
|
||||
b.eq Lx29_ok
|
||||
str xzr, [x1]
|
||||
|
||||
Lx29_ok:
|
||||
// Restore callee-saved registers.
|
||||
ldp d8, d9, [sp, #16]
|
||||
ldp d10, d11, [sp, #32]
|
||||
ldp d12, d13, [sp, #48]
|
||||
ldp d14, d15, [sp, #64]
|
||||
ldp x19, x20, [sp, #80]
|
||||
ldp x21, x22, [sp, #96]
|
||||
ldp x23, x24, [sp, #112]
|
||||
ldp x25, x26, [sp, #128]
|
||||
ldp x27, x28, [sp, #144]
|
||||
|
||||
ldp x29, x30, [sp], #176
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x0
|
||||
.private_extern _abi_test_clobber_x0
|
||||
.align 4
|
||||
_abi_test_clobber_x0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x0, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x1
|
||||
.private_extern _abi_test_clobber_x1
|
||||
.align 4
|
||||
_abi_test_clobber_x1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x1, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x2
|
||||
.private_extern _abi_test_clobber_x2
|
||||
.align 4
|
||||
_abi_test_clobber_x2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x2, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x3
|
||||
.private_extern _abi_test_clobber_x3
|
||||
.align 4
|
||||
_abi_test_clobber_x3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x3, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x4
|
||||
.private_extern _abi_test_clobber_x4
|
||||
.align 4
|
||||
_abi_test_clobber_x4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x4, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x5
|
||||
.private_extern _abi_test_clobber_x5
|
||||
.align 4
|
||||
_abi_test_clobber_x5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x5, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x6
|
||||
.private_extern _abi_test_clobber_x6
|
||||
.align 4
|
||||
_abi_test_clobber_x6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x6, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x7
|
||||
.private_extern _abi_test_clobber_x7
|
||||
.align 4
|
||||
_abi_test_clobber_x7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x7, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x8
|
||||
.private_extern _abi_test_clobber_x8
|
||||
.align 4
|
||||
_abi_test_clobber_x8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x8, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x9
|
||||
.private_extern _abi_test_clobber_x9
|
||||
.align 4
|
||||
_abi_test_clobber_x9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x9, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x10
|
||||
.private_extern _abi_test_clobber_x10
|
||||
.align 4
|
||||
_abi_test_clobber_x10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x10, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x11
|
||||
.private_extern _abi_test_clobber_x11
|
||||
.align 4
|
||||
_abi_test_clobber_x11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x11, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x12
|
||||
.private_extern _abi_test_clobber_x12
|
||||
.align 4
|
||||
_abi_test_clobber_x12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x12, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x13
|
||||
.private_extern _abi_test_clobber_x13
|
||||
.align 4
|
||||
_abi_test_clobber_x13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x13, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x14
|
||||
.private_extern _abi_test_clobber_x14
|
||||
.align 4
|
||||
_abi_test_clobber_x14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x14, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x15
|
||||
.private_extern _abi_test_clobber_x15
|
||||
.align 4
|
||||
_abi_test_clobber_x15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x15, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x16
|
||||
.private_extern _abi_test_clobber_x16
|
||||
.align 4
|
||||
_abi_test_clobber_x16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x16, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x17
|
||||
.private_extern _abi_test_clobber_x17
|
||||
.align 4
|
||||
_abi_test_clobber_x17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x17, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x19
|
||||
.private_extern _abi_test_clobber_x19
|
||||
.align 4
|
||||
_abi_test_clobber_x19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x19, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x20
|
||||
.private_extern _abi_test_clobber_x20
|
||||
.align 4
|
||||
_abi_test_clobber_x20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x20, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x21
|
||||
.private_extern _abi_test_clobber_x21
|
||||
.align 4
|
||||
_abi_test_clobber_x21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x21, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x22
|
||||
.private_extern _abi_test_clobber_x22
|
||||
.align 4
|
||||
_abi_test_clobber_x22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x22, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x23
|
||||
.private_extern _abi_test_clobber_x23
|
||||
.align 4
|
||||
_abi_test_clobber_x23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x23, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x24
|
||||
.private_extern _abi_test_clobber_x24
|
||||
.align 4
|
||||
_abi_test_clobber_x24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x24, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x25
|
||||
.private_extern _abi_test_clobber_x25
|
||||
.align 4
|
||||
_abi_test_clobber_x25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x25, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x26
|
||||
.private_extern _abi_test_clobber_x26
|
||||
.align 4
|
||||
_abi_test_clobber_x26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x26, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x27
|
||||
.private_extern _abi_test_clobber_x27
|
||||
.align 4
|
||||
_abi_test_clobber_x27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x27, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x28
|
||||
.private_extern _abi_test_clobber_x28
|
||||
.align 4
|
||||
_abi_test_clobber_x28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x28, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x29
|
||||
.private_extern _abi_test_clobber_x29
|
||||
.align 4
|
||||
_abi_test_clobber_x29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x29, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d0
|
||||
.private_extern _abi_test_clobber_d0
|
||||
.align 4
|
||||
_abi_test_clobber_d0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d0, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d1
|
||||
.private_extern _abi_test_clobber_d1
|
||||
.align 4
|
||||
_abi_test_clobber_d1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d1, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d2
|
||||
.private_extern _abi_test_clobber_d2
|
||||
.align 4
|
||||
_abi_test_clobber_d2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d2, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d3
|
||||
.private_extern _abi_test_clobber_d3
|
||||
.align 4
|
||||
_abi_test_clobber_d3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d3, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d4
|
||||
.private_extern _abi_test_clobber_d4
|
||||
.align 4
|
||||
_abi_test_clobber_d4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d4, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d5
|
||||
.private_extern _abi_test_clobber_d5
|
||||
.align 4
|
||||
_abi_test_clobber_d5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d5, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d6
|
||||
.private_extern _abi_test_clobber_d6
|
||||
.align 4
|
||||
_abi_test_clobber_d6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d6, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d7
|
||||
.private_extern _abi_test_clobber_d7
|
||||
.align 4
|
||||
_abi_test_clobber_d7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d7, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d8
|
||||
.private_extern _abi_test_clobber_d8
|
||||
.align 4
|
||||
_abi_test_clobber_d8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d8, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d9
|
||||
.private_extern _abi_test_clobber_d9
|
||||
.align 4
|
||||
_abi_test_clobber_d9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d9, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d10
|
||||
.private_extern _abi_test_clobber_d10
|
||||
.align 4
|
||||
_abi_test_clobber_d10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d10, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d11
|
||||
.private_extern _abi_test_clobber_d11
|
||||
.align 4
|
||||
_abi_test_clobber_d11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d11, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d12
|
||||
.private_extern _abi_test_clobber_d12
|
||||
.align 4
|
||||
_abi_test_clobber_d12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d12, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d13
|
||||
.private_extern _abi_test_clobber_d13
|
||||
.align 4
|
||||
_abi_test_clobber_d13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d13, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d14
|
||||
.private_extern _abi_test_clobber_d14
|
||||
.align 4
|
||||
_abi_test_clobber_d14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d14, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d15
|
||||
.private_extern _abi_test_clobber_d15
|
||||
.align 4
|
||||
_abi_test_clobber_d15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d15, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d16
|
||||
.private_extern _abi_test_clobber_d16
|
||||
.align 4
|
||||
_abi_test_clobber_d16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d16, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d17
|
||||
.private_extern _abi_test_clobber_d17
|
||||
.align 4
|
||||
_abi_test_clobber_d17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d17, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d18
|
||||
.private_extern _abi_test_clobber_d18
|
||||
.align 4
|
||||
_abi_test_clobber_d18:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d18, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d19
|
||||
.private_extern _abi_test_clobber_d19
|
||||
.align 4
|
||||
_abi_test_clobber_d19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d19, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d20
|
||||
.private_extern _abi_test_clobber_d20
|
||||
.align 4
|
||||
_abi_test_clobber_d20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d20, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d21
|
||||
.private_extern _abi_test_clobber_d21
|
||||
.align 4
|
||||
_abi_test_clobber_d21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d21, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d22
|
||||
.private_extern _abi_test_clobber_d22
|
||||
.align 4
|
||||
_abi_test_clobber_d22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d22, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d23
|
||||
.private_extern _abi_test_clobber_d23
|
||||
.align 4
|
||||
_abi_test_clobber_d23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d23, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d24
|
||||
.private_extern _abi_test_clobber_d24
|
||||
.align 4
|
||||
_abi_test_clobber_d24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d24, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d25
|
||||
.private_extern _abi_test_clobber_d25
|
||||
.align 4
|
||||
_abi_test_clobber_d25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d25, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d26
|
||||
.private_extern _abi_test_clobber_d26
|
||||
.align 4
|
||||
_abi_test_clobber_d26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d26, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d27
|
||||
.private_extern _abi_test_clobber_d27
|
||||
.align 4
|
||||
_abi_test_clobber_d27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d27, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d28
|
||||
.private_extern _abi_test_clobber_d28
|
||||
.align 4
|
||||
_abi_test_clobber_d28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d28, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d29
|
||||
.private_extern _abi_test_clobber_d29
|
||||
.align 4
|
||||
_abi_test_clobber_d29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d29, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d30
|
||||
.private_extern _abi_test_clobber_d30
|
||||
.align 4
|
||||
_abi_test_clobber_d30:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d30, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d31
|
||||
.private_extern _abi_test_clobber_d31
|
||||
.align 4
|
||||
_abi_test_clobber_d31:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d31, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v8_upper
|
||||
.private_extern _abi_test_clobber_v8_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v8_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v8.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v9_upper
|
||||
.private_extern _abi_test_clobber_v9_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v9_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v9.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v10_upper
|
||||
.private_extern _abi_test_clobber_v10_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v10_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v10.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v11_upper
|
||||
.private_extern _abi_test_clobber_v11_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v11_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v11.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v12_upper
|
||||
.private_extern _abi_test_clobber_v12_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v12_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v12.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v13_upper
|
||||
.private_extern _abi_test_clobber_v13_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v13_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v13.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v14_upper
|
||||
.private_extern _abi_test_clobber_v14_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v14_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v14.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v15_upper
|
||||
.private_extern _abi_test_clobber_v15_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v15_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v15.d[1], xzr
|
||||
ret
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
1498
contrib/boringssl-cmake/ios-arm/crypto/chacha/chacha-armv4.S
Normal file
1498
contrib/boringssl-cmake/ios-arm/crypto/chacha/chacha-armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
790
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/aesv8-armx32.S
Normal file
790
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/aesv8-armx32.S
Normal file
@ -0,0 +1,790 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
|
||||
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
.align 5
|
||||
Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl _aes_hw_set_encrypt_key
|
||||
.private_extern _aes_hw_set_encrypt_key
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_set_encrypt_key
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_set_encrypt_key:
|
||||
Lenc_key:
|
||||
mov r3,#-1
|
||||
cmp r0,#0
|
||||
beq Lenc_key_abort
|
||||
cmp r2,#0
|
||||
beq Lenc_key_abort
|
||||
mov r3,#-2
|
||||
cmp r1,#128
|
||||
blt Lenc_key_abort
|
||||
cmp r1,#256
|
||||
bgt Lenc_key_abort
|
||||
tst r1,#0x3f
|
||||
bne Lenc_key_abort
|
||||
|
||||
adr r3,Lrcon
|
||||
cmp r1,#192
|
||||
|
||||
veor q0,q0,q0
|
||||
vld1.8 {q3},[r0]!
|
||||
mov r1,#8 @ reuse r1
|
||||
vld1.32 {q1,q2},[r3]!
|
||||
|
||||
blt Loop128
|
||||
beq L192
|
||||
b L256
|
||||
|
||||
.align 4
|
||||
Loop128:
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
bne Loop128
|
||||
|
||||
vld1.32 {q1},[r3]
|
||||
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
veor q3,q3,q10
|
||||
vst1.32 {q3},[r2]
|
||||
add r2,r2,#0x50
|
||||
|
||||
mov r12,#10
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L192:
|
||||
vld1.8 {d16},[r0]!
|
||||
vmov.i8 q10,#8 @ borrow q10
|
||||
vst1.32 {q3},[r2]!
|
||||
vsub.i8 q2,q2,q10 @ adjust the mask
|
||||
|
||||
Loop192:
|
||||
vtbl.8 d20,{q8},d4
|
||||
vtbl.8 d21,{q8},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {d16},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
|
||||
vdup.32 q9,d7[1]
|
||||
veor q9,q9,q8
|
||||
veor q10,q10,q1
|
||||
vext.8 q8,q0,q8,#12
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q8,q8,q9
|
||||
veor q3,q3,q10
|
||||
veor q8,q8,q10
|
||||
vst1.32 {q3},[r2]!
|
||||
bne Loop192
|
||||
|
||||
mov r12,#12
|
||||
add r2,r2,#0x20
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L256:
|
||||
vld1.8 {q8},[r0]
|
||||
mov r1,#7
|
||||
mov r12,#14
|
||||
vst1.32 {q3},[r2]!
|
||||
|
||||
Loop256:
|
||||
vtbl.8 d20,{q8},d4
|
||||
vtbl.8 d21,{q8},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q8},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
vst1.32 {q3},[r2]!
|
||||
beq Ldone
|
||||
|
||||
vdup.32 q10,d7[1]
|
||||
vext.8 q9,q0,q8,#12
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q8,q8,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q8,q8,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q8,q8,q9
|
||||
|
||||
veor q8,q8,q10
|
||||
b Loop256
|
||||
|
||||
Ldone:
|
||||
str r12,[r2]
|
||||
mov r3,#0
|
||||
|
||||
Lenc_key_abort:
|
||||
mov r0,r3 @ return value
|
||||
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _aes_hw_set_decrypt_key
|
||||
.private_extern _aes_hw_set_decrypt_key
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_set_decrypt_key
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_set_decrypt_key:
|
||||
stmdb sp!,{r4,lr}
|
||||
bl Lenc_key
|
||||
|
||||
cmp r0,#0
|
||||
bne Ldec_key_abort
|
||||
|
||||
sub r2,r2,#240 @ restore original r2
|
||||
mov r4,#-16
|
||||
add r0,r2,r12,lsl#4 @ end of key schedule
|
||||
|
||||
vld1.32 {q0},[r2]
|
||||
vld1.32 {q1},[r0]
|
||||
vst1.32 {q0},[r0],r4
|
||||
vst1.32 {q1},[r2]!
|
||||
|
||||
Loop_imc:
|
||||
vld1.32 {q0},[r2]
|
||||
vld1.32 {q1},[r0]
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
vst1.32 {q0},[r0],r4
|
||||
vst1.32 {q1},[r2]!
|
||||
cmp r0,r2
|
||||
bhi Loop_imc
|
||||
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
vst1.32 {q0},[r0]
|
||||
|
||||
eor r0,r0,r0 @ return value
|
||||
Ldec_key_abort:
|
||||
ldmia sp!,{r4,pc}
|
||||
|
||||
.globl _aes_hw_encrypt
|
||||
.private_extern _aes_hw_encrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_encrypt
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_encrypt:
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
sub r3,r3,#2
|
||||
vld1.32 {q1},[r2]!
|
||||
|
||||
Loop_enc:
|
||||
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q0},[r2]!
|
||||
subs r3,r3,#2
|
||||
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q1},[r2]!
|
||||
bgt Loop_enc
|
||||
|
||||
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
|
||||
veor q2,q2,q0
|
||||
|
||||
vst1.8 {q2},[r1]
|
||||
bx lr
|
||||
|
||||
.globl _aes_hw_decrypt
|
||||
.private_extern _aes_hw_decrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_decrypt
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_decrypt:
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
sub r3,r3,#2
|
||||
vld1.32 {q1},[r2]!
|
||||
|
||||
Loop_dec:
|
||||
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q0},[r2]!
|
||||
subs r3,r3,#2
|
||||
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q1},[r2]!
|
||||
bgt Loop_dec
|
||||
|
||||
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
|
||||
veor q2,q2,q0
|
||||
|
||||
vst1.8 {q2},[r1]
|
||||
bx lr
|
||||
|
||||
.globl _aes_hw_cbc_encrypt
|
||||
.private_extern _aes_hw_cbc_encrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_cbc_encrypt
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_cbc_encrypt:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,lr}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldmia ip,{r4,r5} @ load remaining args
|
||||
subs r2,r2,#16
|
||||
mov r8,#16
|
||||
blo Lcbc_abort
|
||||
moveq r8,#0
|
||||
|
||||
cmp r5,#0 @ en- or decrypting?
|
||||
ldr r5,[r3,#240]
|
||||
and r2,r2,#-16
|
||||
vld1.8 {q6},[r4]
|
||||
vld1.8 {q0},[r0],r8
|
||||
|
||||
vld1.32 {q8,q9},[r3] @ load key schedule...
|
||||
sub r5,r5,#6
|
||||
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
|
||||
sub r5,r5,#2
|
||||
vld1.32 {q10,q11},[r7]!
|
||||
vld1.32 {q12,q13},[r7]!
|
||||
vld1.32 {q14,q15},[r7]!
|
||||
vld1.32 {q7},[r7]
|
||||
|
||||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
beq Lcbc_dec
|
||||
|
||||
cmp r5,#2
|
||||
veor q0,q0,q6
|
||||
veor q5,q8,q7
|
||||
beq Lcbc_enc128
|
||||
|
||||
vld1.32 {q2,q3},[r7]
|
||||
add r7,r3,#16
|
||||
add r6,r3,#16*4
|
||||
add r12,r3,#16*5
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
add r14,r3,#16*6
|
||||
add r3,r3,#16*7
|
||||
b Lenter_cbc_enc
|
||||
|
||||
.align 4
|
||||
Loop_cbc_enc:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vst1.8 {q6},[r1]!
|
||||
Lenter_cbc_enc:
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q8},[r6]
|
||||
cmp r5,#4
|
||||
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r12]
|
||||
beq Lcbc_enc192
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q8},[r14]
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r3]
|
||||
nop
|
||||
|
||||
Lcbc_enc192:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
subs r2,r2,#16
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
moveq r8,#0
|
||||
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.8 {q8},[r0],r8
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
veor q8,q8,q5
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
veor q6,q0,q7
|
||||
bhs Loop_cbc_enc
|
||||
|
||||
vst1.8 {q6},[r1]!
|
||||
b Lcbc_done
|
||||
|
||||
.align 5
|
||||
Lcbc_enc128:
|
||||
vld1.32 {q2,q3},[r7]
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
b Lenter_cbc_enc128
|
||||
Loop_cbc_enc128:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vst1.8 {q6},[r1]!
|
||||
Lenter_cbc_enc128:
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
subs r2,r2,#16
|
||||
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
moveq r8,#0
|
||||
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.8 {q8},[r0],r8
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
veor q8,q8,q5
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
veor q6,q0,q7
|
||||
bhs Loop_cbc_enc128
|
||||
|
||||
vst1.8 {q6},[r1]!
|
||||
b Lcbc_done
|
||||
.align 5
|
||||
Lcbc_dec:
|
||||
vld1.8 {q10},[r0]!
|
||||
subs r2,r2,#32 @ bias
|
||||
add r6,r5,#2
|
||||
vorr q3,q0,q0
|
||||
vorr q1,q0,q0
|
||||
vorr q11,q10,q10
|
||||
blo Lcbc_dec_tail
|
||||
|
||||
vorr q1,q10,q10
|
||||
vld1.8 {q10},[r0]!
|
||||
vorr q2,q0,q0
|
||||
vorr q3,q1,q1
|
||||
vorr q11,q10,q10
|
||||
|
||||
Loop3x_cbc_dec:
|
||||
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Loop3x_cbc_dec
|
||||
|
||||
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q4,q6,q7
|
||||
subs r2,r2,#0x30
|
||||
veor q5,q2,q7
|
||||
movlo r6,r2 @ r6, r6, is zero at this point
|
||||
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q9,q3,q7
|
||||
add r0,r0,r6 @ r0 is adjusted in such way that
|
||||
@ at exit from the loop q1-q10
|
||||
@ are loaded with last "words"
|
||||
vorr q6,q11,q11
|
||||
mov r7,r3
|
||||
.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q2},[r0]!
|
||||
.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q3},[r0]!
|
||||
.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q11},[r0]!
|
||||
.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
|
||||
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
|
||||
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
|
||||
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
|
||||
add r6,r5,#2
|
||||
veor q4,q4,q0
|
||||
veor q5,q5,q1
|
||||
veor q10,q10,q9
|
||||
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
|
||||
vst1.8 {q4},[r1]!
|
||||
vorr q0,q2,q2
|
||||
vst1.8 {q5},[r1]!
|
||||
vorr q1,q3,q3
|
||||
vst1.8 {q10},[r1]!
|
||||
vorr q10,q11,q11
|
||||
bhs Loop3x_cbc_dec
|
||||
|
||||
cmn r2,#0x30
|
||||
beq Lcbc_done
|
||||
nop
|
||||
|
||||
Lcbc_dec_tail:
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Lcbc_dec_tail
|
||||
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
cmn r2,#0x20
|
||||
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q5,q6,q7
|
||||
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q9,q3,q7
|
||||
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
|
||||
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
|
||||
beq Lcbc_dec_one
|
||||
veor q5,q5,q1
|
||||
veor q9,q9,q10
|
||||
vorr q6,q11,q11
|
||||
vst1.8 {q5},[r1]!
|
||||
vst1.8 {q9},[r1]!
|
||||
b Lcbc_done
|
||||
|
||||
Lcbc_dec_one:
|
||||
veor q5,q5,q10
|
||||
vorr q6,q11,q11
|
||||
vst1.8 {q5},[r1]!
|
||||
|
||||
Lcbc_done:
|
||||
vst1.8 {q6},[r4]
|
||||
Lcbc_abort:
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,pc}
|
||||
|
||||
.globl _aes_hw_ctr32_encrypt_blocks
|
||||
.private_extern _aes_hw_ctr32_encrypt_blocks
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_ctr32_encrypt_blocks
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_ctr32_encrypt_blocks:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldr r4, [ip] @ load remaining arg
|
||||
ldr r5,[r3,#240]
|
||||
|
||||
ldr r8, [r4, #12]
|
||||
vld1.32 {q0},[r4]
|
||||
|
||||
vld1.32 {q8,q9},[r3] @ load key schedule...
|
||||
sub r5,r5,#4
|
||||
mov r12,#16
|
||||
cmp r2,#2
|
||||
add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
|
||||
sub r5,r5,#2
|
||||
vld1.32 {q12,q13},[r7]!
|
||||
vld1.32 {q14,q15},[r7]!
|
||||
vld1.32 {q7},[r7]
|
||||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
movlo r12,#0
|
||||
#ifndef __ARMEB__
|
||||
rev r8, r8
|
||||
#endif
|
||||
vorr q1,q0,q0
|
||||
add r10, r8, #1
|
||||
vorr q10,q0,q0
|
||||
add r8, r8, #2
|
||||
vorr q6,q0,q0
|
||||
rev r10, r10
|
||||
vmov.32 d3[1],r10
|
||||
bls Lctr32_tail
|
||||
rev r12, r8
|
||||
sub r2,r2,#3 @ bias
|
||||
vmov.32 d21[1],r12
|
||||
b Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
Loop3x_ctr32:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Loop3x_ctr32
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
|
||||
vld1.8 {q2},[r0]!
|
||||
vorr q0,q6,q6
|
||||
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.8 {q3},[r0]!
|
||||
vorr q1,q6,q6
|
||||
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
vld1.8 {q11},[r0]!
|
||||
mov r7,r3
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
|
||||
vorr q10,q6,q6
|
||||
add r9,r8,#1
|
||||
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
veor q2,q2,q7
|
||||
add r10,r8,#2
|
||||
.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
veor q3,q3,q7
|
||||
add r8,r8,#3
|
||||
.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
veor q11,q11,q7
|
||||
rev r9,r9
|
||||
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d1[1], r9
|
||||
rev r10,r10
|
||||
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
vmov.32 d3[1], r10
|
||||
rev r12,r8
|
||||
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d21[1], r12
|
||||
subs r2,r2,#3
|
||||
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
|
||||
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
|
||||
.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
|
||||
|
||||
veor q2,q2,q4
|
||||
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
|
||||
vst1.8 {q2},[r1]!
|
||||
veor q3,q3,q5
|
||||
mov r6,r5
|
||||
vst1.8 {q3},[r1]!
|
||||
veor q11,q11,q9
|
||||
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
|
||||
vst1.8 {q11},[r1]!
|
||||
bhs Loop3x_ctr32
|
||||
|
||||
adds r2,r2,#3
|
||||
beq Lctr32_done
|
||||
cmp r2,#1
|
||||
mov r12,#16
|
||||
moveq r12,#0
|
||||
|
||||
Lctr32_tail:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Lctr32_tail
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.8 {q2},[r0],r12
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.8 {q3},[r0]
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
veor q2,q2,q7
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
veor q3,q3,q7
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
|
||||
|
||||
cmp r2,#1
|
||||
veor q2,q2,q0
|
||||
veor q3,q3,q1
|
||||
vst1.8 {q2},[r1]!
|
||||
beq Lctr32_done
|
||||
vst1.8 {q3},[r1]
|
||||
|
||||
Lctr32_done:
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
|
||||
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
982
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/armv4-mont.S
Normal file
982
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/armv4-mont.S
Normal file
@ -0,0 +1,982 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
|
||||
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__)
|
||||
.syntax unified
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.align 5
|
||||
LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-Lbn_mul_mont
|
||||
#endif
|
||||
|
||||
.globl _bn_mul_mont
|
||||
.private_extern _bn_mul_mont
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _bn_mul_mont
|
||||
#endif
|
||||
|
||||
.align 5
|
||||
_bn_mul_mont:
|
||||
Lbn_mul_mont:
|
||||
ldr ip,[sp,#4] @ load num
|
||||
stmdb sp!,{r0,r2} @ sp points at argument block
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
tst ip,#7
|
||||
bne Lialu
|
||||
adr r0,Lbn_mul_mont
|
||||
ldr r2,LOPENSSL_armcap
|
||||
ldr r0,[r0,r2]
|
||||
#ifdef __APPLE__
|
||||
ldr r0,[r0]
|
||||
#endif
|
||||
tst r0,#ARMV7_NEON @ NEON available?
|
||||
ldmia sp, {r0,r2}
|
||||
beq Lialu
|
||||
add sp,sp,#8
|
||||
b bn_mul8x_mont_neon
|
||||
.align 4
|
||||
Lialu:
|
||||
#endif
|
||||
cmp ip,#2
|
||||
mov r0,ip @ load num
|
||||
#ifdef __thumb2__
|
||||
ittt lt
|
||||
#endif
|
||||
movlt r0,#0
|
||||
addlt sp,sp,#2*4
|
||||
blt Labrt
|
||||
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
|
||||
|
||||
mov r0,r0,lsl#2 @ rescale r0 for byte count
|
||||
sub sp,sp,r0 @ alloca(4*num)
|
||||
sub sp,sp,#4 @ +extra dword
|
||||
sub r0,r0,#4 @ "num=num-1"
|
||||
add r4,r2,r0 @ &bp[num-1]
|
||||
|
||||
add r0,sp,r0 @ r0 to point at &tp[num-1]
|
||||
ldr r8,[r0,#14*4] @ &n0
|
||||
ldr r2,[r2] @ bp[0]
|
||||
ldr r5,[r1],#4 @ ap[0],ap++
|
||||
ldr r6,[r3],#4 @ np[0],np++
|
||||
ldr r8,[r8] @ *n0
|
||||
str r4,[r0,#15*4] @ save &bp[num]
|
||||
|
||||
umull r10,r11,r5,r2 @ ap[0]*bp[0]
|
||||
str r8,[r0,#14*4] @ save n0 value
|
||||
mul r8,r10,r8 @ "tp[0]"*n0
|
||||
mov r12,#0
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
|
||||
mov r4,sp
|
||||
|
||||
L1st:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
mov r10,r11
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
|
||||
mov r14,#0
|
||||
umlal r12,r14,r6,r8 @ np[j]*n0
|
||||
adds r12,r12,r10
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne L1st
|
||||
|
||||
adds r12,r12,r11
|
||||
ldr r4,[r0,#13*4] @ restore bp
|
||||
mov r14,#0
|
||||
ldr r8,[r0,#14*4] @ restore n0
|
||||
adc r14,r14,#0
|
||||
str r12,[r0] @ tp[num-1]=
|
||||
mov r7,sp
|
||||
str r14,[r0,#4] @ tp[num]=
|
||||
|
||||
Louter:
|
||||
sub r7,r0,r7 @ "original" r0-1 value
|
||||
sub r1,r1,r7 @ "rewind" ap to &ap[1]
|
||||
ldr r2,[r4,#4]! @ *(++bp)
|
||||
sub r3,r3,r7 @ "rewind" np to &np[1]
|
||||
ldr r5,[r1,#-4] @ ap[0]
|
||||
ldr r10,[sp] @ tp[0]
|
||||
ldr r6,[r3,#-4] @ np[0]
|
||||
ldr r7,[sp,#4] @ tp[1]
|
||||
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
|
||||
str r4,[r0,#13*4] @ save bp
|
||||
mul r8,r10,r8
|
||||
mov r12,#0
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
|
||||
mov r4,sp
|
||||
|
||||
Linner:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
adds r10,r11,r7 @ +=tp[j]
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
|
||||
mov r14,#0
|
||||
umlal r12,r14,r6,r8 @ np[j]*n0
|
||||
adc r11,r11,#0
|
||||
ldr r7,[r4,#8] @ tp[j+1]
|
||||
adds r12,r12,r10
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne Linner
|
||||
|
||||
adds r12,r12,r11
|
||||
mov r14,#0
|
||||
ldr r4,[r0,#13*4] @ restore bp
|
||||
adc r14,r14,#0
|
||||
ldr r8,[r0,#14*4] @ restore n0
|
||||
adds r12,r12,r7
|
||||
ldr r7,[r0,#15*4] @ restore &bp[num]
|
||||
adc r14,r14,#0
|
||||
str r12,[r0] @ tp[num-1]=
|
||||
str r14,[r0,#4] @ tp[num]=
|
||||
|
||||
cmp r4,r7
|
||||
#ifdef __thumb2__
|
||||
itt ne
|
||||
#endif
|
||||
movne r7,sp
|
||||
bne Louter
|
||||
|
||||
ldr r2,[r0,#12*4] @ pull rp
|
||||
mov r5,sp
|
||||
add r0,r0,#4 @ r0 to point at &tp[num]
|
||||
sub r5,r0,r5 @ "original" num value
|
||||
mov r4,sp @ "rewind" r4
|
||||
mov r1,r4 @ "borrow" r1
|
||||
sub r3,r3,r5 @ "rewind" r3 to &np[0]
|
||||
|
||||
subs r7,r7,r7 @ "clear" carry flag
|
||||
Lsub: ldr r7,[r4],#4
|
||||
ldr r6,[r3],#4
|
||||
sbcs r7,r7,r6 @ tp[j]-np[j]
|
||||
str r7,[r2],#4 @ rp[j]=
|
||||
teq r4,r0 @ preserve carry
|
||||
bne Lsub
|
||||
sbcs r14,r14,#0 @ upmost carry
|
||||
mov r4,sp @ "rewind" r4
|
||||
sub r2,r2,r5 @ "rewind" r2
|
||||
|
||||
Lcopy: ldr r7,[r4] @ conditional copy
|
||||
ldr r5,[r2]
|
||||
str sp,[r4],#4 @ zap tp
|
||||
#ifdef __thumb2__
|
||||
it cc
|
||||
#endif
|
||||
movcc r5,r7
|
||||
str r5,[r2],#4
|
||||
teq r4,r0 @ preserve carry
|
||||
bne Lcopy
|
||||
|
||||
mov sp,r0
|
||||
add sp,sp,#4 @ skip over tp[num+1]
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
|
||||
add sp,sp,#2*4 @ skip over {r0,r2}
|
||||
mov r0,#1
|
||||
Labrt:
|
||||
#if __ARM_ARCH__>=5
|
||||
bx lr @ bx lr
|
||||
#else
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
|
||||
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func bn_mul8x_mont_neon
|
||||
#endif
|
||||
.align 5
|
||||
bn_mul8x_mont_neon:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldmia ip,{r4,r5} @ load rest of parameter block
|
||||
mov ip,sp
|
||||
|
||||
cmp r5,#8
|
||||
bhi LNEON_8n
|
||||
|
||||
@ special case for r5==8, everything is in register bank...
|
||||
|
||||
vld1.32 {d28[0]}, [r2,:32]!
|
||||
veor d8,d8,d8
|
||||
sub r7,sp,r5,lsl#4
|
||||
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
|
||||
and r7,r7,#-64
|
||||
vld1.32 {d30[0]}, [r4,:32]
|
||||
mov sp,r7 @ alloca
|
||||
vzip.16 d28,d8
|
||||
|
||||
vmull.u32 q6,d28,d0[0]
|
||||
vmull.u32 q7,d28,d0[1]
|
||||
vmull.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmull.u32 q9,d28,d1[1]
|
||||
|
||||
vadd.u64 d29,d29,d12
|
||||
veor d8,d8,d8
|
||||
vmul.u32 d29,d29,d30
|
||||
|
||||
vmull.u32 q10,d28,d2[0]
|
||||
vld1.32 {d4,d5,d6,d7}, [r3]!
|
||||
vmull.u32 q11,d28,d2[1]
|
||||
vmull.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmull.u32 q13,d28,d3[1]
|
||||
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
sub r9,r5,#1
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmov q5,q6
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmov q6,q7
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmov q7,q8
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vmov q8,q9
|
||||
vmov q9,q10
|
||||
vshr.u64 d10,d10,#16
|
||||
vmov q10,q11
|
||||
vmov q11,q12
|
||||
vadd.u64 d10,d10,d11
|
||||
vmov q12,q13
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
b LNEON_outer8
|
||||
|
||||
.align 4
|
||||
LNEON_outer8:
|
||||
vld1.32 {d28[0]}, [r2,:32]!
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
vadd.u64 d12,d12,d10
|
||||
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
|
||||
vadd.u64 d29,d29,d12
|
||||
veor d8,d8,d8
|
||||
subs r9,r9,#1
|
||||
vmul.u32 d29,d29,d30
|
||||
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmov q5,q6
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmov q6,q7
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmov q7,q8
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vmov q8,q9
|
||||
vmov q9,q10
|
||||
vshr.u64 d10,d10,#16
|
||||
vmov q10,q11
|
||||
vmov q11,q12
|
||||
vadd.u64 d10,d10,d11
|
||||
vmov q12,q13
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
bne LNEON_outer8
|
||||
|
||||
vadd.u64 d12,d12,d10
|
||||
mov r7,sp
|
||||
vshr.u64 d10,d12,#16
|
||||
mov r8,r5
|
||||
vadd.u64 d13,d13,d10
|
||||
add r6,sp,#96
|
||||
vshr.u64 d10,d13,#16
|
||||
vzip.16 d12,d13
|
||||
|
||||
b LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
LNEON_8n:
|
||||
veor q6,q6,q6
|
||||
sub r7,sp,#128
|
||||
veor q7,q7,q7
|
||||
sub r7,r7,r5,lsl#4
|
||||
veor q8,q8,q8
|
||||
and r7,r7,#-64
|
||||
veor q9,q9,q9
|
||||
mov sp,r7 @ alloca
|
||||
veor q10,q10,q10
|
||||
add r7,r7,#256
|
||||
veor q11,q11,q11
|
||||
sub r8,r5,#8
|
||||
veor q12,q12,q12
|
||||
veor q13,q13,q13
|
||||
|
||||
LNEON_8n_init:
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
subs r8,r8,#8
|
||||
vst1.64 {q8,q9},[r7,:256]!
|
||||
vst1.64 {q10,q11},[r7,:256]!
|
||||
vst1.64 {q12,q13},[r7,:256]!
|
||||
bne LNEON_8n_init
|
||||
|
||||
add r6,sp,#256
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
add r10,sp,#8
|
||||
vld1.32 {d30[0]},[r4,:32]
|
||||
mov r9,r5
|
||||
b LNEON_8n_outer
|
||||
|
||||
.align 4
|
||||
LNEON_8n_outer:
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
add r7,sp,#128
|
||||
vld1.32 {d4,d5,d6,d7},[r3]!
|
||||
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
vadd.u64 d29,d29,d12
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vshr.u64 d12,d12,#16
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vadd.u64 d12,d12,d13
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vshr.u64 d12,d12,#16
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vadd.u64 d14,d14,d12
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
|
||||
vmlal.u32 q7,d28,d0[0]
|
||||
vld1.64 {q6},[r6,:128]!
|
||||
vmlal.u32 q8,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q9,d28,d1[0]
|
||||
vshl.i64 d29,d15,#16
|
||||
vmlal.u32 q10,d28,d1[1]
|
||||
vadd.u64 d29,d29,d14
|
||||
vmlal.u32 q11,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q12,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
|
||||
vmlal.u32 q13,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q6,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q7,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q8,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q9,d29,d5[0]
|
||||
vshr.u64 d14,d14,#16
|
||||
vmlal.u32 q10,d29,d5[1]
|
||||
vmlal.u32 q11,d29,d6[0]
|
||||
vadd.u64 d14,d14,d15
|
||||
vmlal.u32 q12,d29,d6[1]
|
||||
vshr.u64 d14,d14,#16
|
||||
vmlal.u32 q13,d29,d7[0]
|
||||
vmlal.u32 q6,d29,d7[1]
|
||||
vadd.u64 d16,d16,d14
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
|
||||
vmlal.u32 q8,d28,d0[0]
|
||||
vld1.64 {q7},[r6,:128]!
|
||||
vmlal.u32 q9,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q10,d28,d1[0]
|
||||
vshl.i64 d29,d17,#16
|
||||
vmlal.u32 q11,d28,d1[1]
|
||||
vadd.u64 d29,d29,d16
|
||||
vmlal.u32 q12,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q13,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
|
||||
vmlal.u32 q6,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q7,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q8,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q9,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q10,d29,d5[0]
|
||||
vshr.u64 d16,d16,#16
|
||||
vmlal.u32 q11,d29,d5[1]
|
||||
vmlal.u32 q12,d29,d6[0]
|
||||
vadd.u64 d16,d16,d17
|
||||
vmlal.u32 q13,d29,d6[1]
|
||||
vshr.u64 d16,d16,#16
|
||||
vmlal.u32 q6,d29,d7[0]
|
||||
vmlal.u32 q7,d29,d7[1]
|
||||
vadd.u64 d18,d18,d16
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
|
||||
vmlal.u32 q9,d28,d0[0]
|
||||
vld1.64 {q8},[r6,:128]!
|
||||
vmlal.u32 q10,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q11,d28,d1[0]
|
||||
vshl.i64 d29,d19,#16
|
||||
vmlal.u32 q12,d28,d1[1]
|
||||
vadd.u64 d29,d29,d18
|
||||
vmlal.u32 q13,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q6,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
|
||||
vmlal.u32 q7,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q8,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q9,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q10,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q11,d29,d5[0]
|
||||
vshr.u64 d18,d18,#16
|
||||
vmlal.u32 q12,d29,d5[1]
|
||||
vmlal.u32 q13,d29,d6[0]
|
||||
vadd.u64 d18,d18,d19
|
||||
vmlal.u32 q6,d29,d6[1]
|
||||
vshr.u64 d18,d18,#16
|
||||
vmlal.u32 q7,d29,d7[0]
|
||||
vmlal.u32 q8,d29,d7[1]
|
||||
vadd.u64 d20,d20,d18
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
|
||||
vmlal.u32 q10,d28,d0[0]
|
||||
vld1.64 {q9},[r6,:128]!
|
||||
vmlal.u32 q11,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q12,d28,d1[0]
|
||||
vshl.i64 d29,d21,#16
|
||||
vmlal.u32 q13,d28,d1[1]
|
||||
vadd.u64 d29,d29,d20
|
||||
vmlal.u32 q6,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q7,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
|
||||
vmlal.u32 q8,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q9,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q10,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q11,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q12,d29,d5[0]
|
||||
vshr.u64 d20,d20,#16
|
||||
vmlal.u32 q13,d29,d5[1]
|
||||
vmlal.u32 q6,d29,d6[0]
|
||||
vadd.u64 d20,d20,d21
|
||||
vmlal.u32 q7,d29,d6[1]
|
||||
vshr.u64 d20,d20,#16
|
||||
vmlal.u32 q8,d29,d7[0]
|
||||
vmlal.u32 q9,d29,d7[1]
|
||||
vadd.u64 d22,d22,d20
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
|
||||
vmlal.u32 q11,d28,d0[0]
|
||||
vld1.64 {q10},[r6,:128]!
|
||||
vmlal.u32 q12,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q13,d28,d1[0]
|
||||
vshl.i64 d29,d23,#16
|
||||
vmlal.u32 q6,d28,d1[1]
|
||||
vadd.u64 d29,d29,d22
|
||||
vmlal.u32 q7,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q8,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
|
||||
vmlal.u32 q9,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q10,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q11,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q12,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q13,d29,d5[0]
|
||||
vshr.u64 d22,d22,#16
|
||||
vmlal.u32 q6,d29,d5[1]
|
||||
vmlal.u32 q7,d29,d6[0]
|
||||
vadd.u64 d22,d22,d23
|
||||
vmlal.u32 q8,d29,d6[1]
|
||||
vshr.u64 d22,d22,#16
|
||||
vmlal.u32 q9,d29,d7[0]
|
||||
vmlal.u32 q10,d29,d7[1]
|
||||
vadd.u64 d24,d24,d22
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
|
||||
vmlal.u32 q12,d28,d0[0]
|
||||
vld1.64 {q11},[r6,:128]!
|
||||
vmlal.u32 q13,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q6,d28,d1[0]
|
||||
vshl.i64 d29,d25,#16
|
||||
vmlal.u32 q7,d28,d1[1]
|
||||
vadd.u64 d29,d29,d24
|
||||
vmlal.u32 q8,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q9,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
|
||||
vmlal.u32 q10,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q11,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q12,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q13,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q6,d29,d5[0]
|
||||
vshr.u64 d24,d24,#16
|
||||
vmlal.u32 q7,d29,d5[1]
|
||||
vmlal.u32 q8,d29,d6[0]
|
||||
vadd.u64 d24,d24,d25
|
||||
vmlal.u32 q9,d29,d6[1]
|
||||
vshr.u64 d24,d24,#16
|
||||
vmlal.u32 q10,d29,d7[0]
|
||||
vmlal.u32 q11,d29,d7[1]
|
||||
vadd.u64 d26,d26,d24
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
|
||||
vmlal.u32 q13,d28,d0[0]
|
||||
vld1.64 {q12},[r6,:128]!
|
||||
vmlal.u32 q6,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q7,d28,d1[0]
|
||||
vshl.i64 d29,d27,#16
|
||||
vmlal.u32 q8,d28,d1[1]
|
||||
vadd.u64 d29,d29,d26
|
||||
vmlal.u32 q9,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q10,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
|
||||
vmlal.u32 q11,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q12,d28,d3[1]
|
||||
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 q13,d29,d4[0]
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
vmlal.u32 q6,d29,d4[1]
|
||||
vmlal.u32 q7,d29,d5[0]
|
||||
vshr.u64 d26,d26,#16
|
||||
vmlal.u32 q8,d29,d5[1]
|
||||
vmlal.u32 q9,d29,d6[0]
|
||||
vadd.u64 d26,d26,d27
|
||||
vmlal.u32 q10,d29,d6[1]
|
||||
vshr.u64 d26,d26,#16
|
||||
vmlal.u32 q11,d29,d7[0]
|
||||
vmlal.u32 q12,d29,d7[1]
|
||||
vadd.u64 d12,d12,d26
|
||||
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
|
||||
add r10,sp,#8 @ rewind
|
||||
sub r8,r5,#8
|
||||
b LNEON_8n_inner
|
||||
|
||||
.align 4
|
||||
LNEON_8n_inner:
|
||||
subs r8,r8,#8
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vld1.64 {q13},[r6,:128]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vld1.32 {d4,d5,d6,d7},[r3]!
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vst1.64 {q6},[r7,:128]!
|
||||
vmlal.u32 q7,d28,d0[0]
|
||||
vld1.64 {q6},[r6,:128]
|
||||
vmlal.u32 q8,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
|
||||
vmlal.u32 q9,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q10,d28,d1[1]
|
||||
vmlal.u32 q11,d28,d2[0]
|
||||
vmlal.u32 q12,d28,d2[1]
|
||||
vmlal.u32 q13,d28,d3[0]
|
||||
vmlal.u32 q6,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
|
||||
vmlal.u32 q7,d29,d4[0]
|
||||
vmlal.u32 q8,d29,d4[1]
|
||||
vmlal.u32 q9,d29,d5[0]
|
||||
vmlal.u32 q10,d29,d5[1]
|
||||
vmlal.u32 q11,d29,d6[0]
|
||||
vmlal.u32 q12,d29,d6[1]
|
||||
vmlal.u32 q13,d29,d7[0]
|
||||
vmlal.u32 q6,d29,d7[1]
|
||||
vst1.64 {q7},[r7,:128]!
|
||||
vmlal.u32 q8,d28,d0[0]
|
||||
vld1.64 {q7},[r6,:128]
|
||||
vmlal.u32 q9,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
|
||||
vmlal.u32 q10,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q11,d28,d1[1]
|
||||
vmlal.u32 q12,d28,d2[0]
|
||||
vmlal.u32 q13,d28,d2[1]
|
||||
vmlal.u32 q6,d28,d3[0]
|
||||
vmlal.u32 q7,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
|
||||
vmlal.u32 q8,d29,d4[0]
|
||||
vmlal.u32 q9,d29,d4[1]
|
||||
vmlal.u32 q10,d29,d5[0]
|
||||
vmlal.u32 q11,d29,d5[1]
|
||||
vmlal.u32 q12,d29,d6[0]
|
||||
vmlal.u32 q13,d29,d6[1]
|
||||
vmlal.u32 q6,d29,d7[0]
|
||||
vmlal.u32 q7,d29,d7[1]
|
||||
vst1.64 {q8},[r7,:128]!
|
||||
vmlal.u32 q9,d28,d0[0]
|
||||
vld1.64 {q8},[r6,:128]
|
||||
vmlal.u32 q10,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
|
||||
vmlal.u32 q11,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q12,d28,d1[1]
|
||||
vmlal.u32 q13,d28,d2[0]
|
||||
vmlal.u32 q6,d28,d2[1]
|
||||
vmlal.u32 q7,d28,d3[0]
|
||||
vmlal.u32 q8,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
|
||||
vmlal.u32 q9,d29,d4[0]
|
||||
vmlal.u32 q10,d29,d4[1]
|
||||
vmlal.u32 q11,d29,d5[0]
|
||||
vmlal.u32 q12,d29,d5[1]
|
||||
vmlal.u32 q13,d29,d6[0]
|
||||
vmlal.u32 q6,d29,d6[1]
|
||||
vmlal.u32 q7,d29,d7[0]
|
||||
vmlal.u32 q8,d29,d7[1]
|
||||
vst1.64 {q9},[r7,:128]!
|
||||
vmlal.u32 q10,d28,d0[0]
|
||||
vld1.64 {q9},[r6,:128]
|
||||
vmlal.u32 q11,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
|
||||
vmlal.u32 q12,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q13,d28,d1[1]
|
||||
vmlal.u32 q6,d28,d2[0]
|
||||
vmlal.u32 q7,d28,d2[1]
|
||||
vmlal.u32 q8,d28,d3[0]
|
||||
vmlal.u32 q9,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
|
||||
vmlal.u32 q10,d29,d4[0]
|
||||
vmlal.u32 q11,d29,d4[1]
|
||||
vmlal.u32 q12,d29,d5[0]
|
||||
vmlal.u32 q13,d29,d5[1]
|
||||
vmlal.u32 q6,d29,d6[0]
|
||||
vmlal.u32 q7,d29,d6[1]
|
||||
vmlal.u32 q8,d29,d7[0]
|
||||
vmlal.u32 q9,d29,d7[1]
|
||||
vst1.64 {q10},[r7,:128]!
|
||||
vmlal.u32 q11,d28,d0[0]
|
||||
vld1.64 {q10},[r6,:128]
|
||||
vmlal.u32 q12,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
|
||||
vmlal.u32 q13,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q6,d28,d1[1]
|
||||
vmlal.u32 q7,d28,d2[0]
|
||||
vmlal.u32 q8,d28,d2[1]
|
||||
vmlal.u32 q9,d28,d3[0]
|
||||
vmlal.u32 q10,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
|
||||
vmlal.u32 q11,d29,d4[0]
|
||||
vmlal.u32 q12,d29,d4[1]
|
||||
vmlal.u32 q13,d29,d5[0]
|
||||
vmlal.u32 q6,d29,d5[1]
|
||||
vmlal.u32 q7,d29,d6[0]
|
||||
vmlal.u32 q8,d29,d6[1]
|
||||
vmlal.u32 q9,d29,d7[0]
|
||||
vmlal.u32 q10,d29,d7[1]
|
||||
vst1.64 {q11},[r7,:128]!
|
||||
vmlal.u32 q12,d28,d0[0]
|
||||
vld1.64 {q11},[r6,:128]
|
||||
vmlal.u32 q13,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
|
||||
vmlal.u32 q6,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q7,d28,d1[1]
|
||||
vmlal.u32 q8,d28,d2[0]
|
||||
vmlal.u32 q9,d28,d2[1]
|
||||
vmlal.u32 q10,d28,d3[0]
|
||||
vmlal.u32 q11,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
|
||||
vmlal.u32 q12,d29,d4[0]
|
||||
vmlal.u32 q13,d29,d4[1]
|
||||
vmlal.u32 q6,d29,d5[0]
|
||||
vmlal.u32 q7,d29,d5[1]
|
||||
vmlal.u32 q8,d29,d6[0]
|
||||
vmlal.u32 q9,d29,d6[1]
|
||||
vmlal.u32 q10,d29,d7[0]
|
||||
vmlal.u32 q11,d29,d7[1]
|
||||
vst1.64 {q12},[r7,:128]!
|
||||
vmlal.u32 q13,d28,d0[0]
|
||||
vld1.64 {q12},[r6,:128]
|
||||
vmlal.u32 q6,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
|
||||
vmlal.u32 q7,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q8,d28,d1[1]
|
||||
vmlal.u32 q9,d28,d2[0]
|
||||
vmlal.u32 q10,d28,d2[1]
|
||||
vmlal.u32 q11,d28,d3[0]
|
||||
vmlal.u32 q12,d28,d3[1]
|
||||
it eq
|
||||
subeq r1,r1,r5,lsl#2 @ rewind
|
||||
vmlal.u32 q13,d29,d4[0]
|
||||
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 q6,d29,d4[1]
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
vmlal.u32 q7,d29,d5[0]
|
||||
add r10,sp,#8 @ rewind
|
||||
vmlal.u32 q8,d29,d5[1]
|
||||
vmlal.u32 q9,d29,d6[0]
|
||||
vmlal.u32 q10,d29,d6[1]
|
||||
vmlal.u32 q11,d29,d7[0]
|
||||
vst1.64 {q13},[r7,:128]!
|
||||
vmlal.u32 q12,d29,d7[1]
|
||||
|
||||
bne LNEON_8n_inner
|
||||
add r6,sp,#128
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
veor q2,q2,q2 @ d4-d5
|
||||
vst1.64 {q8,q9},[r7,:256]!
|
||||
veor q3,q3,q3 @ d6-d7
|
||||
vst1.64 {q10,q11},[r7,:256]!
|
||||
vst1.64 {q12},[r7,:128]
|
||||
|
||||
subs r9,r9,#8
|
||||
vld1.64 {q6,q7},[r6,:256]!
|
||||
vld1.64 {q8,q9},[r6,:256]!
|
||||
vld1.64 {q10,q11},[r6,:256]!
|
||||
vld1.64 {q12,q13},[r6,:256]!
|
||||
|
||||
itt ne
|
||||
subne r3,r3,r5,lsl#2 @ rewind
|
||||
bne LNEON_8n_outer
|
||||
|
||||
add r7,sp,#128
|
||||
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
|
||||
vshr.u64 d10,d12,#16
|
||||
vst1.64 {q2,q3},[sp,:256]!
|
||||
vadd.u64 d13,d13,d10
|
||||
vst1.64 {q2,q3}, [sp,:256]!
|
||||
vshr.u64 d10,d13,#16
|
||||
vst1.64 {q2,q3}, [sp,:256]!
|
||||
vzip.16 d12,d13
|
||||
|
||||
mov r8,r5
|
||||
b LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
LNEON_tail:
|
||||
vadd.u64 d12,d12,d10
|
||||
vshr.u64 d10,d12,#16
|
||||
vld1.64 {q8,q9}, [r6, :256]!
|
||||
vadd.u64 d13,d13,d10
|
||||
vld1.64 {q10,q11}, [r6, :256]!
|
||||
vshr.u64 d10,d13,#16
|
||||
vld1.64 {q12,q13}, [r6, :256]!
|
||||
vzip.16 d12,d13
|
||||
|
||||
LNEON_tail_entry:
|
||||
vadd.u64 d14,d14,d10
|
||||
vst1.32 {d12[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d14,#16
|
||||
vadd.u64 d15,d15,d10
|
||||
vshr.u64 d10,d15,#16
|
||||
vzip.16 d14,d15
|
||||
vadd.u64 d16,d16,d10
|
||||
vst1.32 {d14[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d16,#16
|
||||
vadd.u64 d17,d17,d10
|
||||
vshr.u64 d10,d17,#16
|
||||
vzip.16 d16,d17
|
||||
vadd.u64 d18,d18,d10
|
||||
vst1.32 {d16[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d18,#16
|
||||
vadd.u64 d19,d19,d10
|
||||
vshr.u64 d10,d19,#16
|
||||
vzip.16 d18,d19
|
||||
vadd.u64 d20,d20,d10
|
||||
vst1.32 {d18[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d20,#16
|
||||
vadd.u64 d21,d21,d10
|
||||
vshr.u64 d10,d21,#16
|
||||
vzip.16 d20,d21
|
||||
vadd.u64 d22,d22,d10
|
||||
vst1.32 {d20[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d22,#16
|
||||
vadd.u64 d23,d23,d10
|
||||
vshr.u64 d10,d23,#16
|
||||
vzip.16 d22,d23
|
||||
vadd.u64 d24,d24,d10
|
||||
vst1.32 {d22[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d24,#16
|
||||
vadd.u64 d25,d25,d10
|
||||
vshr.u64 d10,d25,#16
|
||||
vzip.16 d24,d25
|
||||
vadd.u64 d26,d26,d10
|
||||
vst1.32 {d24[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d26,#16
|
||||
vadd.u64 d27,d27,d10
|
||||
vshr.u64 d10,d27,#16
|
||||
vzip.16 d26,d27
|
||||
vld1.64 {q6,q7}, [r6, :256]!
|
||||
subs r8,r8,#8
|
||||
vst1.32 {d26[0]}, [r7, :32]!
|
||||
bne LNEON_tail
|
||||
|
||||
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
|
||||
sub r3,r3,r5,lsl#2 @ rewind r3
|
||||
subs r1,sp,#0 @ clear carry flag
|
||||
add r2,sp,r5,lsl#2
|
||||
|
||||
LNEON_sub:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r3!, {r8,r9,r10,r11}
|
||||
sbcs r8, r4,r8
|
||||
sbcs r9, r5,r9
|
||||
sbcs r10,r6,r10
|
||||
sbcs r11,r7,r11
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne LNEON_sub
|
||||
|
||||
ldr r10, [r1] @ load top-most bit
|
||||
mov r11,sp
|
||||
veor q0,q0,q0
|
||||
sub r11,r2,r11 @ this is num*4
|
||||
veor q1,q1,q1
|
||||
mov r1,sp
|
||||
sub r0,r0,r11 @ rewind r0
|
||||
mov r3,r2 @ second 3/4th of frame
|
||||
sbcs r10,r10,#0 @ result is carry flag
|
||||
|
||||
LNEON_copy_n_zap:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r0, {r8,r9,r10,r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
ldmia r1, {r4,r5,r6,r7}
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
sub r1,r1,#16
|
||||
ldmia r0, {r8,r9,r10,r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0,q1}, [r1,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne LNEON_copy_n_zap
|
||||
|
||||
mov sp,ip
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
bx lr @ bx lr
|
||||
|
||||
#endif
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.comm _OPENSSL_armcap_P,4
|
||||
.non_lazy_symbol_pointer
|
||||
OPENSSL_armcap_P:
|
||||
.indirect_symbol _OPENSSL_armcap_P
|
||||
.long 0
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
1536
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/bsaes-armv7.S
Normal file
1536
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/bsaes-armv7.S
Normal file
File diff suppressed because it is too large
Load Diff
258
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/ghash-armv4.S
Normal file
258
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/ghash-armv4.S
Normal file
@ -0,0 +1,258 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
|
||||
@ instructions are in aesv8-armx.pl.)
|
||||
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__) || defined(__clang__)
|
||||
.syntax unified
|
||||
#define ldrplb ldrbpl
|
||||
#define ldrneb ldrbne
|
||||
#endif
|
||||
#if defined(__thumb2__)
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
|
||||
|
||||
|
||||
.globl _gcm_init_neon
|
||||
.private_extern _gcm_init_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_init_neon
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_init_neon:
|
||||
vld1.64 d7,[r1]! @ load H
|
||||
vmov.i8 q8,#0xe1
|
||||
vld1.64 d6,[r1]
|
||||
vshl.i64 d17,#57
|
||||
vshr.u64 d16,#63 @ t0=0xc2....01
|
||||
vdup.8 q9,d7[7]
|
||||
vshr.u64 d26,d6,#63
|
||||
vshr.s8 q9,#7 @ broadcast carry bit
|
||||
vshl.i64 q3,q3,#1
|
||||
vand q8,q8,q9
|
||||
vorr d7,d26 @ H<<<=1
|
||||
veor q3,q3,q8 @ twisted H
|
||||
vstmia r0,{q3}
|
||||
|
||||
bx lr @ bx lr
|
||||
|
||||
|
||||
.globl _gcm_gmult_neon
|
||||
.private_extern _gcm_gmult_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_gmult_neon
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_gmult_neon:
|
||||
vld1.64 d7,[r0]! @ load Xi
|
||||
vld1.64 d6,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
vldmia r1,{d26,d27} @ load twisted H
|
||||
vmov.i64 d30,#0x00000000ffffffff
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q3,q3
|
||||
#endif
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
mov r3,#16
|
||||
b Lgmult_neon
|
||||
|
||||
|
||||
.globl _gcm_ghash_neon
|
||||
.private_extern _gcm_ghash_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_ghash_neon
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_ghash_neon:
|
||||
vld1.64 d1,[r0]! @ load Xi
|
||||
vld1.64 d0,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
vldmia r1,{d26,d27} @ load twisted H
|
||||
vmov.i64 d30,#0x00000000ffffffff
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
|
||||
Loop_neon:
|
||||
vld1.64 d7,[r2]! @ load inp
|
||||
vld1.64 d6,[r2]!
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q3,q3
|
||||
#endif
|
||||
veor q3,q0 @ inp^=Xi
|
||||
Lgmult_neon:
|
||||
vext.8 d16, d26, d26, #1 @ A1
|
||||
vmull.p8 q8, d16, d6 @ F = A1*B
|
||||
vext.8 d0, d6, d6, #1 @ B1
|
||||
vmull.p8 q0, d26, d0 @ E = A*B1
|
||||
vext.8 d18, d26, d26, #2 @ A2
|
||||
vmull.p8 q9, d18, d6 @ H = A2*B
|
||||
vext.8 d22, d6, d6, #2 @ B2
|
||||
vmull.p8 q11, d26, d22 @ G = A*B2
|
||||
vext.8 d20, d26, d26, #3 @ A3
|
||||
veor q8, q8, q0 @ L = E + F
|
||||
vmull.p8 q10, d20, d6 @ J = A3*B
|
||||
vext.8 d0, d6, d6, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q0, d26, d0 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d6, d6, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d26, d22 @ K = A*B4
|
||||
veor q10, q10, q0 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q0, d26, d6 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q0, q0, q8
|
||||
veor q0, q0, q10
|
||||
veor d6,d6,d7 @ Karatsuba pre-processing
|
||||
vext.8 d16, d28, d28, #1 @ A1
|
||||
vmull.p8 q8, d16, d6 @ F = A1*B
|
||||
vext.8 d2, d6, d6, #1 @ B1
|
||||
vmull.p8 q1, d28, d2 @ E = A*B1
|
||||
vext.8 d18, d28, d28, #2 @ A2
|
||||
vmull.p8 q9, d18, d6 @ H = A2*B
|
||||
vext.8 d22, d6, d6, #2 @ B2
|
||||
vmull.p8 q11, d28, d22 @ G = A*B2
|
||||
vext.8 d20, d28, d28, #3 @ A3
|
||||
veor q8, q8, q1 @ L = E + F
|
||||
vmull.p8 q10, d20, d6 @ J = A3*B
|
||||
vext.8 d2, d6, d6, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q1, d28, d2 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d6, d6, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d28, d22 @ K = A*B4
|
||||
veor q10, q10, q1 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q1, d28, d6 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q1, q1, q8
|
||||
veor q1, q1, q10
|
||||
vext.8 d16, d27, d27, #1 @ A1
|
||||
vmull.p8 q8, d16, d7 @ F = A1*B
|
||||
vext.8 d4, d7, d7, #1 @ B1
|
||||
vmull.p8 q2, d27, d4 @ E = A*B1
|
||||
vext.8 d18, d27, d27, #2 @ A2
|
||||
vmull.p8 q9, d18, d7 @ H = A2*B
|
||||
vext.8 d22, d7, d7, #2 @ B2
|
||||
vmull.p8 q11, d27, d22 @ G = A*B2
|
||||
vext.8 d20, d27, d27, #3 @ A3
|
||||
veor q8, q8, q2 @ L = E + F
|
||||
vmull.p8 q10, d20, d7 @ J = A3*B
|
||||
vext.8 d4, d7, d7, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q2, d27, d4 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d7, d7, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d27, d22 @ K = A*B4
|
||||
veor q10, q10, q2 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q2, d27, d7 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q2, q2, q8
|
||||
veor q2, q2, q10
|
||||
veor q1,q1,q0 @ Karatsuba post-processing
|
||||
veor q1,q1,q2
|
||||
veor d1,d1,d2
|
||||
veor d4,d4,d3 @ Xh|Xl - 256-bit result
|
||||
|
||||
@ equivalent of reduction_avx from ghash-x86_64.pl
|
||||
vshl.i64 q9,q0,#57 @ 1st phase
|
||||
vshl.i64 q10,q0,#62
|
||||
veor q10,q10,q9 @
|
||||
vshl.i64 q9,q0,#63
|
||||
veor q10, q10, q9 @
|
||||
veor d1,d1,d20 @
|
||||
veor d4,d4,d21
|
||||
|
||||
vshr.u64 q10,q0,#1 @ 2nd phase
|
||||
veor q2,q2,q0
|
||||
veor q0,q0,q10 @
|
||||
vshr.u64 q10,q10,#6
|
||||
vshr.u64 q0,q0,#1 @
|
||||
veor q0,q0,q2 @
|
||||
veor q0,q0,q10 @
|
||||
|
||||
subs r3,#16
|
||||
bne Loop_neon
|
||||
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
sub r0,#16
|
||||
vst1.64 d1,[r0]! @ write out Xi
|
||||
vst1.64 d0,[r0]
|
||||
|
||||
bx lr @ bx lr
|
||||
|
||||
#endif
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
@ -0,0 +1,256 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
.globl _gcm_init_v8
|
||||
.private_extern _gcm_init_v8
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_init_v8
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_init_v8:
|
||||
vld1.64 {q9},[r1] @ load input H
|
||||
vmov.i8 q11,#0xe1
|
||||
vshl.i64 q11,q11,#57 @ 0xc2.0
|
||||
vext.8 q3,q9,q9,#8
|
||||
vshr.u64 q10,q11,#63
|
||||
vdup.32 q9,d18[1]
|
||||
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
|
||||
vshr.u64 q10,q3,#63
|
||||
vshr.s32 q9,q9,#31 @ broadcast carry bit
|
||||
vand q10,q10,q8
|
||||
vshl.i64 q3,q3,#1
|
||||
vext.8 q10,q10,q10,#8
|
||||
vand q8,q8,q9
|
||||
vorr q3,q3,q10 @ H<<<=1
|
||||
veor q12,q3,q8 @ twisted H
|
||||
vst1.64 {q12},[r0]! @ store Htable[0]
|
||||
|
||||
@ calculate H^2
|
||||
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
|
||||
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
|
||||
veor q8,q8,q12
|
||||
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
|
||||
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q14,q0,q10
|
||||
|
||||
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
|
||||
veor q9,q9,q14
|
||||
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
|
||||
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
|
||||
|
||||
bx lr
|
||||
|
||||
.globl _gcm_gmult_v8
|
||||
.private_extern _gcm_gmult_v8
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_gmult_v8
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_gmult_v8:
|
||||
vld1.64 {q9},[r0] @ load Xi
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q12,q13},[r1] @ load twisted H, ...
|
||||
vshl.u64 q11,q11,#57
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vext.8 q3,q9,q9,#8
|
||||
|
||||
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
|
||||
veor q9,q9,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
|
||||
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q0,q0,q10
|
||||
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q0,q0,q0,#8
|
||||
vst1.64 {q0},[r0] @ write out Xi
|
||||
|
||||
bx lr
|
||||
|
||||
.globl _gcm_ghash_v8
|
||||
.private_extern _gcm_ghash_v8
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_ghash_v8
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_ghash_v8:
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
vld1.64 {q0},[r0] @ load [rotated] Xi
|
||||
@ "[rotated]" means that
|
||||
@ loaded value would have
|
||||
@ to be rotated in order to
|
||||
@ make it appear as in
|
||||
@ algorithm specification
|
||||
subs r3,r3,#32 @ see if r3 is 32 or larger
|
||||
mov r12,#16 @ r12 is used as post-
|
||||
@ increment for input pointer;
|
||||
@ as loop is modulo-scheduled
|
||||
@ r12 is zeroed just in time
|
||||
@ to preclude overstepping
|
||||
@ inp[len], which means that
|
||||
@ last block[s] are actually
|
||||
@ loaded twice, but last
|
||||
@ copy is not processed
|
||||
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q14},[r1]
|
||||
moveq r12,#0 @ is it time to zero r12?
|
||||
vext.8 q0,q0,q0,#8 @ rotate Xi
|
||||
vld1.64 {q8},[r2]! @ load [rotated] I[0]
|
||||
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q8,q8
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q3,q8,q8,#8 @ rotate I[0]
|
||||
blo Lodd_tail_v8 @ r3 was less than 32
|
||||
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vext.8 q7,q9,q9,#8
|
||||
veor q3,q3,q0 @ I[i]^=Xi
|
||||
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
|
||||
b Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
Loop_mod2x_v8:
|
||||
vext.8 q10,q3,q3,#8
|
||||
subs r3,r3,#32 @ is there more data?
|
||||
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
|
||||
movlo r12,#0 @ is it time to zero r12?
|
||||
|
||||
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
|
||||
veor q10,q10,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
|
||||
veor q0,q0,q4 @ accumulate
|
||||
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
|
||||
|
||||
veor q2,q2,q6
|
||||
moveq r12,#0 @ is it time to zero r12?
|
||||
veor q1,q1,q5
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q8,q8
|
||||
#endif
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
vext.8 q7,q9,q9,#8
|
||||
vext.8 q3,q8,q8,#8
|
||||
veor q0,q1,q10
|
||||
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
|
||||
veor q3,q3,q2 @ accumulate q3 early
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q3,q3,q10
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
veor q3,q3,q0
|
||||
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
|
||||
bhs Loop_mod2x_v8 @ there was at least 32 more bytes
|
||||
|
||||
veor q2,q2,q10
|
||||
vext.8 q3,q8,q8,#8 @ re-construct q3
|
||||
adds r3,r3,#32 @ re-construct r3
|
||||
veor q0,q0,q2 @ re-construct q0
|
||||
beq Ldone_v8 @ is r3 zero?
|
||||
Lodd_tail_v8:
|
||||
vext.8 q10,q0,q0,#8
|
||||
veor q3,q3,q0 @ inp^=Xi
|
||||
veor q9,q8,q10 @ q9 is rotated inp^Xi
|
||||
|
||||
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
|
||||
veor q9,q9,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
|
||||
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q0,q0,q10
|
||||
|
||||
Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q0,q0,q0,#8
|
||||
vst1.64 {q0},[r0] @ write out Xi
|
||||
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
bx lr
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
1518
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha1-armv4-large.S
Normal file
1518
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha1-armv4-large.S
Normal file
File diff suppressed because it is too large
Load Diff
2846
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha256-armv4.S
Normal file
2846
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha256-armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
1899
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha512-armv4.S
Normal file
1899
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha512-armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
1265
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
1265
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
File diff suppressed because it is too large
Load Diff
376
contrib/boringssl-cmake/ios-arm/crypto/test/trampoline-armv4.S
Normal file
376
contrib/boringssl-cmake/ios-arm/crypto/test/trampoline-armv4.S
Normal file
@ -0,0 +1,376 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.syntax unified
|
||||
|
||||
|
||||
|
||||
|
||||
.text
|
||||
|
||||
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
@ with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
@ the result of |func|. The |unwind| argument is unused.
|
||||
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
@ const uint32_t *argv, size_t argc,
|
||||
@ int unwind);
|
||||
|
||||
.globl _abi_test_trampoline
|
||||
.private_extern _abi_test_trampoline
|
||||
.align 4
|
||||
_abi_test_trampoline:
|
||||
@ Save parameters and all callee-saved registers. For convenience, we
|
||||
@ save r9 on iOS even though it's volatile.
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
|
||||
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
|
||||
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
|
||||
sub sp, sp, #28
|
||||
|
||||
@ Every register in AAPCS is either non-volatile or a parameter (except
|
||||
@ r9 on iOS), so this code, by the actual call, loses all its scratch
|
||||
@ registers. First fill in stack parameters while there are registers
|
||||
@ to spare.
|
||||
cmp r3, #4
|
||||
bls Lstack_args_done
|
||||
mov r4, sp @ r4 is the output pointer.
|
||||
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
|
||||
add r2, r2, #16 @ Skip four arguments.
|
||||
Lstack_args_loop:
|
||||
ldr r6, [r2], #4
|
||||
cmp r2, r5
|
||||
str r6, [r4], #4
|
||||
bne Lstack_args_loop
|
||||
|
||||
Lstack_args_done:
|
||||
@ Load registers from |r1|.
|
||||
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Load register parameters. This uses up our remaining registers, so we
|
||||
@ repurpose lr as scratch space.
|
||||
ldr r3, [sp, #40] @ Reload argc.
|
||||
ldr lr, [sp, #36] @ Load argv into lr.
|
||||
cmp r3, #3
|
||||
bhi Larg_r3
|
||||
beq Larg_r2
|
||||
cmp r3, #1
|
||||
bhi Larg_r1
|
||||
beq Larg_r0
|
||||
b Largs_done
|
||||
|
||||
Larg_r3:
|
||||
ldr r3, [lr, #12] @ argv[3]
|
||||
Larg_r2:
|
||||
ldr r2, [lr, #8] @ argv[2]
|
||||
Larg_r1:
|
||||
ldr r1, [lr, #4] @ argv[1]
|
||||
Larg_r0:
|
||||
ldr r0, [lr] @ argv[0]
|
||||
Largs_done:
|
||||
|
||||
@ With every other register in use, load the function pointer into lr
|
||||
@ and call the function.
|
||||
ldr lr, [sp, #28]
|
||||
blx lr
|
||||
|
||||
@ r1-r3 are free for use again. The trampoline only supports
|
||||
@ single-return functions. Pass r4-r11 to the caller.
|
||||
ldr r1, [sp, #32]
|
||||
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Unwind the stack and restore registers.
|
||||
add sp, sp, #44 @ 44 = 28+16
|
||||
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
|
||||
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r0
|
||||
.private_extern _abi_test_clobber_r0
|
||||
.align 4
|
||||
_abi_test_clobber_r0:
|
||||
mov r0, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r1
|
||||
.private_extern _abi_test_clobber_r1
|
||||
.align 4
|
||||
_abi_test_clobber_r1:
|
||||
mov r1, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r2
|
||||
.private_extern _abi_test_clobber_r2
|
||||
.align 4
|
||||
_abi_test_clobber_r2:
|
||||
mov r2, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r3
|
||||
.private_extern _abi_test_clobber_r3
|
||||
.align 4
|
||||
_abi_test_clobber_r3:
|
||||
mov r3, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r4
|
||||
.private_extern _abi_test_clobber_r4
|
||||
.align 4
|
||||
_abi_test_clobber_r4:
|
||||
mov r4, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r5
|
||||
.private_extern _abi_test_clobber_r5
|
||||
.align 4
|
||||
_abi_test_clobber_r5:
|
||||
mov r5, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r6
|
||||
.private_extern _abi_test_clobber_r6
|
||||
.align 4
|
||||
_abi_test_clobber_r6:
|
||||
mov r6, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r7
|
||||
.private_extern _abi_test_clobber_r7
|
||||
.align 4
|
||||
_abi_test_clobber_r7:
|
||||
mov r7, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r8
|
||||
.private_extern _abi_test_clobber_r8
|
||||
.align 4
|
||||
_abi_test_clobber_r8:
|
||||
mov r8, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r9
|
||||
.private_extern _abi_test_clobber_r9
|
||||
.align 4
|
||||
_abi_test_clobber_r9:
|
||||
mov r9, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r10
|
||||
.private_extern _abi_test_clobber_r10
|
||||
.align 4
|
||||
_abi_test_clobber_r10:
|
||||
mov r10, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r11
|
||||
.private_extern _abi_test_clobber_r11
|
||||
.align 4
|
||||
_abi_test_clobber_r11:
|
||||
mov r11, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r12
|
||||
.private_extern _abi_test_clobber_r12
|
||||
.align 4
|
||||
_abi_test_clobber_r12:
|
||||
mov r12, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d0
|
||||
.private_extern _abi_test_clobber_d0
|
||||
.align 4
|
||||
_abi_test_clobber_d0:
|
||||
mov r0, #0
|
||||
vmov s0, r0
|
||||
vmov s1, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d1
|
||||
.private_extern _abi_test_clobber_d1
|
||||
.align 4
|
||||
_abi_test_clobber_d1:
|
||||
mov r0, #0
|
||||
vmov s2, r0
|
||||
vmov s3, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d2
|
||||
.private_extern _abi_test_clobber_d2
|
||||
.align 4
|
||||
_abi_test_clobber_d2:
|
||||
mov r0, #0
|
||||
vmov s4, r0
|
||||
vmov s5, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d3
|
||||
.private_extern _abi_test_clobber_d3
|
||||
.align 4
|
||||
_abi_test_clobber_d3:
|
||||
mov r0, #0
|
||||
vmov s6, r0
|
||||
vmov s7, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d4
|
||||
.private_extern _abi_test_clobber_d4
|
||||
.align 4
|
||||
_abi_test_clobber_d4:
|
||||
mov r0, #0
|
||||
vmov s8, r0
|
||||
vmov s9, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d5
|
||||
.private_extern _abi_test_clobber_d5
|
||||
.align 4
|
||||
_abi_test_clobber_d5:
|
||||
mov r0, #0
|
||||
vmov s10, r0
|
||||
vmov s11, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d6
|
||||
.private_extern _abi_test_clobber_d6
|
||||
.align 4
|
||||
_abi_test_clobber_d6:
|
||||
mov r0, #0
|
||||
vmov s12, r0
|
||||
vmov s13, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d7
|
||||
.private_extern _abi_test_clobber_d7
|
||||
.align 4
|
||||
_abi_test_clobber_d7:
|
||||
mov r0, #0
|
||||
vmov s14, r0
|
||||
vmov s15, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d8
|
||||
.private_extern _abi_test_clobber_d8
|
||||
.align 4
|
||||
_abi_test_clobber_d8:
|
||||
mov r0, #0
|
||||
vmov s16, r0
|
||||
vmov s17, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d9
|
||||
.private_extern _abi_test_clobber_d9
|
||||
.align 4
|
||||
_abi_test_clobber_d9:
|
||||
mov r0, #0
|
||||
vmov s18, r0
|
||||
vmov s19, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d10
|
||||
.private_extern _abi_test_clobber_d10
|
||||
.align 4
|
||||
_abi_test_clobber_d10:
|
||||
mov r0, #0
|
||||
vmov s20, r0
|
||||
vmov s21, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d11
|
||||
.private_extern _abi_test_clobber_d11
|
||||
.align 4
|
||||
_abi_test_clobber_d11:
|
||||
mov r0, #0
|
||||
vmov s22, r0
|
||||
vmov s23, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d12
|
||||
.private_extern _abi_test_clobber_d12
|
||||
.align 4
|
||||
_abi_test_clobber_d12:
|
||||
mov r0, #0
|
||||
vmov s24, r0
|
||||
vmov s25, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d13
|
||||
.private_extern _abi_test_clobber_d13
|
||||
.align 4
|
||||
_abi_test_clobber_d13:
|
||||
mov r0, #0
|
||||
vmov s26, r0
|
||||
vmov s27, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d14
|
||||
.private_extern _abi_test_clobber_d14
|
||||
.align 4
|
||||
_abi_test_clobber_d14:
|
||||
mov r0, #0
|
||||
vmov s28, r0
|
||||
vmov s29, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d15
|
||||
.private_extern _abi_test_clobber_d15
|
||||
.align 4
|
||||
_abi_test_clobber_d15:
|
||||
mov r0, #0
|
||||
vmov s30, r0
|
||||
vmov s31, r0
|
||||
bx lr
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
1994
contrib/boringssl-cmake/linux-aarch64/crypto/chacha/chacha-armv8.S
Normal file
1994
contrib/boringssl-cmake/linux-aarch64/crypto/chacha/chacha-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,785 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
.section .rodata
|
||||
.align 5
|
||||
.Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl aes_hw_set_encrypt_key
|
||||
.hidden aes_hw_set_encrypt_key
|
||||
.type aes_hw_set_encrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_encrypt_key:
|
||||
.Lenc_key:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
cmp x0,#0
|
||||
b.eq .Lenc_key_abort
|
||||
cmp x2,#0
|
||||
b.eq .Lenc_key_abort
|
||||
mov x3,#-2
|
||||
cmp w1,#128
|
||||
b.lt .Lenc_key_abort
|
||||
cmp w1,#256
|
||||
b.gt .Lenc_key_abort
|
||||
tst w1,#0x3f
|
||||
b.ne .Lenc_key_abort
|
||||
|
||||
adrp x3,.Lrcon
|
||||
add x3,x3,:lo12:.Lrcon
|
||||
cmp w1,#192
|
||||
|
||||
eor v0.16b,v0.16b,v0.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
mov w1,#8 // reuse w1
|
||||
ld1 {v1.4s,v2.4s},[x3],#32
|
||||
|
||||
b.lt .Loop128
|
||||
b.eq .L192
|
||||
b .L256
|
||||
|
||||
.align 4
|
||||
.Loop128:
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
b.ne .Loop128
|
||||
|
||||
ld1 {v1.4s},[x3]
|
||||
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
st1 {v3.4s},[x2]
|
||||
add x2,x2,#0x50
|
||||
|
||||
mov w12,#10
|
||||
b .Ldone
|
||||
|
||||
.align 4
|
||||
.L192:
|
||||
ld1 {v4.8b},[x0],#8
|
||||
movi v6.16b,#8 // borrow v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
sub v2.16b,v2.16b,v6.16b // adjust the mask
|
||||
|
||||
.Loop192:
|
||||
tbl v6.16b,{v4.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v4.8b},[x2],#8
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
|
||||
dup v5.4s,v3.s[3]
|
||||
eor v5.16b,v5.16b,v4.16b
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
ext v4.16b,v0.16b,v4.16b,#12
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
eor v4.16b,v4.16b,v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
b.ne .Loop192
|
||||
|
||||
mov w12,#12
|
||||
add x2,x2,#0x20
|
||||
b .Ldone
|
||||
|
||||
.align 4
|
||||
.L256:
|
||||
ld1 {v4.16b},[x0]
|
||||
mov w1,#7
|
||||
mov w12,#14
|
||||
st1 {v3.4s},[x2],#16
|
||||
|
||||
.Loop256:
|
||||
tbl v6.16b,{v4.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v4.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
b.eq .Ldone
|
||||
|
||||
dup v6.4s,v3.s[3] // just splat
|
||||
ext v5.16b,v0.16b,v4.16b,#12
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
|
||||
eor v4.16b,v4.16b,v6.16b
|
||||
b .Loop256
|
||||
|
||||
.Ldone:
|
||||
str w12,[x2]
|
||||
mov x3,#0
|
||||
|
||||
.Lenc_key_abort:
|
||||
mov x0,x3 // return value
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
|
||||
|
||||
.globl aes_hw_set_decrypt_key
|
||||
.hidden aes_hw_set_decrypt_key
|
||||
.type aes_hw_set_decrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl .Lenc_key
|
||||
|
||||
cmp x0,#0
|
||||
b.ne .Ldec_key_abort
|
||||
|
||||
sub x2,x2,#240 // restore original x2
|
||||
mov x4,#-16
|
||||
add x0,x2,x12,lsl#4 // end of key schedule
|
||||
|
||||
ld1 {v0.4s},[x2]
|
||||
ld1 {v1.4s},[x0]
|
||||
st1 {v0.4s},[x0],x4
|
||||
st1 {v1.4s},[x2],#16
|
||||
|
||||
.Loop_imc:
|
||||
ld1 {v0.4s},[x2]
|
||||
ld1 {v1.4s},[x0]
|
||||
aesimc v0.16b,v0.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
st1 {v0.4s},[x0],x4
|
||||
st1 {v1.4s},[x2],#16
|
||||
cmp x0,x2
|
||||
b.hi .Loop_imc
|
||||
|
||||
ld1 {v0.4s},[x2]
|
||||
aesimc v0.16b,v0.16b
|
||||
st1 {v0.4s},[x0]
|
||||
|
||||
eor x0,x0,x0 // return value
|
||||
.Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
|
||||
.globl aes_hw_encrypt
|
||||
.hidden aes_hw_encrypt
|
||||
.type aes_hw_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
sub w3,w3,#2
|
||||
ld1 {v1.4s},[x2],#16
|
||||
|
||||
.Loop_enc:
|
||||
aese v2.16b,v0.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2],#16
|
||||
subs w3,w3,#2
|
||||
aese v2.16b,v1.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v1.4s},[x2],#16
|
||||
b.gt .Loop_enc
|
||||
|
||||
aese v2.16b,v0.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2]
|
||||
aese v2.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
|
||||
st1 {v2.16b},[x1]
|
||||
ret
|
||||
.size aes_hw_encrypt,.-aes_hw_encrypt
|
||||
.globl aes_hw_decrypt
|
||||
.hidden aes_hw_decrypt
|
||||
.type aes_hw_decrypt,%function
|
||||
.align 5
|
||||
aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
sub w3,w3,#2
|
||||
ld1 {v1.4s},[x2],#16
|
||||
|
||||
.Loop_dec:
|
||||
aesd v2.16b,v0.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2],#16
|
||||
subs w3,w3,#2
|
||||
aesd v2.16b,v1.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v1.4s},[x2],#16
|
||||
b.gt .Loop_dec
|
||||
|
||||
aesd v2.16b,v0.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2]
|
||||
aesd v2.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
|
||||
st1 {v2.16b},[x1]
|
||||
ret
|
||||
.size aes_hw_decrypt,.-aes_hw_decrypt
|
||||
.globl aes_hw_cbc_encrypt
|
||||
.hidden aes_hw_cbc_encrypt
|
||||
.type aes_hw_cbc_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_cbc_encrypt:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
mov x8,#16
|
||||
b.lo .Lcbc_abort
|
||||
csel x8,xzr,x8,eq
|
||||
|
||||
cmp w5,#0 // en- or decrypting?
|
||||
ldr w5,[x3,#240]
|
||||
and x2,x2,#-16
|
||||
ld1 {v6.16b},[x4]
|
||||
ld1 {v0.16b},[x0],x8
|
||||
|
||||
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
|
||||
sub w5,w5,#6
|
||||
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
|
||||
sub w5,w5,#2
|
||||
ld1 {v18.4s,v19.4s},[x7],#32
|
||||
ld1 {v20.4s,v21.4s},[x7],#32
|
||||
ld1 {v22.4s,v23.4s},[x7],#32
|
||||
ld1 {v7.4s},[x7]
|
||||
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
b.eq .Lcbc_dec
|
||||
|
||||
cmp w5,#2
|
||||
eor v0.16b,v0.16b,v6.16b
|
||||
eor v5.16b,v16.16b,v7.16b
|
||||
b.eq .Lcbc_enc128
|
||||
|
||||
ld1 {v2.4s,v3.4s},[x7]
|
||||
add x7,x3,#16
|
||||
add x6,x3,#16*4
|
||||
add x12,x3,#16*5
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
add x14,x3,#16*6
|
||||
add x3,x3,#16*7
|
||||
b .Lenter_cbc_enc
|
||||
|
||||
.align 4
|
||||
.Loop_cbc_enc:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
st1 {v6.16b},[x1],#16
|
||||
.Lenter_cbc_enc:
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v2.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.4s},[x6]
|
||||
cmp w5,#4
|
||||
aese v0.16b,v3.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x12]
|
||||
b.eq .Lcbc_enc192
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.4s},[x14]
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x3]
|
||||
nop
|
||||
|
||||
.Lcbc_enc192:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
subs x2,x2,#16
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
csel x8,xzr,x8,eq
|
||||
aese v0.16b,v18.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v19.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.16b},[x0],x8
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
eor v16.16b,v16.16b,v5.16b
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v23.16b
|
||||
eor v6.16b,v0.16b,v7.16b
|
||||
b.hs .Loop_cbc_enc
|
||||
|
||||
st1 {v6.16b},[x1],#16
|
||||
b .Lcbc_done
|
||||
|
||||
.align 5
|
||||
.Lcbc_enc128:
|
||||
ld1 {v2.4s,v3.4s},[x7]
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
b .Lenter_cbc_enc128
|
||||
.Loop_cbc_enc128:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
st1 {v6.16b},[x1],#16
|
||||
.Lenter_cbc_enc128:
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
subs x2,x2,#16
|
||||
aese v0.16b,v2.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
csel x8,xzr,x8,eq
|
||||
aese v0.16b,v3.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v18.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v19.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.16b},[x0],x8
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
eor v16.16b,v16.16b,v5.16b
|
||||
aese v0.16b,v23.16b
|
||||
eor v6.16b,v0.16b,v7.16b
|
||||
b.hs .Loop_cbc_enc128
|
||||
|
||||
st1 {v6.16b},[x1],#16
|
||||
b .Lcbc_done
|
||||
.align 5
|
||||
.Lcbc_dec:
|
||||
ld1 {v18.16b},[x0],#16
|
||||
subs x2,x2,#32 // bias
|
||||
add w6,w5,#2
|
||||
orr v3.16b,v0.16b,v0.16b
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
orr v19.16b,v18.16b,v18.16b
|
||||
b.lo .Lcbc_dec_tail
|
||||
|
||||
orr v1.16b,v18.16b,v18.16b
|
||||
ld1 {v18.16b},[x0],#16
|
||||
orr v2.16b,v0.16b,v0.16b
|
||||
orr v3.16b,v1.16b,v1.16b
|
||||
orr v19.16b,v18.16b,v18.16b
|
||||
|
||||
.Loop3x_cbc_dec:
|
||||
aesd v0.16b,v16.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aesd v0.16b,v17.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt .Loop3x_cbc_dec
|
||||
|
||||
aesd v0.16b,v16.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v4.16b,v6.16b,v7.16b
|
||||
subs x2,x2,#0x30
|
||||
eor v5.16b,v2.16b,v7.16b
|
||||
csel x6,x2,x6,lo // x6, w6, is zero at this point
|
||||
aesd v0.16b,v17.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v17.16b,v3.16b,v7.16b
|
||||
add x0,x0,x6 // x0 is adjusted in such way that
|
||||
// at exit from the loop v1.16b-v18.16b
|
||||
// are loaded with last "words"
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
mov x7,x3
|
||||
aesd v0.16b,v20.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v20.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v20.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
aesd v0.16b,v21.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v21.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v21.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
aesd v0.16b,v22.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v22.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v22.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v19.16b},[x0],#16
|
||||
aesd v0.16b,v23.16b
|
||||
aesd v1.16b,v23.16b
|
||||
aesd v18.16b,v23.16b
|
||||
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
|
||||
add w6,w5,#2
|
||||
eor v4.16b,v4.16b,v0.16b
|
||||
eor v5.16b,v5.16b,v1.16b
|
||||
eor v18.16b,v18.16b,v17.16b
|
||||
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
|
||||
st1 {v4.16b},[x1],#16
|
||||
orr v0.16b,v2.16b,v2.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
orr v1.16b,v3.16b,v3.16b
|
||||
st1 {v18.16b},[x1],#16
|
||||
orr v18.16b,v19.16b,v19.16b
|
||||
b.hs .Loop3x_cbc_dec
|
||||
|
||||
cmn x2,#0x30
|
||||
b.eq .Lcbc_done
|
||||
nop
|
||||
|
||||
.Lcbc_dec_tail:
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt .Lcbc_dec_tail
|
||||
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
aesd v1.16b,v20.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v20.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
cmn x2,#0x20
|
||||
aesd v1.16b,v21.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v21.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v5.16b,v6.16b,v7.16b
|
||||
aesd v1.16b,v22.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v22.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v17.16b,v3.16b,v7.16b
|
||||
aesd v1.16b,v23.16b
|
||||
aesd v18.16b,v23.16b
|
||||
b.eq .Lcbc_dec_one
|
||||
eor v5.16b,v5.16b,v1.16b
|
||||
eor v17.16b,v17.16b,v18.16b
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
st1 {v17.16b},[x1],#16
|
||||
b .Lcbc_done
|
||||
|
||||
.Lcbc_dec_one:
|
||||
eor v5.16b,v5.16b,v18.16b
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
|
||||
.Lcbc_done:
|
||||
st1 {v6.16b},[x4]
|
||||
.Lcbc_abort:
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
|
||||
.globl aes_hw_ctr32_encrypt_blocks
|
||||
.hidden aes_hw_ctr32_encrypt_blocks
|
||||
.type aes_hw_ctr32_encrypt_blocks,%function
|
||||
.align 5
|
||||
aes_hw_ctr32_encrypt_blocks:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
|
||||
ldr w8, [x4, #12]
|
||||
ld1 {v0.4s},[x4]
|
||||
|
||||
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
|
||||
sub w5,w5,#4
|
||||
mov x12,#16
|
||||
cmp x2,#2
|
||||
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
|
||||
sub w5,w5,#2
|
||||
ld1 {v20.4s,v21.4s},[x7],#32
|
||||
ld1 {v22.4s,v23.4s},[x7],#32
|
||||
ld1 {v7.4s},[x7]
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
csel x12,xzr,x12,lo
|
||||
#ifndef __ARMEB__
|
||||
rev w8, w8
|
||||
#endif
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
add w10, w8, #1
|
||||
orr v18.16b,v0.16b,v0.16b
|
||||
add w8, w8, #2
|
||||
orr v6.16b,v0.16b,v0.16b
|
||||
rev w10, w10
|
||||
mov v1.s[3],w10
|
||||
b.ls .Lctr32_tail
|
||||
rev w12, w8
|
||||
sub x2,x2,#3 // bias
|
||||
mov v18.s[3],w12
|
||||
b .Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
.Loop3x_ctr32:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt .Loop3x_ctr32
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v4.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v5.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
aese v4.16b,v17.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v17.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
ld1 {v19.16b},[x0],#16
|
||||
mov x7,x3
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v17.16b,v18.16b
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
add w9,w8,#1
|
||||
aese v4.16b,v20.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v20.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
eor v2.16b,v2.16b,v7.16b
|
||||
add w10,w8,#2
|
||||
aese v17.16b,v20.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
eor v3.16b,v3.16b,v7.16b
|
||||
add w8,w8,#3
|
||||
aese v4.16b,v21.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v21.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
eor v19.16b,v19.16b,v7.16b
|
||||
rev w9,w9
|
||||
aese v17.16b,v21.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v0.s[3], w9
|
||||
rev w10,w10
|
||||
aese v4.16b,v22.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v22.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
mov v1.s[3], w10
|
||||
rev w12,w8
|
||||
aese v17.16b,v22.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v18.s[3], w12
|
||||
subs x2,x2,#3
|
||||
aese v4.16b,v23.16b
|
||||
aese v5.16b,v23.16b
|
||||
aese v17.16b,v23.16b
|
||||
|
||||
eor v2.16b,v2.16b,v4.16b
|
||||
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
|
||||
st1 {v2.16b},[x1],#16
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
mov w6,w5
|
||||
st1 {v3.16b},[x1],#16
|
||||
eor v19.16b,v19.16b,v17.16b
|
||||
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
|
||||
st1 {v19.16b},[x1],#16
|
||||
b.hs .Loop3x_ctr32
|
||||
|
||||
adds x2,x2,#3
|
||||
b.eq .Lctr32_done
|
||||
cmp x2,#1
|
||||
mov x12,#16
|
||||
csel x12,xzr,x12,eq
|
||||
|
||||
.Lctr32_tail:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt .Lctr32_tail
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],x12
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v20.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v3.16b},[x0]
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v21.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v7.16b
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v22.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v7.16b
|
||||
aese v0.16b,v23.16b
|
||||
aese v1.16b,v23.16b
|
||||
|
||||
cmp x2,#1
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
eor v3.16b,v3.16b,v1.16b
|
||||
st1 {v2.16b},[x1],#16
|
||||
b.eq .Lctr32_done
|
||||
st1 {v3.16b},[x1]
|
||||
|
||||
.Lctr32_done:
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
1436
contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/armv8-mont.S
Normal file
1436
contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/armv8-mont.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,346 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl gcm_init_neon
|
||||
.hidden gcm_init_neon
|
||||
.type gcm_init_neon,%function
|
||||
.align 4
|
||||
gcm_init_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
shl v19.2d, v19.2d, #57 // 0xc2.0
|
||||
ext v3.16b, v17.16b, v17.16b, #8
|
||||
ushr v18.2d, v19.2d, #63
|
||||
dup v17.4s, v17.s[1]
|
||||
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
|
||||
ushr v18.2d, v3.2d, #63
|
||||
sshr v17.4s, v17.4s, #31 // broadcast carry bit
|
||||
and v18.16b, v18.16b, v16.16b
|
||||
shl v3.2d, v3.2d, #1
|
||||
ext v18.16b, v18.16b, v18.16b, #8
|
||||
and v16.16b, v16.16b, v17.16b
|
||||
orr v3.16b, v3.16b, v18.16b // H<<<=1
|
||||
eor v5.16b, v3.16b, v16.16b // twisted H
|
||||
st1 {v5.2d}, [x0] // store Htable[0]
|
||||
ret
|
||||
.size gcm_init_neon,.-gcm_init_neon
|
||||
|
||||
.globl gcm_gmult_neon
|
||||
.hidden gcm_gmult_neon
|
||||
.type gcm_gmult_neon,%function
|
||||
.align 4
|
||||
gcm_gmult_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, .Lmasks // load constants
|
||||
add x9, x9, :lo12:.Lmasks
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v3.16b, v3.16b // byteswap Xi
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
mov x3, #16
|
||||
b .Lgmult_neon
|
||||
.size gcm_gmult_neon,.-gcm_gmult_neon
|
||||
|
||||
.globl gcm_ghash_neon
|
||||
.hidden gcm_ghash_neon
|
||||
.type gcm_ghash_neon,%function
|
||||
.align 4
|
||||
gcm_ghash_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, .Lmasks // load constants
|
||||
add x9, x9, :lo12:.Lmasks
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v0.16b, v0.16b // byteswap Xi
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
.Loop_neon:
|
||||
ld1 {v3.16b}, [x2], #16 // load inp
|
||||
rev64 v3.16b, v3.16b // byteswap inp
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
|
||||
|
||||
.Lgmult_neon:
|
||||
// Split the input into v3 and v4. (The upper halves are unused,
|
||||
// so it is okay to leave them alone.)
|
||||
ins v4.d[0], v3.d[1]
|
||||
ext v16.8b, v5.8b, v5.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v0.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v0.8h, v5.8b, v0.8b // E = A*B1
|
||||
ext v17.8b, v5.8b, v5.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v5.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v5.8b, v5.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v0.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v0.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v0.8h, v5.8b, v0.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v0.16b // N = I + J
|
||||
pmull v19.8h, v5.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v0.8h, v5.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v0.16b, v0.16b, v16.16b
|
||||
eor v0.16b, v0.16b, v18.16b
|
||||
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
|
||||
ext v16.8b, v7.8b, v7.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v1.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v1.8h, v7.8b, v1.8b // E = A*B1
|
||||
ext v17.8b, v7.8b, v7.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v7.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v7.8b, v7.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v1.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v1.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v1.8h, v7.8b, v1.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v1.16b // N = I + J
|
||||
pmull v19.8h, v7.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v1.8h, v7.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v1.16b, v1.16b, v16.16b
|
||||
eor v1.16b, v1.16b, v18.16b
|
||||
ext v16.8b, v6.8b, v6.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v4.8b // F = A1*B
|
||||
ext v2.8b, v4.8b, v4.8b, #1 // B1
|
||||
pmull v2.8h, v6.8b, v2.8b // E = A*B1
|
||||
ext v17.8b, v6.8b, v6.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v4.8b // H = A2*B
|
||||
ext v19.8b, v4.8b, v4.8b, #2 // B2
|
||||
pmull v19.8h, v6.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v6.8b, v6.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v2.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v4.8b // J = A3*B
|
||||
ext v2.8b, v4.8b, v4.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v2.8h, v6.8b, v2.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v4.8b, v4.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v2.16b // N = I + J
|
||||
pmull v19.8h, v6.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v2.8h, v6.8b, v4.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v2.16b, v2.16b, v16.16b
|
||||
eor v2.16b, v2.16b, v18.16b
|
||||
ext v16.16b, v0.16b, v2.16b, #8
|
||||
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
|
||||
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
|
||||
// This is a no-op due to the ins instruction below.
|
||||
// ins v2.d[0], v1.d[1]
|
||||
|
||||
// equivalent of reduction_avx from ghash-x86_64.pl
|
||||
shl v17.2d, v0.2d, #57 // 1st phase
|
||||
shl v18.2d, v0.2d, #62
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
shl v17.2d, v0.2d, #63
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
// Note Xm contains {Xl.d[1], Xh.d[0]}.
|
||||
eor v18.16b, v18.16b, v1.16b
|
||||
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
|
||||
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
|
||||
|
||||
ushr v18.2d, v0.2d, #1 // 2nd phase
|
||||
eor v2.16b, v2.16b,v0.16b
|
||||
eor v0.16b, v0.16b,v18.16b //
|
||||
ushr v18.2d, v18.2d, #6
|
||||
ushr v0.2d, v0.2d, #1 //
|
||||
eor v0.16b, v0.16b, v2.16b //
|
||||
eor v0.16b, v0.16b, v18.16b //
|
||||
|
||||
subs x3, x3, #16
|
||||
bne .Loop_neon
|
||||
|
||||
rev64 v0.16b, v0.16b // byteswap Xi and write
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
st1 {v0.16b}, [x0]
|
||||
|
||||
ret
|
||||
.size gcm_ghash_neon,.-gcm_ghash_neon
|
||||
|
||||
.section .rodata
|
||||
.align 4
|
||||
.Lmasks:
|
||||
.quad 0x0000ffffffffffff // k48
|
||||
.quad 0x00000000ffffffff // k32
|
||||
.quad 0x000000000000ffff // k16
|
||||
.quad 0x0000000000000000 // k0
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
@ -0,0 +1,252 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
.globl gcm_init_v8
|
||||
.hidden gcm_init_v8
|
||||
.type gcm_init_v8,%function
|
||||
.align 4
|
||||
gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
ushr v18.2d,v19.2d,#63
|
||||
dup v17.4s,v17.s[1]
|
||||
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
|
||||
ushr v18.2d,v3.2d,#63
|
||||
sshr v17.4s,v17.4s,#31 //broadcast carry bit
|
||||
and v18.16b,v18.16b,v16.16b
|
||||
shl v3.2d,v3.2d,#1
|
||||
ext v18.16b,v18.16b,v18.16b,#8
|
||||
and v16.16b,v16.16b,v17.16b
|
||||
orr v3.16b,v3.16b,v18.16b //H<<<=1
|
||||
eor v20.16b,v3.16b,v16.16b //twisted H
|
||||
st1 {v20.2d},[x0],#16 //store Htable[0]
|
||||
|
||||
//calculate H^2
|
||||
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
|
||||
pmull v0.1q,v20.1d,v20.1d
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
pmull2 v2.1q,v20.2d,v20.2d
|
||||
pmull v1.1q,v16.1d,v16.1d
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v22.16b,v0.16b,v18.16b
|
||||
|
||||
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
|
||||
|
||||
ret
|
||||
.size gcm_init_v8,.-gcm_init_v8
|
||||
.globl gcm_gmult_v8
|
||||
.hidden gcm_gmult_v8
|
||||
.type gcm_gmult_v8,%function
|
||||
.align 4
|
||||
gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
shl v19.2d,v19.2d,#57
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
.size gcm_gmult_v8,.-gcm_gmult_v8
|
||||
.globl gcm_ghash_v8
|
||||
.hidden gcm_ghash_v8
|
||||
.type gcm_ghash_v8,%function
|
||||
.align 4
|
||||
gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
//to be rotated in order to
|
||||
//make it appear as in
|
||||
//algorithm specification
|
||||
subs x3,x3,#32 //see if x3 is 32 or larger
|
||||
mov x12,#16 //x12 is used as post-
|
||||
//increment for input pointer;
|
||||
//as loop is modulo-scheduled
|
||||
//x12 is zeroed just in time
|
||||
//to preclude overstepping
|
||||
//inp[len], which means that
|
||||
//last block[s] are actually
|
||||
//loaded twice, but last
|
||||
//copy is not processed
|
||||
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v22.2d},[x1]
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
|
||||
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
|
||||
b.lo .Lodd_tail_v8 //x3 was less than 32
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b .Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
.Loop_mod2x_v8:
|
||||
ext v18.16b,v3.16b,v3.16b,#8
|
||||
subs x3,x3,#32 //is there more data?
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
|
||||
csel x12,xzr,x12,lo //is it time to zero x12?
|
||||
|
||||
pmull v5.1q,v21.1d,v17.1d
|
||||
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
|
||||
eor v0.16b,v0.16b,v4.16b //accumulate
|
||||
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
|
||||
|
||||
eor v2.16b,v2.16b,v6.16b
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
eor v1.16b,v1.16b,v5.16b
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
#endif
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v3.16b,v3.16b,v18.16b
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
eor v3.16b,v3.16b,v0.16b
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b.hs .Loop_mod2x_v8 //there was at least 32 more bytes
|
||||
|
||||
eor v2.16b,v2.16b,v18.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
|
||||
adds x3,x3,#32 //re-construct x3
|
||||
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
|
||||
b.eq .Ldone_v8 //is x3 zero?
|
||||
.Lodd_tail_v8:
|
||||
ext v18.16b,v0.16b,v0.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //inp^=Xi
|
||||
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
.Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
.size gcm_ghash_v8,.-gcm_ghash_v8
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
1239
contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha1-armv8.S
Normal file
1239
contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha1-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,761 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
// with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
// the result of |func|. The |unwind| argument is unused.
|
||||
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
// const uint64_t *argv, size_t argc,
|
||||
// uint64_t unwind);
|
||||
.type abi_test_trampoline, %function
|
||||
.globl abi_test_trampoline
|
||||
.hidden abi_test_trampoline
|
||||
.align 4
|
||||
abi_test_trampoline:
|
||||
.Labi_test_trampoline_begin:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
// Stack layout (low to high addresses)
|
||||
// x29,x30 (16 bytes)
|
||||
// d8-d15 (64 bytes)
|
||||
// x19-x28 (80 bytes)
|
||||
// x1 (8 bytes)
|
||||
// padding (8 bytes)
|
||||
stp x29, x30, [sp, #-176]!
|
||||
mov x29, sp
|
||||
|
||||
// Saved callee-saved registers and |state|.
|
||||
stp d8, d9, [sp, #16]
|
||||
stp d10, d11, [sp, #32]
|
||||
stp d12, d13, [sp, #48]
|
||||
stp d14, d15, [sp, #64]
|
||||
stp x19, x20, [sp, #80]
|
||||
stp x21, x22, [sp, #96]
|
||||
stp x23, x24, [sp, #112]
|
||||
stp x25, x26, [sp, #128]
|
||||
stp x27, x28, [sp, #144]
|
||||
str x1, [sp, #160]
|
||||
|
||||
// Load registers from |state|, with the exception of x29. x29 is the
|
||||
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
|
||||
// mandate that x29 always point to a frame. iOS64 does so, which means
|
||||
// we cannot fill x29 with entropy without violating ABI rules
|
||||
// ourselves. x29 is tested separately below.
|
||||
ldp d8, d9, [x1], #16
|
||||
ldp d10, d11, [x1], #16
|
||||
ldp d12, d13, [x1], #16
|
||||
ldp d14, d15, [x1], #16
|
||||
ldp x19, x20, [x1], #16
|
||||
ldp x21, x22, [x1], #16
|
||||
ldp x23, x24, [x1], #16
|
||||
ldp x25, x26, [x1], #16
|
||||
ldp x27, x28, [x1], #16
|
||||
|
||||
// Move parameters into temporary registers.
|
||||
mov x9, x0
|
||||
mov x10, x2
|
||||
mov x11, x3
|
||||
|
||||
// Load parameters into registers.
|
||||
cbz x11, .Largs_done
|
||||
ldr x0, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x1, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x2, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x3, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x4, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x5, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x6, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x7, [x10], #8
|
||||
|
||||
.Largs_done:
|
||||
blr x9
|
||||
|
||||
// Reload |state| and store registers.
|
||||
ldr x1, [sp, #160]
|
||||
stp d8, d9, [x1], #16
|
||||
stp d10, d11, [x1], #16
|
||||
stp d12, d13, [x1], #16
|
||||
stp d14, d15, [x1], #16
|
||||
stp x19, x20, [x1], #16
|
||||
stp x21, x22, [x1], #16
|
||||
stp x23, x24, [x1], #16
|
||||
stp x25, x26, [x1], #16
|
||||
stp x27, x28, [x1], #16
|
||||
|
||||
// |func| is required to preserve x29, the frame pointer. We cannot load
|
||||
// random values into x29 (see comment above), so compare it against the
|
||||
// expected value and zero the field of |state| if corrupted.
|
||||
mov x9, sp
|
||||
cmp x29, x9
|
||||
b.eq .Lx29_ok
|
||||
str xzr, [x1]
|
||||
|
||||
.Lx29_ok:
|
||||
// Restore callee-saved registers.
|
||||
ldp d8, d9, [sp, #16]
|
||||
ldp d10, d11, [sp, #32]
|
||||
ldp d12, d13, [sp, #48]
|
||||
ldp d14, d15, [sp, #64]
|
||||
ldp x19, x20, [sp, #80]
|
||||
ldp x21, x22, [sp, #96]
|
||||
ldp x23, x24, [sp, #112]
|
||||
ldp x25, x26, [sp, #128]
|
||||
ldp x27, x28, [sp, #144]
|
||||
|
||||
ldp x29, x30, [sp], #176
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size abi_test_trampoline,.-abi_test_trampoline
|
||||
.type abi_test_clobber_x0, %function
|
||||
.globl abi_test_clobber_x0
|
||||
.hidden abi_test_clobber_x0
|
||||
.align 4
|
||||
abi_test_clobber_x0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x0, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x0,.-abi_test_clobber_x0
|
||||
.type abi_test_clobber_x1, %function
|
||||
.globl abi_test_clobber_x1
|
||||
.hidden abi_test_clobber_x1
|
||||
.align 4
|
||||
abi_test_clobber_x1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x1, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x1,.-abi_test_clobber_x1
|
||||
.type abi_test_clobber_x2, %function
|
||||
.globl abi_test_clobber_x2
|
||||
.hidden abi_test_clobber_x2
|
||||
.align 4
|
||||
abi_test_clobber_x2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x2, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x2,.-abi_test_clobber_x2
|
||||
.type abi_test_clobber_x3, %function
|
||||
.globl abi_test_clobber_x3
|
||||
.hidden abi_test_clobber_x3
|
||||
.align 4
|
||||
abi_test_clobber_x3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x3, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x3,.-abi_test_clobber_x3
|
||||
.type abi_test_clobber_x4, %function
|
||||
.globl abi_test_clobber_x4
|
||||
.hidden abi_test_clobber_x4
|
||||
.align 4
|
||||
abi_test_clobber_x4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x4, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x4,.-abi_test_clobber_x4
|
||||
.type abi_test_clobber_x5, %function
|
||||
.globl abi_test_clobber_x5
|
||||
.hidden abi_test_clobber_x5
|
||||
.align 4
|
||||
abi_test_clobber_x5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x5, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x5,.-abi_test_clobber_x5
|
||||
.type abi_test_clobber_x6, %function
|
||||
.globl abi_test_clobber_x6
|
||||
.hidden abi_test_clobber_x6
|
||||
.align 4
|
||||
abi_test_clobber_x6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x6, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x6,.-abi_test_clobber_x6
|
||||
.type abi_test_clobber_x7, %function
|
||||
.globl abi_test_clobber_x7
|
||||
.hidden abi_test_clobber_x7
|
||||
.align 4
|
||||
abi_test_clobber_x7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x7, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x7,.-abi_test_clobber_x7
|
||||
.type abi_test_clobber_x8, %function
|
||||
.globl abi_test_clobber_x8
|
||||
.hidden abi_test_clobber_x8
|
||||
.align 4
|
||||
abi_test_clobber_x8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x8, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x8,.-abi_test_clobber_x8
|
||||
.type abi_test_clobber_x9, %function
|
||||
.globl abi_test_clobber_x9
|
||||
.hidden abi_test_clobber_x9
|
||||
.align 4
|
||||
abi_test_clobber_x9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x9, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x9,.-abi_test_clobber_x9
|
||||
.type abi_test_clobber_x10, %function
|
||||
.globl abi_test_clobber_x10
|
||||
.hidden abi_test_clobber_x10
|
||||
.align 4
|
||||
abi_test_clobber_x10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x10, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x10,.-abi_test_clobber_x10
|
||||
.type abi_test_clobber_x11, %function
|
||||
.globl abi_test_clobber_x11
|
||||
.hidden abi_test_clobber_x11
|
||||
.align 4
|
||||
abi_test_clobber_x11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x11, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x11,.-abi_test_clobber_x11
|
||||
.type abi_test_clobber_x12, %function
|
||||
.globl abi_test_clobber_x12
|
||||
.hidden abi_test_clobber_x12
|
||||
.align 4
|
||||
abi_test_clobber_x12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x12, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x12,.-abi_test_clobber_x12
|
||||
.type abi_test_clobber_x13, %function
|
||||
.globl abi_test_clobber_x13
|
||||
.hidden abi_test_clobber_x13
|
||||
.align 4
|
||||
abi_test_clobber_x13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x13, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x13,.-abi_test_clobber_x13
|
||||
.type abi_test_clobber_x14, %function
|
||||
.globl abi_test_clobber_x14
|
||||
.hidden abi_test_clobber_x14
|
||||
.align 4
|
||||
abi_test_clobber_x14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x14, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x14,.-abi_test_clobber_x14
|
||||
.type abi_test_clobber_x15, %function
|
||||
.globl abi_test_clobber_x15
|
||||
.hidden abi_test_clobber_x15
|
||||
.align 4
|
||||
abi_test_clobber_x15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x15, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x15,.-abi_test_clobber_x15
|
||||
.type abi_test_clobber_x16, %function
|
||||
.globl abi_test_clobber_x16
|
||||
.hidden abi_test_clobber_x16
|
||||
.align 4
|
||||
abi_test_clobber_x16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x16, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x16,.-abi_test_clobber_x16
|
||||
.type abi_test_clobber_x17, %function
|
||||
.globl abi_test_clobber_x17
|
||||
.hidden abi_test_clobber_x17
|
||||
.align 4
|
||||
abi_test_clobber_x17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x17, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x17,.-abi_test_clobber_x17
|
||||
.type abi_test_clobber_x19, %function
|
||||
.globl abi_test_clobber_x19
|
||||
.hidden abi_test_clobber_x19
|
||||
.align 4
|
||||
abi_test_clobber_x19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x19, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x19,.-abi_test_clobber_x19
|
||||
.type abi_test_clobber_x20, %function
|
||||
.globl abi_test_clobber_x20
|
||||
.hidden abi_test_clobber_x20
|
||||
.align 4
|
||||
abi_test_clobber_x20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x20, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x20,.-abi_test_clobber_x20
|
||||
.type abi_test_clobber_x21, %function
|
||||
.globl abi_test_clobber_x21
|
||||
.hidden abi_test_clobber_x21
|
||||
.align 4
|
||||
abi_test_clobber_x21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x21, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x21,.-abi_test_clobber_x21
|
||||
.type abi_test_clobber_x22, %function
|
||||
.globl abi_test_clobber_x22
|
||||
.hidden abi_test_clobber_x22
|
||||
.align 4
|
||||
abi_test_clobber_x22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x22, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x22,.-abi_test_clobber_x22
|
||||
.type abi_test_clobber_x23, %function
|
||||
.globl abi_test_clobber_x23
|
||||
.hidden abi_test_clobber_x23
|
||||
.align 4
|
||||
abi_test_clobber_x23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x23, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x23,.-abi_test_clobber_x23
|
||||
.type abi_test_clobber_x24, %function
|
||||
.globl abi_test_clobber_x24
|
||||
.hidden abi_test_clobber_x24
|
||||
.align 4
|
||||
abi_test_clobber_x24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x24, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x24,.-abi_test_clobber_x24
|
||||
.type abi_test_clobber_x25, %function
|
||||
.globl abi_test_clobber_x25
|
||||
.hidden abi_test_clobber_x25
|
||||
.align 4
|
||||
abi_test_clobber_x25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x25, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x25,.-abi_test_clobber_x25
|
||||
.type abi_test_clobber_x26, %function
|
||||
.globl abi_test_clobber_x26
|
||||
.hidden abi_test_clobber_x26
|
||||
.align 4
|
||||
abi_test_clobber_x26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x26, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x26,.-abi_test_clobber_x26
|
||||
.type abi_test_clobber_x27, %function
|
||||
.globl abi_test_clobber_x27
|
||||
.hidden abi_test_clobber_x27
|
||||
.align 4
|
||||
abi_test_clobber_x27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x27, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x27,.-abi_test_clobber_x27
|
||||
.type abi_test_clobber_x28, %function
|
||||
.globl abi_test_clobber_x28
|
||||
.hidden abi_test_clobber_x28
|
||||
.align 4
|
||||
abi_test_clobber_x28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x28, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x28,.-abi_test_clobber_x28
|
||||
.type abi_test_clobber_x29, %function
|
||||
.globl abi_test_clobber_x29
|
||||
.hidden abi_test_clobber_x29
|
||||
.align 4
|
||||
abi_test_clobber_x29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x29, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x29,.-abi_test_clobber_x29
|
||||
.type abi_test_clobber_d0, %function
|
||||
.globl abi_test_clobber_d0
|
||||
.hidden abi_test_clobber_d0
|
||||
.align 4
|
||||
abi_test_clobber_d0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d0, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d0,.-abi_test_clobber_d0
|
||||
.type abi_test_clobber_d1, %function
|
||||
.globl abi_test_clobber_d1
|
||||
.hidden abi_test_clobber_d1
|
||||
.align 4
|
||||
abi_test_clobber_d1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d1, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d1,.-abi_test_clobber_d1
|
||||
.type abi_test_clobber_d2, %function
|
||||
.globl abi_test_clobber_d2
|
||||
.hidden abi_test_clobber_d2
|
||||
.align 4
|
||||
abi_test_clobber_d2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d2, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d2,.-abi_test_clobber_d2
|
||||
.type abi_test_clobber_d3, %function
|
||||
.globl abi_test_clobber_d3
|
||||
.hidden abi_test_clobber_d3
|
||||
.align 4
|
||||
abi_test_clobber_d3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d3, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d3,.-abi_test_clobber_d3
|
||||
.type abi_test_clobber_d4, %function
|
||||
.globl abi_test_clobber_d4
|
||||
.hidden abi_test_clobber_d4
|
||||
.align 4
|
||||
abi_test_clobber_d4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d4, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d4,.-abi_test_clobber_d4
|
||||
.type abi_test_clobber_d5, %function
|
||||
.globl abi_test_clobber_d5
|
||||
.hidden abi_test_clobber_d5
|
||||
.align 4
|
||||
abi_test_clobber_d5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d5, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d5,.-abi_test_clobber_d5
|
||||
.type abi_test_clobber_d6, %function
|
||||
.globl abi_test_clobber_d6
|
||||
.hidden abi_test_clobber_d6
|
||||
.align 4
|
||||
abi_test_clobber_d6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d6, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d6,.-abi_test_clobber_d6
|
||||
.type abi_test_clobber_d7, %function
|
||||
.globl abi_test_clobber_d7
|
||||
.hidden abi_test_clobber_d7
|
||||
.align 4
|
||||
abi_test_clobber_d7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d7, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d7,.-abi_test_clobber_d7
|
||||
.type abi_test_clobber_d8, %function
|
||||
.globl abi_test_clobber_d8
|
||||
.hidden abi_test_clobber_d8
|
||||
.align 4
|
||||
abi_test_clobber_d8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d8, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d8,.-abi_test_clobber_d8
|
||||
.type abi_test_clobber_d9, %function
|
||||
.globl abi_test_clobber_d9
|
||||
.hidden abi_test_clobber_d9
|
||||
.align 4
|
||||
abi_test_clobber_d9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d9, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d9,.-abi_test_clobber_d9
|
||||
.type abi_test_clobber_d10, %function
|
||||
.globl abi_test_clobber_d10
|
||||
.hidden abi_test_clobber_d10
|
||||
.align 4
|
||||
abi_test_clobber_d10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d10, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d10,.-abi_test_clobber_d10
|
||||
.type abi_test_clobber_d11, %function
|
||||
.globl abi_test_clobber_d11
|
||||
.hidden abi_test_clobber_d11
|
||||
.align 4
|
||||
abi_test_clobber_d11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d11, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d11,.-abi_test_clobber_d11
|
||||
.type abi_test_clobber_d12, %function
|
||||
.globl abi_test_clobber_d12
|
||||
.hidden abi_test_clobber_d12
|
||||
.align 4
|
||||
abi_test_clobber_d12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d12, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d12,.-abi_test_clobber_d12
|
||||
.type abi_test_clobber_d13, %function
|
||||
.globl abi_test_clobber_d13
|
||||
.hidden abi_test_clobber_d13
|
||||
.align 4
|
||||
abi_test_clobber_d13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d13, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d13,.-abi_test_clobber_d13
|
||||
.type abi_test_clobber_d14, %function
|
||||
.globl abi_test_clobber_d14
|
||||
.hidden abi_test_clobber_d14
|
||||
.align 4
|
||||
abi_test_clobber_d14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d14, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d14,.-abi_test_clobber_d14
|
||||
.type abi_test_clobber_d15, %function
|
||||
.globl abi_test_clobber_d15
|
||||
.hidden abi_test_clobber_d15
|
||||
.align 4
|
||||
abi_test_clobber_d15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d15, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d15,.-abi_test_clobber_d15
|
||||
.type abi_test_clobber_d16, %function
|
||||
.globl abi_test_clobber_d16
|
||||
.hidden abi_test_clobber_d16
|
||||
.align 4
|
||||
abi_test_clobber_d16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d16, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d16,.-abi_test_clobber_d16
|
||||
.type abi_test_clobber_d17, %function
|
||||
.globl abi_test_clobber_d17
|
||||
.hidden abi_test_clobber_d17
|
||||
.align 4
|
||||
abi_test_clobber_d17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d17, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d17,.-abi_test_clobber_d17
|
||||
.type abi_test_clobber_d18, %function
|
||||
.globl abi_test_clobber_d18
|
||||
.hidden abi_test_clobber_d18
|
||||
.align 4
|
||||
abi_test_clobber_d18:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d18, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d18,.-abi_test_clobber_d18
|
||||
.type abi_test_clobber_d19, %function
|
||||
.globl abi_test_clobber_d19
|
||||
.hidden abi_test_clobber_d19
|
||||
.align 4
|
||||
abi_test_clobber_d19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d19, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d19,.-abi_test_clobber_d19
|
||||
.type abi_test_clobber_d20, %function
|
||||
.globl abi_test_clobber_d20
|
||||
.hidden abi_test_clobber_d20
|
||||
.align 4
|
||||
abi_test_clobber_d20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d20, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d20,.-abi_test_clobber_d20
|
||||
.type abi_test_clobber_d21, %function
|
||||
.globl abi_test_clobber_d21
|
||||
.hidden abi_test_clobber_d21
|
||||
.align 4
|
||||
abi_test_clobber_d21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d21, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d21,.-abi_test_clobber_d21
|
||||
.type abi_test_clobber_d22, %function
|
||||
.globl abi_test_clobber_d22
|
||||
.hidden abi_test_clobber_d22
|
||||
.align 4
|
||||
abi_test_clobber_d22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d22, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d22,.-abi_test_clobber_d22
|
||||
.type abi_test_clobber_d23, %function
|
||||
.globl abi_test_clobber_d23
|
||||
.hidden abi_test_clobber_d23
|
||||
.align 4
|
||||
abi_test_clobber_d23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d23, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d23,.-abi_test_clobber_d23
|
||||
.type abi_test_clobber_d24, %function
|
||||
.globl abi_test_clobber_d24
|
||||
.hidden abi_test_clobber_d24
|
||||
.align 4
|
||||
abi_test_clobber_d24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d24, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d24,.-abi_test_clobber_d24
|
||||
.type abi_test_clobber_d25, %function
|
||||
.globl abi_test_clobber_d25
|
||||
.hidden abi_test_clobber_d25
|
||||
.align 4
|
||||
abi_test_clobber_d25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d25, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d25,.-abi_test_clobber_d25
|
||||
.type abi_test_clobber_d26, %function
|
||||
.globl abi_test_clobber_d26
|
||||
.hidden abi_test_clobber_d26
|
||||
.align 4
|
||||
abi_test_clobber_d26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d26, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d26,.-abi_test_clobber_d26
|
||||
.type abi_test_clobber_d27, %function
|
||||
.globl abi_test_clobber_d27
|
||||
.hidden abi_test_clobber_d27
|
||||
.align 4
|
||||
abi_test_clobber_d27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d27, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d27,.-abi_test_clobber_d27
|
||||
.type abi_test_clobber_d28, %function
|
||||
.globl abi_test_clobber_d28
|
||||
.hidden abi_test_clobber_d28
|
||||
.align 4
|
||||
abi_test_clobber_d28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d28, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d28,.-abi_test_clobber_d28
|
||||
.type abi_test_clobber_d29, %function
|
||||
.globl abi_test_clobber_d29
|
||||
.hidden abi_test_clobber_d29
|
||||
.align 4
|
||||
abi_test_clobber_d29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d29, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d29,.-abi_test_clobber_d29
|
||||
.type abi_test_clobber_d30, %function
|
||||
.globl abi_test_clobber_d30
|
||||
.hidden abi_test_clobber_d30
|
||||
.align 4
|
||||
abi_test_clobber_d30:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d30, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d30,.-abi_test_clobber_d30
|
||||
.type abi_test_clobber_d31, %function
|
||||
.globl abi_test_clobber_d31
|
||||
.hidden abi_test_clobber_d31
|
||||
.align 4
|
||||
abi_test_clobber_d31:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d31, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d31,.-abi_test_clobber_d31
|
||||
.type abi_test_clobber_v8_upper, %function
|
||||
.globl abi_test_clobber_v8_upper
|
||||
.hidden abi_test_clobber_v8_upper
|
||||
.align 4
|
||||
abi_test_clobber_v8_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v8.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
|
||||
.type abi_test_clobber_v9_upper, %function
|
||||
.globl abi_test_clobber_v9_upper
|
||||
.hidden abi_test_clobber_v9_upper
|
||||
.align 4
|
||||
abi_test_clobber_v9_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v9.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
|
||||
.type abi_test_clobber_v10_upper, %function
|
||||
.globl abi_test_clobber_v10_upper
|
||||
.hidden abi_test_clobber_v10_upper
|
||||
.align 4
|
||||
abi_test_clobber_v10_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v10.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
|
||||
.type abi_test_clobber_v11_upper, %function
|
||||
.globl abi_test_clobber_v11_upper
|
||||
.hidden abi_test_clobber_v11_upper
|
||||
.align 4
|
||||
abi_test_clobber_v11_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v11.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
|
||||
.type abi_test_clobber_v12_upper, %function
|
||||
.globl abi_test_clobber_v12_upper
|
||||
.hidden abi_test_clobber_v12_upper
|
||||
.align 4
|
||||
abi_test_clobber_v12_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v12.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
|
||||
.type abi_test_clobber_v13_upper, %function
|
||||
.globl abi_test_clobber_v13_upper
|
||||
.hidden abi_test_clobber_v13_upper
|
||||
.align 4
|
||||
abi_test_clobber_v13_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v13.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
|
||||
.type abi_test_clobber_v14_upper, %function
|
||||
.globl abi_test_clobber_v14_upper
|
||||
.hidden abi_test_clobber_v14_upper
|
||||
.align 4
|
||||
abi_test_clobber_v14_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v14.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
|
||||
.type abi_test_clobber_v15_upper, %function
|
||||
.globl abi_test_clobber_v15_upper
|
||||
.hidden abi_test_clobber_v15_upper
|
||||
.align 4
|
||||
abi_test_clobber_v15_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v15.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
1493
contrib/boringssl-cmake/linux-arm/crypto/chacha/chacha-armv4.S
Normal file
1493
contrib/boringssl-cmake/linux-arm/crypto/chacha/chacha-armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,781 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-)
|
||||
.fpu neon
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
.align 5
|
||||
.Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl aes_hw_set_encrypt_key
|
||||
.hidden aes_hw_set_encrypt_key
|
||||
.type aes_hw_set_encrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_encrypt_key:
|
||||
.Lenc_key:
|
||||
mov r3,#-1
|
||||
cmp r0,#0
|
||||
beq .Lenc_key_abort
|
||||
cmp r2,#0
|
||||
beq .Lenc_key_abort
|
||||
mov r3,#-2
|
||||
cmp r1,#128
|
||||
blt .Lenc_key_abort
|
||||
cmp r1,#256
|
||||
bgt .Lenc_key_abort
|
||||
tst r1,#0x3f
|
||||
bne .Lenc_key_abort
|
||||
|
||||
adr r3,.Lrcon
|
||||
cmp r1,#192
|
||||
|
||||
veor q0,q0,q0
|
||||
vld1.8 {q3},[r0]!
|
||||
mov r1,#8 @ reuse r1
|
||||
vld1.32 {q1,q2},[r3]!
|
||||
|
||||
blt .Loop128
|
||||
beq .L192
|
||||
b .L256
|
||||
|
||||
.align 4
|
||||
.Loop128:
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
bne .Loop128
|
||||
|
||||
vld1.32 {q1},[r3]
|
||||
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
veor q3,q3,q10
|
||||
vst1.32 {q3},[r2]
|
||||
add r2,r2,#0x50
|
||||
|
||||
mov r12,#10
|
||||
b .Ldone
|
||||
|
||||
.align 4
|
||||
.L192:
|
||||
vld1.8 {d16},[r0]!
|
||||
vmov.i8 q10,#8 @ borrow q10
|
||||
vst1.32 {q3},[r2]!
|
||||
vsub.i8 q2,q2,q10 @ adjust the mask
|
||||
|
||||
.Loop192:
|
||||
vtbl.8 d20,{q8},d4
|
||||
vtbl.8 d21,{q8},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {d16},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
|
||||
vdup.32 q9,d7[1]
|
||||
veor q9,q9,q8
|
||||
veor q10,q10,q1
|
||||
vext.8 q8,q0,q8,#12
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q8,q8,q9
|
||||
veor q3,q3,q10
|
||||
veor q8,q8,q10
|
||||
vst1.32 {q3},[r2]!
|
||||
bne .Loop192
|
||||
|
||||
mov r12,#12
|
||||
add r2,r2,#0x20
|
||||
b .Ldone
|
||||
|
||||
.align 4
|
||||
.L256:
|
||||
vld1.8 {q8},[r0]
|
||||
mov r1,#7
|
||||
mov r12,#14
|
||||
vst1.32 {q3},[r2]!
|
||||
|
||||
.Loop256:
|
||||
vtbl.8 d20,{q8},d4
|
||||
vtbl.8 d21,{q8},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q8},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
vst1.32 {q3},[r2]!
|
||||
beq .Ldone
|
||||
|
||||
vdup.32 q10,d7[1]
|
||||
vext.8 q9,q0,q8,#12
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q8,q8,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q8,q8,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q8,q8,q9
|
||||
|
||||
veor q8,q8,q10
|
||||
b .Loop256
|
||||
|
||||
.Ldone:
|
||||
str r12,[r2]
|
||||
mov r3,#0
|
||||
|
||||
.Lenc_key_abort:
|
||||
mov r0,r3 @ return value
|
||||
|
||||
bx lr
|
||||
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
|
||||
|
||||
.globl aes_hw_set_decrypt_key
|
||||
.hidden aes_hw_set_decrypt_key
|
||||
.type aes_hw_set_decrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_decrypt_key:
|
||||
stmdb sp!,{r4,lr}
|
||||
bl .Lenc_key
|
||||
|
||||
cmp r0,#0
|
||||
bne .Ldec_key_abort
|
||||
|
||||
sub r2,r2,#240 @ restore original r2
|
||||
mov r4,#-16
|
||||
add r0,r2,r12,lsl#4 @ end of key schedule
|
||||
|
||||
vld1.32 {q0},[r2]
|
||||
vld1.32 {q1},[r0]
|
||||
vst1.32 {q0},[r0],r4
|
||||
vst1.32 {q1},[r2]!
|
||||
|
||||
.Loop_imc:
|
||||
vld1.32 {q0},[r2]
|
||||
vld1.32 {q1},[r0]
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
vst1.32 {q0},[r0],r4
|
||||
vst1.32 {q1},[r2]!
|
||||
cmp r0,r2
|
||||
bhi .Loop_imc
|
||||
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
vst1.32 {q0},[r0]
|
||||
|
||||
eor r0,r0,r0 @ return value
|
||||
.Ldec_key_abort:
|
||||
ldmia sp!,{r4,pc}
|
||||
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
|
||||
.globl aes_hw_encrypt
|
||||
.hidden aes_hw_encrypt
|
||||
.type aes_hw_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_encrypt:
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
sub r3,r3,#2
|
||||
vld1.32 {q1},[r2]!
|
||||
|
||||
.Loop_enc:
|
||||
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q0},[r2]!
|
||||
subs r3,r3,#2
|
||||
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q1},[r2]!
|
||||
bgt .Loop_enc
|
||||
|
||||
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
|
||||
veor q2,q2,q0
|
||||
|
||||
vst1.8 {q2},[r1]
|
||||
bx lr
|
||||
.size aes_hw_encrypt,.-aes_hw_encrypt
|
||||
.globl aes_hw_decrypt
|
||||
.hidden aes_hw_decrypt
|
||||
.type aes_hw_decrypt,%function
|
||||
.align 5
|
||||
aes_hw_decrypt:
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
sub r3,r3,#2
|
||||
vld1.32 {q1},[r2]!
|
||||
|
||||
.Loop_dec:
|
||||
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q0},[r2]!
|
||||
subs r3,r3,#2
|
||||
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q1},[r2]!
|
||||
bgt .Loop_dec
|
||||
|
||||
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
|
||||
veor q2,q2,q0
|
||||
|
||||
vst1.8 {q2},[r1]
|
||||
bx lr
|
||||
.size aes_hw_decrypt,.-aes_hw_decrypt
|
||||
.globl aes_hw_cbc_encrypt
|
||||
.hidden aes_hw_cbc_encrypt
|
||||
.type aes_hw_cbc_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_cbc_encrypt:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,lr}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldmia ip,{r4,r5} @ load remaining args
|
||||
subs r2,r2,#16
|
||||
mov r8,#16
|
||||
blo .Lcbc_abort
|
||||
moveq r8,#0
|
||||
|
||||
cmp r5,#0 @ en- or decrypting?
|
||||
ldr r5,[r3,#240]
|
||||
and r2,r2,#-16
|
||||
vld1.8 {q6},[r4]
|
||||
vld1.8 {q0},[r0],r8
|
||||
|
||||
vld1.32 {q8,q9},[r3] @ load key schedule...
|
||||
sub r5,r5,#6
|
||||
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
|
||||
sub r5,r5,#2
|
||||
vld1.32 {q10,q11},[r7]!
|
||||
vld1.32 {q12,q13},[r7]!
|
||||
vld1.32 {q14,q15},[r7]!
|
||||
vld1.32 {q7},[r7]
|
||||
|
||||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
beq .Lcbc_dec
|
||||
|
||||
cmp r5,#2
|
||||
veor q0,q0,q6
|
||||
veor q5,q8,q7
|
||||
beq .Lcbc_enc128
|
||||
|
||||
vld1.32 {q2,q3},[r7]
|
||||
add r7,r3,#16
|
||||
add r6,r3,#16*4
|
||||
add r12,r3,#16*5
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
add r14,r3,#16*6
|
||||
add r3,r3,#16*7
|
||||
b .Lenter_cbc_enc
|
||||
|
||||
.align 4
|
||||
.Loop_cbc_enc:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vst1.8 {q6},[r1]!
|
||||
.Lenter_cbc_enc:
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q8},[r6]
|
||||
cmp r5,#4
|
||||
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r12]
|
||||
beq .Lcbc_enc192
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q8},[r14]
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r3]
|
||||
nop
|
||||
|
||||
.Lcbc_enc192:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
subs r2,r2,#16
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
moveq r8,#0
|
||||
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.8 {q8},[r0],r8
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
veor q8,q8,q5
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
veor q6,q0,q7
|
||||
bhs .Loop_cbc_enc
|
||||
|
||||
vst1.8 {q6},[r1]!
|
||||
b .Lcbc_done
|
||||
|
||||
.align 5
|
||||
.Lcbc_enc128:
|
||||
vld1.32 {q2,q3},[r7]
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
b .Lenter_cbc_enc128
|
||||
.Loop_cbc_enc128:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vst1.8 {q6},[r1]!
|
||||
.Lenter_cbc_enc128:
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
subs r2,r2,#16
|
||||
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
moveq r8,#0
|
||||
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.8 {q8},[r0],r8
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
veor q8,q8,q5
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
veor q6,q0,q7
|
||||
bhs .Loop_cbc_enc128
|
||||
|
||||
vst1.8 {q6},[r1]!
|
||||
b .Lcbc_done
|
||||
.align 5
|
||||
.Lcbc_dec:
|
||||
vld1.8 {q10},[r0]!
|
||||
subs r2,r2,#32 @ bias
|
||||
add r6,r5,#2
|
||||
vorr q3,q0,q0
|
||||
vorr q1,q0,q0
|
||||
vorr q11,q10,q10
|
||||
blo .Lcbc_dec_tail
|
||||
|
||||
vorr q1,q10,q10
|
||||
vld1.8 {q10},[r0]!
|
||||
vorr q2,q0,q0
|
||||
vorr q3,q1,q1
|
||||
vorr q11,q10,q10
|
||||
|
||||
.Loop3x_cbc_dec:
|
||||
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt .Loop3x_cbc_dec
|
||||
|
||||
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q4,q6,q7
|
||||
subs r2,r2,#0x30
|
||||
veor q5,q2,q7
|
||||
movlo r6,r2 @ r6, r6, is zero at this point
|
||||
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q9,q3,q7
|
||||
add r0,r0,r6 @ r0 is adjusted in such way that
|
||||
@ at exit from the loop q1-q10
|
||||
@ are loaded with last "words"
|
||||
vorr q6,q11,q11
|
||||
mov r7,r3
|
||||
.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q2},[r0]!
|
||||
.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q3},[r0]!
|
||||
.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q11},[r0]!
|
||||
.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
|
||||
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
|
||||
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
|
||||
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
|
||||
add r6,r5,#2
|
||||
veor q4,q4,q0
|
||||
veor q5,q5,q1
|
||||
veor q10,q10,q9
|
||||
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
|
||||
vst1.8 {q4},[r1]!
|
||||
vorr q0,q2,q2
|
||||
vst1.8 {q5},[r1]!
|
||||
vorr q1,q3,q3
|
||||
vst1.8 {q10},[r1]!
|
||||
vorr q10,q11,q11
|
||||
bhs .Loop3x_cbc_dec
|
||||
|
||||
cmn r2,#0x30
|
||||
beq .Lcbc_done
|
||||
nop
|
||||
|
||||
.Lcbc_dec_tail:
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt .Lcbc_dec_tail
|
||||
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
cmn r2,#0x20
|
||||
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q5,q6,q7
|
||||
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q9,q3,q7
|
||||
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
|
||||
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
|
||||
beq .Lcbc_dec_one
|
||||
veor q5,q5,q1
|
||||
veor q9,q9,q10
|
||||
vorr q6,q11,q11
|
||||
vst1.8 {q5},[r1]!
|
||||
vst1.8 {q9},[r1]!
|
||||
b .Lcbc_done
|
||||
|
||||
.Lcbc_dec_one:
|
||||
veor q5,q5,q10
|
||||
vorr q6,q11,q11
|
||||
vst1.8 {q5},[r1]!
|
||||
|
||||
.Lcbc_done:
|
||||
vst1.8 {q6},[r4]
|
||||
.Lcbc_abort:
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,pc}
|
||||
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
|
||||
.globl aes_hw_ctr32_encrypt_blocks
|
||||
.hidden aes_hw_ctr32_encrypt_blocks
|
||||
.type aes_hw_ctr32_encrypt_blocks,%function
|
||||
.align 5
|
||||
aes_hw_ctr32_encrypt_blocks:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldr r4, [ip] @ load remaining arg
|
||||
ldr r5,[r3,#240]
|
||||
|
||||
ldr r8, [r4, #12]
|
||||
vld1.32 {q0},[r4]
|
||||
|
||||
vld1.32 {q8,q9},[r3] @ load key schedule...
|
||||
sub r5,r5,#4
|
||||
mov r12,#16
|
||||
cmp r2,#2
|
||||
add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
|
||||
sub r5,r5,#2
|
||||
vld1.32 {q12,q13},[r7]!
|
||||
vld1.32 {q14,q15},[r7]!
|
||||
vld1.32 {q7},[r7]
|
||||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
movlo r12,#0
|
||||
#ifndef __ARMEB__
|
||||
rev r8, r8
|
||||
#endif
|
||||
vorr q1,q0,q0
|
||||
add r10, r8, #1
|
||||
vorr q10,q0,q0
|
||||
add r8, r8, #2
|
||||
vorr q6,q0,q0
|
||||
rev r10, r10
|
||||
vmov.32 d3[1],r10
|
||||
bls .Lctr32_tail
|
||||
rev r12, r8
|
||||
sub r2,r2,#3 @ bias
|
||||
vmov.32 d21[1],r12
|
||||
b .Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
.Loop3x_ctr32:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt .Loop3x_ctr32
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
|
||||
vld1.8 {q2},[r0]!
|
||||
vorr q0,q6,q6
|
||||
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.8 {q3},[r0]!
|
||||
vorr q1,q6,q6
|
||||
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
vld1.8 {q11},[r0]!
|
||||
mov r7,r3
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
|
||||
vorr q10,q6,q6
|
||||
add r9,r8,#1
|
||||
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
veor q2,q2,q7
|
||||
add r10,r8,#2
|
||||
.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
veor q3,q3,q7
|
||||
add r8,r8,#3
|
||||
.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
veor q11,q11,q7
|
||||
rev r9,r9
|
||||
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d1[1], r9
|
||||
rev r10,r10
|
||||
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
vmov.32 d3[1], r10
|
||||
rev r12,r8
|
||||
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d21[1], r12
|
||||
subs r2,r2,#3
|
||||
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
|
||||
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
|
||||
.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
|
||||
|
||||
veor q2,q2,q4
|
||||
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
|
||||
vst1.8 {q2},[r1]!
|
||||
veor q3,q3,q5
|
||||
mov r6,r5
|
||||
vst1.8 {q3},[r1]!
|
||||
veor q11,q11,q9
|
||||
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
|
||||
vst1.8 {q11},[r1]!
|
||||
bhs .Loop3x_ctr32
|
||||
|
||||
adds r2,r2,#3
|
||||
beq .Lctr32_done
|
||||
cmp r2,#1
|
||||
mov r12,#16
|
||||
moveq r12,#0
|
||||
|
||||
.Lctr32_tail:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt .Lctr32_tail
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.8 {q2},[r0],r12
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.8 {q3},[r0]
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
veor q2,q2,q7
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
veor q3,q3,q7
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
|
||||
|
||||
cmp r2,#1
|
||||
veor q2,q2,q0
|
||||
veor q3,q3,q1
|
||||
vst1.8 {q2},[r1]!
|
||||
beq .Lctr32_done
|
||||
vst1.8 {q3},[r1]
|
||||
|
||||
.Lctr32_done:
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
|
||||
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
977
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/armv4-mont.S
Normal file
977
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/armv4-mont.S
Normal file
@ -0,0 +1,977 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__)
|
||||
.syntax unified
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.align 5
|
||||
.LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-.Lbn_mul_mont
|
||||
#endif
|
||||
|
||||
.globl bn_mul_mont
|
||||
.hidden bn_mul_mont
|
||||
.type bn_mul_mont,%function
|
||||
|
||||
.align 5
|
||||
bn_mul_mont:
|
||||
.Lbn_mul_mont:
|
||||
ldr ip,[sp,#4] @ load num
|
||||
stmdb sp!,{r0,r2} @ sp points at argument block
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
tst ip,#7
|
||||
bne .Lialu
|
||||
adr r0,.Lbn_mul_mont
|
||||
ldr r2,.LOPENSSL_armcap
|
||||
ldr r0,[r0,r2]
|
||||
#ifdef __APPLE__
|
||||
ldr r0,[r0]
|
||||
#endif
|
||||
tst r0,#ARMV7_NEON @ NEON available?
|
||||
ldmia sp, {r0,r2}
|
||||
beq .Lialu
|
||||
add sp,sp,#8
|
||||
b bn_mul8x_mont_neon
|
||||
.align 4
|
||||
.Lialu:
|
||||
#endif
|
||||
cmp ip,#2
|
||||
mov r0,ip @ load num
|
||||
#ifdef __thumb2__
|
||||
ittt lt
|
||||
#endif
|
||||
movlt r0,#0
|
||||
addlt sp,sp,#2*4
|
||||
blt .Labrt
|
||||
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
|
||||
|
||||
mov r0,r0,lsl#2 @ rescale r0 for byte count
|
||||
sub sp,sp,r0 @ alloca(4*num)
|
||||
sub sp,sp,#4 @ +extra dword
|
||||
sub r0,r0,#4 @ "num=num-1"
|
||||
add r4,r2,r0 @ &bp[num-1]
|
||||
|
||||
add r0,sp,r0 @ r0 to point at &tp[num-1]
|
||||
ldr r8,[r0,#14*4] @ &n0
|
||||
ldr r2,[r2] @ bp[0]
|
||||
ldr r5,[r1],#4 @ ap[0],ap++
|
||||
ldr r6,[r3],#4 @ np[0],np++
|
||||
ldr r8,[r8] @ *n0
|
||||
str r4,[r0,#15*4] @ save &bp[num]
|
||||
|
||||
umull r10,r11,r5,r2 @ ap[0]*bp[0]
|
||||
str r8,[r0,#14*4] @ save n0 value
|
||||
mul r8,r10,r8 @ "tp[0]"*n0
|
||||
mov r12,#0
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
|
||||
mov r4,sp
|
||||
|
||||
.L1st:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
mov r10,r11
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
|
||||
mov r14,#0
|
||||
umlal r12,r14,r6,r8 @ np[j]*n0
|
||||
adds r12,r12,r10
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne .L1st
|
||||
|
||||
adds r12,r12,r11
|
||||
ldr r4,[r0,#13*4] @ restore bp
|
||||
mov r14,#0
|
||||
ldr r8,[r0,#14*4] @ restore n0
|
||||
adc r14,r14,#0
|
||||
str r12,[r0] @ tp[num-1]=
|
||||
mov r7,sp
|
||||
str r14,[r0,#4] @ tp[num]=
|
||||
|
||||
.Louter:
|
||||
sub r7,r0,r7 @ "original" r0-1 value
|
||||
sub r1,r1,r7 @ "rewind" ap to &ap[1]
|
||||
ldr r2,[r4,#4]! @ *(++bp)
|
||||
sub r3,r3,r7 @ "rewind" np to &np[1]
|
||||
ldr r5,[r1,#-4] @ ap[0]
|
||||
ldr r10,[sp] @ tp[0]
|
||||
ldr r6,[r3,#-4] @ np[0]
|
||||
ldr r7,[sp,#4] @ tp[1]
|
||||
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
|
||||
str r4,[r0,#13*4] @ save bp
|
||||
mul r8,r10,r8
|
||||
mov r12,#0
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
|
||||
mov r4,sp
|
||||
|
||||
.Linner:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
adds r10,r11,r7 @ +=tp[j]
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
|
||||
mov r14,#0
|
||||
umlal r12,r14,r6,r8 @ np[j]*n0
|
||||
adc r11,r11,#0
|
||||
ldr r7,[r4,#8] @ tp[j+1]
|
||||
adds r12,r12,r10
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne .Linner
|
||||
|
||||
adds r12,r12,r11
|
||||
mov r14,#0
|
||||
ldr r4,[r0,#13*4] @ restore bp
|
||||
adc r14,r14,#0
|
||||
ldr r8,[r0,#14*4] @ restore n0
|
||||
adds r12,r12,r7
|
||||
ldr r7,[r0,#15*4] @ restore &bp[num]
|
||||
adc r14,r14,#0
|
||||
str r12,[r0] @ tp[num-1]=
|
||||
str r14,[r0,#4] @ tp[num]=
|
||||
|
||||
cmp r4,r7
|
||||
#ifdef __thumb2__
|
||||
itt ne
|
||||
#endif
|
||||
movne r7,sp
|
||||
bne .Louter
|
||||
|
||||
ldr r2,[r0,#12*4] @ pull rp
|
||||
mov r5,sp
|
||||
add r0,r0,#4 @ r0 to point at &tp[num]
|
||||
sub r5,r0,r5 @ "original" num value
|
||||
mov r4,sp @ "rewind" r4
|
||||
mov r1,r4 @ "borrow" r1
|
||||
sub r3,r3,r5 @ "rewind" r3 to &np[0]
|
||||
|
||||
subs r7,r7,r7 @ "clear" carry flag
|
||||
.Lsub: ldr r7,[r4],#4
|
||||
ldr r6,[r3],#4
|
||||
sbcs r7,r7,r6 @ tp[j]-np[j]
|
||||
str r7,[r2],#4 @ rp[j]=
|
||||
teq r4,r0 @ preserve carry
|
||||
bne .Lsub
|
||||
sbcs r14,r14,#0 @ upmost carry
|
||||
mov r4,sp @ "rewind" r4
|
||||
sub r2,r2,r5 @ "rewind" r2
|
||||
|
||||
.Lcopy: ldr r7,[r4] @ conditional copy
|
||||
ldr r5,[r2]
|
||||
str sp,[r4],#4 @ zap tp
|
||||
#ifdef __thumb2__
|
||||
it cc
|
||||
#endif
|
||||
movcc r5,r7
|
||||
str r5,[r2],#4
|
||||
teq r4,r0 @ preserve carry
|
||||
bne .Lcopy
|
||||
|
||||
mov sp,r0
|
||||
add sp,sp,#4 @ skip over tp[num+1]
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
|
||||
add sp,sp,#2*4 @ skip over {r0,r2}
|
||||
mov r0,#1
|
||||
.Labrt:
|
||||
#if __ARM_ARCH__>=5
|
||||
bx lr @ bx lr
|
||||
#else
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size bn_mul_mont,.-bn_mul_mont
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
.type bn_mul8x_mont_neon,%function
|
||||
.align 5
|
||||
bn_mul8x_mont_neon:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldmia ip,{r4,r5} @ load rest of parameter block
|
||||
mov ip,sp
|
||||
|
||||
cmp r5,#8
|
||||
bhi .LNEON_8n
|
||||
|
||||
@ special case for r5==8, everything is in register bank...
|
||||
|
||||
vld1.32 {d28[0]}, [r2,:32]!
|
||||
veor d8,d8,d8
|
||||
sub r7,sp,r5,lsl#4
|
||||
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
|
||||
and r7,r7,#-64
|
||||
vld1.32 {d30[0]}, [r4,:32]
|
||||
mov sp,r7 @ alloca
|
||||
vzip.16 d28,d8
|
||||
|
||||
vmull.u32 q6,d28,d0[0]
|
||||
vmull.u32 q7,d28,d0[1]
|
||||
vmull.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmull.u32 q9,d28,d1[1]
|
||||
|
||||
vadd.u64 d29,d29,d12
|
||||
veor d8,d8,d8
|
||||
vmul.u32 d29,d29,d30
|
||||
|
||||
vmull.u32 q10,d28,d2[0]
|
||||
vld1.32 {d4,d5,d6,d7}, [r3]!
|
||||
vmull.u32 q11,d28,d2[1]
|
||||
vmull.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmull.u32 q13,d28,d3[1]
|
||||
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
sub r9,r5,#1
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmov q5,q6
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmov q6,q7
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmov q7,q8
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vmov q8,q9
|
||||
vmov q9,q10
|
||||
vshr.u64 d10,d10,#16
|
||||
vmov q10,q11
|
||||
vmov q11,q12
|
||||
vadd.u64 d10,d10,d11
|
||||
vmov q12,q13
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
b .LNEON_outer8
|
||||
|
||||
.align 4
|
||||
.LNEON_outer8:
|
||||
vld1.32 {d28[0]}, [r2,:32]!
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
vadd.u64 d12,d12,d10
|
||||
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
|
||||
vadd.u64 d29,d29,d12
|
||||
veor d8,d8,d8
|
||||
subs r9,r9,#1
|
||||
vmul.u32 d29,d29,d30
|
||||
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmov q5,q6
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmov q6,q7
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmov q7,q8
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vmov q8,q9
|
||||
vmov q9,q10
|
||||
vshr.u64 d10,d10,#16
|
||||
vmov q10,q11
|
||||
vmov q11,q12
|
||||
vadd.u64 d10,d10,d11
|
||||
vmov q12,q13
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
bne .LNEON_outer8
|
||||
|
||||
vadd.u64 d12,d12,d10
|
||||
mov r7,sp
|
||||
vshr.u64 d10,d12,#16
|
||||
mov r8,r5
|
||||
vadd.u64 d13,d13,d10
|
||||
add r6,sp,#96
|
||||
vshr.u64 d10,d13,#16
|
||||
vzip.16 d12,d13
|
||||
|
||||
b .LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
.LNEON_8n:
|
||||
veor q6,q6,q6
|
||||
sub r7,sp,#128
|
||||
veor q7,q7,q7
|
||||
sub r7,r7,r5,lsl#4
|
||||
veor q8,q8,q8
|
||||
and r7,r7,#-64
|
||||
veor q9,q9,q9
|
||||
mov sp,r7 @ alloca
|
||||
veor q10,q10,q10
|
||||
add r7,r7,#256
|
||||
veor q11,q11,q11
|
||||
sub r8,r5,#8
|
||||
veor q12,q12,q12
|
||||
veor q13,q13,q13
|
||||
|
||||
.LNEON_8n_init:
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
subs r8,r8,#8
|
||||
vst1.64 {q8,q9},[r7,:256]!
|
||||
vst1.64 {q10,q11},[r7,:256]!
|
||||
vst1.64 {q12,q13},[r7,:256]!
|
||||
bne .LNEON_8n_init
|
||||
|
||||
add r6,sp,#256
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
add r10,sp,#8
|
||||
vld1.32 {d30[0]},[r4,:32]
|
||||
mov r9,r5
|
||||
b .LNEON_8n_outer
|
||||
|
||||
.align 4
|
||||
.LNEON_8n_outer:
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
add r7,sp,#128
|
||||
vld1.32 {d4,d5,d6,d7},[r3]!
|
||||
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
vadd.u64 d29,d29,d12
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vshr.u64 d12,d12,#16
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vadd.u64 d12,d12,d13
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vshr.u64 d12,d12,#16
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vadd.u64 d14,d14,d12
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
|
||||
vmlal.u32 q7,d28,d0[0]
|
||||
vld1.64 {q6},[r6,:128]!
|
||||
vmlal.u32 q8,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q9,d28,d1[0]
|
||||
vshl.i64 d29,d15,#16
|
||||
vmlal.u32 q10,d28,d1[1]
|
||||
vadd.u64 d29,d29,d14
|
||||
vmlal.u32 q11,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q12,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
|
||||
vmlal.u32 q13,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q6,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q7,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q8,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q9,d29,d5[0]
|
||||
vshr.u64 d14,d14,#16
|
||||
vmlal.u32 q10,d29,d5[1]
|
||||
vmlal.u32 q11,d29,d6[0]
|
||||
vadd.u64 d14,d14,d15
|
||||
vmlal.u32 q12,d29,d6[1]
|
||||
vshr.u64 d14,d14,#16
|
||||
vmlal.u32 q13,d29,d7[0]
|
||||
vmlal.u32 q6,d29,d7[1]
|
||||
vadd.u64 d16,d16,d14
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
|
||||
vmlal.u32 q8,d28,d0[0]
|
||||
vld1.64 {q7},[r6,:128]!
|
||||
vmlal.u32 q9,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q10,d28,d1[0]
|
||||
vshl.i64 d29,d17,#16
|
||||
vmlal.u32 q11,d28,d1[1]
|
||||
vadd.u64 d29,d29,d16
|
||||
vmlal.u32 q12,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q13,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
|
||||
vmlal.u32 q6,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q7,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q8,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q9,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q10,d29,d5[0]
|
||||
vshr.u64 d16,d16,#16
|
||||
vmlal.u32 q11,d29,d5[1]
|
||||
vmlal.u32 q12,d29,d6[0]
|
||||
vadd.u64 d16,d16,d17
|
||||
vmlal.u32 q13,d29,d6[1]
|
||||
vshr.u64 d16,d16,#16
|
||||
vmlal.u32 q6,d29,d7[0]
|
||||
vmlal.u32 q7,d29,d7[1]
|
||||
vadd.u64 d18,d18,d16
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
|
||||
vmlal.u32 q9,d28,d0[0]
|
||||
vld1.64 {q8},[r6,:128]!
|
||||
vmlal.u32 q10,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q11,d28,d1[0]
|
||||
vshl.i64 d29,d19,#16
|
||||
vmlal.u32 q12,d28,d1[1]
|
||||
vadd.u64 d29,d29,d18
|
||||
vmlal.u32 q13,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q6,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
|
||||
vmlal.u32 q7,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q8,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q9,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q10,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q11,d29,d5[0]
|
||||
vshr.u64 d18,d18,#16
|
||||
vmlal.u32 q12,d29,d5[1]
|
||||
vmlal.u32 q13,d29,d6[0]
|
||||
vadd.u64 d18,d18,d19
|
||||
vmlal.u32 q6,d29,d6[1]
|
||||
vshr.u64 d18,d18,#16
|
||||
vmlal.u32 q7,d29,d7[0]
|
||||
vmlal.u32 q8,d29,d7[1]
|
||||
vadd.u64 d20,d20,d18
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
|
||||
vmlal.u32 q10,d28,d0[0]
|
||||
vld1.64 {q9},[r6,:128]!
|
||||
vmlal.u32 q11,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q12,d28,d1[0]
|
||||
vshl.i64 d29,d21,#16
|
||||
vmlal.u32 q13,d28,d1[1]
|
||||
vadd.u64 d29,d29,d20
|
||||
vmlal.u32 q6,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q7,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
|
||||
vmlal.u32 q8,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q9,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q10,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q11,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q12,d29,d5[0]
|
||||
vshr.u64 d20,d20,#16
|
||||
vmlal.u32 q13,d29,d5[1]
|
||||
vmlal.u32 q6,d29,d6[0]
|
||||
vadd.u64 d20,d20,d21
|
||||
vmlal.u32 q7,d29,d6[1]
|
||||
vshr.u64 d20,d20,#16
|
||||
vmlal.u32 q8,d29,d7[0]
|
||||
vmlal.u32 q9,d29,d7[1]
|
||||
vadd.u64 d22,d22,d20
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
|
||||
vmlal.u32 q11,d28,d0[0]
|
||||
vld1.64 {q10},[r6,:128]!
|
||||
vmlal.u32 q12,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q13,d28,d1[0]
|
||||
vshl.i64 d29,d23,#16
|
||||
vmlal.u32 q6,d28,d1[1]
|
||||
vadd.u64 d29,d29,d22
|
||||
vmlal.u32 q7,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q8,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
|
||||
vmlal.u32 q9,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q10,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q11,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q12,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q13,d29,d5[0]
|
||||
vshr.u64 d22,d22,#16
|
||||
vmlal.u32 q6,d29,d5[1]
|
||||
vmlal.u32 q7,d29,d6[0]
|
||||
vadd.u64 d22,d22,d23
|
||||
vmlal.u32 q8,d29,d6[1]
|
||||
vshr.u64 d22,d22,#16
|
||||
vmlal.u32 q9,d29,d7[0]
|
||||
vmlal.u32 q10,d29,d7[1]
|
||||
vadd.u64 d24,d24,d22
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
|
||||
vmlal.u32 q12,d28,d0[0]
|
||||
vld1.64 {q11},[r6,:128]!
|
||||
vmlal.u32 q13,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q6,d28,d1[0]
|
||||
vshl.i64 d29,d25,#16
|
||||
vmlal.u32 q7,d28,d1[1]
|
||||
vadd.u64 d29,d29,d24
|
||||
vmlal.u32 q8,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q9,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
|
||||
vmlal.u32 q10,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q11,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q12,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q13,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q6,d29,d5[0]
|
||||
vshr.u64 d24,d24,#16
|
||||
vmlal.u32 q7,d29,d5[1]
|
||||
vmlal.u32 q8,d29,d6[0]
|
||||
vadd.u64 d24,d24,d25
|
||||
vmlal.u32 q9,d29,d6[1]
|
||||
vshr.u64 d24,d24,#16
|
||||
vmlal.u32 q10,d29,d7[0]
|
||||
vmlal.u32 q11,d29,d7[1]
|
||||
vadd.u64 d26,d26,d24
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
|
||||
vmlal.u32 q13,d28,d0[0]
|
||||
vld1.64 {q12},[r6,:128]!
|
||||
vmlal.u32 q6,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q7,d28,d1[0]
|
||||
vshl.i64 d29,d27,#16
|
||||
vmlal.u32 q8,d28,d1[1]
|
||||
vadd.u64 d29,d29,d26
|
||||
vmlal.u32 q9,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q10,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
|
||||
vmlal.u32 q11,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q12,d28,d3[1]
|
||||
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 q13,d29,d4[0]
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
vmlal.u32 q6,d29,d4[1]
|
||||
vmlal.u32 q7,d29,d5[0]
|
||||
vshr.u64 d26,d26,#16
|
||||
vmlal.u32 q8,d29,d5[1]
|
||||
vmlal.u32 q9,d29,d6[0]
|
||||
vadd.u64 d26,d26,d27
|
||||
vmlal.u32 q10,d29,d6[1]
|
||||
vshr.u64 d26,d26,#16
|
||||
vmlal.u32 q11,d29,d7[0]
|
||||
vmlal.u32 q12,d29,d7[1]
|
||||
vadd.u64 d12,d12,d26
|
||||
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
|
||||
add r10,sp,#8 @ rewind
|
||||
sub r8,r5,#8
|
||||
b .LNEON_8n_inner
|
||||
|
||||
.align 4
|
||||
.LNEON_8n_inner:
|
||||
subs r8,r8,#8
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vld1.64 {q13},[r6,:128]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vld1.32 {d4,d5,d6,d7},[r3]!
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vst1.64 {q6},[r7,:128]!
|
||||
vmlal.u32 q7,d28,d0[0]
|
||||
vld1.64 {q6},[r6,:128]
|
||||
vmlal.u32 q8,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
|
||||
vmlal.u32 q9,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q10,d28,d1[1]
|
||||
vmlal.u32 q11,d28,d2[0]
|
||||
vmlal.u32 q12,d28,d2[1]
|
||||
vmlal.u32 q13,d28,d3[0]
|
||||
vmlal.u32 q6,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
|
||||
vmlal.u32 q7,d29,d4[0]
|
||||
vmlal.u32 q8,d29,d4[1]
|
||||
vmlal.u32 q9,d29,d5[0]
|
||||
vmlal.u32 q10,d29,d5[1]
|
||||
vmlal.u32 q11,d29,d6[0]
|
||||
vmlal.u32 q12,d29,d6[1]
|
||||
vmlal.u32 q13,d29,d7[0]
|
||||
vmlal.u32 q6,d29,d7[1]
|
||||
vst1.64 {q7},[r7,:128]!
|
||||
vmlal.u32 q8,d28,d0[0]
|
||||
vld1.64 {q7},[r6,:128]
|
||||
vmlal.u32 q9,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
|
||||
vmlal.u32 q10,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q11,d28,d1[1]
|
||||
vmlal.u32 q12,d28,d2[0]
|
||||
vmlal.u32 q13,d28,d2[1]
|
||||
vmlal.u32 q6,d28,d3[0]
|
||||
vmlal.u32 q7,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
|
||||
vmlal.u32 q8,d29,d4[0]
|
||||
vmlal.u32 q9,d29,d4[1]
|
||||
vmlal.u32 q10,d29,d5[0]
|
||||
vmlal.u32 q11,d29,d5[1]
|
||||
vmlal.u32 q12,d29,d6[0]
|
||||
vmlal.u32 q13,d29,d6[1]
|
||||
vmlal.u32 q6,d29,d7[0]
|
||||
vmlal.u32 q7,d29,d7[1]
|
||||
vst1.64 {q8},[r7,:128]!
|
||||
vmlal.u32 q9,d28,d0[0]
|
||||
vld1.64 {q8},[r6,:128]
|
||||
vmlal.u32 q10,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
|
||||
vmlal.u32 q11,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q12,d28,d1[1]
|
||||
vmlal.u32 q13,d28,d2[0]
|
||||
vmlal.u32 q6,d28,d2[1]
|
||||
vmlal.u32 q7,d28,d3[0]
|
||||
vmlal.u32 q8,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
|
||||
vmlal.u32 q9,d29,d4[0]
|
||||
vmlal.u32 q10,d29,d4[1]
|
||||
vmlal.u32 q11,d29,d5[0]
|
||||
vmlal.u32 q12,d29,d5[1]
|
||||
vmlal.u32 q13,d29,d6[0]
|
||||
vmlal.u32 q6,d29,d6[1]
|
||||
vmlal.u32 q7,d29,d7[0]
|
||||
vmlal.u32 q8,d29,d7[1]
|
||||
vst1.64 {q9},[r7,:128]!
|
||||
vmlal.u32 q10,d28,d0[0]
|
||||
vld1.64 {q9},[r6,:128]
|
||||
vmlal.u32 q11,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
|
||||
vmlal.u32 q12,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q13,d28,d1[1]
|
||||
vmlal.u32 q6,d28,d2[0]
|
||||
vmlal.u32 q7,d28,d2[1]
|
||||
vmlal.u32 q8,d28,d3[0]
|
||||
vmlal.u32 q9,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
|
||||
vmlal.u32 q10,d29,d4[0]
|
||||
vmlal.u32 q11,d29,d4[1]
|
||||
vmlal.u32 q12,d29,d5[0]
|
||||
vmlal.u32 q13,d29,d5[1]
|
||||
vmlal.u32 q6,d29,d6[0]
|
||||
vmlal.u32 q7,d29,d6[1]
|
||||
vmlal.u32 q8,d29,d7[0]
|
||||
vmlal.u32 q9,d29,d7[1]
|
||||
vst1.64 {q10},[r7,:128]!
|
||||
vmlal.u32 q11,d28,d0[0]
|
||||
vld1.64 {q10},[r6,:128]
|
||||
vmlal.u32 q12,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
|
||||
vmlal.u32 q13,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q6,d28,d1[1]
|
||||
vmlal.u32 q7,d28,d2[0]
|
||||
vmlal.u32 q8,d28,d2[1]
|
||||
vmlal.u32 q9,d28,d3[0]
|
||||
vmlal.u32 q10,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
|
||||
vmlal.u32 q11,d29,d4[0]
|
||||
vmlal.u32 q12,d29,d4[1]
|
||||
vmlal.u32 q13,d29,d5[0]
|
||||
vmlal.u32 q6,d29,d5[1]
|
||||
vmlal.u32 q7,d29,d6[0]
|
||||
vmlal.u32 q8,d29,d6[1]
|
||||
vmlal.u32 q9,d29,d7[0]
|
||||
vmlal.u32 q10,d29,d7[1]
|
||||
vst1.64 {q11},[r7,:128]!
|
||||
vmlal.u32 q12,d28,d0[0]
|
||||
vld1.64 {q11},[r6,:128]
|
||||
vmlal.u32 q13,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
|
||||
vmlal.u32 q6,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q7,d28,d1[1]
|
||||
vmlal.u32 q8,d28,d2[0]
|
||||
vmlal.u32 q9,d28,d2[1]
|
||||
vmlal.u32 q10,d28,d3[0]
|
||||
vmlal.u32 q11,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
|
||||
vmlal.u32 q12,d29,d4[0]
|
||||
vmlal.u32 q13,d29,d4[1]
|
||||
vmlal.u32 q6,d29,d5[0]
|
||||
vmlal.u32 q7,d29,d5[1]
|
||||
vmlal.u32 q8,d29,d6[0]
|
||||
vmlal.u32 q9,d29,d6[1]
|
||||
vmlal.u32 q10,d29,d7[0]
|
||||
vmlal.u32 q11,d29,d7[1]
|
||||
vst1.64 {q12},[r7,:128]!
|
||||
vmlal.u32 q13,d28,d0[0]
|
||||
vld1.64 {q12},[r6,:128]
|
||||
vmlal.u32 q6,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
|
||||
vmlal.u32 q7,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q8,d28,d1[1]
|
||||
vmlal.u32 q9,d28,d2[0]
|
||||
vmlal.u32 q10,d28,d2[1]
|
||||
vmlal.u32 q11,d28,d3[0]
|
||||
vmlal.u32 q12,d28,d3[1]
|
||||
it eq
|
||||
subeq r1,r1,r5,lsl#2 @ rewind
|
||||
vmlal.u32 q13,d29,d4[0]
|
||||
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 q6,d29,d4[1]
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
vmlal.u32 q7,d29,d5[0]
|
||||
add r10,sp,#8 @ rewind
|
||||
vmlal.u32 q8,d29,d5[1]
|
||||
vmlal.u32 q9,d29,d6[0]
|
||||
vmlal.u32 q10,d29,d6[1]
|
||||
vmlal.u32 q11,d29,d7[0]
|
||||
vst1.64 {q13},[r7,:128]!
|
||||
vmlal.u32 q12,d29,d7[1]
|
||||
|
||||
bne .LNEON_8n_inner
|
||||
add r6,sp,#128
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
veor q2,q2,q2 @ d4-d5
|
||||
vst1.64 {q8,q9},[r7,:256]!
|
||||
veor q3,q3,q3 @ d6-d7
|
||||
vst1.64 {q10,q11},[r7,:256]!
|
||||
vst1.64 {q12},[r7,:128]
|
||||
|
||||
subs r9,r9,#8
|
||||
vld1.64 {q6,q7},[r6,:256]!
|
||||
vld1.64 {q8,q9},[r6,:256]!
|
||||
vld1.64 {q10,q11},[r6,:256]!
|
||||
vld1.64 {q12,q13},[r6,:256]!
|
||||
|
||||
itt ne
|
||||
subne r3,r3,r5,lsl#2 @ rewind
|
||||
bne .LNEON_8n_outer
|
||||
|
||||
add r7,sp,#128
|
||||
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
|
||||
vshr.u64 d10,d12,#16
|
||||
vst1.64 {q2,q3},[sp,:256]!
|
||||
vadd.u64 d13,d13,d10
|
||||
vst1.64 {q2,q3}, [sp,:256]!
|
||||
vshr.u64 d10,d13,#16
|
||||
vst1.64 {q2,q3}, [sp,:256]!
|
||||
vzip.16 d12,d13
|
||||
|
||||
mov r8,r5
|
||||
b .LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
.LNEON_tail:
|
||||
vadd.u64 d12,d12,d10
|
||||
vshr.u64 d10,d12,#16
|
||||
vld1.64 {q8,q9}, [r6, :256]!
|
||||
vadd.u64 d13,d13,d10
|
||||
vld1.64 {q10,q11}, [r6, :256]!
|
||||
vshr.u64 d10,d13,#16
|
||||
vld1.64 {q12,q13}, [r6, :256]!
|
||||
vzip.16 d12,d13
|
||||
|
||||
.LNEON_tail_entry:
|
||||
vadd.u64 d14,d14,d10
|
||||
vst1.32 {d12[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d14,#16
|
||||
vadd.u64 d15,d15,d10
|
||||
vshr.u64 d10,d15,#16
|
||||
vzip.16 d14,d15
|
||||
vadd.u64 d16,d16,d10
|
||||
vst1.32 {d14[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d16,#16
|
||||
vadd.u64 d17,d17,d10
|
||||
vshr.u64 d10,d17,#16
|
||||
vzip.16 d16,d17
|
||||
vadd.u64 d18,d18,d10
|
||||
vst1.32 {d16[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d18,#16
|
||||
vadd.u64 d19,d19,d10
|
||||
vshr.u64 d10,d19,#16
|
||||
vzip.16 d18,d19
|
||||
vadd.u64 d20,d20,d10
|
||||
vst1.32 {d18[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d20,#16
|
||||
vadd.u64 d21,d21,d10
|
||||
vshr.u64 d10,d21,#16
|
||||
vzip.16 d20,d21
|
||||
vadd.u64 d22,d22,d10
|
||||
vst1.32 {d20[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d22,#16
|
||||
vadd.u64 d23,d23,d10
|
||||
vshr.u64 d10,d23,#16
|
||||
vzip.16 d22,d23
|
||||
vadd.u64 d24,d24,d10
|
||||
vst1.32 {d22[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d24,#16
|
||||
vadd.u64 d25,d25,d10
|
||||
vshr.u64 d10,d25,#16
|
||||
vzip.16 d24,d25
|
||||
vadd.u64 d26,d26,d10
|
||||
vst1.32 {d24[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d26,#16
|
||||
vadd.u64 d27,d27,d10
|
||||
vshr.u64 d10,d27,#16
|
||||
vzip.16 d26,d27
|
||||
vld1.64 {q6,q7}, [r6, :256]!
|
||||
subs r8,r8,#8
|
||||
vst1.32 {d26[0]}, [r7, :32]!
|
||||
bne .LNEON_tail
|
||||
|
||||
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
|
||||
sub r3,r3,r5,lsl#2 @ rewind r3
|
||||
subs r1,sp,#0 @ clear carry flag
|
||||
add r2,sp,r5,lsl#2
|
||||
|
||||
.LNEON_sub:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r3!, {r8,r9,r10,r11}
|
||||
sbcs r8, r4,r8
|
||||
sbcs r9, r5,r9
|
||||
sbcs r10,r6,r10
|
||||
sbcs r11,r7,r11
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne .LNEON_sub
|
||||
|
||||
ldr r10, [r1] @ load top-most bit
|
||||
mov r11,sp
|
||||
veor q0,q0,q0
|
||||
sub r11,r2,r11 @ this is num*4
|
||||
veor q1,q1,q1
|
||||
mov r1,sp
|
||||
sub r0,r0,r11 @ rewind r0
|
||||
mov r3,r2 @ second 3/4th of frame
|
||||
sbcs r10,r10,#0 @ result is carry flag
|
||||
|
||||
.LNEON_copy_n_zap:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r0, {r8,r9,r10,r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
ldmia r1, {r4,r5,r6,r7}
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
sub r1,r1,#16
|
||||
ldmia r0, {r8,r9,r10,r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0,q1}, [r1,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne .LNEON_copy_n_zap
|
||||
|
||||
mov sp,ip
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
bx lr @ bx lr
|
||||
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
|
||||
#endif
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
1529
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/bsaes-armv7.S
Normal file
1529
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/bsaes-armv7.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,255 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
|
||||
@ instructions are in aesv8-armx.pl.)
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__) || defined(__clang__)
|
||||
.syntax unified
|
||||
#define ldrplb ldrbpl
|
||||
#define ldrneb ldrbne
|
||||
#endif
|
||||
#if defined(__thumb2__)
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
.globl gcm_init_neon
|
||||
.hidden gcm_init_neon
|
||||
.type gcm_init_neon,%function
|
||||
.align 4
|
||||
gcm_init_neon:
|
||||
vld1.64 d7,[r1]! @ load H
|
||||
vmov.i8 q8,#0xe1
|
||||
vld1.64 d6,[r1]
|
||||
vshl.i64 d17,#57
|
||||
vshr.u64 d16,#63 @ t0=0xc2....01
|
||||
vdup.8 q9,d7[7]
|
||||
vshr.u64 d26,d6,#63
|
||||
vshr.s8 q9,#7 @ broadcast carry bit
|
||||
vshl.i64 q3,q3,#1
|
||||
vand q8,q8,q9
|
||||
vorr d7,d26 @ H<<<=1
|
||||
veor q3,q3,q8 @ twisted H
|
||||
vstmia r0,{q3}
|
||||
|
||||
bx lr @ bx lr
|
||||
.size gcm_init_neon,.-gcm_init_neon
|
||||
|
||||
.globl gcm_gmult_neon
|
||||
.hidden gcm_gmult_neon
|
||||
.type gcm_gmult_neon,%function
|
||||
.align 4
|
||||
gcm_gmult_neon:
|
||||
vld1.64 d7,[r0]! @ load Xi
|
||||
vld1.64 d6,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
vldmia r1,{d26,d27} @ load twisted H
|
||||
vmov.i64 d30,#0x00000000ffffffff
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q3,q3
|
||||
#endif
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
mov r3,#16
|
||||
b .Lgmult_neon
|
||||
.size gcm_gmult_neon,.-gcm_gmult_neon
|
||||
|
||||
.globl gcm_ghash_neon
|
||||
.hidden gcm_ghash_neon
|
||||
.type gcm_ghash_neon,%function
|
||||
.align 4
|
||||
gcm_ghash_neon:
|
||||
vld1.64 d1,[r0]! @ load Xi
|
||||
vld1.64 d0,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
vldmia r1,{d26,d27} @ load twisted H
|
||||
vmov.i64 d30,#0x00000000ffffffff
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
|
||||
.Loop_neon:
|
||||
vld1.64 d7,[r2]! @ load inp
|
||||
vld1.64 d6,[r2]!
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q3,q3
|
||||
#endif
|
||||
veor q3,q0 @ inp^=Xi
|
||||
.Lgmult_neon:
|
||||
vext.8 d16, d26, d26, #1 @ A1
|
||||
vmull.p8 q8, d16, d6 @ F = A1*B
|
||||
vext.8 d0, d6, d6, #1 @ B1
|
||||
vmull.p8 q0, d26, d0 @ E = A*B1
|
||||
vext.8 d18, d26, d26, #2 @ A2
|
||||
vmull.p8 q9, d18, d6 @ H = A2*B
|
||||
vext.8 d22, d6, d6, #2 @ B2
|
||||
vmull.p8 q11, d26, d22 @ G = A*B2
|
||||
vext.8 d20, d26, d26, #3 @ A3
|
||||
veor q8, q8, q0 @ L = E + F
|
||||
vmull.p8 q10, d20, d6 @ J = A3*B
|
||||
vext.8 d0, d6, d6, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q0, d26, d0 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d6, d6, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d26, d22 @ K = A*B4
|
||||
veor q10, q10, q0 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q0, d26, d6 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q0, q0, q8
|
||||
veor q0, q0, q10
|
||||
veor d6,d6,d7 @ Karatsuba pre-processing
|
||||
vext.8 d16, d28, d28, #1 @ A1
|
||||
vmull.p8 q8, d16, d6 @ F = A1*B
|
||||
vext.8 d2, d6, d6, #1 @ B1
|
||||
vmull.p8 q1, d28, d2 @ E = A*B1
|
||||
vext.8 d18, d28, d28, #2 @ A2
|
||||
vmull.p8 q9, d18, d6 @ H = A2*B
|
||||
vext.8 d22, d6, d6, #2 @ B2
|
||||
vmull.p8 q11, d28, d22 @ G = A*B2
|
||||
vext.8 d20, d28, d28, #3 @ A3
|
||||
veor q8, q8, q1 @ L = E + F
|
||||
vmull.p8 q10, d20, d6 @ J = A3*B
|
||||
vext.8 d2, d6, d6, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q1, d28, d2 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d6, d6, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d28, d22 @ K = A*B4
|
||||
veor q10, q10, q1 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q1, d28, d6 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q1, q1, q8
|
||||
veor q1, q1, q10
|
||||
vext.8 d16, d27, d27, #1 @ A1
|
||||
vmull.p8 q8, d16, d7 @ F = A1*B
|
||||
vext.8 d4, d7, d7, #1 @ B1
|
||||
vmull.p8 q2, d27, d4 @ E = A*B1
|
||||
vext.8 d18, d27, d27, #2 @ A2
|
||||
vmull.p8 q9, d18, d7 @ H = A2*B
|
||||
vext.8 d22, d7, d7, #2 @ B2
|
||||
vmull.p8 q11, d27, d22 @ G = A*B2
|
||||
vext.8 d20, d27, d27, #3 @ A3
|
||||
veor q8, q8, q2 @ L = E + F
|
||||
vmull.p8 q10, d20, d7 @ J = A3*B
|
||||
vext.8 d4, d7, d7, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q2, d27, d4 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d7, d7, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d27, d22 @ K = A*B4
|
||||
veor q10, q10, q2 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q2, d27, d7 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q2, q2, q8
|
||||
veor q2, q2, q10
|
||||
veor q1,q1,q0 @ Karatsuba post-processing
|
||||
veor q1,q1,q2
|
||||
veor d1,d1,d2
|
||||
veor d4,d4,d3 @ Xh|Xl - 256-bit result
|
||||
|
||||
@ equivalent of reduction_avx from ghash-x86_64.pl
|
||||
vshl.i64 q9,q0,#57 @ 1st phase
|
||||
vshl.i64 q10,q0,#62
|
||||
veor q10,q10,q9 @
|
||||
vshl.i64 q9,q0,#63
|
||||
veor q10, q10, q9 @
|
||||
veor d1,d1,d20 @
|
||||
veor d4,d4,d21
|
||||
|
||||
vshr.u64 q10,q0,#1 @ 2nd phase
|
||||
veor q2,q2,q0
|
||||
veor q0,q0,q10 @
|
||||
vshr.u64 q10,q10,#6
|
||||
vshr.u64 q0,q0,#1 @
|
||||
veor q0,q0,q2 @
|
||||
veor q0,q0,q10 @
|
||||
|
||||
subs r3,#16
|
||||
bne .Loop_neon
|
||||
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
sub r0,#16
|
||||
vst1.64 d1,[r0]! @ write out Xi
|
||||
vst1.64 d0,[r0]
|
||||
|
||||
bx lr @ bx lr
|
||||
.size gcm_ghash_neon,.-gcm_ghash_neon
|
||||
#endif
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
@ -0,0 +1,253 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
.fpu neon
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
.globl gcm_init_v8
|
||||
.hidden gcm_init_v8
|
||||
.type gcm_init_v8,%function
|
||||
.align 4
|
||||
gcm_init_v8:
|
||||
vld1.64 {q9},[r1] @ load input H
|
||||
vmov.i8 q11,#0xe1
|
||||
vshl.i64 q11,q11,#57 @ 0xc2.0
|
||||
vext.8 q3,q9,q9,#8
|
||||
vshr.u64 q10,q11,#63
|
||||
vdup.32 q9,d18[1]
|
||||
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
|
||||
vshr.u64 q10,q3,#63
|
||||
vshr.s32 q9,q9,#31 @ broadcast carry bit
|
||||
vand q10,q10,q8
|
||||
vshl.i64 q3,q3,#1
|
||||
vext.8 q10,q10,q10,#8
|
||||
vand q8,q8,q9
|
||||
vorr q3,q3,q10 @ H<<<=1
|
||||
veor q12,q3,q8 @ twisted H
|
||||
vst1.64 {q12},[r0]! @ store Htable[0]
|
||||
|
||||
@ calculate H^2
|
||||
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
|
||||
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
|
||||
veor q8,q8,q12
|
||||
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
|
||||
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q14,q0,q10
|
||||
|
||||
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
|
||||
veor q9,q9,q14
|
||||
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
|
||||
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
|
||||
|
||||
bx lr
|
||||
.size gcm_init_v8,.-gcm_init_v8
|
||||
.globl gcm_gmult_v8
|
||||
.hidden gcm_gmult_v8
|
||||
.type gcm_gmult_v8,%function
|
||||
.align 4
|
||||
gcm_gmult_v8:
|
||||
vld1.64 {q9},[r0] @ load Xi
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q12,q13},[r1] @ load twisted H, ...
|
||||
vshl.u64 q11,q11,#57
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vext.8 q3,q9,q9,#8
|
||||
|
||||
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
|
||||
veor q9,q9,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
|
||||
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q0,q0,q10
|
||||
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q0,q0,q0,#8
|
||||
vst1.64 {q0},[r0] @ write out Xi
|
||||
|
||||
bx lr
|
||||
.size gcm_gmult_v8,.-gcm_gmult_v8
|
||||
.globl gcm_ghash_v8
|
||||
.hidden gcm_ghash_v8
|
||||
.type gcm_ghash_v8,%function
|
||||
.align 4
|
||||
gcm_ghash_v8:
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
vld1.64 {q0},[r0] @ load [rotated] Xi
|
||||
@ "[rotated]" means that
|
||||
@ loaded value would have
|
||||
@ to be rotated in order to
|
||||
@ make it appear as in
|
||||
@ algorithm specification
|
||||
subs r3,r3,#32 @ see if r3 is 32 or larger
|
||||
mov r12,#16 @ r12 is used as post-
|
||||
@ increment for input pointer;
|
||||
@ as loop is modulo-scheduled
|
||||
@ r12 is zeroed just in time
|
||||
@ to preclude overstepping
|
||||
@ inp[len], which means that
|
||||
@ last block[s] are actually
|
||||
@ loaded twice, but last
|
||||
@ copy is not processed
|
||||
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q14},[r1]
|
||||
moveq r12,#0 @ is it time to zero r12?
|
||||
vext.8 q0,q0,q0,#8 @ rotate Xi
|
||||
vld1.64 {q8},[r2]! @ load [rotated] I[0]
|
||||
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q8,q8
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q3,q8,q8,#8 @ rotate I[0]
|
||||
blo .Lodd_tail_v8 @ r3 was less than 32
|
||||
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vext.8 q7,q9,q9,#8
|
||||
veor q3,q3,q0 @ I[i]^=Xi
|
||||
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
|
||||
b .Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
.Loop_mod2x_v8:
|
||||
vext.8 q10,q3,q3,#8
|
||||
subs r3,r3,#32 @ is there more data?
|
||||
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
|
||||
movlo r12,#0 @ is it time to zero r12?
|
||||
|
||||
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
|
||||
veor q10,q10,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
|
||||
veor q0,q0,q4 @ accumulate
|
||||
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
|
||||
|
||||
veor q2,q2,q6
|
||||
moveq r12,#0 @ is it time to zero r12?
|
||||
veor q1,q1,q5
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q8,q8
|
||||
#endif
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
vext.8 q7,q9,q9,#8
|
||||
vext.8 q3,q8,q8,#8
|
||||
veor q0,q1,q10
|
||||
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
|
||||
veor q3,q3,q2 @ accumulate q3 early
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q3,q3,q10
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
veor q3,q3,q0
|
||||
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
|
||||
bhs .Loop_mod2x_v8 @ there was at least 32 more bytes
|
||||
|
||||
veor q2,q2,q10
|
||||
vext.8 q3,q8,q8,#8 @ re-construct q3
|
||||
adds r3,r3,#32 @ re-construct r3
|
||||
veor q0,q0,q2 @ re-construct q0
|
||||
beq .Ldone_v8 @ is r3 zero?
|
||||
.Lodd_tail_v8:
|
||||
vext.8 q10,q0,q0,#8
|
||||
veor q3,q3,q0 @ inp^=Xi
|
||||
veor q9,q8,q10 @ q9 is rotated inp^Xi
|
||||
|
||||
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
|
||||
veor q9,q9,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
|
||||
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q0,q0,q10
|
||||
|
||||
.Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q0,q0,q0,#8
|
||||
vst1.64 {q0},[r0] @ write out Xi
|
||||
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
bx lr
|
||||
.size gcm_ghash_v8,.-gcm_ghash_v8
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
File diff suppressed because it is too large
Load Diff
2839
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha256-armv4.S
Normal file
2839
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha256-armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
1894
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha512-armv4.S
Normal file
1894
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha512-armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
1236
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
1236
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
File diff suppressed because it is too large
Load Diff
379
contrib/boringssl-cmake/linux-arm/crypto/test/trampoline-armv4.S
Normal file
379
contrib/boringssl-cmake/linux-arm/crypto/test/trampoline-armv4.S
Normal file
@ -0,0 +1,379 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.syntax unified
|
||||
|
||||
.arch armv7-a
|
||||
.fpu vfp
|
||||
|
||||
.text
|
||||
|
||||
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
@ with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
@ the result of |func|. The |unwind| argument is unused.
|
||||
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
@ const uint32_t *argv, size_t argc,
|
||||
@ int unwind);
|
||||
.type abi_test_trampoline, %function
|
||||
.globl abi_test_trampoline
|
||||
.hidden abi_test_trampoline
|
||||
.align 4
|
||||
abi_test_trampoline:
|
||||
@ Save parameters and all callee-saved registers. For convenience, we
|
||||
@ save r9 on iOS even though it's volatile.
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
|
||||
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
|
||||
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
|
||||
sub sp, sp, #28
|
||||
|
||||
@ Every register in AAPCS is either non-volatile or a parameter (except
|
||||
@ r9 on iOS), so this code, by the actual call, loses all its scratch
|
||||
@ registers. First fill in stack parameters while there are registers
|
||||
@ to spare.
|
||||
cmp r3, #4
|
||||
bls .Lstack_args_done
|
||||
mov r4, sp @ r4 is the output pointer.
|
||||
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
|
||||
add r2, r2, #16 @ Skip four arguments.
|
||||
.Lstack_args_loop:
|
||||
ldr r6, [r2], #4
|
||||
cmp r2, r5
|
||||
str r6, [r4], #4
|
||||
bne .Lstack_args_loop
|
||||
|
||||
.Lstack_args_done:
|
||||
@ Load registers from |r1|.
|
||||
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Load register parameters. This uses up our remaining registers, so we
|
||||
@ repurpose lr as scratch space.
|
||||
ldr r3, [sp, #40] @ Reload argc.
|
||||
ldr lr, [sp, #36] @ .Load argv into lr.
|
||||
cmp r3, #3
|
||||
bhi .Larg_r3
|
||||
beq .Larg_r2
|
||||
cmp r3, #1
|
||||
bhi .Larg_r1
|
||||
beq .Larg_r0
|
||||
b .Largs_done
|
||||
|
||||
.Larg_r3:
|
||||
ldr r3, [lr, #12] @ argv[3]
|
||||
.Larg_r2:
|
||||
ldr r2, [lr, #8] @ argv[2]
|
||||
.Larg_r1:
|
||||
ldr r1, [lr, #4] @ argv[1]
|
||||
.Larg_r0:
|
||||
ldr r0, [lr] @ argv[0]
|
||||
.Largs_done:
|
||||
|
||||
@ With every other register in use, load the function pointer into lr
|
||||
@ and call the function.
|
||||
ldr lr, [sp, #28]
|
||||
blx lr
|
||||
|
||||
@ r1-r3 are free for use again. The trampoline only supports
|
||||
@ single-return functions. Pass r4-r11 to the caller.
|
||||
ldr r1, [sp, #32]
|
||||
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Unwind the stack and restore registers.
|
||||
add sp, sp, #44 @ 44 = 28+16
|
||||
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
|
||||
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
|
||||
bx lr
|
||||
.size abi_test_trampoline,.-abi_test_trampoline
|
||||
.type abi_test_clobber_r0, %function
|
||||
.globl abi_test_clobber_r0
|
||||
.hidden abi_test_clobber_r0
|
||||
.align 4
|
||||
abi_test_clobber_r0:
|
||||
mov r0, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r0,.-abi_test_clobber_r0
|
||||
.type abi_test_clobber_r1, %function
|
||||
.globl abi_test_clobber_r1
|
||||
.hidden abi_test_clobber_r1
|
||||
.align 4
|
||||
abi_test_clobber_r1:
|
||||
mov r1, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r1,.-abi_test_clobber_r1
|
||||
.type abi_test_clobber_r2, %function
|
||||
.globl abi_test_clobber_r2
|
||||
.hidden abi_test_clobber_r2
|
||||
.align 4
|
||||
abi_test_clobber_r2:
|
||||
mov r2, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r2,.-abi_test_clobber_r2
|
||||
.type abi_test_clobber_r3, %function
|
||||
.globl abi_test_clobber_r3
|
||||
.hidden abi_test_clobber_r3
|
||||
.align 4
|
||||
abi_test_clobber_r3:
|
||||
mov r3, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r3,.-abi_test_clobber_r3
|
||||
.type abi_test_clobber_r4, %function
|
||||
.globl abi_test_clobber_r4
|
||||
.hidden abi_test_clobber_r4
|
||||
.align 4
|
||||
abi_test_clobber_r4:
|
||||
mov r4, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r4,.-abi_test_clobber_r4
|
||||
.type abi_test_clobber_r5, %function
|
||||
.globl abi_test_clobber_r5
|
||||
.hidden abi_test_clobber_r5
|
||||
.align 4
|
||||
abi_test_clobber_r5:
|
||||
mov r5, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r5,.-abi_test_clobber_r5
|
||||
.type abi_test_clobber_r6, %function
|
||||
.globl abi_test_clobber_r6
|
||||
.hidden abi_test_clobber_r6
|
||||
.align 4
|
||||
abi_test_clobber_r6:
|
||||
mov r6, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r6,.-abi_test_clobber_r6
|
||||
.type abi_test_clobber_r7, %function
|
||||
.globl abi_test_clobber_r7
|
||||
.hidden abi_test_clobber_r7
|
||||
.align 4
|
||||
abi_test_clobber_r7:
|
||||
mov r7, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r7,.-abi_test_clobber_r7
|
||||
.type abi_test_clobber_r8, %function
|
||||
.globl abi_test_clobber_r8
|
||||
.hidden abi_test_clobber_r8
|
||||
.align 4
|
||||
abi_test_clobber_r8:
|
||||
mov r8, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r8,.-abi_test_clobber_r8
|
||||
.type abi_test_clobber_r9, %function
|
||||
.globl abi_test_clobber_r9
|
||||
.hidden abi_test_clobber_r9
|
||||
.align 4
|
||||
abi_test_clobber_r9:
|
||||
mov r9, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r9,.-abi_test_clobber_r9
|
||||
.type abi_test_clobber_r10, %function
|
||||
.globl abi_test_clobber_r10
|
||||
.hidden abi_test_clobber_r10
|
||||
.align 4
|
||||
abi_test_clobber_r10:
|
||||
mov r10, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r10,.-abi_test_clobber_r10
|
||||
.type abi_test_clobber_r11, %function
|
||||
.globl abi_test_clobber_r11
|
||||
.hidden abi_test_clobber_r11
|
||||
.align 4
|
||||
abi_test_clobber_r11:
|
||||
mov r11, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r11,.-abi_test_clobber_r11
|
||||
.type abi_test_clobber_r12, %function
|
||||
.globl abi_test_clobber_r12
|
||||
.hidden abi_test_clobber_r12
|
||||
.align 4
|
||||
abi_test_clobber_r12:
|
||||
mov r12, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r12,.-abi_test_clobber_r12
|
||||
.type abi_test_clobber_d0, %function
|
||||
.globl abi_test_clobber_d0
|
||||
.hidden abi_test_clobber_d0
|
||||
.align 4
|
||||
abi_test_clobber_d0:
|
||||
mov r0, #0
|
||||
vmov s0, r0
|
||||
vmov s1, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d0,.-abi_test_clobber_d0
|
||||
.type abi_test_clobber_d1, %function
|
||||
.globl abi_test_clobber_d1
|
||||
.hidden abi_test_clobber_d1
|
||||
.align 4
|
||||
abi_test_clobber_d1:
|
||||
mov r0, #0
|
||||
vmov s2, r0
|
||||
vmov s3, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d1,.-abi_test_clobber_d1
|
||||
.type abi_test_clobber_d2, %function
|
||||
.globl abi_test_clobber_d2
|
||||
.hidden abi_test_clobber_d2
|
||||
.align 4
|
||||
abi_test_clobber_d2:
|
||||
mov r0, #0
|
||||
vmov s4, r0
|
||||
vmov s5, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d2,.-abi_test_clobber_d2
|
||||
.type abi_test_clobber_d3, %function
|
||||
.globl abi_test_clobber_d3
|
||||
.hidden abi_test_clobber_d3
|
||||
.align 4
|
||||
abi_test_clobber_d3:
|
||||
mov r0, #0
|
||||
vmov s6, r0
|
||||
vmov s7, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d3,.-abi_test_clobber_d3
|
||||
.type abi_test_clobber_d4, %function
|
||||
.globl abi_test_clobber_d4
|
||||
.hidden abi_test_clobber_d4
|
||||
.align 4
|
||||
abi_test_clobber_d4:
|
||||
mov r0, #0
|
||||
vmov s8, r0
|
||||
vmov s9, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d4,.-abi_test_clobber_d4
|
||||
.type abi_test_clobber_d5, %function
|
||||
.globl abi_test_clobber_d5
|
||||
.hidden abi_test_clobber_d5
|
||||
.align 4
|
||||
abi_test_clobber_d5:
|
||||
mov r0, #0
|
||||
vmov s10, r0
|
||||
vmov s11, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d5,.-abi_test_clobber_d5
|
||||
.type abi_test_clobber_d6, %function
|
||||
.globl abi_test_clobber_d6
|
||||
.hidden abi_test_clobber_d6
|
||||
.align 4
|
||||
abi_test_clobber_d6:
|
||||
mov r0, #0
|
||||
vmov s12, r0
|
||||
vmov s13, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d6,.-abi_test_clobber_d6
|
||||
.type abi_test_clobber_d7, %function
|
||||
.globl abi_test_clobber_d7
|
||||
.hidden abi_test_clobber_d7
|
||||
.align 4
|
||||
abi_test_clobber_d7:
|
||||
mov r0, #0
|
||||
vmov s14, r0
|
||||
vmov s15, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d7,.-abi_test_clobber_d7
|
||||
.type abi_test_clobber_d8, %function
|
||||
.globl abi_test_clobber_d8
|
||||
.hidden abi_test_clobber_d8
|
||||
.align 4
|
||||
abi_test_clobber_d8:
|
||||
mov r0, #0
|
||||
vmov s16, r0
|
||||
vmov s17, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d8,.-abi_test_clobber_d8
|
||||
.type abi_test_clobber_d9, %function
|
||||
.globl abi_test_clobber_d9
|
||||
.hidden abi_test_clobber_d9
|
||||
.align 4
|
||||
abi_test_clobber_d9:
|
||||
mov r0, #0
|
||||
vmov s18, r0
|
||||
vmov s19, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d9,.-abi_test_clobber_d9
|
||||
.type abi_test_clobber_d10, %function
|
||||
.globl abi_test_clobber_d10
|
||||
.hidden abi_test_clobber_d10
|
||||
.align 4
|
||||
abi_test_clobber_d10:
|
||||
mov r0, #0
|
||||
vmov s20, r0
|
||||
vmov s21, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d10,.-abi_test_clobber_d10
|
||||
.type abi_test_clobber_d11, %function
|
||||
.globl abi_test_clobber_d11
|
||||
.hidden abi_test_clobber_d11
|
||||
.align 4
|
||||
abi_test_clobber_d11:
|
||||
mov r0, #0
|
||||
vmov s22, r0
|
||||
vmov s23, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d11,.-abi_test_clobber_d11
|
||||
.type abi_test_clobber_d12, %function
|
||||
.globl abi_test_clobber_d12
|
||||
.hidden abi_test_clobber_d12
|
||||
.align 4
|
||||
abi_test_clobber_d12:
|
||||
mov r0, #0
|
||||
vmov s24, r0
|
||||
vmov s25, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d12,.-abi_test_clobber_d12
|
||||
.type abi_test_clobber_d13, %function
|
||||
.globl abi_test_clobber_d13
|
||||
.hidden abi_test_clobber_d13
|
||||
.align 4
|
||||
abi_test_clobber_d13:
|
||||
mov r0, #0
|
||||
vmov s26, r0
|
||||
vmov s27, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d13,.-abi_test_clobber_d13
|
||||
.type abi_test_clobber_d14, %function
|
||||
.globl abi_test_clobber_d14
|
||||
.hidden abi_test_clobber_d14
|
||||
.align 4
|
||||
abi_test_clobber_d14:
|
||||
mov r0, #0
|
||||
vmov s28, r0
|
||||
vmov s29, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d14,.-abi_test_clobber_d14
|
||||
.type abi_test_clobber_d15, %function
|
||||
.globl abi_test_clobber_d15
|
||||
.hidden abi_test_clobber_d15
|
||||
.align 4
|
||||
abi_test_clobber_d15:
|
||||
mov r0, #0
|
||||
vmov s30, r0
|
||||
vmov s31, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d15,.-abi_test_clobber_d15
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
3670
contrib/boringssl-cmake/linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
Normal file
3670
contrib/boringssl-cmake/linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,587 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(__powerpc64__)
|
||||
.machine "any"
|
||||
|
||||
.abiversion 2
|
||||
.text
|
||||
|
||||
.globl gcm_init_p8
|
||||
.type gcm_init_p8,@function
|
||||
.align 5
|
||||
gcm_init_p8:
|
||||
.localentry gcm_init_p8,0
|
||||
|
||||
li 0,-4096
|
||||
li 8,0x10
|
||||
li 12,-1
|
||||
li 9,0x20
|
||||
or 0,0,0
|
||||
li 10,0x30
|
||||
.long 0x7D202699
|
||||
|
||||
vspltisb 8,-16
|
||||
vspltisb 5,1
|
||||
vaddubm 8,8,8
|
||||
vxor 4,4,4
|
||||
vor 8,8,5
|
||||
vsldoi 8,8,4,15
|
||||
vsldoi 6,4,5,1
|
||||
vaddubm 8,8,8
|
||||
vspltisb 7,7
|
||||
vor 8,8,6
|
||||
vspltb 6,9,0
|
||||
vsl 9,9,5
|
||||
vsrab 6,6,7
|
||||
vand 6,6,8
|
||||
vxor 3,9,6
|
||||
|
||||
vsldoi 9,3,3,8
|
||||
vsldoi 8,4,8,8
|
||||
vsldoi 11,4,9,8
|
||||
vsldoi 10,9,4,8
|
||||
|
||||
.long 0x7D001F99
|
||||
.long 0x7D681F99
|
||||
li 8,0x40
|
||||
.long 0x7D291F99
|
||||
li 9,0x50
|
||||
.long 0x7D4A1F99
|
||||
li 10,0x60
|
||||
|
||||
.long 0x10035CC8
|
||||
.long 0x10234CC8
|
||||
.long 0x104354C8
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
vxor 16,0,6
|
||||
|
||||
vsldoi 17,16,16,8
|
||||
vsldoi 19,4,17,8
|
||||
vsldoi 18,17,4,8
|
||||
|
||||
.long 0x7E681F99
|
||||
li 8,0x70
|
||||
.long 0x7E291F99
|
||||
li 9,0x80
|
||||
.long 0x7E4A1F99
|
||||
li 10,0x90
|
||||
.long 0x10039CC8
|
||||
.long 0x11B09CC8
|
||||
.long 0x10238CC8
|
||||
.long 0x11D08CC8
|
||||
.long 0x104394C8
|
||||
.long 0x11F094C8
|
||||
|
||||
.long 0x10E044C8
|
||||
.long 0x114D44C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vsldoi 11,14,4,8
|
||||
vsldoi 9,4,14,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
vxor 13,13,11
|
||||
vxor 15,15,9
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vsldoi 13,13,13,8
|
||||
vxor 0,0,7
|
||||
vxor 13,13,10
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
vsldoi 9,13,13,8
|
||||
.long 0x100044C8
|
||||
.long 0x11AD44C8
|
||||
vxor 6,6,2
|
||||
vxor 9,9,15
|
||||
vxor 0,0,6
|
||||
vxor 13,13,9
|
||||
|
||||
vsldoi 9,0,0,8
|
||||
vsldoi 17,13,13,8
|
||||
vsldoi 11,4,9,8
|
||||
vsldoi 10,9,4,8
|
||||
vsldoi 19,4,17,8
|
||||
vsldoi 18,17,4,8
|
||||
|
||||
.long 0x7D681F99
|
||||
li 8,0xa0
|
||||
.long 0x7D291F99
|
||||
li 9,0xb0
|
||||
.long 0x7D4A1F99
|
||||
li 10,0xc0
|
||||
.long 0x7E681F99
|
||||
.long 0x7E291F99
|
||||
.long 0x7E4A1F99
|
||||
|
||||
or 12,12,12
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size gcm_init_p8,.-gcm_init_p8
|
||||
.globl gcm_gmult_p8
|
||||
.type gcm_gmult_p8,@function
|
||||
.align 5
|
||||
gcm_gmult_p8:
|
||||
.localentry gcm_gmult_p8,0
|
||||
|
||||
lis 0,0xfff8
|
||||
li 8,0x10
|
||||
li 12,-1
|
||||
li 9,0x20
|
||||
or 0,0,0
|
||||
li 10,0x30
|
||||
.long 0x7C601E99
|
||||
|
||||
.long 0x7D682699
|
||||
lvsl 12,0,0
|
||||
.long 0x7D292699
|
||||
vspltisb 5,0x07
|
||||
.long 0x7D4A2699
|
||||
vxor 12,12,5
|
||||
.long 0x7D002699
|
||||
vperm 3,3,3,12
|
||||
vxor 4,4,4
|
||||
|
||||
.long 0x10035CC8
|
||||
.long 0x10234CC8
|
||||
.long 0x104354C8
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
vxor 0,0,6
|
||||
|
||||
vperm 0,0,0,12
|
||||
.long 0x7C001F99
|
||||
|
||||
or 12,12,12
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size gcm_gmult_p8,.-gcm_gmult_p8
|
||||
|
||||
.globl gcm_ghash_p8
|
||||
.type gcm_ghash_p8,@function
|
||||
.align 5
|
||||
gcm_ghash_p8:
|
||||
.localentry gcm_ghash_p8,0
|
||||
|
||||
li 0,-4096
|
||||
li 8,0x10
|
||||
li 12,-1
|
||||
li 9,0x20
|
||||
or 0,0,0
|
||||
li 10,0x30
|
||||
.long 0x7C001E99
|
||||
|
||||
.long 0x7D682699
|
||||
li 8,0x40
|
||||
lvsl 12,0,0
|
||||
.long 0x7D292699
|
||||
li 9,0x50
|
||||
vspltisb 5,0x07
|
||||
.long 0x7D4A2699
|
||||
li 10,0x60
|
||||
vxor 12,12,5
|
||||
.long 0x7D002699
|
||||
vperm 0,0,0,12
|
||||
vxor 4,4,4
|
||||
|
||||
cmpldi 6,64
|
||||
bge .Lgcm_ghash_p8_4x
|
||||
|
||||
.long 0x7C602E99
|
||||
addi 5,5,16
|
||||
subic. 6,6,16
|
||||
vperm 3,3,3,12
|
||||
vxor 3,3,0
|
||||
beq .Lshort
|
||||
|
||||
.long 0x7E682699
|
||||
li 8,16
|
||||
.long 0x7E292699
|
||||
add 9,5,6
|
||||
.long 0x7E4A2699
|
||||
|
||||
|
||||
.align 5
|
||||
.Loop_2x:
|
||||
.long 0x7E002E99
|
||||
vperm 16,16,16,12
|
||||
|
||||
subic 6,6,32
|
||||
.long 0x10039CC8
|
||||
.long 0x11B05CC8
|
||||
subfe 0,0,0
|
||||
.long 0x10238CC8
|
||||
.long 0x11D04CC8
|
||||
and 0,0,6
|
||||
.long 0x104394C8
|
||||
.long 0x11F054C8
|
||||
add 5,5,0
|
||||
|
||||
vxor 0,0,13
|
||||
vxor 1,1,14
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 2,2,15
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
.long 0x7C682E99
|
||||
addi 5,5,32
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vperm 3,3,3,12
|
||||
vxor 6,6,2
|
||||
vxor 3,3,6
|
||||
vxor 3,3,0
|
||||
cmpld 9,5
|
||||
bgt .Loop_2x
|
||||
|
||||
cmplwi 6,0
|
||||
bne .Leven
|
||||
|
||||
.Lshort:
|
||||
.long 0x10035CC8
|
||||
.long 0x10234CC8
|
||||
.long 0x104354C8
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
|
||||
.Leven:
|
||||
vxor 0,0,6
|
||||
vperm 0,0,0,12
|
||||
.long 0x7C001F99
|
||||
|
||||
or 12,12,12
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,4,0
|
||||
.long 0
|
||||
.align 5
|
||||
.gcm_ghash_p8_4x:
|
||||
.Lgcm_ghash_p8_4x:
|
||||
stdu 1,-256(1)
|
||||
li 10,63
|
||||
li 11,79
|
||||
stvx 20,10,1
|
||||
addi 10,10,32
|
||||
stvx 21,11,1
|
||||
addi 11,11,32
|
||||
stvx 22,10,1
|
||||
addi 10,10,32
|
||||
stvx 23,11,1
|
||||
addi 11,11,32
|
||||
stvx 24,10,1
|
||||
addi 10,10,32
|
||||
stvx 25,11,1
|
||||
addi 11,11,32
|
||||
stvx 26,10,1
|
||||
addi 10,10,32
|
||||
stvx 27,11,1
|
||||
addi 11,11,32
|
||||
stvx 28,10,1
|
||||
addi 10,10,32
|
||||
stvx 29,11,1
|
||||
addi 11,11,32
|
||||
stvx 30,10,1
|
||||
li 10,0x60
|
||||
stvx 31,11,1
|
||||
li 0,-1
|
||||
stw 12,252(1)
|
||||
or 0,0,0
|
||||
|
||||
lvsl 5,0,8
|
||||
|
||||
li 8,0x70
|
||||
.long 0x7E292699
|
||||
li 9,0x80
|
||||
vspltisb 6,8
|
||||
|
||||
li 10,0x90
|
||||
.long 0x7EE82699
|
||||
li 8,0xa0
|
||||
.long 0x7F092699
|
||||
li 9,0xb0
|
||||
.long 0x7F2A2699
|
||||
li 10,0xc0
|
||||
.long 0x7FA82699
|
||||
li 8,0x10
|
||||
.long 0x7FC92699
|
||||
li 9,0x20
|
||||
.long 0x7FEA2699
|
||||
li 10,0x30
|
||||
|
||||
vsldoi 7,4,6,8
|
||||
vaddubm 18,5,7
|
||||
vaddubm 19,6,18
|
||||
|
||||
srdi 6,6,4
|
||||
|
||||
.long 0x7C602E99
|
||||
.long 0x7E082E99
|
||||
subic. 6,6,8
|
||||
.long 0x7EC92E99
|
||||
.long 0x7F8A2E99
|
||||
addi 5,5,0x40
|
||||
vperm 3,3,3,12
|
||||
vperm 16,16,16,12
|
||||
vperm 22,22,22,12
|
||||
vperm 28,28,28,12
|
||||
|
||||
vxor 2,3,0
|
||||
|
||||
.long 0x11B0BCC8
|
||||
.long 0x11D0C4C8
|
||||
.long 0x11F0CCC8
|
||||
|
||||
vperm 11,17,9,18
|
||||
vperm 5,22,28,19
|
||||
vperm 10,17,9,19
|
||||
vperm 6,22,28,18
|
||||
.long 0x12B68CC8
|
||||
.long 0x12855CC8
|
||||
.long 0x137C4CC8
|
||||
.long 0x134654C8
|
||||
|
||||
vxor 21,21,14
|
||||
vxor 20,20,13
|
||||
vxor 27,27,21
|
||||
vxor 26,26,15
|
||||
|
||||
blt .Ltail_4x
|
||||
|
||||
.Loop_4x:
|
||||
.long 0x7C602E99
|
||||
.long 0x7E082E99
|
||||
subic. 6,6,4
|
||||
.long 0x7EC92E99
|
||||
.long 0x7F8A2E99
|
||||
addi 5,5,0x40
|
||||
vperm 16,16,16,12
|
||||
vperm 22,22,22,12
|
||||
vperm 28,28,28,12
|
||||
vperm 3,3,3,12
|
||||
|
||||
.long 0x1002ECC8
|
||||
.long 0x1022F4C8
|
||||
.long 0x1042FCC8
|
||||
.long 0x11B0BCC8
|
||||
.long 0x11D0C4C8
|
||||
.long 0x11F0CCC8
|
||||
|
||||
vxor 0,0,20
|
||||
vxor 1,1,27
|
||||
vxor 2,2,26
|
||||
vperm 5,22,28,19
|
||||
vperm 6,22,28,18
|
||||
|
||||
.long 0x10E044C8
|
||||
.long 0x12855CC8
|
||||
.long 0x134654C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x12B68CC8
|
||||
.long 0x137C4CC8
|
||||
.long 0x100044C8
|
||||
|
||||
vxor 20,20,13
|
||||
vxor 26,26,15
|
||||
vxor 2,2,3
|
||||
vxor 21,21,14
|
||||
vxor 2,2,6
|
||||
vxor 27,27,21
|
||||
vxor 2,2,0
|
||||
bge .Loop_4x
|
||||
|
||||
.Ltail_4x:
|
||||
.long 0x1002ECC8
|
||||
.long 0x1022F4C8
|
||||
.long 0x1042FCC8
|
||||
|
||||
vxor 0,0,20
|
||||
vxor 1,1,27
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 2,2,26
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
vxor 0,0,6
|
||||
|
||||
addic. 6,6,4
|
||||
beq .Ldone_4x
|
||||
|
||||
.long 0x7C602E99
|
||||
cmpldi 6,2
|
||||
li 6,-4
|
||||
blt .Lone
|
||||
.long 0x7E082E99
|
||||
beq .Ltwo
|
||||
|
||||
.Lthree:
|
||||
.long 0x7EC92E99
|
||||
vperm 3,3,3,12
|
||||
vperm 16,16,16,12
|
||||
vperm 22,22,22,12
|
||||
|
||||
vxor 2,3,0
|
||||
vor 29,23,23
|
||||
vor 30, 24, 24
|
||||
vor 31,25,25
|
||||
|
||||
vperm 5,16,22,19
|
||||
vperm 6,16,22,18
|
||||
.long 0x12B08CC8
|
||||
.long 0x13764CC8
|
||||
.long 0x12855CC8
|
||||
.long 0x134654C8
|
||||
|
||||
vxor 27,27,21
|
||||
b .Ltail_4x
|
||||
|
||||
.align 4
|
||||
.Ltwo:
|
||||
vperm 3,3,3,12
|
||||
vperm 16,16,16,12
|
||||
|
||||
vxor 2,3,0
|
||||
vperm 5,4,16,19
|
||||
vperm 6,4,16,18
|
||||
|
||||
vsldoi 29,4,17,8
|
||||
vor 30, 17, 17
|
||||
vsldoi 31,17,4,8
|
||||
|
||||
.long 0x12855CC8
|
||||
.long 0x13704CC8
|
||||
.long 0x134654C8
|
||||
|
||||
b .Ltail_4x
|
||||
|
||||
.align 4
|
||||
.Lone:
|
||||
vperm 3,3,3,12
|
||||
|
||||
vsldoi 29,4,9,8
|
||||
vor 30, 9, 9
|
||||
vsldoi 31,9,4,8
|
||||
|
||||
vxor 2,3,0
|
||||
vxor 20,20,20
|
||||
vxor 27,27,27
|
||||
vxor 26,26,26
|
||||
|
||||
b .Ltail_4x
|
||||
|
||||
.Ldone_4x:
|
||||
vperm 0,0,0,12
|
||||
.long 0x7C001F99
|
||||
|
||||
li 10,63
|
||||
li 11,79
|
||||
or 12,12,12
|
||||
lvx 20,10,1
|
||||
addi 10,10,32
|
||||
lvx 21,11,1
|
||||
addi 11,11,32
|
||||
lvx 22,10,1
|
||||
addi 10,10,32
|
||||
lvx 23,11,1
|
||||
addi 11,11,32
|
||||
lvx 24,10,1
|
||||
addi 10,10,32
|
||||
lvx 25,11,1
|
||||
addi 11,11,32
|
||||
lvx 26,10,1
|
||||
addi 10,10,32
|
||||
lvx 27,11,1
|
||||
addi 11,11,32
|
||||
lvx 28,10,1
|
||||
addi 10,10,32
|
||||
lvx 29,11,1
|
||||
addi 11,11,32
|
||||
lvx 30,10,1
|
||||
lvx 31,11,1
|
||||
addi 1,1,256
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x04,0,0x80,0,4,0
|
||||
.long 0
|
||||
.size gcm_ghash_p8,.-gcm_ghash_p8
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM && __powerpc64__
|
||||
.section .note.GNU-stack,"",@progbits
|
1410
contrib/boringssl-cmake/linux-ppc64le/crypto/test/trampoline-ppc.S
Normal file
1410
contrib/boringssl-cmake/linux-ppc64le/crypto/test/trampoline-ppc.S
Normal file
File diff suppressed because it is too large
Load Diff
975
contrib/boringssl-cmake/linux-x86/crypto/chacha/chacha-x86.S
Normal file
975
contrib/boringssl-cmake/linux-x86/crypto/chacha/chacha-x86.S
Normal file
@ -0,0 +1,975 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl ChaCha20_ctr32
|
||||
.hidden ChaCha20_ctr32
|
||||
.type ChaCha20_ctr32,@function
|
||||
.align 16
|
||||
ChaCha20_ctr32:
|
||||
.L_ChaCha20_ctr32_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
xorl %eax,%eax
|
||||
cmpl 28(%esp),%eax
|
||||
je .L000no_data
|
||||
call .Lpic_point
|
||||
.Lpic_point:
|
||||
popl %eax
|
||||
leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
|
||||
testl $16777216,(%ebp)
|
||||
jz .L001x86
|
||||
testl $512,4(%ebp)
|
||||
jz .L001x86
|
||||
jmp .Lssse3_shortcut
|
||||
.L001x86:
|
||||
movl 32(%esp),%esi
|
||||
movl 36(%esp),%edi
|
||||
subl $132,%esp
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
movl 8(%esi),%ecx
|
||||
movl 12(%esi),%edx
|
||||
movl %eax,80(%esp)
|
||||
movl %ebx,84(%esp)
|
||||
movl %ecx,88(%esp)
|
||||
movl %edx,92(%esp)
|
||||
movl 16(%esi),%eax
|
||||
movl 20(%esi),%ebx
|
||||
movl 24(%esi),%ecx
|
||||
movl 28(%esi),%edx
|
||||
movl %eax,96(%esp)
|
||||
movl %ebx,100(%esp)
|
||||
movl %ecx,104(%esp)
|
||||
movl %edx,108(%esp)
|
||||
movl (%edi),%eax
|
||||
movl 4(%edi),%ebx
|
||||
movl 8(%edi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl $1,%eax
|
||||
movl %eax,112(%esp)
|
||||
movl %ebx,116(%esp)
|
||||
movl %ecx,120(%esp)
|
||||
movl %edx,124(%esp)
|
||||
jmp .L002entry
|
||||
.align 16
|
||||
.L003outer_loop:
|
||||
movl %ebx,156(%esp)
|
||||
movl %eax,152(%esp)
|
||||
movl %ecx,160(%esp)
|
||||
.L002entry:
|
||||
movl $1634760805,%eax
|
||||
movl $857760878,4(%esp)
|
||||
movl $2036477234,8(%esp)
|
||||
movl $1797285236,12(%esp)
|
||||
movl 84(%esp),%ebx
|
||||
movl 88(%esp),%ebp
|
||||
movl 104(%esp),%ecx
|
||||
movl 108(%esp),%esi
|
||||
movl 116(%esp),%edx
|
||||
movl 120(%esp),%edi
|
||||
movl %ebx,20(%esp)
|
||||
movl %ebp,24(%esp)
|
||||
movl %ecx,40(%esp)
|
||||
movl %esi,44(%esp)
|
||||
movl %edx,52(%esp)
|
||||
movl %edi,56(%esp)
|
||||
movl 92(%esp),%ebx
|
||||
movl 124(%esp),%edi
|
||||
movl 112(%esp),%edx
|
||||
movl 80(%esp),%ebp
|
||||
movl 96(%esp),%ecx
|
||||
movl 100(%esp),%esi
|
||||
addl $1,%edx
|
||||
movl %ebx,28(%esp)
|
||||
movl %edi,60(%esp)
|
||||
movl %edx,112(%esp)
|
||||
movl $10,%ebx
|
||||
jmp .L004loop
|
||||
.align 16
|
||||
.L004loop:
|
||||
addl %ebp,%eax
|
||||
movl %ebx,128(%esp)
|
||||
movl %ebp,%ebx
|
||||
xorl %eax,%edx
|
||||
roll $16,%edx
|
||||
addl %edx,%ecx
|
||||
xorl %ecx,%ebx
|
||||
movl 52(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 20(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,(%esp)
|
||||
roll $8,%edx
|
||||
movl 4(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,48(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
movl %ecx,32(%esp)
|
||||
roll $16,%edi
|
||||
movl %ebx,16(%esp)
|
||||
addl %edi,%esi
|
||||
movl 40(%esp),%ecx
|
||||
xorl %esi,%ebp
|
||||
movl 56(%esp),%edx
|
||||
roll $12,%ebp
|
||||
movl 24(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,4(%esp)
|
||||
roll $8,%edi
|
||||
movl 8(%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,52(%esp)
|
||||
xorl %esi,%ebp
|
||||
addl %ebx,%eax
|
||||
roll $7,%ebp
|
||||
xorl %eax,%edx
|
||||
movl %esi,36(%esp)
|
||||
roll $16,%edx
|
||||
movl %ebp,20(%esp)
|
||||
addl %edx,%ecx
|
||||
movl 44(%esp),%esi
|
||||
xorl %ecx,%ebx
|
||||
movl 60(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 28(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,8(%esp)
|
||||
roll $8,%edx
|
||||
movl 12(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,56(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
roll $16,%edi
|
||||
movl %ebx,24(%esp)
|
||||
addl %edi,%esi
|
||||
xorl %esi,%ebp
|
||||
roll $12,%ebp
|
||||
movl 20(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,12(%esp)
|
||||
roll $8,%edi
|
||||
movl (%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,%edx
|
||||
xorl %esi,%ebp
|
||||
addl %ebx,%eax
|
||||
roll $7,%ebp
|
||||
xorl %eax,%edx
|
||||
roll $16,%edx
|
||||
movl %ebp,28(%esp)
|
||||
addl %edx,%ecx
|
||||
xorl %ecx,%ebx
|
||||
movl 48(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 24(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,(%esp)
|
||||
roll $8,%edx
|
||||
movl 4(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,60(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
movl %ecx,40(%esp)
|
||||
roll $16,%edi
|
||||
movl %ebx,20(%esp)
|
||||
addl %edi,%esi
|
||||
movl 32(%esp),%ecx
|
||||
xorl %esi,%ebp
|
||||
movl 52(%esp),%edx
|
||||
roll $12,%ebp
|
||||
movl 28(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,4(%esp)
|
||||
roll $8,%edi
|
||||
movl 8(%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,48(%esp)
|
||||
xorl %esi,%ebp
|
||||
addl %ebx,%eax
|
||||
roll $7,%ebp
|
||||
xorl %eax,%edx
|
||||
movl %esi,44(%esp)
|
||||
roll $16,%edx
|
||||
movl %ebp,24(%esp)
|
||||
addl %edx,%ecx
|
||||
movl 36(%esp),%esi
|
||||
xorl %ecx,%ebx
|
||||
movl 56(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 16(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,8(%esp)
|
||||
roll $8,%edx
|
||||
movl 12(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,52(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
roll $16,%edi
|
||||
movl %ebx,28(%esp)
|
||||
addl %edi,%esi
|
||||
xorl %esi,%ebp
|
||||
movl 48(%esp),%edx
|
||||
roll $12,%ebp
|
||||
movl 128(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,12(%esp)
|
||||
roll $8,%edi
|
||||
movl (%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,56(%esp)
|
||||
xorl %esi,%ebp
|
||||
roll $7,%ebp
|
||||
decl %ebx
|
||||
jnz .L004loop
|
||||
movl 160(%esp),%ebx
|
||||
addl $1634760805,%eax
|
||||
addl 80(%esp),%ebp
|
||||
addl 96(%esp),%ecx
|
||||
addl 100(%esp),%esi
|
||||
cmpl $64,%ebx
|
||||
jb .L005tail
|
||||
movl 156(%esp),%ebx
|
||||
addl 112(%esp),%edx
|
||||
addl 120(%esp),%edi
|
||||
xorl (%ebx),%eax
|
||||
xorl 16(%ebx),%ebp
|
||||
movl %eax,(%esp)
|
||||
movl 152(%esp),%eax
|
||||
xorl 32(%ebx),%ecx
|
||||
xorl 36(%ebx),%esi
|
||||
xorl 48(%ebx),%edx
|
||||
xorl 56(%ebx),%edi
|
||||
movl %ebp,16(%eax)
|
||||
movl %ecx,32(%eax)
|
||||
movl %esi,36(%eax)
|
||||
movl %edx,48(%eax)
|
||||
movl %edi,56(%eax)
|
||||
movl 4(%esp),%ebp
|
||||
movl 8(%esp),%ecx
|
||||
movl 12(%esp),%esi
|
||||
movl 20(%esp),%edx
|
||||
movl 24(%esp),%edi
|
||||
addl $857760878,%ebp
|
||||
addl $2036477234,%ecx
|
||||
addl $1797285236,%esi
|
||||
addl 84(%esp),%edx
|
||||
addl 88(%esp),%edi
|
||||
xorl 4(%ebx),%ebp
|
||||
xorl 8(%ebx),%ecx
|
||||
xorl 12(%ebx),%esi
|
||||
xorl 20(%ebx),%edx
|
||||
xorl 24(%ebx),%edi
|
||||
movl %ebp,4(%eax)
|
||||
movl %ecx,8(%eax)
|
||||
movl %esi,12(%eax)
|
||||
movl %edx,20(%eax)
|
||||
movl %edi,24(%eax)
|
||||
movl 28(%esp),%ebp
|
||||
movl 40(%esp),%ecx
|
||||
movl 44(%esp),%esi
|
||||
movl 52(%esp),%edx
|
||||
movl 60(%esp),%edi
|
||||
addl 92(%esp),%ebp
|
||||
addl 104(%esp),%ecx
|
||||
addl 108(%esp),%esi
|
||||
addl 116(%esp),%edx
|
||||
addl 124(%esp),%edi
|
||||
xorl 28(%ebx),%ebp
|
||||
xorl 40(%ebx),%ecx
|
||||
xorl 44(%ebx),%esi
|
||||
xorl 52(%ebx),%edx
|
||||
xorl 60(%ebx),%edi
|
||||
leal 64(%ebx),%ebx
|
||||
movl %ebp,28(%eax)
|
||||
movl (%esp),%ebp
|
||||
movl %ecx,40(%eax)
|
||||
movl 160(%esp),%ecx
|
||||
movl %esi,44(%eax)
|
||||
movl %edx,52(%eax)
|
||||
movl %edi,60(%eax)
|
||||
movl %ebp,(%eax)
|
||||
leal 64(%eax),%eax
|
||||
subl $64,%ecx
|
||||
jnz .L003outer_loop
|
||||
jmp .L006done
|
||||
.L005tail:
|
||||
addl 112(%esp),%edx
|
||||
addl 120(%esp),%edi
|
||||
movl %eax,(%esp)
|
||||
movl %ebp,16(%esp)
|
||||
movl %ecx,32(%esp)
|
||||
movl %esi,36(%esp)
|
||||
movl %edx,48(%esp)
|
||||
movl %edi,56(%esp)
|
||||
movl 4(%esp),%ebp
|
||||
movl 8(%esp),%ecx
|
||||
movl 12(%esp),%esi
|
||||
movl 20(%esp),%edx
|
||||
movl 24(%esp),%edi
|
||||
addl $857760878,%ebp
|
||||
addl $2036477234,%ecx
|
||||
addl $1797285236,%esi
|
||||
addl 84(%esp),%edx
|
||||
addl 88(%esp),%edi
|
||||
movl %ebp,4(%esp)
|
||||
movl %ecx,8(%esp)
|
||||
movl %esi,12(%esp)
|
||||
movl %edx,20(%esp)
|
||||
movl %edi,24(%esp)
|
||||
movl 28(%esp),%ebp
|
||||
movl 40(%esp),%ecx
|
||||
movl 44(%esp),%esi
|
||||
movl 52(%esp),%edx
|
||||
movl 60(%esp),%edi
|
||||
addl 92(%esp),%ebp
|
||||
addl 104(%esp),%ecx
|
||||
addl 108(%esp),%esi
|
||||
addl 116(%esp),%edx
|
||||
addl 124(%esp),%edi
|
||||
movl %ebp,28(%esp)
|
||||
movl 156(%esp),%ebp
|
||||
movl %ecx,40(%esp)
|
||||
movl 152(%esp),%ecx
|
||||
movl %esi,44(%esp)
|
||||
xorl %esi,%esi
|
||||
movl %edx,52(%esp)
|
||||
movl %edi,60(%esp)
|
||||
xorl %eax,%eax
|
||||
xorl %edx,%edx
|
||||
.L007tail_loop:
|
||||
movb (%esi,%ebp,1),%al
|
||||
movb (%esp,%esi,1),%dl
|
||||
leal 1(%esi),%esi
|
||||
xorb %dl,%al
|
||||
movb %al,-1(%ecx,%esi,1)
|
||||
decl %ebx
|
||||
jnz .L007tail_loop
|
||||
.L006done:
|
||||
addl $132,%esp
|
||||
.L000no_data:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
|
||||
.globl ChaCha20_ssse3
|
||||
.hidden ChaCha20_ssse3
|
||||
.type ChaCha20_ssse3,@function
|
||||
.align 16
|
||||
ChaCha20_ssse3:
|
||||
.L_ChaCha20_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
.Lssse3_shortcut:
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%ecx
|
||||
movl 32(%esp),%edx
|
||||
movl 36(%esp),%ebx
|
||||
movl %esp,%ebp
|
||||
subl $524,%esp
|
||||
andl $-64,%esp
|
||||
movl %ebp,512(%esp)
|
||||
leal .Lssse3_data-.Lpic_point(%eax),%eax
|
||||
movdqu (%ebx),%xmm3
|
||||
cmpl $256,%ecx
|
||||
jb .L0081x
|
||||
movl %edx,516(%esp)
|
||||
movl %ebx,520(%esp)
|
||||
subl $256,%ecx
|
||||
leal 384(%esp),%ebp
|
||||
movdqu (%edx),%xmm7
|
||||
pshufd $0,%xmm3,%xmm0
|
||||
pshufd $85,%xmm3,%xmm1
|
||||
pshufd $170,%xmm3,%xmm2
|
||||
pshufd $255,%xmm3,%xmm3
|
||||
paddd 48(%eax),%xmm0
|
||||
pshufd $0,%xmm7,%xmm4
|
||||
pshufd $85,%xmm7,%xmm5
|
||||
psubd 64(%eax),%xmm0
|
||||
pshufd $170,%xmm7,%xmm6
|
||||
pshufd $255,%xmm7,%xmm7
|
||||
movdqa %xmm0,64(%ebp)
|
||||
movdqa %xmm1,80(%ebp)
|
||||
movdqa %xmm2,96(%ebp)
|
||||
movdqa %xmm3,112(%ebp)
|
||||
movdqu 16(%edx),%xmm3
|
||||
movdqa %xmm4,-64(%ebp)
|
||||
movdqa %xmm5,-48(%ebp)
|
||||
movdqa %xmm6,-32(%ebp)
|
||||
movdqa %xmm7,-16(%ebp)
|
||||
movdqa 32(%eax),%xmm7
|
||||
leal 128(%esp),%ebx
|
||||
pshufd $0,%xmm3,%xmm0
|
||||
pshufd $85,%xmm3,%xmm1
|
||||
pshufd $170,%xmm3,%xmm2
|
||||
pshufd $255,%xmm3,%xmm3
|
||||
pshufd $0,%xmm7,%xmm4
|
||||
pshufd $85,%xmm7,%xmm5
|
||||
pshufd $170,%xmm7,%xmm6
|
||||
pshufd $255,%xmm7,%xmm7
|
||||
movdqa %xmm0,(%ebp)
|
||||
movdqa %xmm1,16(%ebp)
|
||||
movdqa %xmm2,32(%ebp)
|
||||
movdqa %xmm3,48(%ebp)
|
||||
movdqa %xmm4,-128(%ebp)
|
||||
movdqa %xmm5,-112(%ebp)
|
||||
movdqa %xmm6,-96(%ebp)
|
||||
movdqa %xmm7,-80(%ebp)
|
||||
leal 128(%esi),%esi
|
||||
leal 128(%edi),%edi
|
||||
jmp .L009outer_loop
|
||||
.align 16
|
||||
.L009outer_loop:
|
||||
movdqa -112(%ebp),%xmm1
|
||||
movdqa -96(%ebp),%xmm2
|
||||
movdqa -80(%ebp),%xmm3
|
||||
movdqa -48(%ebp),%xmm5
|
||||
movdqa -32(%ebp),%xmm6
|
||||
movdqa -16(%ebp),%xmm7
|
||||
movdqa %xmm1,-112(%ebx)
|
||||
movdqa %xmm2,-96(%ebx)
|
||||
movdqa %xmm3,-80(%ebx)
|
||||
movdqa %xmm5,-48(%ebx)
|
||||
movdqa %xmm6,-32(%ebx)
|
||||
movdqa %xmm7,-16(%ebx)
|
||||
movdqa 32(%ebp),%xmm2
|
||||
movdqa 48(%ebp),%xmm3
|
||||
movdqa 64(%ebp),%xmm4
|
||||
movdqa 80(%ebp),%xmm5
|
||||
movdqa 96(%ebp),%xmm6
|
||||
movdqa 112(%ebp),%xmm7
|
||||
paddd 64(%eax),%xmm4
|
||||
movdqa %xmm2,32(%ebx)
|
||||
movdqa %xmm3,48(%ebx)
|
||||
movdqa %xmm4,64(%ebx)
|
||||
movdqa %xmm5,80(%ebx)
|
||||
movdqa %xmm6,96(%ebx)
|
||||
movdqa %xmm7,112(%ebx)
|
||||
movdqa %xmm4,64(%ebp)
|
||||
movdqa -128(%ebp),%xmm0
|
||||
movdqa %xmm4,%xmm6
|
||||
movdqa -64(%ebp),%xmm3
|
||||
movdqa (%ebp),%xmm4
|
||||
movdqa 16(%ebp),%xmm5
|
||||
movl $10,%edx
|
||||
nop
|
||||
.align 16
|
||||
.L010loop:
|
||||
paddd %xmm3,%xmm0
|
||||
movdqa %xmm3,%xmm2
|
||||
pxor %xmm0,%xmm6
|
||||
pshufb (%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -48(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -112(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 80(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-128(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,64(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
movdqa %xmm4,(%ebx)
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-64(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa 32(%ebx),%xmm4
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa -32(%ebx),%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -96(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa 96(%ebx),%xmm6
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-112(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,80(%ebx)
|
||||
pxor %xmm5,%xmm3
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
por %xmm1,%xmm3
|
||||
movdqa %xmm5,16(%ebx)
|
||||
pshufb (%eax),%xmm6
|
||||
movdqa %xmm3,-48(%ebx)
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa 48(%ebx),%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -16(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -80(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 112(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-96(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,96(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-32(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa -48(%ebx),%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -128(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-80(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,%xmm6
|
||||
pxor %xmm5,%xmm3
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
por %xmm1,%xmm3
|
||||
pshufb (%eax),%xmm6
|
||||
movdqa %xmm3,-16(%ebx)
|
||||
paddd %xmm6,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -32(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -112(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 64(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-128(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,112(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
movdqa %xmm4,32(%ebx)
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-48(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa (%ebx),%xmm4
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa -16(%ebx),%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -96(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa 80(%ebx),%xmm6
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-112(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,64(%ebx)
|
||||
pxor %xmm5,%xmm3
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
por %xmm1,%xmm3
|
||||
movdqa %xmm5,48(%ebx)
|
||||
pshufb (%eax),%xmm6
|
||||
movdqa %xmm3,-32(%ebx)
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa 16(%ebx),%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -64(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -80(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 96(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-96(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,80(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-16(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -128(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa 64(%ebx),%xmm6
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-80(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,96(%ebx)
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
por %xmm1,%xmm3
|
||||
decl %edx
|
||||
jnz .L010loop
|
||||
movdqa %xmm3,-64(%ebx)
|
||||
movdqa %xmm4,(%ebx)
|
||||
movdqa %xmm5,16(%ebx)
|
||||
movdqa %xmm6,64(%ebx)
|
||||
movdqa %xmm7,96(%ebx)
|
||||
movdqa -112(%ebx),%xmm1
|
||||
movdqa -96(%ebx),%xmm2
|
||||
movdqa -80(%ebx),%xmm3
|
||||
paddd -128(%ebp),%xmm0
|
||||
paddd -112(%ebp),%xmm1
|
||||
paddd -96(%ebp),%xmm2
|
||||
paddd -80(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 16(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa -64(%ebx),%xmm0
|
||||
pxor %xmm1,%xmm5
|
||||
movdqa -48(%ebx),%xmm1
|
||||
pxor %xmm2,%xmm6
|
||||
movdqa -32(%ebx),%xmm2
|
||||
pxor %xmm3,%xmm7
|
||||
movdqa -16(%ebx),%xmm3
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 16(%edi),%edi
|
||||
paddd -64(%ebp),%xmm0
|
||||
paddd -48(%ebp),%xmm1
|
||||
paddd -32(%ebp),%xmm2
|
||||
paddd -16(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 16(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa (%ebx),%xmm0
|
||||
pxor %xmm1,%xmm5
|
||||
movdqa 16(%ebx),%xmm1
|
||||
pxor %xmm2,%xmm6
|
||||
movdqa 32(%ebx),%xmm2
|
||||
pxor %xmm3,%xmm7
|
||||
movdqa 48(%ebx),%xmm3
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 16(%edi),%edi
|
||||
paddd (%ebp),%xmm0
|
||||
paddd 16(%ebp),%xmm1
|
||||
paddd 32(%ebp),%xmm2
|
||||
paddd 48(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 16(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa 64(%ebx),%xmm0
|
||||
pxor %xmm1,%xmm5
|
||||
movdqa 80(%ebx),%xmm1
|
||||
pxor %xmm2,%xmm6
|
||||
movdqa 96(%ebx),%xmm2
|
||||
pxor %xmm3,%xmm7
|
||||
movdqa 112(%ebx),%xmm3
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 16(%edi),%edi
|
||||
paddd 64(%ebp),%xmm0
|
||||
paddd 80(%ebp),%xmm1
|
||||
paddd 96(%ebp),%xmm2
|
||||
paddd 112(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 208(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
pxor %xmm1,%xmm5
|
||||
pxor %xmm2,%xmm6
|
||||
pxor %xmm3,%xmm7
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 208(%edi),%edi
|
||||
subl $256,%ecx
|
||||
jnc .L009outer_loop
|
||||
addl $256,%ecx
|
||||
jz .L011done
|
||||
movl 520(%esp),%ebx
|
||||
leal -128(%esi),%esi
|
||||
movl 516(%esp),%edx
|
||||
leal -128(%edi),%edi
|
||||
movd 64(%ebp),%xmm2
|
||||
movdqu (%ebx),%xmm3
|
||||
paddd 96(%eax),%xmm2
|
||||
pand 112(%eax),%xmm3
|
||||
por %xmm2,%xmm3
|
||||
.L0081x:
|
||||
movdqa 32(%eax),%xmm0
|
||||
movdqu (%edx),%xmm1
|
||||
movdqu 16(%edx),%xmm2
|
||||
movdqa (%eax),%xmm6
|
||||
movdqa 16(%eax),%xmm7
|
||||
movl %ebp,48(%esp)
|
||||
movdqa %xmm0,(%esp)
|
||||
movdqa %xmm1,16(%esp)
|
||||
movdqa %xmm2,32(%esp)
|
||||
movdqa %xmm3,48(%esp)
|
||||
movl $10,%edx
|
||||
jmp .L012loop1x
|
||||
.align 16
|
||||
.L013outer1x:
|
||||
movdqa 80(%eax),%xmm3
|
||||
movdqa (%esp),%xmm0
|
||||
movdqa 16(%esp),%xmm1
|
||||
movdqa 32(%esp),%xmm2
|
||||
paddd 48(%esp),%xmm3
|
||||
movl $10,%edx
|
||||
movdqa %xmm3,48(%esp)
|
||||
jmp .L012loop1x
|
||||
.align 16
|
||||
.L012loop1x:
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,222
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $20,%xmm1
|
||||
pslld $12,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,223
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $25,%xmm1
|
||||
pslld $7,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $57,%xmm1,%xmm1
|
||||
pshufd $147,%xmm3,%xmm3
|
||||
nop
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,222
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $20,%xmm1
|
||||
pslld $12,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,223
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $25,%xmm1
|
||||
pslld $7,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $147,%xmm1,%xmm1
|
||||
pshufd $57,%xmm3,%xmm3
|
||||
decl %edx
|
||||
jnz .L012loop1x
|
||||
paddd (%esp),%xmm0
|
||||
paddd 16(%esp),%xmm1
|
||||
paddd 32(%esp),%xmm2
|
||||
paddd 48(%esp),%xmm3
|
||||
cmpl $64,%ecx
|
||||
jb .L014tail
|
||||
movdqu (%esi),%xmm4
|
||||
movdqu 16(%esi),%xmm5
|
||||
pxor %xmm4,%xmm0
|
||||
movdqu 32(%esi),%xmm4
|
||||
pxor %xmm5,%xmm1
|
||||
movdqu 48(%esi),%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
pxor %xmm5,%xmm3
|
||||
leal 64(%esi),%esi
|
||||
movdqu %xmm0,(%edi)
|
||||
movdqu %xmm1,16(%edi)
|
||||
movdqu %xmm2,32(%edi)
|
||||
movdqu %xmm3,48(%edi)
|
||||
leal 64(%edi),%edi
|
||||
subl $64,%ecx
|
||||
jnz .L013outer1x
|
||||
jmp .L011done
|
||||
.L014tail:
|
||||
movdqa %xmm0,(%esp)
|
||||
movdqa %xmm1,16(%esp)
|
||||
movdqa %xmm2,32(%esp)
|
||||
movdqa %xmm3,48(%esp)
|
||||
xorl %eax,%eax
|
||||
xorl %edx,%edx
|
||||
xorl %ebp,%ebp
|
||||
.L015tail_loop:
|
||||
movb (%esp,%ebp,1),%al
|
||||
movb (%esi,%ebp,1),%dl
|
||||
leal 1(%ebp),%ebp
|
||||
xorb %dl,%al
|
||||
movb %al,-1(%edi,%ebp,1)
|
||||
decl %ecx
|
||||
jnz .L015tail_loop
|
||||
.L011done:
|
||||
movl 512(%esp),%esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
|
||||
.align 64
|
||||
.Lssse3_data:
|
||||
.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
|
||||
.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
|
||||
.long 1634760805,857760878,2036477234,1797285236
|
||||
.long 0,1,2,3
|
||||
.long 4,4,4,4
|
||||
.long 1,0,0,0
|
||||
.long 4,0,0,0
|
||||
.long 0,-1,-1,-1
|
||||
.align 64
|
||||
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
|
||||
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
|
||||
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
|
||||
.byte 114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
2513
contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/aesni-x86.S
Normal file
2513
contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/aesni-x86.S
Normal file
File diff suppressed because it is too large
Load Diff
997
contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/bn-586.S
Normal file
997
contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/bn-586.S
Normal file
@ -0,0 +1,997 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl bn_mul_add_words
|
||||
.hidden bn_mul_add_words
|
||||
.type bn_mul_add_words,@function
|
||||
.align 16
|
||||
bn_mul_add_words:
|
||||
.L_bn_mul_add_words_begin:
|
||||
call .L000PIC_me_up
|
||||
.L000PIC_me_up:
|
||||
popl %eax
|
||||
leal OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax
|
||||
btl $26,(%eax)
|
||||
jnc .L001maw_non_sse2
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
movl 12(%esp),%ecx
|
||||
movd 16(%esp),%mm0
|
||||
pxor %mm1,%mm1
|
||||
jmp .L002maw_sse2_entry
|
||||
.align 16
|
||||
.L003maw_sse2_unrolled:
|
||||
movd (%eax),%mm3
|
||||
paddq %mm3,%mm1
|
||||
movd (%edx),%mm2
|
||||
pmuludq %mm0,%mm2
|
||||
movd 4(%edx),%mm4
|
||||
pmuludq %mm0,%mm4
|
||||
movd 8(%edx),%mm6
|
||||
pmuludq %mm0,%mm6
|
||||
movd 12(%edx),%mm7
|
||||
pmuludq %mm0,%mm7
|
||||
paddq %mm2,%mm1
|
||||
movd 4(%eax),%mm3
|
||||
paddq %mm4,%mm3
|
||||
movd 8(%eax),%mm5
|
||||
paddq %mm6,%mm5
|
||||
movd 12(%eax),%mm4
|
||||
paddq %mm4,%mm7
|
||||
movd %mm1,(%eax)
|
||||
movd 16(%edx),%mm2
|
||||
pmuludq %mm0,%mm2
|
||||
psrlq $32,%mm1
|
||||
movd 20(%edx),%mm4
|
||||
pmuludq %mm0,%mm4
|
||||
paddq %mm3,%mm1
|
||||
movd 24(%edx),%mm6
|
||||
pmuludq %mm0,%mm6
|
||||
movd %mm1,4(%eax)
|
||||
psrlq $32,%mm1
|
||||
movd 28(%edx),%mm3
|
||||
addl $32,%edx
|
||||
pmuludq %mm0,%mm3
|
||||
paddq %mm5,%mm1
|
||||
movd 16(%eax),%mm5
|
||||
paddq %mm5,%mm2
|
||||
movd %mm1,8(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm7,%mm1
|
||||
movd 20(%eax),%mm5
|
||||
paddq %mm5,%mm4
|
||||
movd %mm1,12(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm2,%mm1
|
||||
movd 24(%eax),%mm5
|
||||
paddq %mm5,%mm6
|
||||
movd %mm1,16(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm4,%mm1
|
||||
movd 28(%eax),%mm5
|
||||
paddq %mm5,%mm3
|
||||
movd %mm1,20(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm6,%mm1
|
||||
movd %mm1,24(%eax)
|
||||
psrlq $32,%mm1
|
||||
paddq %mm3,%mm1
|
||||
movd %mm1,28(%eax)
|
||||
leal 32(%eax),%eax
|
||||
psrlq $32,%mm1
|
||||
subl $8,%ecx
|
||||
jz .L004maw_sse2_exit
|
||||
.L002maw_sse2_entry:
|
||||
testl $4294967288,%ecx
|
||||
jnz .L003maw_sse2_unrolled
|
||||
.align 4
|
||||
.L005maw_sse2_loop:
|
||||
movd (%edx),%mm2
|
||||
movd (%eax),%mm3
|
||||
pmuludq %mm0,%mm2
|
||||
leal 4(%edx),%edx
|
||||
paddq %mm3,%mm1
|
||||
paddq %mm2,%mm1
|
||||
movd %mm1,(%eax)
|
||||
subl $1,%ecx
|
||||
psrlq $32,%mm1
|
||||
leal 4(%eax),%eax
|
||||
jnz .L005maw_sse2_loop
|
||||
.L004maw_sse2_exit:
|
||||
movd %mm1,%eax
|
||||
emms
|
||||
ret
|
||||
.align 16
|
||||
.L001maw_non_sse2:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
xorl %esi,%esi
|
||||
movl 20(%esp),%edi
|
||||
movl 28(%esp),%ecx
|
||||
movl 24(%esp),%ebx
|
||||
andl $4294967288,%ecx
|
||||
movl 32(%esp),%ebp
|
||||
pushl %ecx
|
||||
jz .L006maw_finish
|
||||
.align 16
|
||||
.L007maw_loop:
|
||||
|
||||
movl (%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl (%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 4(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 4(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,4(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 8(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 8(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,8(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 12(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 12(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,12(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 16(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 16(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 20(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 20(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,20(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 24(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 24(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,24(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 28(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 28(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,28(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
subl $8,%ecx
|
||||
leal 32(%ebx),%ebx
|
||||
leal 32(%edi),%edi
|
||||
jnz .L007maw_loop
|
||||
.L006maw_finish:
|
||||
movl 32(%esp),%ecx
|
||||
andl $7,%ecx
|
||||
jnz .L008maw_finish2
|
||||
jmp .L009maw_end
|
||||
.L008maw_finish2:
|
||||
|
||||
movl (%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl (%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,(%edi)
|
||||
movl %edx,%esi
|
||||
jz .L009maw_end
|
||||
|
||||
movl 4(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 4(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,4(%edi)
|
||||
movl %edx,%esi
|
||||
jz .L009maw_end
|
||||
|
||||
movl 8(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 8(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,8(%edi)
|
||||
movl %edx,%esi
|
||||
jz .L009maw_end
|
||||
|
||||
movl 12(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 12(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,12(%edi)
|
||||
movl %edx,%esi
|
||||
jz .L009maw_end
|
||||
|
||||
movl 16(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 16(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,%esi
|
||||
jz .L009maw_end
|
||||
|
||||
movl 20(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 20(%edi),%eax
|
||||
adcl $0,%edx
|
||||
decl %ecx
|
||||
movl %eax,20(%edi)
|
||||
movl %edx,%esi
|
||||
jz .L009maw_end
|
||||
|
||||
movl 24(%ebx),%eax
|
||||
mull %ebp
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
addl 24(%edi),%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,24(%edi)
|
||||
movl %edx,%esi
|
||||
.L009maw_end:
|
||||
movl %esi,%eax
|
||||
popl %ecx
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_mul_add_words,.-.L_bn_mul_add_words_begin
|
||||
.globl bn_mul_words
|
||||
.hidden bn_mul_words
|
||||
.type bn_mul_words,@function
|
||||
.align 16
|
||||
bn_mul_words:
|
||||
.L_bn_mul_words_begin:
|
||||
call .L010PIC_me_up
|
||||
.L010PIC_me_up:
|
||||
popl %eax
|
||||
leal OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax
|
||||
btl $26,(%eax)
|
||||
jnc .L011mw_non_sse2
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
movl 12(%esp),%ecx
|
||||
movd 16(%esp),%mm0
|
||||
pxor %mm1,%mm1
|
||||
.align 16
|
||||
.L012mw_sse2_loop:
|
||||
movd (%edx),%mm2
|
||||
pmuludq %mm0,%mm2
|
||||
leal 4(%edx),%edx
|
||||
paddq %mm2,%mm1
|
||||
movd %mm1,(%eax)
|
||||
subl $1,%ecx
|
||||
psrlq $32,%mm1
|
||||
leal 4(%eax),%eax
|
||||
jnz .L012mw_sse2_loop
|
||||
movd %mm1,%eax
|
||||
emms
|
||||
ret
|
||||
.align 16
|
||||
.L011mw_non_sse2:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
xorl %esi,%esi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%ebx
|
||||
movl 28(%esp),%ebp
|
||||
movl 32(%esp),%ecx
|
||||
andl $4294967288,%ebp
|
||||
jz .L013mw_finish
|
||||
.L014mw_loop:
|
||||
|
||||
movl (%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 4(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,4(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 8(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,8(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 12(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,12(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 16(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 20(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,20(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 24(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,24(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
movl 28(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,28(%edi)
|
||||
movl %edx,%esi
|
||||
|
||||
addl $32,%ebx
|
||||
addl $32,%edi
|
||||
subl $8,%ebp
|
||||
jz .L013mw_finish
|
||||
jmp .L014mw_loop
|
||||
.L013mw_finish:
|
||||
movl 28(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jnz .L015mw_finish2
|
||||
jmp .L016mw_end
|
||||
.L015mw_finish2:
|
||||
|
||||
movl (%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz .L016mw_end
|
||||
|
||||
movl 4(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,4(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz .L016mw_end
|
||||
|
||||
movl 8(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,8(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz .L016mw_end
|
||||
|
||||
movl 12(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,12(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz .L016mw_end
|
||||
|
||||
movl 16(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,16(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz .L016mw_end
|
||||
|
||||
movl 20(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,20(%edi)
|
||||
movl %edx,%esi
|
||||
decl %ebp
|
||||
jz .L016mw_end
|
||||
|
||||
movl 24(%ebx),%eax
|
||||
mull %ecx
|
||||
addl %esi,%eax
|
||||
adcl $0,%edx
|
||||
movl %eax,24(%edi)
|
||||
movl %edx,%esi
|
||||
.L016mw_end:
|
||||
movl %esi,%eax
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_mul_words,.-.L_bn_mul_words_begin
|
||||
.globl bn_sqr_words
|
||||
.hidden bn_sqr_words
|
||||
.type bn_sqr_words,@function
|
||||
.align 16
|
||||
bn_sqr_words:
|
||||
.L_bn_sqr_words_begin:
|
||||
call .L017PIC_me_up
|
||||
.L017PIC_me_up:
|
||||
popl %eax
|
||||
leal OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax
|
||||
btl $26,(%eax)
|
||||
jnc .L018sqr_non_sse2
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
movl 12(%esp),%ecx
|
||||
.align 16
|
||||
.L019sqr_sse2_loop:
|
||||
movd (%edx),%mm0
|
||||
pmuludq %mm0,%mm0
|
||||
leal 4(%edx),%edx
|
||||
movq %mm0,(%eax)
|
||||
subl $1,%ecx
|
||||
leal 8(%eax),%eax
|
||||
jnz .L019sqr_sse2_loop
|
||||
emms
|
||||
ret
|
||||
.align 16
|
||||
.L018sqr_non_sse2:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl 20(%esp),%esi
|
||||
movl 24(%esp),%edi
|
||||
movl 28(%esp),%ebx
|
||||
andl $4294967288,%ebx
|
||||
jz .L020sw_finish
|
||||
.L021sw_loop:
|
||||
|
||||
movl (%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,(%esi)
|
||||
movl %edx,4(%esi)
|
||||
|
||||
movl 4(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,8(%esi)
|
||||
movl %edx,12(%esi)
|
||||
|
||||
movl 8(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,16(%esi)
|
||||
movl %edx,20(%esi)
|
||||
|
||||
movl 12(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,24(%esi)
|
||||
movl %edx,28(%esi)
|
||||
|
||||
movl 16(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,32(%esi)
|
||||
movl %edx,36(%esi)
|
||||
|
||||
movl 20(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,40(%esi)
|
||||
movl %edx,44(%esi)
|
||||
|
||||
movl 24(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,48(%esi)
|
||||
movl %edx,52(%esi)
|
||||
|
||||
movl 28(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,56(%esi)
|
||||
movl %edx,60(%esi)
|
||||
|
||||
addl $32,%edi
|
||||
addl $64,%esi
|
||||
subl $8,%ebx
|
||||
jnz .L021sw_loop
|
||||
.L020sw_finish:
|
||||
movl 28(%esp),%ebx
|
||||
andl $7,%ebx
|
||||
jz .L022sw_end
|
||||
|
||||
movl (%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,(%esi)
|
||||
decl %ebx
|
||||
movl %edx,4(%esi)
|
||||
jz .L022sw_end
|
||||
|
||||
movl 4(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,8(%esi)
|
||||
decl %ebx
|
||||
movl %edx,12(%esi)
|
||||
jz .L022sw_end
|
||||
|
||||
movl 8(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,16(%esi)
|
||||
decl %ebx
|
||||
movl %edx,20(%esi)
|
||||
jz .L022sw_end
|
||||
|
||||
movl 12(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,24(%esi)
|
||||
decl %ebx
|
||||
movl %edx,28(%esi)
|
||||
jz .L022sw_end
|
||||
|
||||
movl 16(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,32(%esi)
|
||||
decl %ebx
|
||||
movl %edx,36(%esi)
|
||||
jz .L022sw_end
|
||||
|
||||
movl 20(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,40(%esi)
|
||||
decl %ebx
|
||||
movl %edx,44(%esi)
|
||||
jz .L022sw_end
|
||||
|
||||
movl 24(%edi),%eax
|
||||
mull %eax
|
||||
movl %eax,48(%esi)
|
||||
movl %edx,52(%esi)
|
||||
.L022sw_end:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_sqr_words,.-.L_bn_sqr_words_begin
|
||||
.globl bn_div_words
|
||||
.hidden bn_div_words
|
||||
.type bn_div_words,@function
|
||||
.align 16
|
||||
bn_div_words:
|
||||
.L_bn_div_words_begin:
|
||||
movl 4(%esp),%edx
|
||||
movl 8(%esp),%eax
|
||||
movl 12(%esp),%ecx
|
||||
divl %ecx
|
||||
ret
|
||||
.size bn_div_words,.-.L_bn_div_words_begin
|
||||
.globl bn_add_words
|
||||
.hidden bn_add_words
|
||||
.type bn_add_words,@function
|
||||
.align 16
|
||||
bn_add_words:
|
||||
.L_bn_add_words_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl 20(%esp),%ebx
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edi
|
||||
movl 32(%esp),%ebp
|
||||
xorl %eax,%eax
|
||||
andl $4294967288,%ebp
|
||||
jz .L023aw_finish
|
||||
.L024aw_loop:
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,4(%ebx)
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,8(%ebx)
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,12(%ebx)
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,16(%ebx)
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,20(%ebx)
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
|
||||
movl 28(%esi),%ecx
|
||||
movl 28(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,28(%ebx)
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%edi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz .L024aw_loop
|
||||
.L023aw_finish:
|
||||
movl 32(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz .L025aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,(%ebx)
|
||||
jz .L025aw_end
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,4(%ebx)
|
||||
jz .L025aw_end
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,8(%ebx)
|
||||
jz .L025aw_end
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,12(%ebx)
|
||||
jz .L025aw_end
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,16(%ebx)
|
||||
jz .L025aw_end
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,20(%ebx)
|
||||
jz .L025aw_end
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
addl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
addl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
.L025aw_end:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_add_words,.-.L_bn_add_words_begin
|
||||
.globl bn_sub_words
|
||||
.hidden bn_sub_words
|
||||
.type bn_sub_words,@function
|
||||
.align 16
|
||||
bn_sub_words:
|
||||
.L_bn_sub_words_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl 20(%esp),%ebx
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edi
|
||||
movl 32(%esp),%ebp
|
||||
xorl %eax,%eax
|
||||
andl $4294967288,%ebp
|
||||
jz .L026aw_finish
|
||||
.L027aw_loop:
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,(%ebx)
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,4(%ebx)
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,8(%ebx)
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,12(%ebx)
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,16(%ebx)
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,20(%ebx)
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
|
||||
movl 28(%esi),%ecx
|
||||
movl 28(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,28(%ebx)
|
||||
|
||||
addl $32,%esi
|
||||
addl $32,%edi
|
||||
addl $32,%ebx
|
||||
subl $8,%ebp
|
||||
jnz .L027aw_loop
|
||||
.L026aw_finish:
|
||||
movl 32(%esp),%ebp
|
||||
andl $7,%ebp
|
||||
jz .L028aw_end
|
||||
|
||||
movl (%esi),%ecx
|
||||
movl (%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,(%ebx)
|
||||
jz .L028aw_end
|
||||
|
||||
movl 4(%esi),%ecx
|
||||
movl 4(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,4(%ebx)
|
||||
jz .L028aw_end
|
||||
|
||||
movl 8(%esi),%ecx
|
||||
movl 8(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,8(%ebx)
|
||||
jz .L028aw_end
|
||||
|
||||
movl 12(%esi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,12(%ebx)
|
||||
jz .L028aw_end
|
||||
|
||||
movl 16(%esi),%ecx
|
||||
movl 16(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,16(%ebx)
|
||||
jz .L028aw_end
|
||||
|
||||
movl 20(%esi),%ecx
|
||||
movl 20(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
decl %ebp
|
||||
movl %ecx,20(%ebx)
|
||||
jz .L028aw_end
|
||||
|
||||
movl 24(%esi),%ecx
|
||||
movl 24(%edi),%edx
|
||||
subl %eax,%ecx
|
||||
movl $0,%eax
|
||||
adcl %eax,%eax
|
||||
subl %edx,%ecx
|
||||
adcl $0,%eax
|
||||
movl %ecx,24(%ebx)
|
||||
.L028aw_end:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size bn_sub_words,.-.L_bn_sub_words_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
1266
contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/co-586.S
Normal file
1266
contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/co-586.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,294 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl gcm_gmult_ssse3
|
||||
.hidden gcm_gmult_ssse3
|
||||
.type gcm_gmult_ssse3,@function
|
||||
.align 16
|
||||
gcm_gmult_ssse3:
|
||||
.L_gcm_gmult_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movdqu (%edi),%xmm0
|
||||
call .L000pic_point
|
||||
.L000pic_point:
|
||||
popl %eax
|
||||
movdqa .Lreverse_bytes-.L000pic_point(%eax),%xmm7
|
||||
movdqa .Llow4_mask-.L000pic_point(%eax),%xmm2
|
||||
.byte 102,15,56,0,199
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm0
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
.L001loop_row_1:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L001loop_row_1
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
.L002loop_row_2:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L002loop_row_2
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $6,%eax
|
||||
.L003loop_row_3:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L003loop_row_3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
.byte 102,15,56,0,215
|
||||
movdqu %xmm2,(%edi)
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size gcm_gmult_ssse3,.-.L_gcm_gmult_ssse3_begin
|
||||
.globl gcm_ghash_ssse3
|
||||
.hidden gcm_ghash_ssse3
|
||||
.type gcm_ghash_ssse3,@function
|
||||
.align 16
|
||||
gcm_ghash_ssse3:
|
||||
.L_gcm_ghash_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%edx
|
||||
movl 32(%esp),%ecx
|
||||
movdqu (%edi),%xmm0
|
||||
call .L004pic_point
|
||||
.L004pic_point:
|
||||
popl %ebx
|
||||
movdqa .Lreverse_bytes-.L004pic_point(%ebx),%xmm7
|
||||
andl $-16,%ecx
|
||||
.byte 102,15,56,0,199
|
||||
pxor %xmm3,%xmm3
|
||||
.L005loop_ghash:
|
||||
movdqa .Llow4_mask-.L004pic_point(%ebx),%xmm2
|
||||
movdqu (%edx),%xmm1
|
||||
.byte 102,15,56,0,207
|
||||
pxor %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm1
|
||||
pandn %xmm0,%xmm1
|
||||
psrld $4,%xmm1
|
||||
pand %xmm2,%xmm0
|
||||
pxor %xmm2,%xmm2
|
||||
movl $5,%eax
|
||||
.L006loop_row_4:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L006loop_row_4
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $5,%eax
|
||||
.L007loop_row_5:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L007loop_row_5
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movl $6,%eax
|
||||
.L008loop_row_6:
|
||||
movdqa (%esi),%xmm4
|
||||
leal 16(%esi),%esi
|
||||
movdqa %xmm2,%xmm6
|
||||
.byte 102,15,58,15,243,1
|
||||
movdqa %xmm6,%xmm3
|
||||
psrldq $1,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
.byte 102,15,56,0,224
|
||||
.byte 102,15,56,0,233
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm4,%xmm5
|
||||
psllq $60,%xmm5
|
||||
movdqa %xmm5,%xmm6
|
||||
pslldq $8,%xmm6
|
||||
pxor %xmm6,%xmm3
|
||||
psrldq $8,%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
psrlq $4,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
subl $1,%eax
|
||||
jnz .L008loop_row_6
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $1,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
psrlq $5,%xmm3
|
||||
pxor %xmm3,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
movdqa %xmm2,%xmm0
|
||||
leal -256(%esi),%esi
|
||||
leal 16(%edx),%edx
|
||||
subl $16,%ecx
|
||||
jnz .L005loop_ghash
|
||||
.byte 102,15,56,0,199
|
||||
movdqu %xmm0,(%edi)
|
||||
pxor %xmm0,%xmm0
|
||||
pxor %xmm1,%xmm1
|
||||
pxor %xmm2,%xmm2
|
||||
pxor %xmm3,%xmm3
|
||||
pxor %xmm4,%xmm4
|
||||
pxor %xmm5,%xmm5
|
||||
pxor %xmm6,%xmm6
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size gcm_ghash_ssse3,.-.L_gcm_ghash_ssse3_begin
|
||||
.align 16
|
||||
.Lreverse_bytes:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
.align 16
|
||||
.Llow4_mask:
|
||||
.long 252645135,252645135,252645135,252645135
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
330
contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/ghash-x86.S
Normal file
330
contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/ghash-x86.S
Normal file
@ -0,0 +1,330 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl gcm_init_clmul
|
||||
.hidden gcm_init_clmul
|
||||
.type gcm_init_clmul,@function
|
||||
.align 16
|
||||
gcm_init_clmul:
|
||||
.L_gcm_init_clmul_begin:
|
||||
movl 4(%esp),%edx
|
||||
movl 8(%esp),%eax
|
||||
call .L000pic
|
||||
.L000pic:
|
||||
popl %ecx
|
||||
leal .Lbswap-.L000pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm2
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $255,%xmm2,%xmm4
|
||||
movdqa %xmm2,%xmm3
|
||||
psllq $1,%xmm2
|
||||
pxor %xmm5,%xmm5
|
||||
psrlq $63,%xmm3
|
||||
pcmpgtd %xmm4,%xmm5
|
||||
pslldq $8,%xmm3
|
||||
por %xmm3,%xmm2
|
||||
pand 16(%ecx),%xmm5
|
||||
pxor %xmm5,%xmm2
|
||||
movdqa %xmm2,%xmm0
|
||||
movdqa %xmm0,%xmm1
|
||||
pshufd $78,%xmm0,%xmm3
|
||||
pshufd $78,%xmm2,%xmm4
|
||||
pxor %xmm0,%xmm3
|
||||
pxor %xmm2,%xmm4
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,220,0
|
||||
xorps %xmm0,%xmm3
|
||||
xorps %xmm1,%xmm3
|
||||
movdqa %xmm3,%xmm4
|
||||
psrldq $8,%xmm3
|
||||
pslldq $8,%xmm4
|
||||
pxor %xmm3,%xmm1
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
pshufd $78,%xmm2,%xmm3
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
pxor %xmm2,%xmm3
|
||||
movdqu %xmm2,(%edx)
|
||||
pxor %xmm0,%xmm4
|
||||
movdqu %xmm0,16(%edx)
|
||||
.byte 102,15,58,15,227,8
|
||||
movdqu %xmm4,32(%edx)
|
||||
ret
|
||||
.size gcm_init_clmul,.-.L_gcm_init_clmul_begin
|
||||
.globl gcm_gmult_clmul
|
||||
.hidden gcm_gmult_clmul
|
||||
.type gcm_gmult_clmul,@function
|
||||
.align 16
|
||||
gcm_gmult_clmul:
|
||||
.L_gcm_gmult_clmul_begin:
|
||||
movl 4(%esp),%eax
|
||||
movl 8(%esp),%edx
|
||||
call .L001pic
|
||||
.L001pic:
|
||||
popl %ecx
|
||||
leal .Lbswap-.L001pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
movups (%edx),%xmm2
|
||||
.byte 102,15,56,0,197
|
||||
movups 32(%edx),%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pshufd $78,%xmm0,%xmm3
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,220,0
|
||||
xorps %xmm0,%xmm3
|
||||
xorps %xmm1,%xmm3
|
||||
movdqa %xmm3,%xmm4
|
||||
psrldq $8,%xmm3
|
||||
pslldq $8,%xmm4
|
||||
pxor %xmm3,%xmm1
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
.byte 102,15,56,0,197
|
||||
movdqu %xmm0,(%eax)
|
||||
ret
|
||||
.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
|
||||
.globl gcm_ghash_clmul
|
||||
.hidden gcm_ghash_clmul
|
||||
.type gcm_ghash_clmul,@function
|
||||
.align 16
|
||||
gcm_ghash_clmul:
|
||||
.L_gcm_ghash_clmul_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 20(%esp),%eax
|
||||
movl 24(%esp),%edx
|
||||
movl 28(%esp),%esi
|
||||
movl 32(%esp),%ebx
|
||||
call .L002pic
|
||||
.L002pic:
|
||||
popl %ecx
|
||||
leal .Lbswap-.L002pic(%ecx),%ecx
|
||||
movdqu (%eax),%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
movdqu (%edx),%xmm2
|
||||
.byte 102,15,56,0,197
|
||||
subl $16,%ebx
|
||||
jz .L003odd_tail
|
||||
movdqu (%esi),%xmm3
|
||||
movdqu 16(%esi),%xmm6
|
||||
.byte 102,15,56,0,221
|
||||
.byte 102,15,56,0,245
|
||||
movdqu 32(%edx),%xmm5
|
||||
pxor %xmm3,%xmm0
|
||||
pshufd $78,%xmm6,%xmm3
|
||||
movdqa %xmm6,%xmm7
|
||||
pxor %xmm6,%xmm3
|
||||
leal 32(%esi),%esi
|
||||
.byte 102,15,58,68,242,0
|
||||
.byte 102,15,58,68,250,17
|
||||
.byte 102,15,58,68,221,0
|
||||
movups 16(%edx),%xmm2
|
||||
nop
|
||||
subl $32,%ebx
|
||||
jbe .L004even_tail
|
||||
jmp .L005mod_loop
|
||||
.align 32
|
||||
.L005mod_loop:
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
nop
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,229,16
|
||||
movups (%edx),%xmm2
|
||||
xorps %xmm6,%xmm0
|
||||
movdqa (%ecx),%xmm5
|
||||
xorps %xmm7,%xmm1
|
||||
movdqu (%esi),%xmm7
|
||||
pxor %xmm0,%xmm3
|
||||
movdqu 16(%esi),%xmm6
|
||||
pxor %xmm1,%xmm3
|
||||
.byte 102,15,56,0,253
|
||||
pxor %xmm3,%xmm4
|
||||
movdqa %xmm4,%xmm3
|
||||
psrldq $8,%xmm4
|
||||
pslldq $8,%xmm3
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm3,%xmm0
|
||||
.byte 102,15,56,0,245
|
||||
pxor %xmm7,%xmm1
|
||||
movdqa %xmm6,%xmm7
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
.byte 102,15,58,68,242,0
|
||||
movups 32(%edx),%xmm5
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
pshufd $78,%xmm7,%xmm3
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm7,%xmm3
|
||||
pxor %xmm4,%xmm1
|
||||
.byte 102,15,58,68,250,17
|
||||
movups 16(%edx),%xmm2
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
.byte 102,15,58,68,221,0
|
||||
leal 32(%esi),%esi
|
||||
subl $32,%ebx
|
||||
ja .L005mod_loop
|
||||
.L004even_tail:
|
||||
pshufd $78,%xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,229,16
|
||||
movdqa (%ecx),%xmm5
|
||||
xorps %xmm6,%xmm0
|
||||
xorps %xmm7,%xmm1
|
||||
pxor %xmm0,%xmm3
|
||||
pxor %xmm1,%xmm3
|
||||
pxor %xmm3,%xmm4
|
||||
movdqa %xmm4,%xmm3
|
||||
psrldq $8,%xmm4
|
||||
pslldq $8,%xmm3
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm3,%xmm0
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
testl %ebx,%ebx
|
||||
jnz .L006done
|
||||
movups (%edx),%xmm2
|
||||
.L003odd_tail:
|
||||
movdqu (%esi),%xmm3
|
||||
.byte 102,15,56,0,221
|
||||
pxor %xmm3,%xmm0
|
||||
movdqa %xmm0,%xmm1
|
||||
pshufd $78,%xmm0,%xmm3
|
||||
pshufd $78,%xmm2,%xmm4
|
||||
pxor %xmm0,%xmm3
|
||||
pxor %xmm2,%xmm4
|
||||
.byte 102,15,58,68,194,0
|
||||
.byte 102,15,58,68,202,17
|
||||
.byte 102,15,58,68,220,0
|
||||
xorps %xmm0,%xmm3
|
||||
xorps %xmm1,%xmm3
|
||||
movdqa %xmm3,%xmm4
|
||||
psrldq $8,%xmm3
|
||||
pslldq $8,%xmm4
|
||||
pxor %xmm3,%xmm1
|
||||
pxor %xmm4,%xmm0
|
||||
movdqa %xmm0,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
psllq $5,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
psllq $1,%xmm0
|
||||
pxor %xmm3,%xmm0
|
||||
psllq $57,%xmm0
|
||||
movdqa %xmm0,%xmm3
|
||||
pslldq $8,%xmm0
|
||||
psrldq $8,%xmm3
|
||||
pxor %xmm4,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
movdqa %xmm0,%xmm4
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm4,%xmm1
|
||||
pxor %xmm0,%xmm4
|
||||
psrlq $5,%xmm0
|
||||
pxor %xmm4,%xmm0
|
||||
psrlq $1,%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
.L006done:
|
||||
.byte 102,15,56,0,197
|
||||
movdqu %xmm0,(%eax)
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
|
||||
.align 64
|
||||
.Lbswap:
|
||||
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
||||
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
|
||||
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
|
||||
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
|
||||
.byte 0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
688
contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/md5-586.S
Normal file
688
contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/md5-586.S
Normal file
@ -0,0 +1,688 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl md5_block_asm_data_order
|
||||
.hidden md5_block_asm_data_order
|
||||
.type md5_block_asm_data_order,@function
|
||||
.align 16
|
||||
md5_block_asm_data_order:
|
||||
.L_md5_block_asm_data_order_begin:
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl 12(%esp),%edi
|
||||
movl 16(%esp),%esi
|
||||
movl 20(%esp),%ecx
|
||||
pushl %ebp
|
||||
shll $6,%ecx
|
||||
pushl %ebx
|
||||
addl %esi,%ecx
|
||||
subl $64,%ecx
|
||||
movl (%edi),%eax
|
||||
pushl %ecx
|
||||
movl 4(%edi),%ebx
|
||||
movl 8(%edi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
.L000start:
|
||||
|
||||
|
||||
movl %ecx,%edi
|
||||
movl (%esi),%ebp
|
||||
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
leal 3614090360(%eax,%ebp,1),%eax
|
||||
xorl %edx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $7,%eax
|
||||
movl 4(%esi),%ebp
|
||||
addl %ebx,%eax
|
||||
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
leal 3905402710(%edx,%ebp,1),%edx
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $12,%edx
|
||||
movl 8(%esi),%ebp
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
leal 606105819(%ecx,%ebp,1),%ecx
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $17,%ecx
|
||||
movl 12(%esi),%ebp
|
||||
addl %edx,%ecx
|
||||
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
leal 3250441966(%ebx,%ebp,1),%ebx
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $22,%ebx
|
||||
movl 16(%esi),%ebp
|
||||
addl %ecx,%ebx
|
||||
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
leal 4118548399(%eax,%ebp,1),%eax
|
||||
xorl %edx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $7,%eax
|
||||
movl 20(%esi),%ebp
|
||||
addl %ebx,%eax
|
||||
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
leal 1200080426(%edx,%ebp,1),%edx
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $12,%edx
|
||||
movl 24(%esi),%ebp
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
leal 2821735955(%ecx,%ebp,1),%ecx
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $17,%ecx
|
||||
movl 28(%esi),%ebp
|
||||
addl %edx,%ecx
|
||||
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
leal 4249261313(%ebx,%ebp,1),%ebx
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $22,%ebx
|
||||
movl 32(%esi),%ebp
|
||||
addl %ecx,%ebx
|
||||
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
leal 1770035416(%eax,%ebp,1),%eax
|
||||
xorl %edx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $7,%eax
|
||||
movl 36(%esi),%ebp
|
||||
addl %ebx,%eax
|
||||
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
leal 2336552879(%edx,%ebp,1),%edx
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $12,%edx
|
||||
movl 40(%esi),%ebp
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
leal 4294925233(%ecx,%ebp,1),%ecx
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $17,%ecx
|
||||
movl 44(%esi),%ebp
|
||||
addl %edx,%ecx
|
||||
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
leal 2304563134(%ebx,%ebp,1),%ebx
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $22,%ebx
|
||||
movl 48(%esi),%ebp
|
||||
addl %ecx,%ebx
|
||||
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
leal 1804603682(%eax,%ebp,1),%eax
|
||||
xorl %edx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $7,%eax
|
||||
movl 52(%esi),%ebp
|
||||
addl %ebx,%eax
|
||||
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
leal 4254626195(%edx,%ebp,1),%edx
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $12,%edx
|
||||
movl 56(%esi),%ebp
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
leal 2792965006(%ecx,%ebp,1),%ecx
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $17,%ecx
|
||||
movl 60(%esi),%ebp
|
||||
addl %edx,%ecx
|
||||
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
leal 1236535329(%ebx,%ebp,1),%ebx
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $22,%ebx
|
||||
movl 4(%esi),%ebp
|
||||
addl %ecx,%ebx
|
||||
|
||||
|
||||
|
||||
leal 4129170786(%eax,%ebp,1),%eax
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
movl 24(%esi),%ebp
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
|
||||
leal 3225465664(%edx,%ebp,1),%edx
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
movl 44(%esi),%ebp
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
leal 643717713(%ecx,%ebp,1),%ecx
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
movl (%esi),%ebp
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
|
||||
leal 3921069994(%ebx,%ebp,1),%ebx
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
movl 20(%esi),%ebp
|
||||
xorl %edx,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
leal 3593408605(%eax,%ebp,1),%eax
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
movl 40(%esi),%ebp
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
|
||||
leal 38016083(%edx,%ebp,1),%edx
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
movl 60(%esi),%ebp
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
leal 3634488961(%ecx,%ebp,1),%ecx
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
movl 16(%esi),%ebp
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
|
||||
leal 3889429448(%ebx,%ebp,1),%ebx
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
movl 36(%esi),%ebp
|
||||
xorl %edx,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
leal 568446438(%eax,%ebp,1),%eax
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
movl 56(%esi),%ebp
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
|
||||
leal 3275163606(%edx,%ebp,1),%edx
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
movl 12(%esi),%ebp
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
leal 4107603335(%ecx,%ebp,1),%ecx
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
movl 32(%esi),%ebp
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
|
||||
leal 1163531501(%ebx,%ebp,1),%ebx
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
movl 52(%esi),%ebp
|
||||
xorl %edx,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
leal 2850285829(%eax,%ebp,1),%eax
|
||||
xorl %ebx,%edi
|
||||
andl %edx,%edi
|
||||
movl 8(%esi),%ebp
|
||||
xorl %ecx,%edi
|
||||
addl %edi,%eax
|
||||
movl %ebx,%edi
|
||||
roll $5,%eax
|
||||
addl %ebx,%eax
|
||||
|
||||
leal 4243563512(%edx,%ebp,1),%edx
|
||||
xorl %eax,%edi
|
||||
andl %ecx,%edi
|
||||
movl 28(%esi),%ebp
|
||||
xorl %ebx,%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $9,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
leal 1735328473(%ecx,%ebp,1),%ecx
|
||||
xorl %edx,%edi
|
||||
andl %ebx,%edi
|
||||
movl 48(%esi),%ebp
|
||||
xorl %eax,%edi
|
||||
addl %edi,%ecx
|
||||
movl %edx,%edi
|
||||
roll $14,%ecx
|
||||
addl %edx,%ecx
|
||||
|
||||
leal 2368359562(%ebx,%ebp,1),%ebx
|
||||
xorl %ecx,%edi
|
||||
andl %eax,%edi
|
||||
movl 20(%esi),%ebp
|
||||
xorl %edx,%edi
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $20,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
|
||||
|
||||
xorl %edx,%edi
|
||||
xorl %ebx,%edi
|
||||
leal 4294588738(%eax,%ebp,1),%eax
|
||||
addl %edi,%eax
|
||||
roll $4,%eax
|
||||
movl 32(%esi),%ebp
|
||||
movl %ebx,%edi
|
||||
|
||||
leal 2272392833(%edx,%ebp,1),%edx
|
||||
addl %ebx,%eax
|
||||
xorl %ecx,%edi
|
||||
xorl %eax,%edi
|
||||
movl 44(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $11,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
xorl %edx,%edi
|
||||
leal 1839030562(%ecx,%ebp,1),%ecx
|
||||
addl %edi,%ecx
|
||||
roll $16,%ecx
|
||||
movl 56(%esi),%ebp
|
||||
movl %edx,%edi
|
||||
|
||||
leal 4259657740(%ebx,%ebp,1),%ebx
|
||||
addl %edx,%ecx
|
||||
xorl %eax,%edi
|
||||
xorl %ecx,%edi
|
||||
movl 4(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $23,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
xorl %edx,%edi
|
||||
xorl %ebx,%edi
|
||||
leal 2763975236(%eax,%ebp,1),%eax
|
||||
addl %edi,%eax
|
||||
roll $4,%eax
|
||||
movl 16(%esi),%ebp
|
||||
movl %ebx,%edi
|
||||
|
||||
leal 1272893353(%edx,%ebp,1),%edx
|
||||
addl %ebx,%eax
|
||||
xorl %ecx,%edi
|
||||
xorl %eax,%edi
|
||||
movl 28(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $11,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
xorl %edx,%edi
|
||||
leal 4139469664(%ecx,%ebp,1),%ecx
|
||||
addl %edi,%ecx
|
||||
roll $16,%ecx
|
||||
movl 40(%esi),%ebp
|
||||
movl %edx,%edi
|
||||
|
||||
leal 3200236656(%ebx,%ebp,1),%ebx
|
||||
addl %edx,%ecx
|
||||
xorl %eax,%edi
|
||||
xorl %ecx,%edi
|
||||
movl 52(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $23,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
xorl %edx,%edi
|
||||
xorl %ebx,%edi
|
||||
leal 681279174(%eax,%ebp,1),%eax
|
||||
addl %edi,%eax
|
||||
roll $4,%eax
|
||||
movl (%esi),%ebp
|
||||
movl %ebx,%edi
|
||||
|
||||
leal 3936430074(%edx,%ebp,1),%edx
|
||||
addl %ebx,%eax
|
||||
xorl %ecx,%edi
|
||||
xorl %eax,%edi
|
||||
movl 12(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $11,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
xorl %edx,%edi
|
||||
leal 3572445317(%ecx,%ebp,1),%ecx
|
||||
addl %edi,%ecx
|
||||
roll $16,%ecx
|
||||
movl 24(%esi),%ebp
|
||||
movl %edx,%edi
|
||||
|
||||
leal 76029189(%ebx,%ebp,1),%ebx
|
||||
addl %edx,%ecx
|
||||
xorl %eax,%edi
|
||||
xorl %ecx,%edi
|
||||
movl 36(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl %ecx,%edi
|
||||
roll $23,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
xorl %edx,%edi
|
||||
xorl %ebx,%edi
|
||||
leal 3654602809(%eax,%ebp,1),%eax
|
||||
addl %edi,%eax
|
||||
roll $4,%eax
|
||||
movl 48(%esi),%ebp
|
||||
movl %ebx,%edi
|
||||
|
||||
leal 3873151461(%edx,%ebp,1),%edx
|
||||
addl %ebx,%eax
|
||||
xorl %ecx,%edi
|
||||
xorl %eax,%edi
|
||||
movl 60(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl %eax,%edi
|
||||
roll $11,%edx
|
||||
addl %eax,%edx
|
||||
|
||||
xorl %ebx,%edi
|
||||
xorl %edx,%edi
|
||||
leal 530742520(%ecx,%ebp,1),%ecx
|
||||
addl %edi,%ecx
|
||||
roll $16,%ecx
|
||||
movl 8(%esi),%ebp
|
||||
movl %edx,%edi
|
||||
|
||||
leal 3299628645(%ebx,%ebp,1),%ebx
|
||||
addl %edx,%ecx
|
||||
xorl %eax,%edi
|
||||
xorl %ecx,%edi
|
||||
movl (%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl $-1,%edi
|
||||
roll $23,%ebx
|
||||
addl %ecx,%ebx
|
||||
|
||||
|
||||
|
||||
xorl %edx,%edi
|
||||
orl %ebx,%edi
|
||||
leal 4096336452(%eax,%ebp,1),%eax
|
||||
xorl %ecx,%edi
|
||||
movl 28(%esi),%ebp
|
||||
addl %edi,%eax
|
||||
movl $-1,%edi
|
||||
roll $6,%eax
|
||||
xorl %ecx,%edi
|
||||
addl %ebx,%eax
|
||||
|
||||
orl %eax,%edi
|
||||
leal 1126891415(%edx,%ebp,1),%edx
|
||||
xorl %ebx,%edi
|
||||
movl 56(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl $-1,%edi
|
||||
roll $10,%edx
|
||||
xorl %ebx,%edi
|
||||
addl %eax,%edx
|
||||
|
||||
orl %edx,%edi
|
||||
leal 2878612391(%ecx,%ebp,1),%ecx
|
||||
xorl %eax,%edi
|
||||
movl 20(%esi),%ebp
|
||||
addl %edi,%ecx
|
||||
movl $-1,%edi
|
||||
roll $15,%ecx
|
||||
xorl %eax,%edi
|
||||
addl %edx,%ecx
|
||||
|
||||
orl %ecx,%edi
|
||||
leal 4237533241(%ebx,%ebp,1),%ebx
|
||||
xorl %edx,%edi
|
||||
movl 48(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl $-1,%edi
|
||||
roll $21,%ebx
|
||||
xorl %edx,%edi
|
||||
addl %ecx,%ebx
|
||||
|
||||
orl %ebx,%edi
|
||||
leal 1700485571(%eax,%ebp,1),%eax
|
||||
xorl %ecx,%edi
|
||||
movl 12(%esi),%ebp
|
||||
addl %edi,%eax
|
||||
movl $-1,%edi
|
||||
roll $6,%eax
|
||||
xorl %ecx,%edi
|
||||
addl %ebx,%eax
|
||||
|
||||
orl %eax,%edi
|
||||
leal 2399980690(%edx,%ebp,1),%edx
|
||||
xorl %ebx,%edi
|
||||
movl 40(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl $-1,%edi
|
||||
roll $10,%edx
|
||||
xorl %ebx,%edi
|
||||
addl %eax,%edx
|
||||
|
||||
orl %edx,%edi
|
||||
leal 4293915773(%ecx,%ebp,1),%ecx
|
||||
xorl %eax,%edi
|
||||
movl 4(%esi),%ebp
|
||||
addl %edi,%ecx
|
||||
movl $-1,%edi
|
||||
roll $15,%ecx
|
||||
xorl %eax,%edi
|
||||
addl %edx,%ecx
|
||||
|
||||
orl %ecx,%edi
|
||||
leal 2240044497(%ebx,%ebp,1),%ebx
|
||||
xorl %edx,%edi
|
||||
movl 32(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl $-1,%edi
|
||||
roll $21,%ebx
|
||||
xorl %edx,%edi
|
||||
addl %ecx,%ebx
|
||||
|
||||
orl %ebx,%edi
|
||||
leal 1873313359(%eax,%ebp,1),%eax
|
||||
xorl %ecx,%edi
|
||||
movl 60(%esi),%ebp
|
||||
addl %edi,%eax
|
||||
movl $-1,%edi
|
||||
roll $6,%eax
|
||||
xorl %ecx,%edi
|
||||
addl %ebx,%eax
|
||||
|
||||
orl %eax,%edi
|
||||
leal 4264355552(%edx,%ebp,1),%edx
|
||||
xorl %ebx,%edi
|
||||
movl 24(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl $-1,%edi
|
||||
roll $10,%edx
|
||||
xorl %ebx,%edi
|
||||
addl %eax,%edx
|
||||
|
||||
orl %edx,%edi
|
||||
leal 2734768916(%ecx,%ebp,1),%ecx
|
||||
xorl %eax,%edi
|
||||
movl 52(%esi),%ebp
|
||||
addl %edi,%ecx
|
||||
movl $-1,%edi
|
||||
roll $15,%ecx
|
||||
xorl %eax,%edi
|
||||
addl %edx,%ecx
|
||||
|
||||
orl %ecx,%edi
|
||||
leal 1309151649(%ebx,%ebp,1),%ebx
|
||||
xorl %edx,%edi
|
||||
movl 16(%esi),%ebp
|
||||
addl %edi,%ebx
|
||||
movl $-1,%edi
|
||||
roll $21,%ebx
|
||||
xorl %edx,%edi
|
||||
addl %ecx,%ebx
|
||||
|
||||
orl %ebx,%edi
|
||||
leal 4149444226(%eax,%ebp,1),%eax
|
||||
xorl %ecx,%edi
|
||||
movl 44(%esi),%ebp
|
||||
addl %edi,%eax
|
||||
movl $-1,%edi
|
||||
roll $6,%eax
|
||||
xorl %ecx,%edi
|
||||
addl %ebx,%eax
|
||||
|
||||
orl %eax,%edi
|
||||
leal 3174756917(%edx,%ebp,1),%edx
|
||||
xorl %ebx,%edi
|
||||
movl 8(%esi),%ebp
|
||||
addl %edi,%edx
|
||||
movl $-1,%edi
|
||||
roll $10,%edx
|
||||
xorl %ebx,%edi
|
||||
addl %eax,%edx
|
||||
|
||||
orl %edx,%edi
|
||||
leal 718787259(%ecx,%ebp,1),%ecx
|
||||
xorl %eax,%edi
|
||||
movl 36(%esi),%ebp
|
||||
addl %edi,%ecx
|
||||
movl $-1,%edi
|
||||
roll $15,%ecx
|
||||
xorl %eax,%edi
|
||||
addl %edx,%ecx
|
||||
|
||||
orl %ecx,%edi
|
||||
leal 3951481745(%ebx,%ebp,1),%ebx
|
||||
xorl %edx,%edi
|
||||
movl 24(%esp),%ebp
|
||||
addl %edi,%ebx
|
||||
addl $64,%esi
|
||||
roll $21,%ebx
|
||||
movl (%ebp),%edi
|
||||
addl %ecx,%ebx
|
||||
addl %edi,%eax
|
||||
movl 4(%ebp),%edi
|
||||
addl %edi,%ebx
|
||||
movl 8(%ebp),%edi
|
||||
addl %edi,%ecx
|
||||
movl 12(%ebp),%edi
|
||||
addl %edi,%edx
|
||||
movl %eax,(%ebp)
|
||||
movl %ebx,4(%ebp)
|
||||
movl (%esp),%edi
|
||||
movl %ecx,8(%ebp)
|
||||
movl %edx,12(%ebp)
|
||||
cmpl %esi,%edi
|
||||
jae .L000start
|
||||
popl %eax
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
3808
contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/sha1-586.S
Normal file
3808
contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/sha1-586.S
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user