Merge branch 'master' into FawnD2-switch-upstream-for-arrow-submodule

This commit is contained in:
Alexey Milovidov 2020-12-20 09:38:46 +03:00
commit bf2df558d4
1580 changed files with 353924 additions and 33156 deletions

View File

@ -1,3 +1,5 @@
# See the example here: https://github.com/github/codeql-action
name: "CodeQL Scanning"
on:
@ -16,17 +18,14 @@ jobs:
fetch-depth: 2
submodules: 'recursive'
- run: git checkout HEAD^2
if: ${{ github.event_name == 'pull_request' }}
- name: Initialize CodeQL
uses: github/codeql-action/init@v1
with:
languages: cpp
- run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-9 g++-9 && mkdir build
- run: cd build && CC=gcc-9 CXX=g++-9 cmake ..
- run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-10 g++-10 && mkdir build
- run: cd build && CC=gcc-10 CXX=g++-10 cmake ..
- run: cd build && ninja
- name: Perform CodeQL Analysis

11
.gitignore vendored
View File

@ -125,4 +125,15 @@ website/package-lock.json
# Toolchains
/cmake/toolchain/*
# ANTLR extension cache
.antlr
# ANTLR generated files
/src/Parsers/New/*.interp
/src/Parsers/New/*.tokens
/src/Parsers/New/ClickHouseParserBaseVisitor.*
# pytest-profiling
/prof
*.iml

25
.gitmodules vendored
View File

@ -125,9 +125,6 @@
[submodule "contrib/curl"]
path = contrib/curl
url = https://github.com/curl/curl.git
[submodule "contrib/openssl"]
path = contrib/openssl
url = https://github.com/ClickHouse-Extras/openssl.git
[submodule "contrib/icudata"]
path = contrib/icudata
url = https://github.com/ClickHouse-Extras/icudata.git
@ -143,9 +140,6 @@
[submodule "contrib/replxx"]
path = contrib/replxx
url = https://github.com/ClickHouse-Extras/replxx.git
[submodule "contrib/ryu"]
path = contrib/ryu
url = https://github.com/ClickHouse-Extras/ryu.git
[submodule "contrib/avro"]
path = contrib/avro
url = https://github.com/ClickHouse-Extras/avro.git
@ -158,7 +152,7 @@
url = https://github.com/ClickHouse-Extras/libcpuid.git
[submodule "contrib/openldap"]
path = contrib/openldap
url = https://github.com/openldap/openldap.git
url = https://github.com/ClickHouse-Extras/openldap.git
[submodule "contrib/AMQP-CPP"]
path = contrib/AMQP-CPP
url = https://github.com/ClickHouse-Extras/AMQP-CPP.git
@ -173,6 +167,9 @@
[submodule "contrib/fmtlib"]
path = contrib/fmtlib
url = https://github.com/fmtlib/fmt.git
[submodule "contrib/antlr4-runtime"]
path = contrib/antlr4-runtime
url = https://github.com/ClickHouse-Extras/antlr4-runtime.git
[submodule "contrib/sentry-native"]
path = contrib/sentry-native
url = https://github.com/ClickHouse-Extras/sentry-native.git
@ -184,7 +181,7 @@
url = https://github.com/kthohr/stats.git
[submodule "contrib/krb5"]
path = contrib/krb5
url = https://github.com/krb5/krb5
url = https://github.com/ClickHouse-Extras/krb5
[submodule "contrib/cyrus-sasl"]
path = contrib/cyrus-sasl
url = https://github.com/cyrusimap/cyrus-sasl
@ -198,8 +195,7 @@
url = https://github.com/danlark1/miniselect
[submodule "contrib/rocksdb"]
path = contrib/rocksdb
url = https://github.com/facebook/rocksdb
branch = v6.14.5
url = https://github.com/ClickHouse-Extras/rocksdb.git
[submodule "contrib/xz"]
path = contrib/xz
url = https://github.com/xz-mirror/xz
@ -207,3 +203,12 @@
path = contrib/abseil-cpp
url = https://github.com/ClickHouse-Extras/abseil-cpp.git
branch = lts_2020_02_25
[submodule "contrib/dragonbox"]
path = contrib/dragonbox
url = https://github.com/ClickHouse-Extras/dragonbox.git
[submodule "contrib/fast_float"]
path = contrib/fast_float
url = https://github.com/fastfloat/fast_float
[submodule "contrib/boringssl"]
path = contrib/boringssl
url = https://github.com/ClickHouse-Extras/boringssl.git

View File

@ -1,3 +1,126 @@
### ClickHouse release 20.12
### ClickHouse release v20.12.3.3-stable, 2020-12-13
#### Backward Incompatible Change
* Enable `use_compact_format_in_distributed_parts_names` by default (see the documentation for the reference). [#16728](https://github.com/ClickHouse/ClickHouse/pull/16728) ([Azat Khuzhin](https://github.com/azat)).
* Accept user settings related to file formats (e.g. `format_csv_delimiter`) in the `SETTINGS` clause when creating a table that uses `File` engine, and use these settings in all `INSERT`s and `SELECT`s. The file format settings changed in the current user session, or in the `SETTINGS` clause of a DML query itself, no longer affect the query. [#16591](https://github.com/ClickHouse/ClickHouse/pull/16591) ([Alexander Kuzmenkov](https://github.com/akuzm)).
#### New Feature
* add `*.xz` compression/decompression support.It enables using `*.xz` in `file()` function. This closes [#8828](https://github.com/ClickHouse/ClickHouse/issues/8828). [#16578](https://github.com/ClickHouse/ClickHouse/pull/16578) ([Abi Palagashvili](https://github.com/fibersel)).
* Introduce the query `ALTER TABLE ... DROP|DETACH PART 'part_name'`. [#15511](https://github.com/ClickHouse/ClickHouse/pull/15511) ([nvartolomei](https://github.com/nvartolomei)).
* Added new ALTER UPDATE/DELETE IN PARTITION syntax. [#13403](https://github.com/ClickHouse/ClickHouse/pull/13403) ([Vladimir Chebotarev](https://github.com/excitoon)).
* Allow formatting named tuples as JSON objects when using JSON input/output formats, controlled by the `output_format_json_named_tuples_as_objects` setting, disabled by default. [#17175](https://github.com/ClickHouse/ClickHouse/pull/17175) ([Alexander Kuzmenkov](https://github.com/akuzm)).
* Add a possibility to input enum value as it's id in TSV and CSV formats by default. [#16834](https://github.com/ClickHouse/ClickHouse/pull/16834) ([Kruglov Pavel](https://github.com/Avogar)).
* Add COLLATE support for Nullable, LowCardinality, Array and Tuple, where nested type is String. Also refactor the code associated with collations in ColumnString.cpp. [#16273](https://github.com/ClickHouse/ClickHouse/pull/16273) ([Kruglov Pavel](https://github.com/Avogar)).
* New `tcpPort` function returns TCP port listened by this server. [#17134](https://github.com/ClickHouse/ClickHouse/pull/17134) ([Ivan](https://github.com/abyss7)).
* Add new math functions: `acosh`, `asinh`, `atan2`, `atanh`, `cosh`, `hypot`, `log1p`, `sinh`. [#16636](https://github.com/ClickHouse/ClickHouse/pull/16636) ([Konstantin Malanchev](https://github.com/hombit)).
* Possibility to distribute the merges between different replicas. Introduces the `execute_merges_on_single_replica_time_threshold` mergetree setting. [#16424](https://github.com/ClickHouse/ClickHouse/pull/16424) ([filimonov](https://github.com/filimonov)).
* Add setting `aggregate_functions_null_for_empty` for SQL standard compatibility. This option will rewrite all aggregate functions in a query, adding -OrNull suffix to them. Implements [10273](https://github.com/ClickHouse/ClickHouse/issues/10273). [#16123](https://github.com/ClickHouse/ClickHouse/pull/16123) ([flynn](https://github.com/ucasFL)).
* Updated DateTime, DateTime64 parsing to accept string Date literal format. [#16040](https://github.com/ClickHouse/ClickHouse/pull/16040) ([Maksim Kita](https://github.com/kitaisreal)).
* Make it possible to change the path to history file in `clickhouse-client` using the `--history_file` parameter. [#15960](https://github.com/ClickHouse/ClickHouse/pull/15960) ([Maksim Kita](https://github.com/kitaisreal)).
#### Bug Fix
* Fix the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) ([Amos Bird](https://github.com/amosbird)).
* Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([tavplubix](https://github.com/tavplubix)).
* Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)).
* fix incorrect initialization of `max_compress_block_size` of MergeTreeWriterSettings with `min_compress_block_size`. [#17833](https://github.com/ClickHouse/ClickHouse/pull/17833) ([flynn](https://github.com/ucasFL)).
* Exception message about max table size to drop was displayed incorrectly. [#17764](https://github.com/ClickHouse/ClickHouse/pull/17764) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixed possible segfault when there is not enough space when inserting into `Distributed` table. [#17737](https://github.com/ClickHouse/ClickHouse/pull/17737) ([tavplubix](https://github.com/tavplubix)).
* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)).
* In might be determined incorrectly if cluster is circular- (cross-) replicated or not when executing `ON CLUSTER` query due to race condition when `pool_size` > 1. It's fixed. [#17640](https://github.com/ClickHouse/ClickHouse/pull/17640) ([tavplubix](https://github.com/tavplubix)).
* Exception `fmt::v7::format_error` can be logged in background for MergeTree tables. This fixes [#17613](https://github.com/ClickHouse/ClickHouse/issues/17613). [#17615](https://github.com/ClickHouse/ClickHouse/pull/17615) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)).
* Fix issue when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)).
* Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)).
* Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* fix `toInt256(inf)` stack overflow. Int256 is an experimental feature. Closed [#17235](https://github.com/ClickHouse/ClickHouse/issues/17235). [#17257](https://github.com/ClickHouse/ClickHouse/pull/17257) ([flynn](https://github.com/ucasFL)).
* Fix possible `Unexpected packet Data received from client` error logged for Distributed queries with `LIMIT`. [#17254](https://github.com/ClickHouse/ClickHouse/pull/17254) ([Azat Khuzhin](https://github.com/azat)).
* Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246). [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)).
* Fix possible wrong index analysis when the types of the index comparison are different. This fixes [#17122](https://github.com/ClickHouse/ClickHouse/issues/17122). [#17145](https://github.com/ClickHouse/ClickHouse/pull/17145) ([Amos Bird](https://github.com/amosbird)).
* Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)).
* Multiple fixed for MaterializeMySQL (experimental feature). Fixes [#16923](https://github.com/ClickHouse/ClickHouse/issues/16923) Fixes [#15883](https://github.com/ClickHouse/ClickHouse/issues/15883) Fix MaterializeMySQL SYNC failure when the modify MySQL binlog_checksum. [#17091](https://github.com/ClickHouse/ClickHouse/pull/17091) ([Winter Zhang](https://github.com/zhang2014)).
* Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)).
* Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([tavplubix](https://github.com/tavplubix)).
* Bug unfinished implementation for funciton fuzzBits, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)).
* Fix LLVM's libunwind in the case when CFA register is RAX. This is the [bug](https://bugs.llvm.org/show_bug.cgi?id=48186) in [LLVM's libunwind](https://github.com/llvm/llvm-project/tree/master/libunwind). We already have workarounds for this bug. [#17046](https://github.com/ClickHouse/ClickHouse/pull/17046) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)).
* Fix `optimize_distributed_group_by_sharding_key` setting (that is disabled by default) for query with OFFSET only. [#16996](https://github.com/ClickHouse/ClickHouse/pull/16996) ([Azat Khuzhin](https://github.com/azat)).
* Fix for Merge tables over Distributed tables with JOIN. [#16993](https://github.com/ClickHouse/ClickHouse/pull/16993) ([Azat Khuzhin](https://github.com/azat)).
* Fixed wrong result in big integers (128, 256 bit) when casting from double. Big integers support is experimental. [#16986](https://github.com/ClickHouse/ClickHouse/pull/16986) ([Mike](https://github.com/myrrc)).
* Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)).
* Blame info was not calculated correctly in `clickhouse-git-import`. [#16959](https://github.com/ClickHouse/ClickHouse/pull/16959) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix order by optimization with monotonous functions. Fixes [#16107](https://github.com/ClickHouse/ClickHouse/issues/16107). [#16956](https://github.com/ClickHouse/ClickHouse/pull/16956) ([Anton Popov](https://github.com/CurtizJ)).
* Fix optimization of group by with enabled setting `optimize_aggregators_of_group_by_keys` and joins. Fixes [#12604](https://github.com/ClickHouse/ClickHouse/issues/12604). [#16951](https://github.com/ClickHouse/ClickHouse/pull/16951) ([Anton Popov](https://github.com/CurtizJ)).
* Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix strange code in InterpreterShowAccessQuery. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([tavplubix](https://github.com/tavplubix)).
* Prevent clickhouse server crashes when using the function `timeSeriesGroupSum`. The function is removed from newer ClickHouse releases. [#16865](https://github.com/ClickHouse/ClickHouse/pull/16865) ([filimonov](https://github.com/filimonov)).
* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)).
* If no memory can be allocated while writing table metadata on disk, broken metadata file can be written. [#16772](https://github.com/ClickHouse/ClickHouse/pull/16772) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix trivial query optimization with partition predicate. [#16767](https://github.com/ClickHouse/ClickHouse/pull/16767) ([Azat Khuzhin](https://github.com/azat)).
* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)).
* Return number of affected rows for INSERT queries via MySQL protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)).
* Fix remote query failure when using 'if' suffix aggregate function. Fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) Fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)).
* Fix inconsistent behavior caused by `select_sequential_consistency` for optimized trivial count query and system.tables. [#16309](https://github.com/ClickHouse/ClickHouse/pull/16309) ([Hao Chen](https://github.com/haoch)).
#### Improvement
* Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm. [#16895](https://github.com/ClickHouse/ClickHouse/pull/16895) ([Anton Popov](https://github.com/CurtizJ)).
* Enable compact format of directories for asynchronous sends in Distributed tables: `use_compact_format_in_distributed_parts_names` is set to 1 by default. [#16788](https://github.com/ClickHouse/ClickHouse/pull/16788) ([Azat Khuzhin](https://github.com/azat)).
* Abort multipart upload if no data was written to S3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)).
* Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)).
* Mask password in data_path in the system.distribution_queue. [#16727](https://github.com/ClickHouse/ClickHouse/pull/16727) ([Azat Khuzhin](https://github.com/azat)).
* Throw error when use column transformer replaces non existing column. [#16183](https://github.com/ClickHouse/ClickHouse/pull/16183) ([hexiaoting](https://github.com/hexiaoting)).
* Turn off parallel parsing when there is no enough memory for all threads to work simultaneously. Also there could be exceptions like "Memory limit exceeded" when somebody will try to insert extremely huge rows (> min_chunk_bytes_for_parallel_parsing), because each piece to parse has to be independent set of strings (one or more). [#16721](https://github.com/ClickHouse/ClickHouse/pull/16721) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)).
* Correct grammar in error message in JSONEachRow, JSONCompactEachRow, and RegexpRow input formats. [#17205](https://github.com/ClickHouse/ClickHouse/pull/17205) ([nico piderman](https://github.com/sneako)).
* Set default `host` and `port` parameters for `SOURCE(CLICKHOUSE(...))` to current instance and set default `user` value to `'default'`. [#16997](https://github.com/ClickHouse/ClickHouse/pull/16997) ([vdimir](https://github.com/vdimir)).
* Throw an informative error message when doing `ATTACH/DETACH TABLE <DICTIONARY>`. Before this PR, `detach table <dict>` works but leads to an ill-formed in-memory metadata. [#16885](https://github.com/ClickHouse/ClickHouse/pull/16885) ([Amos Bird](https://github.com/amosbird)).
* Add cutToFirstSignificantSubdomainWithWWW(). [#16845](https://github.com/ClickHouse/ClickHouse/pull/16845) ([Azat Khuzhin](https://github.com/azat)).
* Server refused to startup with exception message if wrong config is given (`metric_log`.`collect_interval_milliseconds` is missing). [#16815](https://github.com/ClickHouse/ClickHouse/pull/16815) ([Ivan](https://github.com/abyss7)).
* Better exception message when configuration for distributed DDL is absent. This fixes [#5075](https://github.com/ClickHouse/ClickHouse/issues/5075). [#16769](https://github.com/ClickHouse/ClickHouse/pull/16769) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Usability improvement: better suggestions in syntax error message when `CODEC` expression is misplaced in `CREATE TABLE` query. This fixes [#12493](https://github.com/ClickHouse/ClickHouse/issues/12493). [#16768](https://github.com/ClickHouse/ClickHouse/pull/16768) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Remove empty directories for async INSERT at start of Distributed engine. [#16729](https://github.com/ClickHouse/ClickHouse/pull/16729) ([Azat Khuzhin](https://github.com/azat)).
* Workaround for use S3 with nginx server as proxy. Nginx currenty does not accept urls with empty path like `http://domain.com?delete`, but vanilla aws-sdk-cpp produces this kind of urls. This commit uses patched aws-sdk-cpp version, which makes urls with "/" as path in this cases, like `http://domain.com/?delete`. [#16709](https://github.com/ClickHouse/ClickHouse/pull/16709) ([ianton-ru](https://github.com/ianton-ru)).
* Allow `reinterpretAs*` functions to work for integers and floats of the same size. Implements [16640](https://github.com/ClickHouse/ClickHouse/issues/16640). [#16657](https://github.com/ClickHouse/ClickHouse/pull/16657) ([flynn](https://github.com/ucasFL)).
* Now, `<auxiliary_zookeepers>` configuration can be changed in `config.xml` and reloaded without server startup. [#16627](https://github.com/ClickHouse/ClickHouse/pull/16627) ([Amos Bird](https://github.com/amosbird)).
* Support SNI in https connections to remote resources. This will allow to connect to Cloudflare servers that require SNI. This fixes [#10055](https://github.com/ClickHouse/ClickHouse/issues/10055). [#16252](https://github.com/ClickHouse/ClickHouse/pull/16252) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)).
* Fix possible stack overflow if a loop of materialized views is created. This closes [#15732](https://github.com/ClickHouse/ClickHouse/issues/15732). [#16048](https://github.com/ClickHouse/ClickHouse/pull/16048) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Simplify the implementation of background tasks processing for the MergeTree table engines family. There should be no visible changes for user. [#15983](https://github.com/ClickHouse/ClickHouse/pull/15983) ([alesapin](https://github.com/alesapin)).
* Improvement for MaterializeMySQL (experimental feature). Throw exception about right sync privileges when MySQL sync user has error privileges. [#15977](https://github.com/ClickHouse/ClickHouse/pull/15977) ([TCeason](https://github.com/TCeason)).
* Made `indexOf()` use BloomFilter. [#14977](https://github.com/ClickHouse/ClickHouse/pull/14977) ([achimbab](https://github.com/achimbab)).
#### Performance Improvement
* Use Floyd-Rivest algorithm, it is the best for the ClickHouse use case of partial sorting. Bechmarks are in https://github.com/danlark1/miniselect and [here](https://drive.google.com/drive/folders/1DHEaeXgZuX6AJ9eByeZ8iQVQv0ueP8XM). [#16825](https://github.com/ClickHouse/ClickHouse/pull/16825) ([Danila Kutenin](https://github.com/danlark1)).
* Now `ReplicatedMergeTree` tree engines family uses a separate thread pool for replicated fetches. Size of the pool limited by setting `background_fetches_pool_size` which can be tuned with a server restart. The default value of the setting is 3 and it means that the maximum amount of parallel fetches is equal to 3 (and it allows to utilize 10G network). Fixes #520. [#16390](https://github.com/ClickHouse/ClickHouse/pull/16390) ([alesapin](https://github.com/alesapin)).
* Fixed uncontrolled growth of the state of `quantileTDigest`. [#16680](https://github.com/ClickHouse/ClickHouse/pull/16680) ([hrissan](https://github.com/hrissan)).
* Add `VIEW` subquery description to `EXPLAIN`. Limit push down optimisation for `VIEW`. Add local replicas of `Distributed` to query plan. [#14936](https://github.com/ClickHouse/ClickHouse/pull/14936) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix optimize_read_in_order/optimize_aggregation_in_order with max_threads > 0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)).
* Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)).
* Now we can safely prune partitions with exact match. Useful case: Suppose table is partitioned by `intHash64(x) % 100` and the query has condition on `intHash64(x) % 100` verbatim, not on x. [#16253](https://github.com/ClickHouse/ClickHouse/pull/16253) ([Amos Bird](https://github.com/amosbird)).
#### Experimental Feature
* Add `EmbeddedRocksDB` table engine (can be used for dictionaries). [#15073](https://github.com/ClickHouse/ClickHouse/pull/15073) ([sundyli](https://github.com/sundy-li)).
#### Build/Testing/Packaging Improvement
* Improvements in test coverage building images. [#17233](https://github.com/ClickHouse/ClickHouse/pull/17233) ([alesapin](https://github.com/alesapin)).
* Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)).
* Fix UBSan report in Poco. This closes [#12719](https://github.com/ClickHouse/ClickHouse/issues/12719). [#16765](https://github.com/ClickHouse/ClickHouse/pull/16765) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Do not instrument 3rd-party libraries with UBSan. [#16764](https://github.com/ClickHouse/ClickHouse/pull/16764) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix UBSan report in cache dictionaries. This closes [#12641](https://github.com/ClickHouse/ClickHouse/issues/12641). [#16763](https://github.com/ClickHouse/ClickHouse/pull/16763) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix UBSan report when trying to convert infinite floating point number to integer. This closes [#14190](https://github.com/ClickHouse/ClickHouse/issues/14190). [#16677](https://github.com/ClickHouse/ClickHouse/pull/16677) ([alexey-milovidov](https://github.com/alexey-milovidov)).
## ClickHouse release 20.11
### ClickHouse release v20.11.3.3-stable, 2020-11-13
@ -15,7 +138,8 @@
* Restrict to use of non-comparable data types (like `AggregateFunction`) in keys (Sorting key, Primary key, Partition key, and so on). [#16601](https://github.com/ClickHouse/ClickHouse/pull/16601) ([alesapin](https://github.com/alesapin)).
* Remove `ANALYZE` and `AST` queries, and make the setting `enable_debug_queries` obsolete since now it is the part of full featured `EXPLAIN` query. [#16536](https://github.com/ClickHouse/ClickHouse/pull/16536) ([Ivan](https://github.com/abyss7)).
* Aggregate functions `boundingRatio`, `rankCorr`, `retention`, `timeSeriesGroupSum`, `timeSeriesGroupRateSum`, `windowFunnel` were erroneously made case-insensitive. Now their names are made case sensitive as designed. Only functions that are specified in SQL standard or made for compatibility with other DBMS or functions similar to those should be case-insensitive. [#16407](https://github.com/ClickHouse/ClickHouse/pull/16407) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Make `rankCorr` function return nan on insufficient data https://github.com/ClickHouse/ClickHouse/issues/16124. [#16135](https://github.com/ClickHouse/ClickHouse/pull/16135) ([hexiaoting](https://github.com/hexiaoting)).
* Make `rankCorr` function return nan on insufficient data [#16124](https://github.com/ClickHouse/ClickHouse/issues/16124). [#16135](https://github.com/ClickHouse/ClickHouse/pull/16135) ([hexiaoting](https://github.com/hexiaoting)).
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
#### New Feature
@ -32,7 +156,7 @@
* Now we can provide identifiers via query parameters. And these parameters can be used as table objects or columns. [#16594](https://github.com/ClickHouse/ClickHouse/pull/16594) ([Amos Bird](https://github.com/amosbird)).
* Added big integers (UInt256, Int128, Int256) and UUID data types support for MergeTree BloomFilter index. Big integers is an experimental feature. [#16642](https://github.com/ClickHouse/ClickHouse/pull/16642) ([Maksim Kita](https://github.com/kitaisreal)).
* Add `farmFingerprint64` function (non-cryptographic string hashing). [#16570](https://github.com/ClickHouse/ClickHouse/pull/16570) ([Jacob Hayes](https://github.com/JacobHayes)).
* Add `log_queries_min_query_duration_ms`, only queries slower then the value of this setting will go to `query_log`/`query_thread_log` (i.e. something like `slow_query_log` in mysql). [#16529](https://github.com/ClickHouse/ClickHouse/pull/16529) ([Azat Khuzhin](https://github.com/azat)).
* Add `log_queries_min_query_duration_ms`, only queries slower than the value of this setting will go to `query_log`/`query_thread_log` (i.e. something like `slow_query_log` in mysql). [#16529](https://github.com/ClickHouse/ClickHouse/pull/16529) ([Azat Khuzhin](https://github.com/azat)).
* Ability to create a docker image on the top of `Alpine`. Uses precompiled binary and glibc components from ubuntu 20.04. [#16479](https://github.com/ClickHouse/ClickHouse/pull/16479) ([filimonov](https://github.com/filimonov)).
* Added `toUUIDOrNull`, `toUUIDOrZero` cast functions. [#16337](https://github.com/ClickHouse/ClickHouse/pull/16337) ([Maksim Kita](https://github.com/kitaisreal)).
* Add `max_concurrent_queries_for_all_users` setting, see [#6636](https://github.com/ClickHouse/ClickHouse/issues/6636) for use cases. [#16154](https://github.com/ClickHouse/ClickHouse/pull/16154) ([nvartolomei](https://github.com/nvartolomei)).
@ -154,6 +278,7 @@
* Change default value of `format_regexp_escaping_rule` setting (it's related to `Regexp` format) to `Raw` (it means - read whole subpattern as a value) to make the behaviour more like to what users expect. [#15426](https://github.com/ClickHouse/ClickHouse/pull/15426) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Add support for nested multiline comments `/* comment /* comment */ */` in SQL. This conforms to the SQL standard. [#14655](https://github.com/ClickHouse/ClickHouse/pull/14655) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Added MergeTree settings (`max_replicated_merges_with_ttl_in_queue` and `max_number_of_merges_with_ttl_in_pool`) to control the number of merges with TTL in the background pool and replicated queue. This change breaks compatibility with older versions only if you use delete TTL. Otherwise, replication will stay compatible. You can avoid incompatibility issues if you update all shard replicas at once or execute `SYSTEM STOP TTL MERGES` until you finish the update of all replicas. If you'll get an incompatible entry in the replication queue, first of all, execute `SYSTEM STOP TTL MERGES` and after `ALTER TABLE ... DETACH PARTITION ...` the partition where incompatible TTL merge was assigned. Attach it back on a single replica. [#14490](https://github.com/ClickHouse/ClickHouse/pull/14490) ([alesapin](https://github.com/alesapin)).
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
#### New Feature
@ -176,7 +301,7 @@
* Add `JSONStrings` format which output data in arrays of strings. [#14333](https://github.com/ClickHouse/ClickHouse/pull/14333) ([hcz](https://github.com/hczhcz)).
* Add support for "Raw" column format for `Regexp` format. It allows to simply extract subpatterns as a whole without any escaping rules. [#15363](https://github.com/ClickHouse/ClickHouse/pull/15363) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Allow configurable `NULL` representation for `TSV` output format. It is controlled by the setting `output_format_tsv_null_representation` which is `\N` by default. This closes [#9375](https://github.com/ClickHouse/ClickHouse/issues/9375). Note that the setting only controls output format and `\N` is the only supported `NULL` representation for `TSV` input format. [#14586](https://github.com/ClickHouse/ClickHouse/pull/14586) ([Kruglov Pavel](https://github.com/Avogar)).
* Support Decimal data type for `MaterializedMySQL`. `MaterializedMySQL` is an experimental feature. [#14535](https://github.com/ClickHouse/ClickHouse/pull/14535) ([Winter Zhang](https://github.com/zhang2014)).
* Support Decimal data type for `MaterializeMySQL`. `MaterializeMySQL` is an experimental feature. [#14535](https://github.com/ClickHouse/ClickHouse/pull/14535) ([Winter Zhang](https://github.com/zhang2014)).
* Add new feature: `SHOW DATABASES LIKE 'xxx'`. [#14521](https://github.com/ClickHouse/ClickHouse/pull/14521) ([hexiaoting](https://github.com/hexiaoting)).
* Added a script to import (arbitrary) git repository to ClickHouse as a sample dataset. [#14471](https://github.com/ClickHouse/ClickHouse/pull/14471) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Now insert statements can have asterisk (or variants) with column transformers in the column list. [#14453](https://github.com/ClickHouse/ClickHouse/pull/14453) ([Amos Bird](https://github.com/amosbird)).
@ -198,18 +323,18 @@
* Fix a very wrong code in TwoLevelStringHashTable implementation, which might lead to memory leak. [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)).
* Fix segfault in some cases of wrong aggregation in lambdas. [#16082](https://github.com/ClickHouse/ClickHouse/pull/16082) ([Anton Popov](https://github.com/CurtizJ)).
* Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)).
* `MaterializedMySQL` (experimental feature): Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)).
* `MaterializeMySQL` (experimental feature): Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)).
* Allow to use `direct` layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)).
* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)).
* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([tavplubix](https://github.com/tavplubix)).
* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)).
* `MaterializedMySQL` (experimental feature): Fix crash on create database failure. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)).
* `MaterializeMySQL` (experimental feature): Fix crash on create database failure. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)).
* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) - Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([tavplubix](https://github.com/tavplubix)).
* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fixes [#12513](https://github.com/ClickHouse/ClickHouse/issues/12513): difference expressions with same alias when query is reanalyzed. [#15886](https://github.com/ClickHouse/ClickHouse/pull/15886) ([Winter Zhang](https://github.com/zhang2014)).
* Fix possible very rare deadlocks in RBAC implementation. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)).
* `MaterializedMySQL` (experimental feature): Fix `select count()` inaccuracy. [#15767](https://github.com/ClickHouse/ClickHouse/pull/15767) ([tavplubix](https://github.com/tavplubix)).
* `MaterializeMySQL` (experimental feature): Fix `select count()` inaccuracy. [#15767](https://github.com/ClickHouse/ClickHouse/pull/15767) ([tavplubix](https://github.com/tavplubix)).
* Fix some cases of queries, in which only virtual columns are selected. Previously `Not found column _nothing in block` exception may be thrown. Fixes [#12298](https://github.com/ClickHouse/ClickHouse/issues/12298). [#15756](https://github.com/ClickHouse/ClickHouse/pull/15756) ([Anton Popov](https://github.com/CurtizJ)).
* Fix drop of materialized view with inner table in Atomic database (hangs all subsequent DROP TABLE due to hang of the worker thread, due to recursive DROP TABLE for inner table of MV). [#15743](https://github.com/ClickHouse/ClickHouse/pull/15743) ([Azat Khuzhin](https://github.com/azat)).
* Possibility to move part to another disk/volume if the first attempt was failed. [#15723](https://github.com/ClickHouse/ClickHouse/pull/15723) ([Pavel Kovalenko](https://github.com/Jokser)).
@ -241,37 +366,37 @@
* Fix hang of queries with a lot of subqueries to same table of `MySQL` engine. Previously, if there were more than 16 subqueries to same `MySQL` table in query, it hang forever. [#15299](https://github.com/ClickHouse/ClickHouse/pull/15299) ([Anton Popov](https://github.com/CurtizJ)).
* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix 'Unknown identifier' in GROUP BY when query has JOIN over Merge table. [#15242](https://github.com/ClickHouse/ClickHouse/pull/15242) ([Artem Zuikov](https://github.com/4ertus2)).
* Fix instance crash when using `joinGet` with `LowCardinality` types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
* Fix instance crash when using `joinGet` with `LowCardinality` types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)).
* Adjust Decimal field size in MySQL column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)).
* Fixes `Data compressed with different methods` in `join_algorithm='auto'`. Keep LowCardinality as type for left table join key in `join_algorithm='partial_merge'`. [#15088](https://github.com/ClickHouse/ClickHouse/pull/15088) ([Artem Zuikov](https://github.com/4ertus2)).
* Update `jemalloc` to fix `percpu_arena` with affinity mask. [#15035](https://github.com/ClickHouse/ClickHouse/pull/15035) ([Azat Khuzhin](https://github.com/azat)). [#14957](https://github.com/ClickHouse/ClickHouse/pull/14957) ([Azat Khuzhin](https://github.com/azat)).
* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes https://github.com/ClickHouse/ClickHouse/issues/14908. [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
* If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in Docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)).
* Fix crash in RIGHT or FULL JOIN with join_algorith='auto' when memory limit exceeded and we should change HashJoin with MergeJoin. [#15002](https://github.com/ClickHouse/ClickHouse/pull/15002) ([Artem Zuikov](https://github.com/4ertus2)).
* Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)).
* Fix to make predicate push down work when subquery contains `finalizeAggregation` function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)).
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes https://github.com/ClickHouse/ClickHouse/issues/14923. [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
* `MaterializedMySQL` (experimental feature): Fixed `.metadata.tmp File exists` error. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)).
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
* `MaterializeMySQL` (experimental feature): Fixed `.metadata.tmp File exists` error. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)).
* Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix SIGSEGV for an attempt to INSERT into StorageFile with file descriptor. [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)).
* Fixed segfault in `cache` dictionary [#14837](https://github.com/ClickHouse/ClickHouse/issues/14837). [#14879](https://github.com/ClickHouse/ClickHouse/pull/14879) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* `MaterializedMySQL` (experimental feature): Fixed bug in parsing MySQL binlog events, which causes `Attempt to read after eof` and `Packet payload is not fully read` in `MaterializeMySQL` database engine. [#14852](https://github.com/ClickHouse/ClickHouse/pull/14852) ([Winter Zhang](https://github.com/zhang2014)).
* `MaterializeMySQL` (experimental feature): Fixed bug in parsing MySQL binlog events, which causes `Attempt to read after eof` and `Packet payload is not fully read` in `MaterializeMySQL` database engine. [#14852](https://github.com/ClickHouse/ClickHouse/pull/14852) ([Winter Zhang](https://github.com/zhang2014)).
* Fix rare error in `SELECT` queries when the queried column has `DEFAULT` expression which depends on the other column which also has `DEFAULT` and not present in select query and not exists on disk. Partially fixes [#14531](https://github.com/ClickHouse/ClickHouse/issues/14531). [#14845](https://github.com/ClickHouse/ClickHouse/pull/14845) ([alesapin](https://github.com/alesapin)).
* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)).
* Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)).
* `Replace` column transformer should replace identifiers with cloned ASTs. This fixes https://github.com/ClickHouse/ClickHouse/issues/14695 . [#14734](https://github.com/ClickHouse/ClickHouse/pull/14734) ([Amos Bird](https://github.com/amosbird)).
* `Replace` column transformer should replace identifiers with cloned ASTs. This fixes [#14695](https://github.com/ClickHouse/ClickHouse/issues/14695) . [#14734](https://github.com/ClickHouse/ClickHouse/pull/14734) ([Amos Bird](https://github.com/amosbird)).
* Fixed missed default database name in metadata of materialized view when executing `ALTER ... MODIFY QUERY`. [#14664](https://github.com/ClickHouse/ClickHouse/pull/14664) ([tavplubix](https://github.com/tavplubix)).
* Fix bug when `ALTER UPDATE` mutation with `Nullable` column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)).
* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)).
* Fix function `has` with `LowCardinality` of `Nullable`. [#14591](https://github.com/ClickHouse/ClickHouse/pull/14591) ([Mike](https://github.com/myrrc)).
* Cleanup data directory after Zookeeper exceptions during CreateQuery for StorageReplicatedMergeTree Engine. [#14563](https://github.com/ClickHouse/ClickHouse/pull/14563) ([Bharat Nallan](https://github.com/bharatnc)).
* Fix rare segfaults in functions with combinator `-Resample`, which could appear in result of overflow with very large parameters. [#14562](https://github.com/ClickHouse/ClickHouse/pull/14562) ([Anton Popov](https://github.com/CurtizJ)).
* Fix a bug when converting `Nullable(String)` to Enum. Introduced by https://github.com/ClickHouse/ClickHouse/pull/12745. This fixes https://github.com/ClickHouse/ClickHouse/issues/14435. [#14530](https://github.com/ClickHouse/ClickHouse/pull/14530) ([Amos Bird](https://github.com/amosbird)).
* Fix a bug when converting `Nullable(String)` to Enum. Introduced by [#12745](https://github.com/ClickHouse/ClickHouse/pull/12745). This fixes [#14435](https://github.com/ClickHouse/ClickHouse/issues/14435). [#14530](https://github.com/ClickHouse/ClickHouse/pull/14530) ([Amos Bird](https://github.com/amosbird)).
* Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fix `currentDatabase()` function cannot be used in `ON CLUSTER` ddl query. [#14211](https://github.com/ClickHouse/ClickHouse/pull/14211) ([Winter Zhang](https://github.com/zhang2014)).
* `MaterializedMySQL` (experimental feature): Fixed `Packet payload is not fully read` error in `MaterializeMySQL` database engine. [#14696](https://github.com/ClickHouse/ClickHouse/pull/14696) ([BohuTANG](https://github.com/BohuTANG)).
* `MaterializeMySQL` (experimental feature): Fixed `Packet payload is not fully read` error in `MaterializeMySQL` database engine. [#14696](https://github.com/ClickHouse/ClickHouse/pull/14696) ([BohuTANG](https://github.com/BohuTANG)).
#### Improvement
@ -306,7 +431,7 @@
* Add an option to skip access checks for `DiskS3`. `s3` disk is an experimental feature. [#14497](https://github.com/ClickHouse/ClickHouse/pull/14497) ([Pavel Kovalenko](https://github.com/Jokser)).
* Speed up server shutdown process if there are ongoing S3 requests. [#14496](https://github.com/ClickHouse/ClickHouse/pull/14496) ([Pavel Kovalenko](https://github.com/Jokser)).
* `SYSTEM RELOAD CONFIG` now throws an exception if failed to reload and continues using the previous users.xml. The background periodic reloading also continues using the previous users.xml if failed to reload. [#14492](https://github.com/ClickHouse/ClickHouse/pull/14492) ([Vitaly Baranov](https://github.com/vitlibar)).
* For INSERTs with inline data in VALUES format in the script mode of `clickhouse-client`, support semicolon as the data terminator, in addition to the new line. Closes https://github.com/ClickHouse/ClickHouse/issues/12288. [#13192](https://github.com/ClickHouse/ClickHouse/pull/13192) ([Alexander Kuzmenkov](https://github.com/akuzm)).
* For INSERTs with inline data in VALUES format in the script mode of `clickhouse-client`, support semicolon as the data terminator, in addition to the new line. Closes [#12288](https://github.com/ClickHouse/ClickHouse/issues/12288). [#13192](https://github.com/ClickHouse/ClickHouse/pull/13192) ([Alexander Kuzmenkov](https://github.com/akuzm)).
* Support custom codecs in compact parts. [#12183](https://github.com/ClickHouse/ClickHouse/pull/12183) ([Anton Popov](https://github.com/CurtizJ)).
#### Performance Improvement
@ -318,7 +443,7 @@
* Improve performance of 256-bit types using (u)int64_t as base type for wide integers. Original wide integers use 8-bit types as base. [#14859](https://github.com/ClickHouse/ClickHouse/pull/14859) ([Artem Zuikov](https://github.com/4ertus2)).
* Explicitly use a temporary disk to store vertical merge temporary data. [#15639](https://github.com/ClickHouse/ClickHouse/pull/15639) ([Grigory Pervakov](https://github.com/GrigoryPervakov)).
* Use one S3 DeleteObjects request instead of multiple DeleteObject in a loop. No any functionality changes, so covered by existing tests like integration/test_log_family_s3. [#15238](https://github.com/ClickHouse/ClickHouse/pull/15238) ([ianton-ru](https://github.com/ianton-ru)).
* Fix `DateTime <op> DateTime` mistakenly choosing the slow generic implementation. This fixes https://github.com/ClickHouse/ClickHouse/issues/15153. [#15178](https://github.com/ClickHouse/ClickHouse/pull/15178) ([Amos Bird](https://github.com/amosbird)).
* Fix `DateTime <op> DateTime` mistakenly choosing the slow generic implementation. This fixes [#15153](https://github.com/ClickHouse/ClickHouse/issues/15153). [#15178](https://github.com/ClickHouse/ClickHouse/pull/15178) ([Amos Bird](https://github.com/amosbird)).
* Improve performance of GROUP BY key of type `FixedString`. [#15034](https://github.com/ClickHouse/ClickHouse/pull/15034) ([Amos Bird](https://github.com/amosbird)).
* Only `mlock` code segment when starting clickhouse-server. In previous versions, all mapped regions were locked in memory, including debug info. Debug info is usually splitted to a separate file but if it isn't, it led to +2..3 GiB memory usage. [#14929](https://github.com/ClickHouse/ClickHouse/pull/14929) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* ClickHouse binary become smaller due to link time optimization.
@ -385,7 +510,7 @@
* Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)).
* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)).
* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([tavplubix](https://github.com/tavplubix)).
* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes https://github.com/ClickHouse/ClickHouse/issues/15628. [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)).
* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([tavplubix](https://github.com/tavplubix)).
* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
@ -396,7 +521,7 @@
* Fixed too low default value of `max_replicated_logs_to_keep` setting, which might cause replicas to become lost too often. Improve lost replica recovery process by choosing the most up-to-date replica to clone. Also do not remove old parts from lost replica, detach them instead. [#15701](https://github.com/ClickHouse/ClickHouse/pull/15701) ([tavplubix](https://github.com/tavplubix)).
* Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)).
* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes https://github.com/ClickHouse/ClickHouse/issues/15598. [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)).
* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes [#15598](https://github.com/ClickHouse/ClickHouse/issues/15598). [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)).
* Throw an error when a single parameter is passed to ReplicatedMergeTree instead of ignoring it. [#15516](https://github.com/ClickHouse/ClickHouse/pull/15516) ([nvartolomei](https://github.com/nvartolomei)).
#### Improvement
@ -420,11 +545,11 @@
* Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)).
* Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)).
* Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)).
* Fix bug where queries like SELECT toStartOfDay(today()) fail complaining about empty time_zone argument. [#15319](https://github.com/ClickHouse/ClickHouse/pull/15319) ([Bharat Nallan](https://github.com/bharatnc)).
* Fix bug where queries like `SELECT toStartOfDay(today())` fail complaining about empty time_zone argument. [#15319](https://github.com/ClickHouse/ClickHouse/pull/15319) ([Bharat Nallan](https://github.com/bharatnc)).
* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)).
* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)).
* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix instance crash when using joinGet with LowCardinality types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
* Fix instance crash when using joinGet with LowCardinality types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)).
* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)).
* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)).
@ -438,6 +563,10 @@
### ClickHouse release v20.9.2.20, 2020-09-22
#### Backward Incompatible Change
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
#### New Feature
* Added column transformers `EXCEPT`, `REPLACE`, `APPLY`, which can be applied to the list of selected columns (after `*` or `COLUMNS(...)`). For example, you can write `SELECT * EXCEPT(URL) REPLACE(number + 1 AS number)`. Another example: `select * apply(length) apply(max) from wide_string_table` to find out the maxium length of all string columns. [#14233](https://github.com/ClickHouse/ClickHouse/pull/14233) ([Amos Bird](https://github.com/amosbird)).
@ -449,10 +578,10 @@
* Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)).
* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)).
* Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fixed inconsistent comparison with primary key of type `FixedString` on index analysis if they're compered with a string of less size. This fixes https://github.com/ClickHouse/ClickHouse/issues/14908. [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
* Fixed inconsistent comparison with primary key of type `FixedString` on index analysis if they're compered with a string of less size. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
* Fix bug which leads to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)).
* If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes https://github.com/ClickHouse/ClickHouse/issues/14923. [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
* Fixed `.metadata.tmp File exists` error when using `MaterializeMySQL` database engine. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)).
* Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix SIGSEGV for an attempt to INSERT into StorageFile(fd). [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)).
@ -495,7 +624,7 @@
#### Performance Improvement
* Optimize queries with LIMIT/LIMIT BY/ORDER BY for distributed with GROUP BY sharding_key (under optimize_skip_unused_shards and optimize_distributed_group_by_sharding_key). [#10373](https://github.com/ClickHouse/ClickHouse/pull/10373) ([Azat Khuzhin](https://github.com/azat)).
* Optimize queries with LIMIT/LIMIT BY/ORDER BY for distributed with GROUP BY sharding_key (under `optimize_skip_unused_shards` and `optimize_distributed_group_by_sharding_key`). [#10373](https://github.com/ClickHouse/ClickHouse/pull/10373) ([Azat Khuzhin](https://github.com/azat)).
* Creating sets for multiple `JOIN` and `IN` in parallel. It may slightly improve performance for queries with several different `IN subquery` expressions. [#14412](https://github.com/ClickHouse/ClickHouse/pull/14412) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Improve Kafka engine performance by providing independent thread for each consumer. Separate thread pool for streaming engines (like Kafka). [#13939](https://github.com/ClickHouse/ClickHouse/pull/13939) ([fastio](https://github.com/fastio)).
@ -573,15 +702,15 @@
* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)).
* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)).
* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix instance crash when using joinGet with LowCardinality types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
* Fix instance crash when using joinGet with LowCardinality types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)).
* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)).
* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes https://github.com/ClickHouse/ClickHouse/issues/14908. [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
* If function `bar` was called with specifically crafter arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
* If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)).
* Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)).
* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)).
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes https://github.com/ClickHouse/ClickHouse/issues/14923. [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
* Fixed `.metadata.tmp File exists` error when using `MaterializeMySQL` database engine. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)).
* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)).
* Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)).
@ -621,6 +750,7 @@
* Now `OPTIMIZE FINAL` query doesn't recalculate TTL for parts that were added before TTL was created. Use `ALTER TABLE ... MATERIALIZE TTL` once to calculate them, after that `OPTIMIZE FINAL` will evaluate TTL's properly. This behavior never worked for replicated tables. [#14220](https://github.com/ClickHouse/ClickHouse/pull/14220) ([alesapin](https://github.com/alesapin)).
* Extend `parallel_distributed_insert_select` setting, adding an option to run `INSERT` into local table. The setting changes type from `Bool` to `UInt64`, so the values `false` and `true` are no longer supported. If you have these values in server configuration, the server will not start. Please replace them with `0` and `1`, respectively. [#14060](https://github.com/ClickHouse/ClickHouse/pull/14060) ([Azat Khuzhin](https://github.com/azat)).
* Remove support for the `ODBCDriver` input/output format. This was a deprecated format once used for communication with the ClickHouse ODBC driver, now long superseded by the `ODBCDriver2` format. Resolves [#13629](https://github.com/ClickHouse/ClickHouse/issues/13629). [#13847](https://github.com/ClickHouse/ClickHouse/pull/13847) ([hexiaoting](https://github.com/hexiaoting)).
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
#### New Feature
@ -640,16 +770,16 @@
* Fix visible data clobbering by progress bar in client in interactive mode. This fixes [#12562](https://github.com/ClickHouse/ClickHouse/issues/12562) and [#13369](https://github.com/ClickHouse/ClickHouse/issues/13369) and [#13584](https://github.com/ClickHouse/ClickHouse/issues/13584) and fixes [#12964](https://github.com/ClickHouse/ClickHouse/issues/12964). [#13691](https://github.com/ClickHouse/ClickHouse/pull/13691) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixed incorrect sorting order if `LowCardinality` column when sorting by multiple columns. This fixes [#13958](https://github.com/ClickHouse/ClickHouse/issues/13958). [#14223](https://github.com/ClickHouse/ClickHouse/pull/14223) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafter parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafted parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix bug which can lead to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)).
* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) [#14334](https://github.com/ClickHouse/ClickHouse/pull/14334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix crash during `ALTER` query for table which was created `AS table_function`. Fixes [#14212](https://github.com/ClickHouse/ClickHouse/issues/14212). [#14326](https://github.com/ClickHouse/ClickHouse/pull/14326) ([alesapin](https://github.com/alesapin)).
* Fix exception during ALTER LIVE VIEW query with REFRESH command. Live view is an experimental feature. [#14320](https://github.com/ClickHouse/ClickHouse/pull/14320) ([Bharat Nallan](https://github.com/bharatnc)).
* Fix QueryPlan lifetime (for EXPLAIN PIPELINE graph=1) for queries with nested interpreter. [#14315](https://github.com/ClickHouse/ClickHouse/pull/14315) ([Azat Khuzhin](https://github.com/azat)).
* Fix segfault in `clickhouse-odbc-bridge` during schema fetch from some external sources. This PR fixes https://github.com/ClickHouse/ClickHouse/issues/13861. [#14267](https://github.com/ClickHouse/ClickHouse/pull/14267) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix crash in mark inclusion search introduced in https://github.com/ClickHouse/ClickHouse/pull/12277. [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)).
* Fix segfault in `clickhouse-odbc-bridge` during schema fetch from some external sources. This PR fixes [#13861](https://github.com/ClickHouse/ClickHouse/issues/13861). [#14267](https://github.com/ClickHouse/ClickHouse/pull/14267) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix crash in mark inclusion search introduced in [#12277](https://github.com/ClickHouse/ClickHouse/pull/12277). [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)).
* Fix creation of tables with named tuples. This fixes [#13027](https://github.com/ClickHouse/ClickHouse/issues/13027). [#14143](https://github.com/ClickHouse/ClickHouse/pull/14143) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix formatting of minimal negative decimal numbers. This fixes https://github.com/ClickHouse/ClickHouse/issues/14111. [#14119](https://github.com/ClickHouse/ClickHouse/pull/14119) ([Alexander Kuzmenkov](https://github.com/akuzm)).
* Fix formatting of minimal negative decimal numbers. This fixes [#14111](https://github.com/ClickHouse/ClickHouse/issues/14111). [#14119](https://github.com/ClickHouse/ClickHouse/pull/14119) ([Alexander Kuzmenkov](https://github.com/akuzm)).
* Fix `DistributedFilesToInsert` metric (zeroed when it should not). [#14095](https://github.com/ClickHouse/ClickHouse/pull/14095) ([Azat Khuzhin](https://github.com/azat)).
* Fix `pointInPolygon` with const 2d array as polygon. [#14079](https://github.com/ClickHouse/ClickHouse/pull/14079) ([Alexey Ilyukhov](https://github.com/livace)).
* Fixed wrong mount point in extra info for `Poco::Exception: no space left on device`. [#14050](https://github.com/ClickHouse/ClickHouse/pull/14050) ([tavplubix](https://github.com/tavplubix)).
@ -678,10 +808,10 @@
* Fix wrong code in function `netloc`. This fixes [#13335](https://github.com/ClickHouse/ClickHouse/issues/13335). [#13446](https://github.com/ClickHouse/ClickHouse/pull/13446) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix possible race in `StorageMemory`. [#13416](https://github.com/ClickHouse/ClickHouse/pull/13416) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix missing or excessive headers in `TSV/CSVWithNames` formats in HTTP protocol. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)).
* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes https://github.com/ClickHouse/ClickHouse/issues/5779, https://github.com/ClickHouse/ClickHouse/issues/12527. [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779), [#12527](https://github.com/ClickHouse/ClickHouse/issues/12527). [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix access to `redis` dictionary after connection was dropped once. It may happen with `cache` and `direct` dictionary layouts. [#13082](https://github.com/ClickHouse/ClickHouse/pull/13082) ([Anton Popov](https://github.com/CurtizJ)).
* Removed wrong auth access check when using ClickHouseDictionarySource to query remote tables. [#12756](https://github.com/ClickHouse/ClickHouse/pull/12756) ([sundyli](https://github.com/sundy-li)).
* Properly distinguish subqueries in some cases for common subexpression elimination. https://github.com/ClickHouse/ClickHouse/issues/8333. [#8367](https://github.com/ClickHouse/ClickHouse/pull/8367) ([Amos Bird](https://github.com/amosbird)).
* Properly distinguish subqueries in some cases for common subexpression elimination. [#8333](https://github.com/ClickHouse/ClickHouse/issues/8333). [#8367](https://github.com/ClickHouse/ClickHouse/pull/8367) ([Amos Bird](https://github.com/amosbird)).
#### Improvement
@ -749,7 +879,7 @@
* Updating LDAP user authentication suite to check that it works with RBAC. [#13656](https://github.com/ClickHouse/ClickHouse/pull/13656) ([vzakaznikov](https://github.com/vzakaznikov)).
* Removed `-DENABLE_CURL_CLIENT` for `contrib/aws`. [#13628](https://github.com/ClickHouse/ClickHouse/pull/13628) ([Vladimir Chebotarev](https://github.com/excitoon)).
* Increasing health-check timeouts for ClickHouse nodes and adding support to dump docker-compose logs if unhealthy containers found. [#13612](https://github.com/ClickHouse/ClickHouse/pull/13612) ([vzakaznikov](https://github.com/vzakaznikov)).
* Make sure https://github.com/ClickHouse/ClickHouse/issues/10977 is invalid. [#13539](https://github.com/ClickHouse/ClickHouse/pull/13539) ([Amos Bird](https://github.com/amosbird)).
* Make sure [#10977](https://github.com/ClickHouse/ClickHouse/issues/10977) is invalid. [#13539](https://github.com/ClickHouse/ClickHouse/pull/13539) ([Amos Bird](https://github.com/amosbird)).
* Skip PR's from robot-clickhouse. [#13489](https://github.com/ClickHouse/ClickHouse/pull/13489) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Move Dockerfiles from integration tests to `docker/test` directory. docker_compose files are available in `runner` docker container. Docker images are built in CI and not in integration tests. [#13448](https://github.com/ClickHouse/ClickHouse/pull/13448) ([Ilya Yatsishin](https://github.com/qoega)).
@ -765,6 +895,7 @@
* The function `groupArrayMoving*` was not working for distributed queries. It's result was calculated within incorrect data type (without promotion to the largest type). The function `groupArrayMovingAvg` was returning integer number that was inconsistent with the `avg` function. This fixes [#12568](https://github.com/ClickHouse/ClickHouse/issues/12568). [#12622](https://github.com/ClickHouse/ClickHouse/pull/12622) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Add sanity check for MergeTree settings. If the settings are incorrect, the server will refuse to start or to create a table, printing detailed explanation to the user. [#13153](https://github.com/ClickHouse/ClickHouse/pull/13153) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Protect from the cases when user may set `background_pool_size` to value lower than `number_of_free_entries_in_pool_to_execute_mutation` or `number_of_free_entries_in_pool_to_lower_max_size_of_merge`. In these cases ALTERs won't work or the maximum size of merge will be too limited. It will throw exception explaining what to do. This closes [#10897](https://github.com/ClickHouse/ClickHouse/issues/10897). [#12728](https://github.com/ClickHouse/ClickHouse/pull/12728) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
#### New Feature
@ -780,7 +911,7 @@
* Add `FROM_UNIXTIME` function for compatibility with MySQL, related to [12149](https://github.com/ClickHouse/ClickHouse/issues/12149). [#12484](https://github.com/ClickHouse/ClickHouse/pull/12484) ([flynn](https://github.com/ucasFL)).
* Allow Nullable types as keys in MergeTree tables if `allow_nullable_key` table setting is enabled. Closes [#5319](https://github.com/ClickHouse/ClickHouse/issues/5319). [#12433](https://github.com/ClickHouse/ClickHouse/pull/12433) ([Amos Bird](https://github.com/amosbird)).
* Integration with [COS](https://intl.cloud.tencent.com/product/cos). [#12386](https://github.com/ClickHouse/ClickHouse/pull/12386) ([fastio](https://github.com/fastio)).
* Add mapAdd and mapSubtract functions for adding/subtracting key-mapped values. [#11735](https://github.com/ClickHouse/ClickHouse/pull/11735) ([Ildus Kurbangaliev](https://github.com/ildus)).
* Add `mapAdd` and `mapSubtract` functions for adding/subtracting key-mapped values. [#11735](https://github.com/ClickHouse/ClickHouse/pull/11735) ([Ildus Kurbangaliev](https://github.com/ildus)).
#### Bug Fix
@ -951,6 +1082,10 @@
### ClickHouse release v20.6.3.28-stable
#### Backward Incompatible Change
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
#### New Feature
* Added an initial implementation of `EXPLAIN` query. Syntax: `EXPLAIN SELECT ...`. This fixes [#1118](https://github.com/ClickHouse/ClickHouse/issues/1118). [#11873](https://github.com/ClickHouse/ClickHouse/pull/11873) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
@ -1059,7 +1194,7 @@
* Improved performace of 'ORDER BY' and 'GROUP BY' by prefix of sorting key (enabled with `optimize_aggregation_in_order` setting, disabled by default). [#11696](https://github.com/ClickHouse/ClickHouse/pull/11696) ([Anton Popov](https://github.com/CurtizJ)).
* Removed injective functions inside `uniq*()` if `set optimize_injective_functions_inside_uniq=1`. [#12337](https://github.com/ClickHouse/ClickHouse/pull/12337) ([Ruslan Kamalov](https://github.com/kamalov-ruslan)).
* Index not used for IN operator with literals", performance regression introduced around v19.3. This fixes "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
* Index not used for IN operator with literals, performance regression introduced around v19.3. This fixes [#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
* Implemented single part uploads for DiskS3 (experimental feature). [#12026](https://github.com/ClickHouse/ClickHouse/pull/12026) ([Vladimir Chebotarev](https://github.com/excitoon)).
#### Experimental Feature
@ -1121,7 +1256,7 @@
#### Performance Improvement
* Index not used for IN operator with literals", performance regression introduced around v19.3. This fixes "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
* Index not used for IN operator with literals, performance regression introduced around v19.3. This fixes [#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
#### Build/Testing/Packaging Improvement
@ -1139,6 +1274,7 @@
* Update `zstd` to 1.4.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. This change is backward compatible but we list it here in changelog in case you will wonder about these messages. [#10663](https://github.com/ClickHouse/ClickHouse/pull/10663) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Added a check for meaningless codecs and a setting `allow_suspicious_codecs` to control this check. This closes [#4966](https://github.com/ClickHouse/ClickHouse/issues/4966). [#10645](https://github.com/ClickHouse/ClickHouse/pull/10645) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Several Kafka setting changes their defaults. See [#11388](https://github.com/ClickHouse/ClickHouse/pull/11388).
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
#### New Feature
@ -1200,7 +1336,7 @@
* Fix wrong result of comparison of FixedString with constant String. This fixes [#11393](https://github.com/ClickHouse/ClickHouse/issues/11393). This bug appeared in version 20.4. [#11828](https://github.com/ClickHouse/ClickHouse/pull/11828) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix wrong result for `if` with NULLs in condition. [#11807](https://github.com/ClickHouse/ClickHouse/pull/11807) ([Artem Zuikov](https://github.com/4ertus2)).
* Fix using too many threads for queries. [#11788](https://github.com/ClickHouse/ClickHouse/pull/11788) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fixed `Scalar doesn't exist` exception when using `WITH <scalar subquery> ...` in `SELECT ... FROM merge_tree_table ...` https://github.com/ClickHouse/ClickHouse/issues/11621. [#11767](https://github.com/ClickHouse/ClickHouse/pull/11767) ([Amos Bird](https://github.com/amosbird)).
* Fixed `Scalar doesn't exist` exception when using `WITH <scalar subquery> ...` in `SELECT ... FROM merge_tree_table ...` [#11621](https://github.com/ClickHouse/ClickHouse/issues/11621). [#11767](https://github.com/ClickHouse/ClickHouse/pull/11767) ([Amos Bird](https://github.com/amosbird)).
* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)).
* Now replicated fetches will be cancelled during metadata alter. [#11744](https://github.com/ClickHouse/ClickHouse/pull/11744) ([alesapin](https://github.com/alesapin)).
* Parse metadata stored in zookeeper before checking for equality. [#11739](https://github.com/ClickHouse/ClickHouse/pull/11739) ([Azat Khuzhin](https://github.com/azat)).
@ -1251,8 +1387,8 @@
* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix crash when `SET DEFAULT ROLE` is called with wrong arguments. This fixes https://github.com/ClickHouse/ClickHouse/issues/10586. [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix crash while reading malformed data in `Protobuf` format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix crash when `SET DEFAULT ROLE` is called with wrong arguments. This fixes [#10586](https://github.com/ClickHouse/ClickHouse/issues/10586). [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix crash while reading malformed data in `Protobuf` format. This fixes [#5957](https://github.com/ClickHouse/ClickHouse/issues/5957), fixes [#11203](https://github.com/ClickHouse/ClickHouse/issues/11203). [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fixed a bug when `cache` dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
@ -1318,7 +1454,7 @@
* Fix error `the BloomFilter false positive must be a double number between 0 and 1` [#10551](https://github.com/ClickHouse/ClickHouse/issues/10551). [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)).
* Fix SELECT of column ALIAS which default expression type different from column type. [#10563](https://github.com/ClickHouse/ClickHouse/pull/10563) ([Azat Khuzhin](https://github.com/azat)).
* Implemented comparison between DateTime64 and String values (just like for DateTime). [#10560](https://github.com/ClickHouse/ClickHouse/pull/10560) ([Vasily Nemkov](https://github.com/Enmk)).
* Fix index corruption, which may accur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)).
* Fix index corruption, which may occur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)).
* Disable GROUP BY sharding_key optimization by default (`optimize_distributed_group_by_sharding_key` had been introduced and turned of by default, due to trickery of sharding_key analyzing, simple example is `if` in sharding key) and fix it for WITH ROLLUP/CUBE/TOTALS. [#10516](https://github.com/ClickHouse/ClickHouse/pull/10516) ([Azat Khuzhin](https://github.com/azat)).
* Fixes: [#10263](https://github.com/ClickHouse/ClickHouse/issues/10263) (after that PR dist send via INSERT had been postponing on each INSERT) Fixes: [#8756](https://github.com/ClickHouse/ClickHouse/issues/8756) (that PR breaks distributed sends with all of the following conditions met (unlikely setup for now I guess): `internal_replication == false`, multiple local shards (activates the hardlinking code) and `distributed_storage_policy` (makes `link(2)` fails on `EXDEV`)). [#10486](https://github.com/ClickHouse/ClickHouse/pull/10486) ([Azat Khuzhin](https://github.com/azat)).
* Fixed error with "max_rows_to_sort" limit. [#10268](https://github.com/ClickHouse/ClickHouse/pull/10268) ([alexey-milovidov](https://github.com/alexey-milovidov)).
@ -1475,7 +1611,7 @@
* Lower memory usage in tests. [#10617](https://github.com/ClickHouse/ClickHouse/pull/10617) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixing hard coded timeouts in new live view tests. [#10604](https://github.com/ClickHouse/ClickHouse/pull/10604) ([vzakaznikov](https://github.com/vzakaznikov)).
* Increasing timeout when opening a client in tests/queries/0_stateless/helpers/client.py. [#10599](https://github.com/ClickHouse/ClickHouse/pull/10599) ([vzakaznikov](https://github.com/vzakaznikov)).
* Enable ThinLTO for clang builds, continuation of https://github.com/ClickHouse/ClickHouse/pull/10435. [#10585](https://github.com/ClickHouse/ClickHouse/pull/10585) ([Amos Bird](https://github.com/amosbird)).
* Enable ThinLTO for clang builds, continuation of [#10435](https://github.com/ClickHouse/ClickHouse/pull/10435). [#10585](https://github.com/ClickHouse/ClickHouse/pull/10585) ([Amos Bird](https://github.com/amosbird)).
* Adding fuzzers and preparing for oss-fuzz integration. [#10546](https://github.com/ClickHouse/ClickHouse/pull/10546) ([kyprizel](https://github.com/kyprizel)).
* Fix FreeBSD build. [#10150](https://github.com/ClickHouse/ClickHouse/pull/10150) ([Ivan](https://github.com/abyss7)).
* Add new build for query tests using pytest framework. [#10039](https://github.com/ClickHouse/ClickHouse/pull/10039) ([Ivan](https://github.com/abyss7)).
@ -1550,7 +1686,7 @@
#### Performance Improvement
* Index not used for IN operator with literals", performance regression introduced around v19.3. This fixes "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
* Index not used for IN operator with literals, performance regression introduced around v19.3. This fixes [#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
#### Build/Testing/Packaging Improvement
@ -1604,7 +1740,7 @@
* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)).
* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
* Fix pointInPolygon with nan as point. Fixes [#11375](https://github.com/ClickHouse/ClickHouse/issues/11375). [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
* Fix potential uninitialized memory read in MergeTree shutdown if table was not created successfully. [#11420](https://github.com/ClickHouse/ClickHouse/pull/11420) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)).
* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
@ -1620,8 +1756,8 @@
* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix crash when SET DEFAULT ROLE is called with wrong arguments. This fixes https://github.com/ClickHouse/ClickHouse/issues/10586. [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix crash when SET DEFAULT ROLE is called with wrong arguments. This fixes [#10586](https://github.com/ClickHouse/ClickHouse/issues/10586). [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix crash while reading malformed data in Protobuf format. This fixes [#5957](https://github.com/ClickHouse/ClickHouse/issues/5957), fixes [#11203](https://github.com/ClickHouse/ClickHouse/issues/11203). [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fixed a bug when cache-dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
@ -1666,7 +1802,7 @@ No changes compared to v20.4.3.16-stable.
* Now constraints are updated if the column participating in `CONSTRAINT` expression was renamed. Fixes [#10844](https://github.com/ClickHouse/ClickHouse/issues/10844). [#10847](https://github.com/ClickHouse/ClickHouse/pull/10847) ([alesapin](https://github.com/alesapin)).
* Fixed potential read of uninitialized memory in cache-dictionary. [#10834](https://github.com/ClickHouse/ClickHouse/pull/10834) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixed columns order after `Block::sortColumns()`. [#10826](https://github.com/ClickHouse/ClickHouse/pull/10826) ([Azat Khuzhin](https://github.com/azat)).
* Fixed the issue with `ODBC` bridge when no quoting of identifiers is requested. Fixes [#7984] (https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixed the issue with `ODBC` bridge when no quoting of identifiers is requested. Fixes [#7984](https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixed `UBSan` and `MSan` report in `DateLUT`. [#10798](https://github.com/ClickHouse/ClickHouse/pull/10798) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixed incorrect type conversion in key conditions. Fixes [#6287](https://github.com/ClickHouse/ClickHouse/issues/6287). [#10791](https://github.com/ClickHouse/ClickHouse/pull/10791) ([Andrew Onyshchuk](https://github.com/oandrew)).
* Fixed `parallel_view_processing` behavior. Now all insertions into `MATERIALIZED VIEW` without exception should be finished if exception happened. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#10757](https://github.com/ClickHouse/ClickHouse/pull/10757) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
@ -1694,15 +1830,15 @@ No changes compared to v20.4.3.16-stable.
#### New Feature
* Add support for secured connection from ClickHouse to Zookeeper [#10184](https://github.com/ClickHouse/ClickHouse/pull/10184) ([Konstantin Lebedev](https://github.com/xzkostyan))
* Support custom HTTP handlers. See ISSUES-5436 for description. [#7572](https://github.com/ClickHouse/ClickHouse/pull/7572) ([Winter Zhang](https://github.com/zhang2014))
* Support custom HTTP handlers. See [#5436](https://github.com/ClickHouse/ClickHouse/issues/5436) for description. [#7572](https://github.com/ClickHouse/ClickHouse/pull/7572) ([Winter Zhang](https://github.com/zhang2014))
* Add MessagePack Input/Output format. [#9889](https://github.com/ClickHouse/ClickHouse/pull/9889) ([Kruglov Pavel](https://github.com/Avogar))
* Add Regexp input format. [#9196](https://github.com/ClickHouse/ClickHouse/pull/9196) ([Kruglov Pavel](https://github.com/Avogar))
* Added output format `Markdown` for embedding tables in markdown documents. [#10317](https://github.com/ClickHouse/ClickHouse/pull/10317) ([Kruglov Pavel](https://github.com/Avogar))
* Added support for custom settings section in dictionaries. Also fixes issue [#2829](https://github.com/ClickHouse/ClickHouse/issues/2829). [#10137](https://github.com/ClickHouse/ClickHouse/pull/10137) ([Artem Streltsov](https://github.com/kekekekule))
* Added custom settings support in DDL-queries for CREATE DICTIONARY [#10465](https://github.com/ClickHouse/ClickHouse/pull/10465) ([Artem Streltsov](https://github.com/kekekekule))
* Added custom settings support in DDL-queries for `CREATE DICTIONARY` [#10465](https://github.com/ClickHouse/ClickHouse/pull/10465) ([Artem Streltsov](https://github.com/kekekekule))
* Add simple server-wide memory profiler that will collect allocation contexts when server memory usage becomes higher than the next allocation threshold. [#10444](https://github.com/ClickHouse/ClickHouse/pull/10444) ([alexey-milovidov](https://github.com/alexey-milovidov))
* Add setting `always_fetch_merged_part` which restrict replica to merge parts by itself and always prefer dowloading from other replicas. [#10379](https://github.com/ClickHouse/ClickHouse/pull/10379) ([alesapin](https://github.com/alesapin))
* Add function JSONExtractKeysAndValuesRaw which extracts raw data from JSON objects [#10378](https://github.com/ClickHouse/ClickHouse/pull/10378) ([hcz](https://github.com/hczhcz))
* Add function `JSONExtractKeysAndValuesRaw` which extracts raw data from JSON objects [#10378](https://github.com/ClickHouse/ClickHouse/pull/10378) ([hcz](https://github.com/hczhcz))
* Add memory usage from OS to `system.asynchronous_metrics`. [#10361](https://github.com/ClickHouse/ClickHouse/pull/10361) ([alexey-milovidov](https://github.com/alexey-milovidov))
* Added generic variants for functions `least` and `greatest`. Now they work with arbitrary number of arguments of arbitrary types. This fixes [#4767](https://github.com/ClickHouse/ClickHouse/issues/4767) [#10318](https://github.com/ClickHouse/ClickHouse/pull/10318) ([alexey-milovidov](https://github.com/alexey-milovidov))
* Now ClickHouse controls timeouts of dictionary sources on its side. Two new settings added to cache dictionary configuration: `strict_max_lifetime_seconds`, which is `max_lifetime` by default, and `query_wait_timeout_milliseconds`, which is one minute by default. The first settings is also useful with `allow_read_expired_keys` settings (to forbid reading very expired keys). [#10337](https://github.com/ClickHouse/ClickHouse/pull/10337) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov))
@ -1715,7 +1851,7 @@ No changes compared to v20.4.3.16-stable.
* Add ability to query Distributed over Distributed (w/o `distributed_group_by_no_merge`) ... [#9923](https://github.com/ClickHouse/ClickHouse/pull/9923) ([Azat Khuzhin](https://github.com/azat))
* Add function `arrayReduceInRanges` which aggregates array elements in given ranges. [#9598](https://github.com/ClickHouse/ClickHouse/pull/9598) ([hcz](https://github.com/hczhcz))
* Add Dictionary Status on prometheus exporter. [#9622](https://github.com/ClickHouse/ClickHouse/pull/9622) ([Guillaume Tassery](https://github.com/YiuRULE))
* Add function arrayAUC [#8698](https://github.com/ClickHouse/ClickHouse/pull/8698) ([taiyang-li](https://github.com/taiyang-li))
* Add function `arrayAUC` [#8698](https://github.com/ClickHouse/ClickHouse/pull/8698) ([taiyang-li](https://github.com/taiyang-li))
* Support `DROP VIEW` statement for better TPC-H compatibility. [#9831](https://github.com/ClickHouse/ClickHouse/pull/9831) ([Amos Bird](https://github.com/amosbird))
* Add 'strict_order' option to windowFunnel() [#9773](https://github.com/ClickHouse/ClickHouse/pull/9773) ([achimbab](https://github.com/achimbab))
* Support `DATE` and `TIMESTAMP` SQL operators, e.g. `SELECT date '2001-01-01'` [#9691](https://github.com/ClickHouse/ClickHouse/pull/9691) ([Artem Zuikov](https://github.com/4ertus2))
@ -1919,7 +2055,7 @@ No changes compared to v20.4.3.16-stable.
* Move integration tests docker files to docker/ directory. [#10335](https://github.com/ClickHouse/ClickHouse/pull/10335) ([Ilya Yatsishin](https://github.com/qoega))
* Allow to use `clang-10` in CI. It ensures that [#10238](https://github.com/ClickHouse/ClickHouse/issues/10238) is fixed. [#10384](https://github.com/ClickHouse/ClickHouse/pull/10384) ([alexey-milovidov](https://github.com/alexey-milovidov))
* Update OpenSSL to upstream master. Fixed the issue when TLS connections may fail with the message `OpenSSL SSL_read: error:14094438:SSL routines:ssl3_read_bytes:tlsv1 alert internal error` and `SSL Exception: error:2400006E:random number generator::error retrieving entropy`. The issue was present in version 20.1. [#8956](https://github.com/ClickHouse/ClickHouse/pull/8956) ([alexey-milovidov](https://github.com/alexey-milovidov))
* Fix clang-10 build. https://github.com/ClickHouse/ClickHouse/issues/10238 [#10370](https://github.com/ClickHouse/ClickHouse/pull/10370) ([Amos Bird](https://github.com/amosbird))
* Fix clang-10 build. [#10238](https://github.com/ClickHouse/ClickHouse/issues/10238) [#10370](https://github.com/ClickHouse/ClickHouse/pull/10370) ([Amos Bird](https://github.com/amosbird))
* Add performance test for [Parallel INSERT for materialized view](https://github.com/ClickHouse/ClickHouse/pull/10052). [#10345](https://github.com/ClickHouse/ClickHouse/pull/10345) ([vxider](https://github.com/Vxider))
* Fix flaky test `test_settings_constraints_distributed.test_insert_clamps_settings`. [#10346](https://github.com/ClickHouse/ClickHouse/pull/10346) ([Vitaly Baranov](https://github.com/vitlibar))
* Add util to test results upload in CI ClickHouse [#10330](https://github.com/ClickHouse/ClickHouse/pull/10330) ([Ilya Yatsishin](https://github.com/qoega))
@ -2093,7 +2229,7 @@ No changes compared to v20.4.3.16-stable.
#### Performance Improvement
* Index not used for IN operator with literals", performance regression introduced around v19.3. This fixes "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
* Index not used for IN operator with literals, performance regression introduced around v19.3. This fixes [#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
### ClickHouse release v20.3.12.112-lts 2020-06-25
@ -2135,7 +2271,7 @@ No changes compared to v20.4.3.16-stable.
* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)).
* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
* Fix pointInPolygon with nan as point. Fixes [#11375](https://github.com/ClickHouse/ClickHouse/issues/11375). [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
* Fix crash in JOIN over LowCarinality(T) and Nullable(T). [#11380](https://github.com/ClickHouse/ClickHouse/issues/11380). [#11414](https://github.com/ClickHouse/ClickHouse/pull/11414) ([Artem Zuikov](https://github.com/4ertus2)).
* Fix error code for wrong `USING` key. [#11373](https://github.com/ClickHouse/ClickHouse/issues/11373). [#11404](https://github.com/ClickHouse/ClickHouse/pull/11404) ([Artem Zuikov](https://github.com/4ertus2)).
* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)).
@ -2152,7 +2288,7 @@ No changes compared to v20.4.3.16-stable.
* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix crash while reading malformed data in Protobuf format. This fixes [#5957](https://github.com/ClickHouse/ClickHouse/issues/5957), fixes [#11203](https://github.com/ClickHouse/ClickHouse/issues/11203). [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fixed a bug when cache-dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
@ -2183,7 +2319,7 @@ No changes compared to v20.4.3.16-stable.
* Fixed `SIGSEGV` in `StringHashTable` if such a key does not exist. [#10870](https://github.com/ClickHouse/ClickHouse/pull/10870) ([Azat Khuzhin](https://github.com/azat)).
* Fixed bug in `ReplicatedMergeTree` which might cause some `ALTER` on `OPTIMIZE` query to hang waiting for some replica after it become inactive. [#10849](https://github.com/ClickHouse/ClickHouse/pull/10849) ([tavplubix](https://github.com/tavplubix)).
* Fixed columns order after `Block::sortColumns()`. [#10826](https://github.com/ClickHouse/ClickHouse/pull/10826) ([Azat Khuzhin](https://github.com/azat)).
* Fixed the issue with `ODBC` bridge when no quoting of identifiers is requested. Fixes [#7984] (https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixed the issue with `ODBC` bridge when no quoting of identifiers is requested. Fixes [#7984](https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixed `UBSan` and `MSan` report in `DateLUT`. [#10798](https://github.com/ClickHouse/ClickHouse/pull/10798) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixed incorrect type conversion in key conditions. Fixes [#6287](https://github.com/ClickHouse/ClickHouse/issues/6287). [#10791](https://github.com/ClickHouse/ClickHouse/pull/10791) ([Andrew Onyshchuk](https://github.com/oandrew))
* Fixed `parallel_view_processing` behavior. Now all insertions into `MATERIALIZED VIEW` without exception should be finished if exception happened. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#10757](https://github.com/ClickHouse/ClickHouse/pull/10757) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
@ -2202,7 +2338,7 @@ No changes compared to v20.4.3.16-stable.
* Fixed incorrect scalar results inside inner query of `MATERIALIZED VIEW` in case if this query contained dependent table. [#10603](https://github.com/ClickHouse/ClickHouse/pull/10603) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fixed `SELECT` of column `ALIAS` which default expression type different from column type. [#10563](https://github.com/ClickHouse/ClickHouse/pull/10563) ([Azat Khuzhin](https://github.com/azat)).
* Implemented comparison between DateTime64 and String values. [#10560](https://github.com/ClickHouse/ClickHouse/pull/10560) ([Vasily Nemkov](https://github.com/Enmk)).
* Fixed index corruption, which may accur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)).
* Fixed index corruption, which may occur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)).
* Fixed the situation, when mutation finished all parts, but hung up in `is_done=0`. [#10526](https://github.com/ClickHouse/ClickHouse/pull/10526) ([alesapin](https://github.com/alesapin)).
* Fixed overflow at beginning of unix epoch for timezones with fractional offset from `UTC`. This fixes [#9335](https://github.com/ClickHouse/ClickHouse/issues/9335). [#10513](https://github.com/ClickHouse/ClickHouse/pull/10513) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fixed improper shutdown of `Distributed` storage. [#10491](https://github.com/ClickHouse/ClickHouse/pull/10491) ([Azat Khuzhin](https://github.com/azat)).
@ -2212,14 +2348,14 @@ No changes compared to v20.4.3.16-stable.
#### Build/Testing/Packaging Improvement
* Fix UBSan report in LZ4 library. [#10631](https://github.com/ClickHouse/ClickHouse/pull/10631) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix clang-10 build. https://github.com/ClickHouse/ClickHouse/issues/10238. [#10370](https://github.com/ClickHouse/ClickHouse/pull/10370) ([Amos Bird](https://github.com/amosbird)).
* Fix clang-10 build. [#10238](https://github.com/ClickHouse/ClickHouse/issues/10238). [#10370](https://github.com/ClickHouse/ClickHouse/pull/10370) ([Amos Bird](https://github.com/amosbird)).
* Added failing tests about `max_rows_to_sort` setting. [#10268](https://github.com/ClickHouse/ClickHouse/pull/10268) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Added some improvements in printing diagnostic info in input formats. Fixes [#10204](https://github.com/ClickHouse/ClickHouse/issues/10204). [#10418](https://github.com/ClickHouse/ClickHouse/pull/10418) ([tavplubix](https://github.com/tavplubix)).
* Added CA certificates to clickhouse-server docker image. [#10476](https://github.com/ClickHouse/ClickHouse/pull/10476) ([filimonov](https://github.com/filimonov)).
#### Bug fix
* #10551. [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)).
* Fix error `the BloomFilter false positive must be a double number between 0 and 1` [#10551](https://github.com/ClickHouse/ClickHouse/issues/10551). [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)).
### ClickHouse release v20.3.8.53, 2020-04-23
@ -2411,7 +2547,7 @@ No changes compared to v20.4.3.16-stable.
* Fixed the behaviour of `match` and `extract` functions when haystack has zero bytes. The behaviour was wrong when haystack was constant. This fixes [#9160](https://github.com/ClickHouse/ClickHouse/issues/9160) [#9163](https://github.com/ClickHouse/ClickHouse/pull/9163) ([alexey-milovidov](https://github.com/alexey-milovidov)) [#9345](https://github.com/ClickHouse/ClickHouse/pull/9345) ([alexey-milovidov](https://github.com/alexey-milovidov))
* Avoid throwing from destructor in Apache Avro 3rd-party library. [#9066](https://github.com/ClickHouse/ClickHouse/pull/9066) ([Andrew Onyshchuk](https://github.com/oandrew))
* Don't commit a batch polled from `Kafka` partially as it can lead to holes in data. [#8876](https://github.com/ClickHouse/ClickHouse/pull/8876) ([filimonov](https://github.com/filimonov))
* Fix `joinGet` with nullable return types. https://github.com/ClickHouse/ClickHouse/issues/8919 [#9014](https://github.com/ClickHouse/ClickHouse/pull/9014) ([Amos Bird](https://github.com/amosbird))
* Fix `joinGet` with nullable return types. [#8919](https://github.com/ClickHouse/ClickHouse/issues/8919) [#9014](https://github.com/ClickHouse/ClickHouse/pull/9014) ([Amos Bird](https://github.com/amosbird))
* Fix data incompatibility when compressed with `T64` codec. [#9016](https://github.com/ClickHouse/ClickHouse/pull/9016) ([Artem Zuikov](https://github.com/4ertus2)) Fix data type ids in `T64` compression codec that leads to wrong (de)compression in affected versions. [#9033](https://github.com/ClickHouse/ClickHouse/pull/9033) ([Artem Zuikov](https://github.com/4ertus2))
* Add setting `enable_early_constant_folding` and disable it in some cases that leads to errors. [#9010](https://github.com/ClickHouse/ClickHouse/pull/9010) ([Artem Zuikov](https://github.com/4ertus2))
* Fix pushdown predicate optimizer with VIEW and enable the test [#9011](https://github.com/ClickHouse/ClickHouse/pull/9011) ([Winter Zhang](https://github.com/zhang2014))
@ -2613,7 +2749,7 @@ No changes compared to v20.4.3.16-stable.
* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)).
* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
* Fix pointInPolygon with nan as point. Fixes [#11375](https://github.com/ClickHouse/ClickHouse/issues/11375). [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)).
* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix crash in `quantilesExactWeightedArray`. [#11337](https://github.com/ClickHouse/ClickHouse/pull/11337) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
@ -2623,7 +2759,7 @@ No changes compared to v20.4.3.16-stable.
* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix crash while reading malformed data in Protobuf format. This fixes [#5957](https://github.com/ClickHouse/ClickHouse/issues/5957), fixes [#11203](https://github.com/ClickHouse/ClickHouse/issues/11203). [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix possible error `Cannot capture column` for higher-order functions with `Array(Array(LowCardinality))` captured argument. [#11185](https://github.com/ClickHouse/ClickHouse/pull/11185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)).
* Remove logging from mutation finalization task if nothing was finalized. [#11109](https://github.com/ClickHouse/ClickHouse/pull/11109) ([alesapin](https://github.com/alesapin)).
@ -2901,7 +3037,7 @@ No changes compared to v20.4.3.16-stable.
* Several improvements ClickHouse grammar in `.g4` file. [#8294](https://github.com/ClickHouse/ClickHouse/pull/8294) ([taiyang-li](https://github.com/taiyang-li))
* Fix bug that leads to crashes in `JOIN`s with tables with engine `Join`. This fixes [#7556](https://github.com/ClickHouse/ClickHouse/issues/7556) [#8254](https://github.com/ClickHouse/ClickHouse/issues/8254) [#7915](https://github.com/ClickHouse/ClickHouse/issues/7915) [#8100](https://github.com/ClickHouse/ClickHouse/issues/8100). [#8298](https://github.com/ClickHouse/ClickHouse/pull/8298) ([Artem Zuikov](https://github.com/4ertus2))
* Fix redundant dictionaries reload on `CREATE DATABASE`. [#7916](https://github.com/ClickHouse/ClickHouse/pull/7916) ([Azat Khuzhin](https://github.com/azat))
* Limit maximum number of streams for read from `StorageFile` and `StorageHDFS`. Fixes https://github.com/ClickHouse/ClickHouse/issues/7650. [#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([alesapin](https://github.com/alesapin))
* Limit maximum number of streams for read from `StorageFile` and `StorageHDFS`. Fixes [#7650](https://github.com/ClickHouse/ClickHouse/issues/7650). [#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([alesapin](https://github.com/alesapin))
* Fix bug in `ALTER ... MODIFY ... CODEC` query, when user specify both default expression and codec. Fixes [8593](https://github.com/ClickHouse/ClickHouse/issues/8593). [#8614](https://github.com/ClickHouse/ClickHouse/pull/8614) ([alesapin](https://github.com/alesapin))
* Fix error in background merge of columns with `SimpleAggregateFunction(LowCardinality)` type. [#8613](https://github.com/ClickHouse/ClickHouse/pull/8613) ([Nikolai Kochetov](https://github.com/KochetovNicolai))
* Fixed type check in function `toDateTime64`. [#8375](https://github.com/ClickHouse/ClickHouse/pull/8375) ([Vasily Nemkov](https://github.com/Enmk))
@ -2985,7 +3121,7 @@ No changes compared to v20.4.3.16-stable.
* Added check for extra parts of `MergeTree` at different disks, in order to not allow to miss data parts at undefined disks. [#8118](https://github.com/ClickHouse/ClickHouse/pull/8118) ([Vladimir Chebotarev](https://github.com/excitoon))
* Enable SSL support for Mac client and server. [#8297](https://github.com/ClickHouse/ClickHouse/pull/8297) ([Ivan](https://github.com/abyss7))
* Now ClickHouse can work as MySQL federated server (see https://dev.mysql.com/doc/refman/5.7/en/federated-create-server.html). [#7717](https://github.com/ClickHouse/ClickHouse/pull/7717) ([Maxim Fedotov](https://github.com/MaxFedotov))
* `clickhouse-client` now only enable `bracketed-paste` when multiquery is on and multiline is off. This fixes (#7757)[https://github.com/ClickHouse/ClickHouse/issues/7757]. [#7761](https://github.com/ClickHouse/ClickHouse/pull/7761) ([Amos Bird](https://github.com/amosbird))
* `clickhouse-client` now only enable `bracketed-paste` when multiquery is on and multiline is off. This fixes [#7757](https://github.com/ClickHouse/ClickHouse/issues/7757). [#7761](https://github.com/ClickHouse/ClickHouse/pull/7761) ([Amos Bird](https://github.com/amosbird))
* Support `Array(Decimal)` in `if` function. [#7721](https://github.com/ClickHouse/ClickHouse/pull/7721) ([Artem Zuikov](https://github.com/4ertus2))
* Support Decimals in `arrayDifference`, `arrayCumSum` and `arrayCumSumNegative` functions. [#7724](https://github.com/ClickHouse/ClickHouse/pull/7724) ([Artem Zuikov](https://github.com/4ertus2))
* Added `lifetime` column to `system.dictionaries` table. [#6820](https://github.com/ClickHouse/ClickHouse/issues/6820) [#7727](https://github.com/ClickHouse/ClickHouse/pull/7727) ([kekekekule](https://github.com/kekekekule))

View File

@ -112,6 +112,12 @@ if (ENABLE_FUZZING)
set (FUZZER "libfuzzer")
endif()
# Global libraries
# See:
# - default_libs.cmake
# - sanitize.cmake
add_library(global-libs INTERFACE)
include (cmake/fuzzer.cmake)
include (cmake/sanitize.cmake)
@ -223,16 +229,16 @@ if (ARCH_NATIVE)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native")
endif ()
if (UNBUNDLED AND (COMPILER_GCC OR COMPILER_CLANG))
# to make numeric_limits<__int128> works for unbundled build
set (_CXX_STANDARD "-std=gnu++2a")
if (COMPILER_GCC OR COMPILER_CLANG)
# to make numeric_limits<__int128> works with GCC
set (_CXX_STANDARD "gnu++2a")
else()
set (_CXX_STANDARD "-std=c++2a")
set (_CXX_STANDARD "c++2a")
endif()
# cmake < 3.12 doesn't support 20. We'll set CMAKE_CXX_FLAGS for now
# set (CMAKE_CXX_STANDARD 20)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_CXX_STANDARD}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=${_CXX_STANDARD}")
set (CMAKE_CXX_EXTENSIONS 0) # https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html#prop_tgt:CXX_EXTENSIONS
set (CMAKE_CXX_STANDARD_REQUIRED ON)
@ -257,6 +263,8 @@ if (WITH_COVERAGE AND COMPILER_GCC)
set(WITHOUT_COVERAGE "-fno-profile-arcs -fno-test-coverage")
endif()
set(COMPILER_FLAGS "${COMPILER_FLAGS}")
set (CMAKE_BUILD_COLOR_MAKEFILE ON)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${PLATFORM_EXTRA_CXX_FLAG} ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS}")
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_CXX_FLAGS_ADD}")
@ -455,6 +463,7 @@ include (cmake/find/s3.cmake)
include (cmake/find/base64.cmake)
include (cmake/find/parquet.cmake)
include (cmake/find/simdjson.cmake)
include (cmake/find/fast_float.cmake)
include (cmake/find/rapidjson.cmake)
include (cmake/find/fastops.cmake)
include (cmake/find/odbc.cmake)
@ -510,8 +519,11 @@ macro (add_executable target)
get_target_property (type ${target} TYPE)
if (${type} STREQUAL EXECUTABLE)
# operator::new/delete for executables (MemoryTracker stuff)
target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES})
# disabled for TSAN and gcc since libtsan.a provides overrides too
if (TARGET clickhouse_new_delete)
# operator::new/delete for executables (MemoryTracker stuff)
target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES})
endif()
endif()
endmacro()
@ -522,8 +534,8 @@ include_directories(${ConfigIncludePath})
include (cmake/warnings.cmake)
add_subdirectory (base)
add_subdirectory (programs)
add_subdirectory (src)
add_subdirectory (programs)
add_subdirectory (tests)
add_subdirectory (utils)

View File

@ -14,3 +14,6 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [SF Bay Area ClickHouse Virtual Office Hours (online)](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/274273549/) on 20 January 2020.

View File

@ -6,6 +6,7 @@ set (SRCS
demangle.cpp
getFQDNOrHostName.cpp
getMemoryAmount.cpp
getPageSize.cpp
getThreadId.cpp
JSON.cpp
LineReader.cpp

View File

@ -127,7 +127,7 @@ String LineReader::readLine(const String & first_prompt, const String & second_p
}
#endif
line += (line.empty() ? "" : " ") + input;
line += (line.empty() ? "" : "\n") + input;
if (!need_next_line)
break;

View File

@ -1,4 +1,5 @@
#include <common/ReadlineLineReader.h>
#include <common/errnoToString.h>
#include <ext/scope_guard.h>
#include <errno.h>
@ -69,7 +70,7 @@ ReadlineLineReader::ReadlineLineReader(
{
int res = read_history(history_file_path.c_str());
if (res)
std::cerr << "Cannot read history from file " + history_file_path + ": "+ strerror(errno) << std::endl;
std::cerr << "Cannot read history from file " + history_file_path + ": "+ errnoToString(errno) << std::endl;
}
/// Added '.' to the default list. Because it is used to separate database and table.
@ -107,7 +108,7 @@ ReadlineLineReader::ReadlineLineReader(
};
if (signal(SIGINT, clear_prompt_or_exit) == SIG_ERR)
throw std::runtime_error(std::string("Cannot set signal handler for readline: ") + strerror(errno));
throw std::runtime_error(std::string("Cannot set signal handler for readline: ") + errnoToString(errno));
rl_variable_bind("completion-ignore-case", "on");
// TODO: it doesn't work

View File

@ -47,7 +47,7 @@ ReplxxLineReader::ReplxxLineReader(
{
if (!rx.history_load(history_file_path))
{
rx.print("Loading history failed: %s\n", strerror(errno));
rx.print("Loading history failed: %s\n", errnoToString(errno).c_str());
}
if (flock(history_file_fd, LOCK_UN))
@ -58,6 +58,8 @@ ReplxxLineReader::ReplxxLineReader(
}
}
rx.install_window_change_handler();
auto callback = [&suggest] (const String & context, size_t context_size)
{
if (auto range = suggest.getCompletions(context, context_size))
@ -86,7 +88,7 @@ ReplxxLineReader::ReplxxLineReader(
ReplxxLineReader::~ReplxxLineReader()
{
if (close(history_file_fd))
rx.print("Close of history file failed: %s\n", strerror(errno));
rx.print("Close of history file failed: %s\n", errnoToString(errno).c_str());
}
LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
@ -111,7 +113,7 @@ void ReplxxLineReader::addToHistory(const String & line)
// and that is why flock() is added here.
bool locked = false;
if (flock(history_file_fd, LOCK_EX))
rx.print("Lock of history file failed: %s\n", strerror(errno));
rx.print("Lock of history file failed: %s\n", errnoToString(errno).c_str());
else
locked = true;
@ -119,10 +121,10 @@ void ReplxxLineReader::addToHistory(const String & line)
// flush changes to the disk
if (!rx.history_save(history_file_path))
rx.print("Saving history failed: %s\n", strerror(errno));
rx.print("Saving history failed: %s\n", errnoToString(errno).c_str());
if (locked && 0 != flock(history_file_fd, LOCK_UN))
rx.print("Unlock of history file failed: %s\n", strerror(errno));
rx.print("Unlock of history file failed: %s\n", errnoToString(errno).c_str());
}
void ReplxxLineReader::enableBracketedPaste()

View File

@ -76,12 +76,8 @@
# define NO_SANITIZE_THREAD
#endif
#if defined __GNUC__ && !defined __clang__
# define OPTIMIZE(x) __attribute__((__optimize__(x)))
#else
# define OPTIMIZE(x)
#endif
/// A macro for suppressing warnings about unused variables or function results.
/// Useful for structured bindings which have no standard way to declare this.
#define UNUSED(...) (void)(__VA_ARGS__)
/// A template function for suppressing warnings about unused variables or function results.
template <typename... Args>
constexpr void UNUSED(Args &&... args [[maybe_unused]])
{
}

View File

@ -1,100 +1,29 @@
#include <stdexcept>
#include "common/getMemoryAmount.h"
#include "common/getPageSize.h"
// http://nadeausoftware.com/articles/2012/09/c_c_tip_how_get_physical_memory_size_system
/*
* Author: David Robert Nadeau
* Site: http://NadeauSoftware.com/
* License: Creative Commons Attribution 3.0 Unported License
* http://creativecommons.org/licenses/by/3.0/deed.en_US
*/
#if defined(WIN32) || defined(_WIN32)
#include <Windows.h>
#else
#include <unistd.h>
#include <sys/types.h>
#include <sys/param.h>
#if defined(BSD)
#include <sys/sysctl.h>
#endif
#endif
/**
* Returns the size of physical memory (RAM) in bytes.
* Returns 0 on unsupported platform
*/
/** Returns the size of physical memory (RAM) in bytes.
* Returns 0 on unsupported platform
*/
uint64_t getMemoryAmountOrZero()
{
#if defined(_WIN32) && (defined(__CYGWIN__) || defined(__CYGWIN32__))
/* Cygwin under Windows. ------------------------------------ */
/* New 64-bit MEMORYSTATUSEX isn't available. Use old 32.bit */
MEMORYSTATUS status;
status.dwLength = sizeof(status);
GlobalMemoryStatus(&status);
return status.dwTotalPhys;
int64_t num_pages = sysconf(_SC_PHYS_PAGES);
if (num_pages <= 0)
return 0;
#elif defined(WIN32) || defined(_WIN32)
/* Windows. ------------------------------------------------- */
/* Use new 64-bit MEMORYSTATUSEX, not old 32-bit MEMORYSTATUS */
MEMORYSTATUSEX status;
status.dwLength = sizeof(status);
GlobalMemoryStatusEx(&status);
return status.ullTotalPhys;
int64_t page_size = getPageSize();
if (page_size <= 0)
return 0;
#else
/* UNIX variants. ------------------------------------------- */
/* Prefer sysctl() over sysconf() except sysctl() HW_REALMEM and HW_PHYSMEM */
#if defined(CTL_HW) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM64))
int mib[2];
mib[0] = CTL_HW;
#if defined(HW_MEMSIZE)
mib[1] = HW_MEMSIZE; /* OSX. --------------------- */
#elif defined(HW_PHYSMEM64)
mib[1] = HW_PHYSMEM64; /* NetBSD, OpenBSD. --------- */
#endif
uint64_t size = 0; /* 64-bit */
size_t len = sizeof(size);
if (sysctl(mib, 2, &size, &len, nullptr, 0) == 0)
return size;
return 0; /* Failed? */
#elif defined(_SC_AIX_REALMEM)
/* AIX. ----------------------------------------------------- */
return sysconf(_SC_AIX_REALMEM) * 1024;
#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
/* FreeBSD, Linux, OpenBSD, and Solaris. -------------------- */
return uint64_t(sysconf(_SC_PHYS_PAGES))
*uint64_t(sysconf(_SC_PAGESIZE));
#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGE_SIZE)
/* Legacy. -------------------------------------------------- */
return uint64_t(sysconf(_SC_PHYS_PAGES))
* uint64_t(sysconf(_SC_PAGE_SIZE));
#elif defined(CTL_HW) && (defined(HW_PHYSMEM) || defined(HW_REALMEM))
/* DragonFly BSD, FreeBSD, NetBSD, OpenBSD, and OSX. -------- */
int mib[2];
mib[0] = CTL_HW;
#if defined(HW_REALMEM)
mib[1] = HW_REALMEM; /* FreeBSD. ----------------- */
#elif defined(HW_PYSMEM)
mib[1] = HW_PHYSMEM; /* Others. ------------------ */
#endif
unsigned int size = 0; /* 32-bit */
size_t len = sizeof(size);
if (sysctl(mib, 2, &size, &len, nullptr, 0) == 0)
return size;
return 0; /* Failed? */
#endif /* sysctl and sysconf variants */
#endif
return num_pages * page_size;
}

View File

@ -0,0 +1,8 @@
#include "common/getPageSize.h"
#include <unistd.h>
Int64 getPageSize()
{
return sysconf(_SC_PAGESIZE);
}

View File

@ -0,0 +1,6 @@
#pragma once
#include "common/types.h"
/// Get memory page size
Int64 getPageSize();

View File

@ -1,6 +1,7 @@
// https://stackoverflow.com/questions/1413445/reading-a-password-from-stdcin
#include <common/setTerminalEcho.h>
#include <common/errnoToString.h>
#include <stdexcept>
#include <cstring>
#include <string>
@ -31,7 +32,7 @@ void setTerminalEcho(bool enable)
#else
struct termios tty;
if (tcgetattr(STDIN_FILENO, &tty))
throw std::runtime_error(std::string("setTerminalEcho failed get: ") + strerror(errno));
throw std::runtime_error(std::string("setTerminalEcho failed get: ") + errnoToString(errno));
if (!enable)
tty.c_lflag &= ~ECHO;
else
@ -39,6 +40,6 @@ void setTerminalEcho(bool enable)
auto ret = tcsetattr(STDIN_FILENO, TCSANOW, &tty);
if (ret)
throw std::runtime_error(std::string("setTerminalEcho failed set: ") + strerror(errno));
throw std::runtime_error(std::string("setTerminalEcho failed set: ") + errnoToString(errno));
#endif
}

View File

@ -8,7 +8,7 @@ using Int16 = int16_t;
using Int32 = int32_t;
using Int64 = int64_t;
#if __cplusplus <= 201703L
#ifndef __cpp_char8_t
using char8_t = unsigned char;
#endif

View File

@ -58,8 +58,7 @@ public:
using signed_base_type = int64_t;
// ctors
integer() = default;
constexpr integer() noexcept;
template <typename T>
constexpr integer(T rhs) noexcept;
template <typename T>

View File

@ -916,6 +916,11 @@ public:
// Members
template <size_t Bits, typename Signed>
constexpr integer<Bits, Signed>::integer() noexcept
: items{}
{}
template <size_t Bits, typename Signed>
template <typename T>
constexpr integer<Bits, Signed>::integer(T rhs) noexcept

View File

@ -5,7 +5,6 @@ LIBRARY()
ADDINCL(
GLOBAL clickhouse/base
GLOBAL contrib/libs/cctz/include
)
CFLAGS (GLOBAL -DARCADIA_BUILD)
@ -24,7 +23,7 @@ ELSEIF (OS_LINUX)
ENDIF ()
PEERDIR(
contrib/libs/cctz/src
contrib/libs/cctz
contrib/libs/cxxsupp/libcxx-filesystem
contrib/libs/poco/Net
contrib/libs/poco/Util
@ -48,6 +47,7 @@ SRCS(
errnoToString.cpp
getFQDNOrHostName.cpp
getMemoryAmount.cpp
getPageSize.cpp
getResource.cpp
getThreadId.cpp
mremap.cpp

View File

@ -4,7 +4,6 @@ LIBRARY()
ADDINCL(
GLOBAL clickhouse/base
GLOBAL contrib/libs/cctz/include
)
CFLAGS (GLOBAL -DARCADIA_BUILD)
@ -23,7 +22,7 @@ ELSEIF (OS_LINUX)
ENDIF ()
PEERDIR(
contrib/libs/cctz/src
contrib/libs/cctz
contrib/libs/cxxsupp/libcxx-filesystem
contrib/libs/poco/Net
contrib/libs/poco/Util

View File

@ -761,14 +761,14 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
static KillingErrorHandler killing_error_handler;
Poco::ErrorHandler::set(&killing_error_handler);
signal_pipe.setNonBlocking();
signal_pipe.setNonBlockingWrite();
signal_pipe.tryIncreaseSize(1 << 20);
signal_listener = std::make_unique<SignalListener>(*this);
signal_listener_thread.start(*signal_listener);
#if defined(__ELF__) && !defined(__FreeBSD__)
String build_id_hex = DB::SymbolIndex::instance().getBuildIDHex();
String build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex();
if (build_id_hex.empty())
build_id_info = "no build id";
else

View File

@ -6,10 +6,12 @@
#include <common/defines.h>
#include <common/getFQDNOrHostName.h>
#include <common/getMemoryAmount.h>
#include <common/logger_useful.h>
#include <Common/SymbolIndex.h>
#include <Common/StackTrace.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#if !defined(ARCADIA_BUILD)
# include "Common/config_version.h"
@ -28,14 +30,13 @@ namespace
bool initialized = false;
bool anonymize = false;
std::string server_data_path;
void setExtras()
{
if (!anonymize)
{
sentry_set_extra("server_name", sentry_value_new_string(getFQDNOrHostName().c_str()));
}
sentry_set_tag("version", VERSION_STRING);
sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH));
sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE));
@ -44,6 +45,15 @@ void setExtras()
sentry_set_extra("version_major", sentry_value_new_int32(VERSION_MAJOR));
sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR));
sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH));
sentry_set_extra("version_official", sentry_value_new_string(VERSION_OFFICIAL));
/// Sentry does not support 64-bit integers.
sentry_set_extra("total_ram", sentry_value_new_string(formatReadableSizeWithBinarySuffix(getMemoryAmountOrZero()).c_str()));
sentry_set_extra("physical_cpu_cores", sentry_value_new_int32(getNumberOfPhysicalCPUCores()));
if (!server_data_path.empty())
sentry_set_extra("disk_free_space", sentry_value_new_string(formatReadableSizeWithBinarySuffix(
Poco::File(server_data_path).freeSpace()).c_str()));
}
void sentry_logger(sentry_level_e level, const char * message, va_list args, void *)
@ -98,6 +108,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config)
}
if (enabled)
{
server_data_path = config.getString("path", "");
const std::filesystem::path & default_tmp_path = std::filesystem::path(config.getString("tmp_path", Poco::Path::temp())) / "sentry";
const std::string & endpoint
= config.getString("send_crash_reports.endpoint");
@ -168,7 +179,7 @@ void SentryWriter::onFault(int sig, const std::string & error_message, const Sta
sentry_set_extra("signal_number", sentry_value_new_int32(sig));
#if defined(__ELF__) && !defined(__FreeBSD__)
const String & build_id_hex = DB::SymbolIndex::instance().getBuildIDHex();
const String & build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex();
sentry_set_tag("build_id", build_id_hex.c_str());
#endif

View File

@ -104,6 +104,11 @@ void Connection::connect(const char* db,
if (mysql_options(driver.get(), MYSQL_OPT_LOCAL_INFILE, &enable_local_infile_arg))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
/// Enables auto-reconnect.
bool reconnect = true;
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&reconnect)))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
/// Specifies particular ssl key and certificate if it needs
if (mysql_ssl_set(driver.get(), ifNotEmpty(ssl_key), ifNotEmpty(ssl_cert), ifNotEmpty(ssl_ca), nullptr, nullptr))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
@ -115,11 +120,6 @@ void Connection::connect(const char* db,
if (mysql_set_character_set(driver.get(), "UTF8"))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
/// Enables auto-reconnect.
bool reconnect = true;
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&reconnect)))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
is_connected = true;
}

View File

@ -26,6 +26,7 @@ void Pool::Entry::incrementRefCount()
mysql_thread_init();
}
void Pool::Entry::decrementRefCount()
{
if (!data)
@ -150,28 +151,39 @@ Pool::Entry Pool::tryGet()
initialize();
/// Searching for connection which was established but wasn't used.
for (auto & connection : connections)
/// Try to pick an idle connection from already allocated
for (auto connection_it = connections.cbegin(); connection_it != connections.cend();)
{
if (connection->ref_count == 0)
Connection * connection_ptr = *connection_it;
/// Fixme: There is a race condition here b/c we do not synchronize with Pool::Entry's copy-assignment operator
if (connection_ptr->ref_count == 0)
{
Entry res(connection, this);
return res.tryForceConnected() ? res : Entry();
Entry res(connection_ptr, this);
if (res.tryForceConnected()) /// Tries to reestablish connection as well
return res;
auto & logger = Poco::Util::Application::instance().logger();
logger.information("Idle connection to mysql server cannot be recovered, dropping it.");
/// This one is disconnected, cannot be reestablished and so needs to be disposed of.
connection_it = connections.erase(connection_it);
::delete connection_ptr; /// TODO: Manual memory management is awkward (matches allocConnection() method)
}
else
++connection_it;
}
/// Throws if pool is overflowed.
if (connections.size() >= max_connections)
throw Poco::Exception("mysqlxx::Pool is full");
/// Allocates new connection.
Connection * conn = allocConnection(true);
if (conn)
return Entry(conn, this);
Connection * connection_ptr = allocConnection(true);
if (connection_ptr)
return {connection_ptr, this};
return Entry();
return {};
}
void Pool::removeConnection(Connection* connection)
{
std::lock_guard<std::mutex> lock(mutex);
@ -199,11 +211,9 @@ void Pool::Entry::forceConnected() const
throw Poco::RuntimeException("Tried to access NULL database connection.");
Poco::Util::Application & app = Poco::Util::Application::instance();
if (data->conn.ping())
return;
bool first = true;
do
while (!tryForceConnected())
{
if (first)
first = false;
@ -225,7 +235,26 @@ void Pool::Entry::forceConnected() const
pool->rw_timeout,
pool->enable_local_infile);
}
while (!data->conn.ping());
}
bool Pool::Entry::tryForceConnected() const
{
auto * const mysql_driver = data->conn.getDriver();
const auto prev_connection_id = mysql_thread_id(mysql_driver);
if (data->conn.ping()) /// Attempts to reestablish lost connection
{
const auto current_connection_id = mysql_thread_id(mysql_driver);
if (prev_connection_id != current_connection_id)
{
auto & logger = Poco::Util::Application::instance().logger();
logger.information("Connection to mysql server has been reestablished. Connection id changed: %lu -> %lu",
prev_connection_id, current_connection_id);
}
return true;
}
return false;
}

View File

@ -127,10 +127,7 @@ public:
void forceConnected() const;
/// Connects to database. If connection is failed then returns false.
bool tryForceConnected() const
{
return data->conn.ping();
}
bool tryForceConnected() const;
void incrementRefCount();
void decrementRefCount();

View File

@ -22,4 +22,12 @@ ResultBase::~ResultBase()
mysql_free_result(res);
}
std::string ResultBase::getFieldName(size_t n) const
{
if (num_fields <= n)
throw Exception(std::string("Unknown column position ") + std::to_string(n));
return fields[n].name;
}
}

View File

@ -31,6 +31,8 @@ public:
MYSQL_RES * getRes() { return res; }
const Query * getQuery() const { return query; }
std::string getFieldName(size_t n) const;
virtual ~ResultBase();
protected:

View File

@ -1,2 +1,5 @@
add_executable (mysqlxx_test mysqlxx_test.cpp)
target_link_libraries (mysqlxx_test PRIVATE mysqlxx)
add_executable (mysqlxx_pool_test mysqlxx_pool_test.cpp)
target_link_libraries (mysqlxx_pool_test PRIVATE mysqlxx)

View File

@ -0,0 +1,98 @@
#include <mysqlxx/mysqlxx.h>
#include <chrono>
#include <iostream>
#include <sstream>
#include <thread>
namespace
{
mysqlxx::Pool::Entry getWithFailover(mysqlxx::Pool & connections_pool)
{
using namespace std::chrono;
constexpr size_t max_tries = 3;
mysqlxx::Pool::Entry worker_connection;
for (size_t try_no = 1; try_no <= max_tries; ++try_no)
{
try
{
worker_connection = connections_pool.tryGet();
if (!worker_connection.isNull())
{
return worker_connection;
}
}
catch (const Poco::Exception & e)
{
if (e.displayText().find("mysqlxx::Pool is full") != std::string::npos)
{
std::cerr << e.displayText() << std::endl;
}
std::cerr << "Connection to " << connections_pool.getDescription() << " failed: " << e.displayText() << std::endl;
}
std::clog << "Connection to all replicas failed " << try_no << " times" << std::endl;
std::this_thread::sleep_for(1s);
}
std::stringstream message;
message << "Connections to all replicas failed: " << connections_pool.getDescription();
throw Poco::Exception(message.str());
}
}
int main(int, char **)
{
using namespace std::chrono;
const char * remote_mysql = "localhost";
const std::string test_query = "SHOW DATABASES";
mysqlxx::Pool mysql_conn_pool("", remote_mysql, "default", "10203040", 3306);
size_t iteration = 0;
while (++iteration)
{
std::clog << "Iteration: " << iteration << std::endl;
try
{
std::clog << "Acquiring DB connection ...";
mysqlxx::Pool::Entry worker = getWithFailover(mysql_conn_pool);
std::clog << "ok" << std::endl;
std::clog << "Preparing query (5s sleep) ...";
std::this_thread::sleep_for(5s);
mysqlxx::Query query = worker->query();
query << test_query;
std::clog << "ok" << std::endl;
std::clog << "Querying result (5s sleep) ...";
std::this_thread::sleep_for(5s);
mysqlxx::UseQueryResult result = query.use();
std::clog << "ok" << std::endl;
std::clog << "Fetching result data (5s sleep) ...";
std::this_thread::sleep_for(5s);
size_t rows_count = 0;
while (result.fetch())
++rows_count;
std::clog << "ok" << std::endl;
std::clog << "Read " << rows_count << " rows." << std::endl;
}
catch (const Poco::Exception & e)
{
std::cerr << "Iteration FAILED:\n" << e.displayText() << std::endl;
}
std::clog << "====================" << std::endl;
std::this_thread::sleep_for(3s);
}
}

View File

@ -14,10 +14,6 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
# Minimal supported SDK version
set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
# Global libraries
add_library(global-libs INTERFACE)
# Unfortunately '-pthread' doesn't work with '-nodefaultlibs'.
# Just make sure we have pthreads at all.
set(THREADS_PREFER_PTHREAD_FLAG ON)

View File

@ -0,0 +1,6 @@
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/fast_float/fast_float.h")
message (FATAL_ERROR "submodule contrib/fast_float is missing. to fix try run: \n git submodule update --init --recursive")
endif ()
set(FAST_FLOAT_LIBRARY fast_float)
set(FAST_FLOAT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/")

View File

@ -1,4 +1,11 @@
option(ENABLE_GRPC "Use gRPC" ${ENABLE_LIBRARIES})
# disable grpc due to conflicts of abseil (required by grpc) dynamic annotations with libtsan.a
if (SANITIZE STREQUAL "thread" AND COMPILER_GCC)
set(ENABLE_GRPC_DEFAULT OFF)
else()
set(ENABLE_GRPC_DEFAULT ${ENABLE_LIBRARIES})
endif()
option(ENABLE_GRPC "Use gRPC" ${ENABLE_GRPC_DEFAULT})
if(NOT ENABLE_GRPC)
if(USE_INTERNAL_GRPC_LIBRARY)

View File

@ -11,9 +11,9 @@ endif()
option(USE_INTERNAL_SSL_LIBRARY "Set to FALSE to use system *ssl library instead of bundled" ${NOT_UNBUNDLED})
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/openssl/README")
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boringssl/README.md")
if(USE_INTERNAL_SSL_LIBRARY)
message(WARNING "submodule contrib/openssl is missing. to fix try run: \n git submodule update --init --recursive")
message(WARNING "submodule contrib/boringssl is missing. to fix try run: \n git submodule update --init --recursive")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal ssl library")
endif()
set(USE_INTERNAL_SSL_LIBRARY 0)
@ -52,12 +52,12 @@ endif ()
if (NOT OPENSSL_FOUND AND NOT MISSING_INTERNAL_SSL_LIBRARY)
set (USE_INTERNAL_SSL_LIBRARY 1)
set (OPENSSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/openssl")
set (OPENSSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/boringssl")
if (ARCH_AMD64)
set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include" "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake/linux_x86_64/include")
set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include")
elseif (ARCH_AARCH64)
set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include" "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake/linux_aarch64/include")
set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include")
endif ()
set (OPENSSL_CRYPTO_LIBRARY crypto)
set (OPENSSL_SSL_LIBRARY ssl)

View File

@ -17,10 +17,6 @@ message(STATUS "Default libraries: ${DEFAULT_LIBS}")
set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
# Global libraries
add_library(global-libs INTERFACE)
# Unfortunately '-pthread' doesn't work with '-nodefaultlibs'.
# Just make sure we have pthreads at all.
set(THREADS_PREFER_PTHREAD_FLAG ON)

View File

@ -35,6 +35,15 @@ if (NOT PARALLEL_LINK_JOBS AND AVAILABLE_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY)
endif ()
endif ()
# ThinLTO provides its own parallel linking
# But use 2 parallel jobs, since:
# - this is what llvm does
# - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary
if (ENABLE_THINLTO AND PARALLEL_LINK_JOBS GREATER 2)
message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.")
set (PARALLEL_LINK_JOBS 2)
endif()
if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES))
set(CMAKE_JOB_POOL_LINK link_job_pool${CMAKE_CURRENT_SOURCE_DIR})
string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_LINK ${CMAKE_JOB_POOL_LINK})

View File

@ -12,10 +12,10 @@ else ()
endif ()
if (OS_ANDROID)
# pthread and rt are included in libc
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -ldl")
# pthread and rt are included in libc
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -ldl")
else ()
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread -ldl")
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread -ldl")
endif ()
message(STATUS "Default libraries: ${DEFAULT_LIBS}")
@ -31,10 +31,6 @@ if (ARCH_AMD64 AND NOT_UNBUNDLED)
set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers)
endif ()
# Global libraries
add_library(global-libs INTERFACE)
# Unfortunately '-pthread' doesn't work with '-nodefaultlibs'.
# Just make sure we have pthreads at all.
set(THREADS_PREFER_PTHREAD_FLAG ON)

View File

@ -1,18 +1,34 @@
# Possible values: `address` (ASan), `memory` (MSan), `thread` (TSan), `undefined` (UBSan), and "" (no sanitizing)
# Possible values:
# - `address` (ASan)
# - `memory` (MSan)
# - `thread` (TSan)
# - `undefined` (UBSan)
# - "" (no sanitizing)
option (SANITIZE "Enable one of the code sanitizers" "")
set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")
# gcc with -nodefaultlibs does not add sanitizer libraries
# with -static-libasan and similar
macro(add_explicit_sanitizer_library lib)
target_link_libraries(global-libs INTERFACE "-Wl,-static -l${lib} -Wl,-Bdynamic")
endmacro()
if (SANITIZE)
if (SANITIZE STREQUAL "address")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope")
set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_FLAGS}")
endif()
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libasan")
endif ()
if (COMPILER_GCC)
add_explicit_sanitizer_library(asan)
endif()
elseif (SANITIZE STREQUAL "memory")
# MemorySanitizer flags are set according to the official documentation:
@ -41,9 +57,10 @@ if (SANITIZE)
if (COMPILER_CLANG)
set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt")
else()
message (WARNING "TSAN suppressions was not passed to the compiler (since the compiler is not clang)")
message (WARNING "Use the following command to pass them manually:")
message (WARNING " export TSAN_OPTIONS=\"$TSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt\"")
set (MESSAGE "TSAN suppressions was not passed to the compiler (since the compiler is not clang)\n")
set (MESSAGE "${MESSAGE}Use the following command to pass them manually:\n")
set (MESSAGE "${MESSAGE} export TSAN_OPTIONS=\"$TSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt\"")
message (WARNING "${MESSAGE}")
endif()
@ -55,16 +72,32 @@ if (SANITIZE)
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libtsan")
endif ()
if (COMPILER_GCC)
add_explicit_sanitizer_library(tsan)
endif()
elseif (SANITIZE STREQUAL "undefined")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
set (UBSAN_FLAGS "-fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
if (COMPILER_CLANG)
set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
else()
set (MESSAGE "UBSAN suppressions was not passed to the compiler (since the compiler is not clang)\n")
set (MESSAGE "${MESSAGE}Use the following command to pass them manually:\n")
set (MESSAGE "${MESSAGE} export UBSAN_OPTIONS=\"$UBSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt\"")
message (WARNING "${MESSAGE}")
endif()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
endif()
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan")
endif ()
if (COMPILER_GCC)
add_explicit_sanitizer_library(ubsan)
endif()
# llvm-tblgen, that is used during LLVM build, doesn't work with UBSan.
set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "")

View File

@ -24,7 +24,7 @@ option (WEVERYTHING "Enable -Weverything option with some exceptions." ON)
# Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size.
# Only in release build because debug has too large stack frames.
if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE_CXX_COMPILER_ID MATCHES "AppleClang"))
add_warning(frame-larger-than=32768)
add_warning(frame-larger-than=65536)
endif ()
if (COMPILER_CLANG)

2
contrib/AMQP-CPP vendored

@ -1 +1 @@
Subproject commit d63e1f016582e9faaaf279aa24513087a07bc6e7
Subproject commit 03781aaff0f10ef41f902b8cf865fe0067180c10

View File

@ -21,10 +21,12 @@ endif()
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
add_subdirectory (antlr4-runtime-cmake)
add_subdirectory (boost-cmake)
add_subdirectory (cctz-cmake)
add_subdirectory (consistent-hashing-sumbur)
add_subdirectory (consistent-hashing)
add_subdirectory (dragonbox-cmake)
add_subdirectory (FastMemcpy)
add_subdirectory (hyperscan-cmake)
add_subdirectory (jemalloc-cmake)
@ -34,7 +36,6 @@ add_subdirectory (libmetrohash)
add_subdirectory (lz4-cmake)
add_subdirectory (murmurhash)
add_subdirectory (replxx-cmake)
add_subdirectory (ryu-cmake)
add_subdirectory (unixodbc-cmake)
add_subdirectory (xz)
@ -98,10 +99,10 @@ if (USE_INTERNAL_H3_LIBRARY)
endif ()
if (USE_INTERNAL_SSL_LIBRARY)
add_subdirectory (openssl-cmake)
add_subdirectory (boringssl-cmake)
add_library(OpenSSL::Crypto ALIAS ${OPENSSL_CRYPTO_LIBRARY})
add_library(OpenSSL::SSL ALIAS ${OPENSSL_SSL_LIBRARY})
add_library(OpenSSL::Crypto ALIAS crypto)
add_library(OpenSSL::SSL ALIAS ssl)
endif ()
if (USE_INTERNAL_LDAP_LIBRARY)
@ -209,6 +210,14 @@ if (USE_EMBEDDED_COMPILER AND USE_INTERNAL_LLVM_LIBRARY)
set (LLVM_ENABLE_RTTI 1 CACHE INTERNAL "")
set (LLVM_ENABLE_PIC 0 CACHE INTERNAL "")
set (LLVM_TARGETS_TO_BUILD "X86;AArch64" CACHE STRING "")
# Yes it is set globally, but this is not enough, since llvm will add -std=c++11 after default
# And c++2a cannot be used, due to ambiguous operator !=
if (COMPILER_GCC OR COMPILER_CLANG)
set (_CXX_STANDARD "gnu++17")
else()
set (_CXX_STANDARD "c++17")
endif()
set (LLVM_CXX_STD ${_CXX_STANDARD} CACHE STRING "" FORCE)
add_subdirectory (llvm/llvm)
target_include_directories(LLVMSupport SYSTEM BEFORE PRIVATE ${ZLIB_INCLUDE_DIR})
endif ()
@ -290,3 +299,6 @@ endif()
if (USE_INTERNAL_ROCKSDB_LIBRARY)
add_subdirectory(rocksdb-cmake)
endif()
add_subdirectory(fast_float)

1
contrib/antlr4-runtime vendored Submodule

@ -0,0 +1 @@
Subproject commit a2fa7b76e2ee16d2ad955e9214a90bbf79da66fc

View File

@ -0,0 +1,156 @@
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/antlr4-runtime)
set (SRCS
${LIBRARY_DIR}/ANTLRErrorListener.cpp
${LIBRARY_DIR}/ANTLRErrorStrategy.cpp
${LIBRARY_DIR}/ANTLRFileStream.cpp
${LIBRARY_DIR}/ANTLRInputStream.cpp
${LIBRARY_DIR}/atn/AbstractPredicateTransition.cpp
${LIBRARY_DIR}/atn/ActionTransition.cpp
${LIBRARY_DIR}/atn/AmbiguityInfo.cpp
${LIBRARY_DIR}/atn/ArrayPredictionContext.cpp
${LIBRARY_DIR}/atn/ATN.cpp
${LIBRARY_DIR}/atn/ATNConfig.cpp
${LIBRARY_DIR}/atn/ATNConfigSet.cpp
${LIBRARY_DIR}/atn/ATNDeserializationOptions.cpp
${LIBRARY_DIR}/atn/ATNDeserializer.cpp
${LIBRARY_DIR}/atn/ATNSerializer.cpp
${LIBRARY_DIR}/atn/ATNSimulator.cpp
${LIBRARY_DIR}/atn/ATNState.cpp
${LIBRARY_DIR}/atn/AtomTransition.cpp
${LIBRARY_DIR}/atn/BasicBlockStartState.cpp
${LIBRARY_DIR}/atn/BasicState.cpp
${LIBRARY_DIR}/atn/BlockEndState.cpp
${LIBRARY_DIR}/atn/BlockStartState.cpp
${LIBRARY_DIR}/atn/ContextSensitivityInfo.cpp
${LIBRARY_DIR}/atn/DecisionEventInfo.cpp
${LIBRARY_DIR}/atn/DecisionInfo.cpp
${LIBRARY_DIR}/atn/DecisionState.cpp
${LIBRARY_DIR}/atn/EmptyPredictionContext.cpp
${LIBRARY_DIR}/atn/EpsilonTransition.cpp
${LIBRARY_DIR}/atn/ErrorInfo.cpp
${LIBRARY_DIR}/atn/LexerAction.cpp
${LIBRARY_DIR}/atn/LexerActionExecutor.cpp
${LIBRARY_DIR}/atn/LexerATNConfig.cpp
${LIBRARY_DIR}/atn/LexerATNSimulator.cpp
${LIBRARY_DIR}/atn/LexerChannelAction.cpp
${LIBRARY_DIR}/atn/LexerCustomAction.cpp
${LIBRARY_DIR}/atn/LexerIndexedCustomAction.cpp
${LIBRARY_DIR}/atn/LexerModeAction.cpp
${LIBRARY_DIR}/atn/LexerMoreAction.cpp
${LIBRARY_DIR}/atn/LexerPopModeAction.cpp
${LIBRARY_DIR}/atn/LexerPushModeAction.cpp
${LIBRARY_DIR}/atn/LexerSkipAction.cpp
${LIBRARY_DIR}/atn/LexerTypeAction.cpp
${LIBRARY_DIR}/atn/LL1Analyzer.cpp
${LIBRARY_DIR}/atn/LookaheadEventInfo.cpp
${LIBRARY_DIR}/atn/LoopEndState.cpp
${LIBRARY_DIR}/atn/NotSetTransition.cpp
${LIBRARY_DIR}/atn/OrderedATNConfigSet.cpp
${LIBRARY_DIR}/atn/ParseInfo.cpp
${LIBRARY_DIR}/atn/ParserATNSimulator.cpp
${LIBRARY_DIR}/atn/PlusBlockStartState.cpp
${LIBRARY_DIR}/atn/PlusLoopbackState.cpp
${LIBRARY_DIR}/atn/PrecedencePredicateTransition.cpp
${LIBRARY_DIR}/atn/PredicateEvalInfo.cpp
${LIBRARY_DIR}/atn/PredicateTransition.cpp
${LIBRARY_DIR}/atn/PredictionContext.cpp
${LIBRARY_DIR}/atn/PredictionMode.cpp
${LIBRARY_DIR}/atn/ProfilingATNSimulator.cpp
${LIBRARY_DIR}/atn/RangeTransition.cpp
${LIBRARY_DIR}/atn/RuleStartState.cpp
${LIBRARY_DIR}/atn/RuleStopState.cpp
${LIBRARY_DIR}/atn/RuleTransition.cpp
${LIBRARY_DIR}/atn/SemanticContext.cpp
${LIBRARY_DIR}/atn/SetTransition.cpp
${LIBRARY_DIR}/atn/SingletonPredictionContext.cpp
${LIBRARY_DIR}/atn/StarBlockStartState.cpp
${LIBRARY_DIR}/atn/StarLoopbackState.cpp
${LIBRARY_DIR}/atn/StarLoopEntryState.cpp
${LIBRARY_DIR}/atn/TokensStartState.cpp
${LIBRARY_DIR}/atn/Transition.cpp
${LIBRARY_DIR}/atn/WildcardTransition.cpp
${LIBRARY_DIR}/BailErrorStrategy.cpp
${LIBRARY_DIR}/BaseErrorListener.cpp
${LIBRARY_DIR}/BufferedTokenStream.cpp
${LIBRARY_DIR}/CharStream.cpp
${LIBRARY_DIR}/CommonToken.cpp
${LIBRARY_DIR}/CommonTokenFactory.cpp
${LIBRARY_DIR}/CommonTokenStream.cpp
${LIBRARY_DIR}/ConsoleErrorListener.cpp
${LIBRARY_DIR}/DefaultErrorStrategy.cpp
${LIBRARY_DIR}/dfa/DFA.cpp
${LIBRARY_DIR}/dfa/DFASerializer.cpp
${LIBRARY_DIR}/dfa/DFAState.cpp
${LIBRARY_DIR}/dfa/LexerDFASerializer.cpp
${LIBRARY_DIR}/DiagnosticErrorListener.cpp
${LIBRARY_DIR}/Exceptions.cpp
${LIBRARY_DIR}/FailedPredicateException.cpp
${LIBRARY_DIR}/InputMismatchException.cpp
${LIBRARY_DIR}/InterpreterRuleContext.cpp
${LIBRARY_DIR}/IntStream.cpp
${LIBRARY_DIR}/Lexer.cpp
${LIBRARY_DIR}/LexerInterpreter.cpp
${LIBRARY_DIR}/LexerNoViableAltException.cpp
${LIBRARY_DIR}/ListTokenSource.cpp
${LIBRARY_DIR}/misc/InterpreterDataReader.cpp
${LIBRARY_DIR}/misc/Interval.cpp
${LIBRARY_DIR}/misc/IntervalSet.cpp
${LIBRARY_DIR}/misc/MurmurHash.cpp
${LIBRARY_DIR}/misc/Predicate.cpp
${LIBRARY_DIR}/NoViableAltException.cpp
${LIBRARY_DIR}/Parser.cpp
${LIBRARY_DIR}/ParserInterpreter.cpp
${LIBRARY_DIR}/ParserRuleContext.cpp
${LIBRARY_DIR}/ProxyErrorListener.cpp
${LIBRARY_DIR}/RecognitionException.cpp
${LIBRARY_DIR}/Recognizer.cpp
${LIBRARY_DIR}/RuleContext.cpp
${LIBRARY_DIR}/RuleContextWithAltNum.cpp
${LIBRARY_DIR}/RuntimeMetaData.cpp
${LIBRARY_DIR}/support/Any.cpp
${LIBRARY_DIR}/support/Arrays.cpp
${LIBRARY_DIR}/support/CPPUtils.cpp
${LIBRARY_DIR}/support/guid.cpp
${LIBRARY_DIR}/support/StringUtils.cpp
${LIBRARY_DIR}/Token.cpp
${LIBRARY_DIR}/TokenSource.cpp
${LIBRARY_DIR}/TokenStream.cpp
${LIBRARY_DIR}/TokenStreamRewriter.cpp
${LIBRARY_DIR}/tree/ErrorNode.cpp
${LIBRARY_DIR}/tree/ErrorNodeImpl.cpp
${LIBRARY_DIR}/tree/IterativeParseTreeWalker.cpp
${LIBRARY_DIR}/tree/ParseTree.cpp
${LIBRARY_DIR}/tree/ParseTreeListener.cpp
${LIBRARY_DIR}/tree/ParseTreeVisitor.cpp
${LIBRARY_DIR}/tree/ParseTreeWalker.cpp
${LIBRARY_DIR}/tree/pattern/Chunk.cpp
${LIBRARY_DIR}/tree/pattern/ParseTreeMatch.cpp
${LIBRARY_DIR}/tree/pattern/ParseTreePattern.cpp
${LIBRARY_DIR}/tree/pattern/ParseTreePatternMatcher.cpp
${LIBRARY_DIR}/tree/pattern/RuleTagToken.cpp
${LIBRARY_DIR}/tree/pattern/TagChunk.cpp
${LIBRARY_DIR}/tree/pattern/TextChunk.cpp
${LIBRARY_DIR}/tree/pattern/TokenTagToken.cpp
${LIBRARY_DIR}/tree/TerminalNode.cpp
${LIBRARY_DIR}/tree/TerminalNodeImpl.cpp
${LIBRARY_DIR}/tree/Trees.cpp
${LIBRARY_DIR}/tree/xpath/XPath.cpp
${LIBRARY_DIR}/tree/xpath/XPathElement.cpp
${LIBRARY_DIR}/tree/xpath/XPathLexer.cpp
${LIBRARY_DIR}/tree/xpath/XPathLexerErrorListener.cpp
${LIBRARY_DIR}/tree/xpath/XPathRuleAnywhereElement.cpp
${LIBRARY_DIR}/tree/xpath/XPathRuleElement.cpp
${LIBRARY_DIR}/tree/xpath/XPathTokenAnywhereElement.cpp
${LIBRARY_DIR}/tree/xpath/XPathTokenElement.cpp
${LIBRARY_DIR}/tree/xpath/XPathWildcardAnywhereElement.cpp
${LIBRARY_DIR}/tree/xpath/XPathWildcardElement.cpp
${LIBRARY_DIR}/UnbufferedCharStream.cpp
${LIBRARY_DIR}/UnbufferedTokenStream.cpp
${LIBRARY_DIR}/Vocabulary.cpp
${LIBRARY_DIR}/WritableToken.cpp
)
add_library (antlr4-runtime ${SRCS})
target_include_directories (antlr4-runtime SYSTEM PUBLIC ${LIBRARY_DIR})

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit a04e72c0464f0c31d3384f18f0c0db36a05538e0
Subproject commit 0b98b443aa7bb77d65efd7b23b3b8c8a0ab5f1f3

1
contrib/boringssl vendored Submodule

@ -0,0 +1 @@
Subproject commit 8b2bf912ba04823cfe9e7e8f5bb60cb7f6252449

View File

@ -0,0 +1,661 @@
# Copyright (c) 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# This file is created by generate_build_files.py and edited accordingly.
cmake_minimum_required(VERSION 3.0)
project(BoringSSL LANGUAGES C CXX)
set(BORINGSSL_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/boringssl)
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CLANG 1)
endif()
if(CMAKE_COMPILER_IS_GNUCXX OR CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fvisibility=hidden -fno-common -fno-exceptions -fno-rtti")
if(APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden -fno-common")
if((CMAKE_C_COMPILER_VERSION VERSION_GREATER "4.8.99") OR CLANG)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11")
else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
endif()
endif()
# pthread_rwlock_t requires a feature flag.
if(NOT WIN32)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_XOPEN_SOURCE=700")
endif()
if(WIN32)
add_definitions(-D_HAS_EXCEPTIONS=0)
add_definitions(-DWIN32_LEAN_AND_MEAN)
add_definitions(-DNOMINMAX)
# Allow use of fopen.
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
# VS 2017 and higher supports STL-only warning suppressions.
# A bug in CMake < 3.13.0 may cause the space in this value to
# cause issues when building with NASM. In that case, update CMake.
add_definitions("-D_STL_EXTRA_DISABLED_WARNINGS=4774 4987")
endif()
add_definitions(-DBORINGSSL_IMPLEMENTATION)
# CMake's iOS support uses Apple's multiple-architecture toolchain. It takes an
# architecture list from CMAKE_OSX_ARCHITECTURES, leaves CMAKE_SYSTEM_PROCESSOR
# alone, and expects all architecture-specific logic to be conditioned within
# the source files rather than the build. This does not work for our assembly
# files, so we fix CMAKE_SYSTEM_PROCESSOR and only support single-architecture
# builds.
if(NOT OPENSSL_NO_ASM AND CMAKE_OSX_ARCHITECTURES)
list(LENGTH CMAKE_OSX_ARCHITECTURES NUM_ARCHES)
if(NOT ${NUM_ARCHES} EQUAL 1)
message(FATAL_ERROR "Universal binaries not supported.")
endif()
list(GET CMAKE_OSX_ARCHITECTURES 0 CMAKE_SYSTEM_PROCESSOR)
endif()
if(OPENSSL_NO_ASM)
add_definitions(-DOPENSSL_NO_ASM)
set(ARCH "generic")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
set(ARCH "x86_64")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64")
set(ARCH "x86_64")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
# cmake reports AMD64 on Windows, but we might be building for 32-bit.
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(ARCH "x86_64")
else()
set(ARCH "x86")
endif()
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86")
set(ARCH "x86")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386")
set(ARCH "x86")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
set(ARCH "x86")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
set(ARCH "aarch64")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
set(ARCH "aarch64")
# Apple A12 Bionic chipset which is added in iPhone XS/XS Max/XR uses arm64e architecture.
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64e")
set(ARCH "aarch64")
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm*")
set(ARCH "arm")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "mips")
# Just to avoid the unknown processor error.
set(ARCH "generic")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le")
set(ARCH "ppc64le")
else()
message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR})
endif()
if(NOT OPENSSL_NO_ASM)
if(UNIX)
enable_language(ASM)
# Clang's integerated assembler does not support debug symbols.
if(NOT CMAKE_ASM_COMPILER_ID MATCHES "Clang")
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -Wa,-g")
endif()
# CMake does not add -isysroot and -arch flags to assembly.
if(APPLE)
if(CMAKE_OSX_SYSROOT)
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -isysroot \"${CMAKE_OSX_SYSROOT}\"")
endif()
foreach(arch ${CMAKE_OSX_ARCHITECTURES})
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -arch ${arch}")
endforeach()
endif()
else()
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -gcv8")
enable_language(ASM_NASM)
endif()
endif()
if(BUILD_SHARED_LIBS)
add_definitions(-DBORINGSSL_SHARED_LIBRARY)
# Enable position-independent code globally. This is needed because
# some library targets are OBJECT libraries.
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
endif()
include_directories(${BORINGSSL_SOURCE_DIR}/include)
set(
CRYPTO_ios_aarch64_SOURCES
ios-aarch64/crypto/chacha/chacha-armv8.S
ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
ios-aarch64/crypto/fipsmodule/armv8-mont.S
ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
ios-aarch64/crypto/fipsmodule/sha1-armv8.S
ios-aarch64/crypto/fipsmodule/sha256-armv8.S
ios-aarch64/crypto/fipsmodule/sha512-armv8.S
ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
ios-aarch64/crypto/test/trampoline-armv8.S
)
set(
CRYPTO_ios_arm_SOURCES
ios-arm/crypto/chacha/chacha-armv4.S
ios-arm/crypto/fipsmodule/aesv8-armx32.S
ios-arm/crypto/fipsmodule/armv4-mont.S
ios-arm/crypto/fipsmodule/bsaes-armv7.S
ios-arm/crypto/fipsmodule/ghash-armv4.S
ios-arm/crypto/fipsmodule/ghashv8-armx32.S
ios-arm/crypto/fipsmodule/sha1-armv4-large.S
ios-arm/crypto/fipsmodule/sha256-armv4.S
ios-arm/crypto/fipsmodule/sha512-armv4.S
ios-arm/crypto/fipsmodule/vpaes-armv7.S
ios-arm/crypto/test/trampoline-armv4.S
)
set(
CRYPTO_linux_aarch64_SOURCES
linux-aarch64/crypto/chacha/chacha-armv8.S
linux-aarch64/crypto/fipsmodule/aesv8-armx64.S
linux-aarch64/crypto/fipsmodule/armv8-mont.S
linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S
linux-aarch64/crypto/fipsmodule/sha1-armv8.S
linux-aarch64/crypto/fipsmodule/sha256-armv8.S
linux-aarch64/crypto/fipsmodule/sha512-armv8.S
linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
linux-aarch64/crypto/test/trampoline-armv8.S
)
set(
CRYPTO_linux_arm_SOURCES
linux-arm/crypto/chacha/chacha-armv4.S
linux-arm/crypto/fipsmodule/aesv8-armx32.S
linux-arm/crypto/fipsmodule/armv4-mont.S
linux-arm/crypto/fipsmodule/bsaes-armv7.S
linux-arm/crypto/fipsmodule/ghash-armv4.S
linux-arm/crypto/fipsmodule/ghashv8-armx32.S
linux-arm/crypto/fipsmodule/sha1-armv4-large.S
linux-arm/crypto/fipsmodule/sha256-armv4.S
linux-arm/crypto/fipsmodule/sha512-armv4.S
linux-arm/crypto/fipsmodule/vpaes-armv7.S
linux-arm/crypto/test/trampoline-armv4.S
${BORINGSSL_SOURCE_DIR}/crypto/curve25519/asm/x25519-asm-arm.S
${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_arm_asm.S
)
set(
CRYPTO_linux_ppc64le_SOURCES
linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
linux-ppc64le/crypto/test/trampoline-ppc.S
)
set(
CRYPTO_linux_x86_SOURCES
linux-x86/crypto/chacha/chacha-x86.S
linux-x86/crypto/fipsmodule/aesni-x86.S
linux-x86/crypto/fipsmodule/bn-586.S
linux-x86/crypto/fipsmodule/co-586.S
linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S
linux-x86/crypto/fipsmodule/ghash-x86.S
linux-x86/crypto/fipsmodule/md5-586.S
linux-x86/crypto/fipsmodule/sha1-586.S
linux-x86/crypto/fipsmodule/sha256-586.S
linux-x86/crypto/fipsmodule/sha512-586.S
linux-x86/crypto/fipsmodule/vpaes-x86.S
linux-x86/crypto/fipsmodule/x86-mont.S
linux-x86/crypto/test/trampoline-x86.S
)
set(
CRYPTO_linux_x86_64_SOURCES
linux-x86_64/crypto/chacha/chacha-x86_64.S
linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
linux-x86_64/crypto/fipsmodule/ghash-x86_64.S
linux-x86_64/crypto/fipsmodule/md5-x86_64.S
linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S
linux-x86_64/crypto/fipsmodule/rsaz-avx2.S
linux-x86_64/crypto/fipsmodule/sha1-x86_64.S
linux-x86_64/crypto/fipsmodule/sha256-x86_64.S
linux-x86_64/crypto/fipsmodule/sha512-x86_64.S
linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
linux-x86_64/crypto/fipsmodule/x86_64-mont.S
linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
linux-x86_64/crypto/test/trampoline-x86_64.S
${BORINGSSL_SOURCE_DIR}/crypto/hrss/asm/poly_rq_mul.S
)
set(
CRYPTO_mac_x86_SOURCES
mac-x86/crypto/chacha/chacha-x86.S
mac-x86/crypto/fipsmodule/aesni-x86.S
mac-x86/crypto/fipsmodule/bn-586.S
mac-x86/crypto/fipsmodule/co-586.S
mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S
mac-x86/crypto/fipsmodule/ghash-x86.S
mac-x86/crypto/fipsmodule/md5-586.S
mac-x86/crypto/fipsmodule/sha1-586.S
mac-x86/crypto/fipsmodule/sha256-586.S
mac-x86/crypto/fipsmodule/sha512-586.S
mac-x86/crypto/fipsmodule/vpaes-x86.S
mac-x86/crypto/fipsmodule/x86-mont.S
mac-x86/crypto/test/trampoline-x86.S
)
set(
CRYPTO_mac_x86_64_SOURCES
mac-x86_64/crypto/chacha/chacha-x86_64.S
mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
mac-x86_64/crypto/fipsmodule/aesni-x86_64.S
mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
mac-x86_64/crypto/fipsmodule/ghash-x86_64.S
mac-x86_64/crypto/fipsmodule/md5-x86_64.S
mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S
mac-x86_64/crypto/fipsmodule/rsaz-avx2.S
mac-x86_64/crypto/fipsmodule/sha1-x86_64.S
mac-x86_64/crypto/fipsmodule/sha256-x86_64.S
mac-x86_64/crypto/fipsmodule/sha512-x86_64.S
mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S
mac-x86_64/crypto/fipsmodule/x86_64-mont.S
mac-x86_64/crypto/fipsmodule/x86_64-mont5.S
mac-x86_64/crypto/test/trampoline-x86_64.S
)
set(
CRYPTO_win_x86_SOURCES
win-x86/crypto/chacha/chacha-x86.asm
win-x86/crypto/fipsmodule/aesni-x86.asm
win-x86/crypto/fipsmodule/bn-586.asm
win-x86/crypto/fipsmodule/co-586.asm
win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm
win-x86/crypto/fipsmodule/ghash-x86.asm
win-x86/crypto/fipsmodule/md5-586.asm
win-x86/crypto/fipsmodule/sha1-586.asm
win-x86/crypto/fipsmodule/sha256-586.asm
win-x86/crypto/fipsmodule/sha512-586.asm
win-x86/crypto/fipsmodule/vpaes-x86.asm
win-x86/crypto/fipsmodule/x86-mont.asm
win-x86/crypto/test/trampoline-x86.asm
)
set(
CRYPTO_win_x86_64_SOURCES
win-x86_64/crypto/chacha/chacha-x86_64.asm
win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm
win-x86_64/crypto/fipsmodule/ghash-x86_64.asm
win-x86_64/crypto/fipsmodule/md5-x86_64.asm
win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm
win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm
win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm
win-x86_64/crypto/fipsmodule/rsaz-avx2.asm
win-x86_64/crypto/fipsmodule/sha1-x86_64.asm
win-x86_64/crypto/fipsmodule/sha256-x86_64.asm
win-x86_64/crypto/fipsmodule/sha512-x86_64.asm
win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm
win-x86_64/crypto/fipsmodule/x86_64-mont.asm
win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
win-x86_64/crypto/test/trampoline-x86_64.asm
)
if(APPLE AND ${ARCH} STREQUAL "aarch64")
set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_aarch64_SOURCES})
elseif(APPLE AND ${ARCH} STREQUAL "arm")
set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_arm_SOURCES})
elseif(APPLE)
set(CRYPTO_ARCH_SOURCES ${CRYPTO_mac_${ARCH}_SOURCES})
elseif(UNIX)
set(CRYPTO_ARCH_SOURCES ${CRYPTO_linux_${ARCH}_SOURCES})
elseif(WIN32)
set(CRYPTO_ARCH_SOURCES ${CRYPTO_win_${ARCH}_SOURCES})
endif()
add_library(
crypto
${CRYPTO_ARCH_SOURCES}
err_data.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_bitstr.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_bool.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_d2i_fp.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_dup.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_enum.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_gentm.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_i2d_fp.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_int.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_mbstr.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_utctm.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_utf8.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn1_lib.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn1_par.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn_pack.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_enum.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_int.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_string.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_dec.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_enc.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_fre.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_new.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_typ.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_utl.c
${BORINGSSL_SOURCE_DIR}/crypto/asn1/time_support.c
${BORINGSSL_SOURCE_DIR}/crypto/base64/base64.c
${BORINGSSL_SOURCE_DIR}/crypto/bio/bio.c
${BORINGSSL_SOURCE_DIR}/crypto/bio/bio_mem.c
${BORINGSSL_SOURCE_DIR}/crypto/bio/connect.c
${BORINGSSL_SOURCE_DIR}/crypto/bio/fd.c
${BORINGSSL_SOURCE_DIR}/crypto/bio/file.c
${BORINGSSL_SOURCE_DIR}/crypto/bio/hexdump.c
${BORINGSSL_SOURCE_DIR}/crypto/bio/pair.c
${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c
${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c
${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c
${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c
${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c
${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/asn1_compat.c
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/ber.c
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/cbb.c
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/cbs.c
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/unicode.c
${BORINGSSL_SOURCE_DIR}/crypto/chacha/chacha.c
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/cipher_extra.c
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/derive_key.c
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesccm.c
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesctrhmac.c
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesgcmsiv.c
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_chacha20poly1305.c
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_null.c
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_rc2.c
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_rc4.c
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_tls.c
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/tls_cbc.c
${BORINGSSL_SOURCE_DIR}/crypto/cmac/cmac.c
${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c
${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c
${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c
${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c
${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c
${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c
${BORINGSSL_SOURCE_DIR}/crypto/cpu-ppc64le.c
${BORINGSSL_SOURCE_DIR}/crypto/crypto.c
${BORINGSSL_SOURCE_DIR}/crypto/curve25519/curve25519.c
${BORINGSSL_SOURCE_DIR}/crypto/curve25519/spake25519.c
${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/dh_asn1.c
${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/params.c
${BORINGSSL_SOURCE_DIR}/crypto/digest_extra/digest_extra.c
${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa.c
${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa_asn1.c
${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_asn1.c
${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_derive.c
${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/hash_to_curve.c
${BORINGSSL_SOURCE_DIR}/crypto/ecdh_extra/ecdh_extra.c
${BORINGSSL_SOURCE_DIR}/crypto/ecdsa_extra/ecdsa_asn1.c
${BORINGSSL_SOURCE_DIR}/crypto/engine/engine.c
${BORINGSSL_SOURCE_DIR}/crypto/err/err.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/digestsign.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/evp.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/evp_asn1.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/evp_ctx.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_dsa_asn1.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ec.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ec_asn1.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ed25519.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ed25519_asn1.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_rsa.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_rsa_asn1.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_x25519.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_x25519_asn1.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/pbkdf.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/print.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/scrypt.c
${BORINGSSL_SOURCE_DIR}/crypto/evp/sign.c
${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c
${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c
${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c
${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/is_fips.c
${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c
${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c
${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c
${BORINGSSL_SOURCE_DIR}/crypto/lhash/lhash.c
${BORINGSSL_SOURCE_DIR}/crypto/mem.c
${BORINGSSL_SOURCE_DIR}/crypto/obj/obj.c
${BORINGSSL_SOURCE_DIR}/crypto/obj/obj_xref.c
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_all.c
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_info.c
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_lib.c
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_oth.c
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_pk8.c
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_pkey.c
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_x509.c
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_xaux.c
${BORINGSSL_SOURCE_DIR}/crypto/pkcs7/pkcs7.c
${BORINGSSL_SOURCE_DIR}/crypto/pkcs7/pkcs7_x509.c
${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/p5_pbev2.c
${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/pkcs8.c
${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/pkcs8_x509.c
${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305.c
${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_arm.c
${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_vec.c
${BORINGSSL_SOURCE_DIR}/crypto/pool/pool.c
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/deterministic.c
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/forkunsafe.c
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/fuchsia.c
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/passive.c
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/rand_extra.c
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/windows.c
${BORINGSSL_SOURCE_DIR}/crypto/rc4/rc4.c
${BORINGSSL_SOURCE_DIR}/crypto/refcount_c11.c
${BORINGSSL_SOURCE_DIR}/crypto/refcount_lock.c
${BORINGSSL_SOURCE_DIR}/crypto/rsa_extra/rsa_asn1.c
${BORINGSSL_SOURCE_DIR}/crypto/rsa_extra/rsa_print.c
${BORINGSSL_SOURCE_DIR}/crypto/siphash/siphash.c
${BORINGSSL_SOURCE_DIR}/crypto/stack/stack.c
${BORINGSSL_SOURCE_DIR}/crypto/thread.c
${BORINGSSL_SOURCE_DIR}/crypto/thread_none.c
${BORINGSSL_SOURCE_DIR}/crypto/thread_pthread.c
${BORINGSSL_SOURCE_DIR}/crypto/thread_win.c
${BORINGSSL_SOURCE_DIR}/crypto/trust_token/pmbtoken.c
${BORINGSSL_SOURCE_DIR}/crypto/trust_token/trust_token.c
${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/a_strex.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/t_x509.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/t_x509a.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_att.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_cmp.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_d2.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_def.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_ext.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_lu.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_obj.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_r2x.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_req.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_set.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_trs.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_txt.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_v3.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_vfy.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_vpm.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509cset.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509name.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509rset.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509spki.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_algor.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_all.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_attrib.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_crl.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_exten.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_info.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_name.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_pkey.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_pubkey.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_req.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_sig.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_spki.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_val.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_x509.c
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_x509a.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_cache.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_data.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_lib.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_map.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_node.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_tree.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_akey.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_akeya.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_alt.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_bcons.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_bitst.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_conf.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_cpols.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_crld.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_enum.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_extku.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_genn.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ia5.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_info.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_int.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_lib.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ncons.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ocsp.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pci.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcia.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcons.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pmaps.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_prn.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_purp.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_skey.c
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_utl.c
)
add_library(
ssl
${BORINGSSL_SOURCE_DIR}/ssl/bio_ssl.cc
${BORINGSSL_SOURCE_DIR}/ssl/d1_both.cc
${BORINGSSL_SOURCE_DIR}/ssl/d1_lib.cc
${BORINGSSL_SOURCE_DIR}/ssl/d1_pkt.cc
${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc
${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc
${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc
${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc
${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc
${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc
${BORINGSSL_SOURCE_DIR}/ssl/handshake_server.cc
${BORINGSSL_SOURCE_DIR}/ssl/s3_both.cc
${BORINGSSL_SOURCE_DIR}/ssl/s3_lib.cc
${BORINGSSL_SOURCE_DIR}/ssl/s3_pkt.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_aead_ctx.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_asn1.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_buffer.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_cert.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_cipher.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_file.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_key_share.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_lib.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_privkey.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_session.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_stat.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_transcript.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc
${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc
${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc
${BORINGSSL_SOURCE_DIR}/ssl/t1_lib.cc
${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc
${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc
${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc
${BORINGSSL_SOURCE_DIR}/ssl/tls13_server.cc
${BORINGSSL_SOURCE_DIR}/ssl/tls_method.cc
${BORINGSSL_SOURCE_DIR}/ssl/tls_record.cc
${BORINGSSL_SOURCE_DIR}/decrepit/ssl/ssl_decrepit.c
${BORINGSSL_SOURCE_DIR}/decrepit/cfb/cfb.c
)
add_executable(
bssl
${BORINGSSL_SOURCE_DIR}/tool/args.cc
${BORINGSSL_SOURCE_DIR}/tool/ciphers.cc
${BORINGSSL_SOURCE_DIR}/tool/client.cc
${BORINGSSL_SOURCE_DIR}/tool/const.cc
${BORINGSSL_SOURCE_DIR}/tool/digest.cc
${BORINGSSL_SOURCE_DIR}/tool/fd.cc
${BORINGSSL_SOURCE_DIR}/tool/file.cc
${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc
${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc
${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc
${BORINGSSL_SOURCE_DIR}/tool/rand.cc
${BORINGSSL_SOURCE_DIR}/tool/server.cc
${BORINGSSL_SOURCE_DIR}/tool/sign.cc
${BORINGSSL_SOURCE_DIR}/tool/speed.cc
${BORINGSSL_SOURCE_DIR}/tool/tool.cc
${BORINGSSL_SOURCE_DIR}/tool/transport_common.cc
)
target_link_libraries(ssl crypto)
target_link_libraries(bssl ssl)
if(NOT WIN32 AND NOT ANDROID)
target_link_libraries(crypto pthread)
endif()
if(WIN32)
target_link_libraries(bssl ws2_32)
endif()
target_include_directories(crypto SYSTEM PUBLIC ${BORINGSSL_SOURCE_DIR}/include)
target_include_directories(ssl SYSTEM PUBLIC ${BORINGSSL_SOURCE_DIR}/include)
target_compile_options(crypto PRIVATE -Wno-gnu-anonymous-struct)

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,782 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.section __TEXT,__const
.align 5
Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl _aes_hw_set_encrypt_key
.private_extern _aes_hw_set_encrypt_key
.align 5
_aes_hw_set_encrypt_key:
Lenc_key:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
cmp x0,#0
b.eq Lenc_key_abort
cmp x2,#0
b.eq Lenc_key_abort
mov x3,#-2
cmp w1,#128
b.lt Lenc_key_abort
cmp w1,#256
b.gt Lenc_key_abort
tst w1,#0x3f
b.ne Lenc_key_abort
adrp x3,Lrcon@PAGE
add x3,x3,Lrcon@PAGEOFF
cmp w1,#192
eor v0.16b,v0.16b,v0.16b
ld1 {v3.16b},[x0],#16
mov w1,#8 // reuse w1
ld1 {v1.4s,v2.4s},[x3],#32
b.lt Loop128
b.eq L192
b L256
.align 4
Loop128:
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
b.ne Loop128
ld1 {v1.4s},[x3]
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2]
add x2,x2,#0x50
mov w12,#10
b Ldone
.align 4
L192:
ld1 {v4.8b},[x0],#8
movi v6.16b,#8 // borrow v6.16b
st1 {v3.4s},[x2],#16
sub v2.16b,v2.16b,v6.16b // adjust the mask
Loop192:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.8b},[x2],#8
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
dup v5.4s,v3.s[3]
eor v5.16b,v5.16b,v4.16b
eor v6.16b,v6.16b,v1.16b
ext v4.16b,v0.16b,v4.16b,#12
shl v1.16b,v1.16b,#1
eor v4.16b,v4.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
eor v4.16b,v4.16b,v6.16b
st1 {v3.4s},[x2],#16
b.ne Loop192
mov w12,#12
add x2,x2,#0x20
b Ldone
.align 4
L256:
ld1 {v4.16b},[x0]
mov w1,#7
mov w12,#14
st1 {v3.4s},[x2],#16
Loop256:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2],#16
b.eq Ldone
dup v6.4s,v3.s[3] // just splat
ext v5.16b,v0.16b,v4.16b,#12
aese v6.16b,v0.16b
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
eor v4.16b,v4.16b,v6.16b
b Loop256
Ldone:
str w12,[x2]
mov x3,#0
Lenc_key_abort:
mov x0,x3 // return value
ldr x29,[sp],#16
ret
.globl _aes_hw_set_decrypt_key
.private_extern _aes_hw_set_decrypt_key
.align 5
_aes_hw_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl Lenc_key
cmp x0,#0
b.ne Ldec_key_abort
sub x2,x2,#240 // restore original x2
mov x4,#-16
add x0,x2,x12,lsl#4 // end of key schedule
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
Loop_imc:
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
aesimc v0.16b,v0.16b
aesimc v1.16b,v1.16b
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
cmp x0,x2
b.hi Loop_imc
ld1 {v0.4s},[x2]
aesimc v0.16b,v0.16b
st1 {v0.4s},[x0]
eor x0,x0,x0 // return value
Ldec_key_abort:
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _aes_hw_encrypt
.private_extern _aes_hw_encrypt
.align 5
_aes_hw_encrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
Loop_enc:
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aese v2.16b,v1.16b
aesmc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt Loop_enc
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aese v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.globl _aes_hw_decrypt
.private_extern _aes_hw_decrypt
.align 5
_aes_hw_decrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
Loop_dec:
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aesd v2.16b,v1.16b
aesimc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt Loop_dec
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aesd v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.globl _aes_hw_cbc_encrypt
.private_extern _aes_hw_cbc_encrypt
.align 5
_aes_hw_cbc_encrypt:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
mov x8,#16
b.lo Lcbc_abort
csel x8,xzr,x8,eq
cmp w5,#0 // en- or decrypting?
ldr w5,[x3,#240]
and x2,x2,#-16
ld1 {v6.16b},[x4]
ld1 {v0.16b},[x0],x8
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#6
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
sub w5,w5,#2
ld1 {v18.4s,v19.4s},[x7],#32
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
b.eq Lcbc_dec
cmp w5,#2
eor v0.16b,v0.16b,v6.16b
eor v5.16b,v16.16b,v7.16b
b.eq Lcbc_enc128
ld1 {v2.4s,v3.4s},[x7]
add x7,x3,#16
add x6,x3,#16*4
add x12,x3,#16*5
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
add x14,x3,#16*6
add x3,x3,#16*7
b Lenter_cbc_enc
.align 4
Loop_cbc_enc:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
Lenter_cbc_enc:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x6]
cmp w5,#4
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x12]
b.eq Lcbc_enc192
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x14]
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x3]
nop
Lcbc_enc192:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs Loop_cbc_enc
st1 {v6.16b},[x1],#16
b Lcbc_done
.align 5
Lcbc_enc128:
ld1 {v2.4s,v3.4s},[x7]
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
b Lenter_cbc_enc128
Loop_cbc_enc128:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
Lenter_cbc_enc128:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs Loop_cbc_enc128
st1 {v6.16b},[x1],#16
b Lcbc_done
.align 5
Lcbc_dec:
ld1 {v18.16b},[x0],#16
subs x2,x2,#32 // bias
add w6,w5,#2
orr v3.16b,v0.16b,v0.16b
orr v1.16b,v0.16b,v0.16b
orr v19.16b,v18.16b,v18.16b
b.lo Lcbc_dec_tail
orr v1.16b,v18.16b,v18.16b
ld1 {v18.16b},[x0],#16
orr v2.16b,v0.16b,v0.16b
orr v3.16b,v1.16b,v1.16b
orr v19.16b,v18.16b,v18.16b
Loop3x_cbc_dec:
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt Loop3x_cbc_dec
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
eor v4.16b,v6.16b,v7.16b
subs x2,x2,#0x30
eor v5.16b,v2.16b,v7.16b
csel x6,x2,x6,lo // x6, w6, is zero at this point
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
add x0,x0,x6 // x0 is adjusted in such way that
// at exit from the loop v1.16b-v18.16b
// are loaded with last "words"
orr v6.16b,v19.16b,v19.16b
mov x7,x3
aesd v0.16b,v20.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
ld1 {v2.16b},[x0],#16
aesd v0.16b,v21.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
aesd v0.16b,v22.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
ld1 {v19.16b},[x0],#16
aesd v0.16b,v23.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
add w6,w5,#2
eor v4.16b,v4.16b,v0.16b
eor v5.16b,v5.16b,v1.16b
eor v18.16b,v18.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v4.16b},[x1],#16
orr v0.16b,v2.16b,v2.16b
st1 {v5.16b},[x1],#16
orr v1.16b,v3.16b,v3.16b
st1 {v18.16b},[x1],#16
orr v18.16b,v19.16b,v19.16b
b.hs Loop3x_cbc_dec
cmn x2,#0x30
b.eq Lcbc_done
nop
Lcbc_dec_tail:
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt Lcbc_dec_tail
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
cmn x2,#0x20
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
eor v5.16b,v6.16b,v7.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
b.eq Lcbc_dec_one
eor v5.16b,v5.16b,v1.16b
eor v17.16b,v17.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
st1 {v17.16b},[x1],#16
b Lcbc_done
Lcbc_dec_one:
eor v5.16b,v5.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
Lcbc_done:
st1 {v6.16b},[x4]
Lcbc_abort:
ldr x29,[sp],#16
ret
.globl _aes_hw_ctr32_encrypt_blocks
.private_extern _aes_hw_ctr32_encrypt_blocks
.align 5
_aes_hw_ctr32_encrypt_blocks:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]
ldr w8, [x4, #12]
ld1 {v0.4s},[x4]
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#4
mov x12,#16
cmp x2,#2
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
sub w5,w5,#2
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
csel x12,xzr,x12,lo
#ifndef __ARMEB__
rev w8, w8
#endif
orr v1.16b,v0.16b,v0.16b
add w10, w8, #1
orr v18.16b,v0.16b,v0.16b
add w8, w8, #2
orr v6.16b,v0.16b,v0.16b
rev w10, w10
mov v1.s[3],w10
b.ls Lctr32_tail
rev w12, w8
sub x2,x2,#3 // bias
mov v18.s[3],w12
b Loop3x_ctr32
.align 4
Loop3x_ctr32:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
aese v18.16b,v17.16b
aesmc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt Loop3x_ctr32
aese v0.16b,v16.16b
aesmc v4.16b,v0.16b
aese v1.16b,v16.16b
aesmc v5.16b,v1.16b
ld1 {v2.16b},[x0],#16
orr v0.16b,v6.16b,v6.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
orr v1.16b,v6.16b,v6.16b
aese v4.16b,v17.16b
aesmc v4.16b,v4.16b
aese v5.16b,v17.16b
aesmc v5.16b,v5.16b
ld1 {v19.16b},[x0],#16
mov x7,x3
aese v18.16b,v17.16b
aesmc v17.16b,v18.16b
orr v18.16b,v6.16b,v6.16b
add w9,w8,#1
aese v4.16b,v20.16b
aesmc v4.16b,v4.16b
aese v5.16b,v20.16b
aesmc v5.16b,v5.16b
eor v2.16b,v2.16b,v7.16b
add w10,w8,#2
aese v17.16b,v20.16b
aesmc v17.16b,v17.16b
eor v3.16b,v3.16b,v7.16b
add w8,w8,#3
aese v4.16b,v21.16b
aesmc v4.16b,v4.16b
aese v5.16b,v21.16b
aesmc v5.16b,v5.16b
eor v19.16b,v19.16b,v7.16b
rev w9,w9
aese v17.16b,v21.16b
aesmc v17.16b,v17.16b
mov v0.s[3], w9
rev w10,w10
aese v4.16b,v22.16b
aesmc v4.16b,v4.16b
aese v5.16b,v22.16b
aesmc v5.16b,v5.16b
mov v1.s[3], w10
rev w12,w8
aese v17.16b,v22.16b
aesmc v17.16b,v17.16b
mov v18.s[3], w12
subs x2,x2,#3
aese v4.16b,v23.16b
aese v5.16b,v23.16b
aese v17.16b,v23.16b
eor v2.16b,v2.16b,v4.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
st1 {v2.16b},[x1],#16
eor v3.16b,v3.16b,v5.16b
mov w6,w5
st1 {v3.16b},[x1],#16
eor v19.16b,v19.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v19.16b},[x1],#16
b.hs Loop3x_ctr32
adds x2,x2,#3
b.eq Lctr32_done
cmp x2,#1
mov x12,#16
csel x12,xzr,x12,eq
Lctr32_tail:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v17.4s},[x7],#16
b.gt Lctr32_tail
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v2.16b},[x0],x12
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v1.16b,v20.16b
aesmc v1.16b,v1.16b
ld1 {v3.16b},[x0]
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v1.16b,v21.16b
aesmc v1.16b,v1.16b
eor v2.16b,v2.16b,v7.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v1.16b,v22.16b
aesmc v1.16b,v1.16b
eor v3.16b,v3.16b,v7.16b
aese v0.16b,v23.16b
aese v1.16b,v23.16b
cmp x2,#1
eor v2.16b,v2.16b,v0.16b
eor v3.16b,v3.16b,v1.16b
st1 {v2.16b},[x1],#16
b.eq Lctr32_done
st1 {v3.16b},[x1]
Lctr32_done:
ldr x29,[sp],#16
ret
#endif
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,343 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl _gcm_init_neon
.private_extern _gcm_init_neon
.align 4
_gcm_init_neon:
AARCH64_VALID_CALL_TARGET
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
shl v19.2d, v19.2d, #57 // 0xc2.0
ext v3.16b, v17.16b, v17.16b, #8
ushr v18.2d, v19.2d, #63
dup v17.4s, v17.s[1]
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
ushr v18.2d, v3.2d, #63
sshr v17.4s, v17.4s, #31 // broadcast carry bit
and v18.16b, v18.16b, v16.16b
shl v3.2d, v3.2d, #1
ext v18.16b, v18.16b, v18.16b, #8
and v16.16b, v16.16b, v17.16b
orr v3.16b, v3.16b, v18.16b // H<<<=1
eor v5.16b, v3.16b, v16.16b // twisted H
st1 {v5.2d}, [x0] // store Htable[0]
ret
.globl _gcm_gmult_neon
.private_extern _gcm_gmult_neon
.align 4
_gcm_gmult_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, Lmasks@PAGE // load constants
add x9, x9, Lmasks@PAGEOFF
ld1 {v24.2d, v25.2d}, [x9]
rev64 v3.16b, v3.16b // byteswap Xi
ext v3.16b, v3.16b, v3.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
mov x3, #16
b Lgmult_neon
.globl _gcm_ghash_neon
.private_extern _gcm_ghash_neon
.align 4
_gcm_ghash_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, Lmasks@PAGE // load constants
add x9, x9, Lmasks@PAGEOFF
ld1 {v24.2d, v25.2d}, [x9]
rev64 v0.16b, v0.16b // byteswap Xi
ext v0.16b, v0.16b, v0.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
Loop_neon:
ld1 {v3.16b}, [x2], #16 // load inp
rev64 v3.16b, v3.16b // byteswap inp
ext v3.16b, v3.16b, v3.16b, #8
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
Lgmult_neon:
// Split the input into v3 and v4. (The upper halves are unused,
// so it is okay to leave them alone.)
ins v4.d[0], v3.d[1]
ext v16.8b, v5.8b, v5.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v0.8b, v3.8b, v3.8b, #1 // B1
pmull v0.8h, v5.8b, v0.8b // E = A*B1
ext v17.8b, v5.8b, v5.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v5.8b, v19.8b // G = A*B2
ext v18.8b, v5.8b, v5.8b, #3 // A3
eor v16.16b, v16.16b, v0.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v0.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v0.8h, v5.8b, v0.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v0.16b // N = I + J
pmull v19.8h, v5.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v0.8h, v5.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v0.16b, v0.16b, v16.16b
eor v0.16b, v0.16b, v18.16b
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
ext v16.8b, v7.8b, v7.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v1.8b, v3.8b, v3.8b, #1 // B1
pmull v1.8h, v7.8b, v1.8b // E = A*B1
ext v17.8b, v7.8b, v7.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v7.8b, v19.8b // G = A*B2
ext v18.8b, v7.8b, v7.8b, #3 // A3
eor v16.16b, v16.16b, v1.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v1.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v1.8h, v7.8b, v1.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v1.16b // N = I + J
pmull v19.8h, v7.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v1.8h, v7.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v1.16b, v1.16b, v16.16b
eor v1.16b, v1.16b, v18.16b
ext v16.8b, v6.8b, v6.8b, #1 // A1
pmull v16.8h, v16.8b, v4.8b // F = A1*B
ext v2.8b, v4.8b, v4.8b, #1 // B1
pmull v2.8h, v6.8b, v2.8b // E = A*B1
ext v17.8b, v6.8b, v6.8b, #2 // A2
pmull v17.8h, v17.8b, v4.8b // H = A2*B
ext v19.8b, v4.8b, v4.8b, #2 // B2
pmull v19.8h, v6.8b, v19.8b // G = A*B2
ext v18.8b, v6.8b, v6.8b, #3 // A3
eor v16.16b, v16.16b, v2.16b // L = E + F
pmull v18.8h, v18.8b, v4.8b // J = A3*B
ext v2.8b, v4.8b, v4.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v2.8h, v6.8b, v2.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v4.8b, v4.8b, #4 // B4
eor v18.16b, v18.16b, v2.16b // N = I + J
pmull v19.8h, v6.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v2.8h, v6.8b, v4.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v2.16b, v2.16b, v16.16b
eor v2.16b, v2.16b, v18.16b
ext v16.16b, v0.16b, v2.16b, #8
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
eor v1.16b, v1.16b, v2.16b
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
// This is a no-op due to the ins instruction below.
// ins v2.d[0], v1.d[1]
// equivalent of reduction_avx from ghash-x86_64.pl
shl v17.2d, v0.2d, #57 // 1st phase
shl v18.2d, v0.2d, #62
eor v18.16b, v18.16b, v17.16b //
shl v17.2d, v0.2d, #63
eor v18.16b, v18.16b, v17.16b //
// Note Xm contains {Xl.d[1], Xh.d[0]}.
eor v18.16b, v18.16b, v1.16b
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
ushr v18.2d, v0.2d, #1 // 2nd phase
eor v2.16b, v2.16b,v0.16b
eor v0.16b, v0.16b,v18.16b //
ushr v18.2d, v18.2d, #6
ushr v0.2d, v0.2d, #1 //
eor v0.16b, v0.16b, v2.16b //
eor v0.16b, v0.16b, v18.16b //
subs x3, x3, #16
bne Loop_neon
rev64 v0.16b, v0.16b // byteswap Xi and write
ext v0.16b, v0.16b, v0.16b, #8
st1 {v0.16b}, [x0]
ret
.section __TEXT,__const
.align 4
Lmasks:
.quad 0x0000ffffffffffff // k48
.quad 0x00000000ffffffff // k32
.quad 0x000000000000ffff // k16
.quad 0x0000000000000000 // k0
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM

View File

@ -0,0 +1,249 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl _gcm_init_v8
.private_extern _gcm_init_v8
.align 4
_gcm_init_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
ext v3.16b,v17.16b,v17.16b,#8
ushr v18.2d,v19.2d,#63
dup v17.4s,v17.s[1]
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
ushr v18.2d,v3.2d,#63
sshr v17.4s,v17.4s,#31 //broadcast carry bit
and v18.16b,v18.16b,v16.16b
shl v3.2d,v3.2d,#1
ext v18.16b,v18.16b,v18.16b,#8
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
st1 {v20.2d},[x0],#16 //store Htable[0]
//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
pmull v0.1q,v20.1d,v20.1d
eor v16.16b,v16.16b,v20.16b
pmull2 v2.1q,v20.2d,v20.2d
pmull v1.1q,v16.1d,v16.1d
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v22.16b,v0.16b,v18.16b
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
ret
.globl _gcm_gmult_v8
.private_extern _gcm_gmult_v8
.align 4
_gcm_gmult_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
shl v19.2d,v19.2d,#57
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v3.16b,v17.16b,v17.16b,#8
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.globl _gcm_ghash_v8
.private_extern _gcm_ghash_v8
.align 4
_gcm_ghash_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
//to be rotated in order to
//make it appear as in
//algorithm specification
subs x3,x3,#32 //see if x3 is 32 or larger
mov x12,#16 //x12 is used as post-
//increment for input pointer;
//as loop is modulo-scheduled
//x12 is zeroed just in time
//to preclude overstepping
//inp[len], which means that
//last block[s] are actually
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
rev64 v0.16b,v0.16b
#endif
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
b.lo Lodd_tail_v8 //x3 was less than 32
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v7.16b,v17.16b,v17.16b,#8
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
pmull2 v6.1q,v20.2d,v7.2d
b Loop_mod2x_v8
.align 4
Loop_mod2x_v8:
ext v18.16b,v3.16b,v3.16b,#8
subs x3,x3,#32 //is there more data?
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
csel x12,xzr,x12,lo //is it time to zero x12?
pmull v5.1q,v21.1d,v17.1d
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
eor v0.16b,v0.16b,v4.16b //accumulate
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
eor v2.16b,v2.16b,v6.16b
csel x12,xzr,x12,eq //is it time to zero x12?
eor v1.16b,v1.16b,v5.16b
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
#endif
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v7.16b,v17.16b,v17.16b,#8
ext v3.16b,v16.16b,v16.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v3.16b,v3.16b,v18.16b
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
eor v3.16b,v3.16b,v0.16b
pmull2 v6.1q,v20.2d,v7.2d
b.hs Loop_mod2x_v8 //there was at least 32 more bytes
eor v2.16b,v2.16b,v18.16b
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
adds x3,x3,#32 //re-construct x3
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
b.eq Ldone_v8 //is x3 zero?
Lodd_tail_v8:
ext v18.16b,v0.16b,v0.16b,#8
eor v3.16b,v3.16b,v0.16b //inp^=Xi
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
Ldone_v8:
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,758 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
// with |argv|, then saves the callee-saved registers into |state|. It returns
// the result of |func|. The |unwind| argument is unused.
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
// const uint64_t *argv, size_t argc,
// uint64_t unwind);
.globl _abi_test_trampoline
.private_extern _abi_test_trampoline
.align 4
_abi_test_trampoline:
Labi_test_trampoline_begin:
AARCH64_SIGN_LINK_REGISTER
// Stack layout (low to high addresses)
// x29,x30 (16 bytes)
// d8-d15 (64 bytes)
// x19-x28 (80 bytes)
// x1 (8 bytes)
// padding (8 bytes)
stp x29, x30, [sp, #-176]!
mov x29, sp
// Saved callee-saved registers and |state|.
stp d8, d9, [sp, #16]
stp d10, d11, [sp, #32]
stp d12, d13, [sp, #48]
stp d14, d15, [sp, #64]
stp x19, x20, [sp, #80]
stp x21, x22, [sp, #96]
stp x23, x24, [sp, #112]
stp x25, x26, [sp, #128]
stp x27, x28, [sp, #144]
str x1, [sp, #160]
// Load registers from |state|, with the exception of x29. x29 is the
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
// mandate that x29 always point to a frame. iOS64 does so, which means
// we cannot fill x29 with entropy without violating ABI rules
// ourselves. x29 is tested separately below.
ldp d8, d9, [x1], #16
ldp d10, d11, [x1], #16
ldp d12, d13, [x1], #16
ldp d14, d15, [x1], #16
ldp x19, x20, [x1], #16
ldp x21, x22, [x1], #16
ldp x23, x24, [x1], #16
ldp x25, x26, [x1], #16
ldp x27, x28, [x1], #16
// Move parameters into temporary registers.
mov x9, x0
mov x10, x2
mov x11, x3
// Load parameters into registers.
cbz x11, Largs_done
ldr x0, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x1, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x2, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x3, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x4, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x5, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x6, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x7, [x10], #8
Largs_done:
blr x9
// Reload |state| and store registers.
ldr x1, [sp, #160]
stp d8, d9, [x1], #16
stp d10, d11, [x1], #16
stp d12, d13, [x1], #16
stp d14, d15, [x1], #16
stp x19, x20, [x1], #16
stp x21, x22, [x1], #16
stp x23, x24, [x1], #16
stp x25, x26, [x1], #16
stp x27, x28, [x1], #16
// |func| is required to preserve x29, the frame pointer. We cannot load
// random values into x29 (see comment above), so compare it against the
// expected value and zero the field of |state| if corrupted.
mov x9, sp
cmp x29, x9
b.eq Lx29_ok
str xzr, [x1]
Lx29_ok:
// Restore callee-saved registers.
ldp d8, d9, [sp, #16]
ldp d10, d11, [sp, #32]
ldp d12, d13, [sp, #48]
ldp d14, d15, [sp, #64]
ldp x19, x20, [sp, #80]
ldp x21, x22, [sp, #96]
ldp x23, x24, [sp, #112]
ldp x25, x26, [sp, #128]
ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _abi_test_clobber_x0
.private_extern _abi_test_clobber_x0
.align 4
_abi_test_clobber_x0:
AARCH64_VALID_CALL_TARGET
mov x0, xzr
ret
.globl _abi_test_clobber_x1
.private_extern _abi_test_clobber_x1
.align 4
_abi_test_clobber_x1:
AARCH64_VALID_CALL_TARGET
mov x1, xzr
ret
.globl _abi_test_clobber_x2
.private_extern _abi_test_clobber_x2
.align 4
_abi_test_clobber_x2:
AARCH64_VALID_CALL_TARGET
mov x2, xzr
ret
.globl _abi_test_clobber_x3
.private_extern _abi_test_clobber_x3
.align 4
_abi_test_clobber_x3:
AARCH64_VALID_CALL_TARGET
mov x3, xzr
ret
.globl _abi_test_clobber_x4
.private_extern _abi_test_clobber_x4
.align 4
_abi_test_clobber_x4:
AARCH64_VALID_CALL_TARGET
mov x4, xzr
ret
.globl _abi_test_clobber_x5
.private_extern _abi_test_clobber_x5
.align 4
_abi_test_clobber_x5:
AARCH64_VALID_CALL_TARGET
mov x5, xzr
ret
.globl _abi_test_clobber_x6
.private_extern _abi_test_clobber_x6
.align 4
_abi_test_clobber_x6:
AARCH64_VALID_CALL_TARGET
mov x6, xzr
ret
.globl _abi_test_clobber_x7
.private_extern _abi_test_clobber_x7
.align 4
_abi_test_clobber_x7:
AARCH64_VALID_CALL_TARGET
mov x7, xzr
ret
.globl _abi_test_clobber_x8
.private_extern _abi_test_clobber_x8
.align 4
_abi_test_clobber_x8:
AARCH64_VALID_CALL_TARGET
mov x8, xzr
ret
.globl _abi_test_clobber_x9
.private_extern _abi_test_clobber_x9
.align 4
_abi_test_clobber_x9:
AARCH64_VALID_CALL_TARGET
mov x9, xzr
ret
.globl _abi_test_clobber_x10
.private_extern _abi_test_clobber_x10
.align 4
_abi_test_clobber_x10:
AARCH64_VALID_CALL_TARGET
mov x10, xzr
ret
.globl _abi_test_clobber_x11
.private_extern _abi_test_clobber_x11
.align 4
_abi_test_clobber_x11:
AARCH64_VALID_CALL_TARGET
mov x11, xzr
ret
.globl _abi_test_clobber_x12
.private_extern _abi_test_clobber_x12
.align 4
_abi_test_clobber_x12:
AARCH64_VALID_CALL_TARGET
mov x12, xzr
ret
.globl _abi_test_clobber_x13
.private_extern _abi_test_clobber_x13
.align 4
_abi_test_clobber_x13:
AARCH64_VALID_CALL_TARGET
mov x13, xzr
ret
.globl _abi_test_clobber_x14
.private_extern _abi_test_clobber_x14
.align 4
_abi_test_clobber_x14:
AARCH64_VALID_CALL_TARGET
mov x14, xzr
ret
.globl _abi_test_clobber_x15
.private_extern _abi_test_clobber_x15
.align 4
_abi_test_clobber_x15:
AARCH64_VALID_CALL_TARGET
mov x15, xzr
ret
.globl _abi_test_clobber_x16
.private_extern _abi_test_clobber_x16
.align 4
_abi_test_clobber_x16:
AARCH64_VALID_CALL_TARGET
mov x16, xzr
ret
.globl _abi_test_clobber_x17
.private_extern _abi_test_clobber_x17
.align 4
_abi_test_clobber_x17:
AARCH64_VALID_CALL_TARGET
mov x17, xzr
ret
.globl _abi_test_clobber_x19
.private_extern _abi_test_clobber_x19
.align 4
_abi_test_clobber_x19:
AARCH64_VALID_CALL_TARGET
mov x19, xzr
ret
.globl _abi_test_clobber_x20
.private_extern _abi_test_clobber_x20
.align 4
_abi_test_clobber_x20:
AARCH64_VALID_CALL_TARGET
mov x20, xzr
ret
.globl _abi_test_clobber_x21
.private_extern _abi_test_clobber_x21
.align 4
_abi_test_clobber_x21:
AARCH64_VALID_CALL_TARGET
mov x21, xzr
ret
.globl _abi_test_clobber_x22
.private_extern _abi_test_clobber_x22
.align 4
_abi_test_clobber_x22:
AARCH64_VALID_CALL_TARGET
mov x22, xzr
ret
.globl _abi_test_clobber_x23
.private_extern _abi_test_clobber_x23
.align 4
_abi_test_clobber_x23:
AARCH64_VALID_CALL_TARGET
mov x23, xzr
ret
.globl _abi_test_clobber_x24
.private_extern _abi_test_clobber_x24
.align 4
_abi_test_clobber_x24:
AARCH64_VALID_CALL_TARGET
mov x24, xzr
ret
.globl _abi_test_clobber_x25
.private_extern _abi_test_clobber_x25
.align 4
_abi_test_clobber_x25:
AARCH64_VALID_CALL_TARGET
mov x25, xzr
ret
.globl _abi_test_clobber_x26
.private_extern _abi_test_clobber_x26
.align 4
_abi_test_clobber_x26:
AARCH64_VALID_CALL_TARGET
mov x26, xzr
ret
.globl _abi_test_clobber_x27
.private_extern _abi_test_clobber_x27
.align 4
_abi_test_clobber_x27:
AARCH64_VALID_CALL_TARGET
mov x27, xzr
ret
.globl _abi_test_clobber_x28
.private_extern _abi_test_clobber_x28
.align 4
_abi_test_clobber_x28:
AARCH64_VALID_CALL_TARGET
mov x28, xzr
ret
.globl _abi_test_clobber_x29
.private_extern _abi_test_clobber_x29
.align 4
_abi_test_clobber_x29:
AARCH64_VALID_CALL_TARGET
mov x29, xzr
ret
.globl _abi_test_clobber_d0
.private_extern _abi_test_clobber_d0
.align 4
_abi_test_clobber_d0:
AARCH64_VALID_CALL_TARGET
fmov d0, xzr
ret
.globl _abi_test_clobber_d1
.private_extern _abi_test_clobber_d1
.align 4
_abi_test_clobber_d1:
AARCH64_VALID_CALL_TARGET
fmov d1, xzr
ret
.globl _abi_test_clobber_d2
.private_extern _abi_test_clobber_d2
.align 4
_abi_test_clobber_d2:
AARCH64_VALID_CALL_TARGET
fmov d2, xzr
ret
.globl _abi_test_clobber_d3
.private_extern _abi_test_clobber_d3
.align 4
_abi_test_clobber_d3:
AARCH64_VALID_CALL_TARGET
fmov d3, xzr
ret
.globl _abi_test_clobber_d4
.private_extern _abi_test_clobber_d4
.align 4
_abi_test_clobber_d4:
AARCH64_VALID_CALL_TARGET
fmov d4, xzr
ret
.globl _abi_test_clobber_d5
.private_extern _abi_test_clobber_d5
.align 4
_abi_test_clobber_d5:
AARCH64_VALID_CALL_TARGET
fmov d5, xzr
ret
.globl _abi_test_clobber_d6
.private_extern _abi_test_clobber_d6
.align 4
_abi_test_clobber_d6:
AARCH64_VALID_CALL_TARGET
fmov d6, xzr
ret
.globl _abi_test_clobber_d7
.private_extern _abi_test_clobber_d7
.align 4
_abi_test_clobber_d7:
AARCH64_VALID_CALL_TARGET
fmov d7, xzr
ret
.globl _abi_test_clobber_d8
.private_extern _abi_test_clobber_d8
.align 4
_abi_test_clobber_d8:
AARCH64_VALID_CALL_TARGET
fmov d8, xzr
ret
.globl _abi_test_clobber_d9
.private_extern _abi_test_clobber_d9
.align 4
_abi_test_clobber_d9:
AARCH64_VALID_CALL_TARGET
fmov d9, xzr
ret
.globl _abi_test_clobber_d10
.private_extern _abi_test_clobber_d10
.align 4
_abi_test_clobber_d10:
AARCH64_VALID_CALL_TARGET
fmov d10, xzr
ret
.globl _abi_test_clobber_d11
.private_extern _abi_test_clobber_d11
.align 4
_abi_test_clobber_d11:
AARCH64_VALID_CALL_TARGET
fmov d11, xzr
ret
.globl _abi_test_clobber_d12
.private_extern _abi_test_clobber_d12
.align 4
_abi_test_clobber_d12:
AARCH64_VALID_CALL_TARGET
fmov d12, xzr
ret
.globl _abi_test_clobber_d13
.private_extern _abi_test_clobber_d13
.align 4
_abi_test_clobber_d13:
AARCH64_VALID_CALL_TARGET
fmov d13, xzr
ret
.globl _abi_test_clobber_d14
.private_extern _abi_test_clobber_d14
.align 4
_abi_test_clobber_d14:
AARCH64_VALID_CALL_TARGET
fmov d14, xzr
ret
.globl _abi_test_clobber_d15
.private_extern _abi_test_clobber_d15
.align 4
_abi_test_clobber_d15:
AARCH64_VALID_CALL_TARGET
fmov d15, xzr
ret
.globl _abi_test_clobber_d16
.private_extern _abi_test_clobber_d16
.align 4
_abi_test_clobber_d16:
AARCH64_VALID_CALL_TARGET
fmov d16, xzr
ret
.globl _abi_test_clobber_d17
.private_extern _abi_test_clobber_d17
.align 4
_abi_test_clobber_d17:
AARCH64_VALID_CALL_TARGET
fmov d17, xzr
ret
.globl _abi_test_clobber_d18
.private_extern _abi_test_clobber_d18
.align 4
_abi_test_clobber_d18:
AARCH64_VALID_CALL_TARGET
fmov d18, xzr
ret
.globl _abi_test_clobber_d19
.private_extern _abi_test_clobber_d19
.align 4
_abi_test_clobber_d19:
AARCH64_VALID_CALL_TARGET
fmov d19, xzr
ret
.globl _abi_test_clobber_d20
.private_extern _abi_test_clobber_d20
.align 4
_abi_test_clobber_d20:
AARCH64_VALID_CALL_TARGET
fmov d20, xzr
ret
.globl _abi_test_clobber_d21
.private_extern _abi_test_clobber_d21
.align 4
_abi_test_clobber_d21:
AARCH64_VALID_CALL_TARGET
fmov d21, xzr
ret
.globl _abi_test_clobber_d22
.private_extern _abi_test_clobber_d22
.align 4
_abi_test_clobber_d22:
AARCH64_VALID_CALL_TARGET
fmov d22, xzr
ret
.globl _abi_test_clobber_d23
.private_extern _abi_test_clobber_d23
.align 4
_abi_test_clobber_d23:
AARCH64_VALID_CALL_TARGET
fmov d23, xzr
ret
.globl _abi_test_clobber_d24
.private_extern _abi_test_clobber_d24
.align 4
_abi_test_clobber_d24:
AARCH64_VALID_CALL_TARGET
fmov d24, xzr
ret
.globl _abi_test_clobber_d25
.private_extern _abi_test_clobber_d25
.align 4
_abi_test_clobber_d25:
AARCH64_VALID_CALL_TARGET
fmov d25, xzr
ret
.globl _abi_test_clobber_d26
.private_extern _abi_test_clobber_d26
.align 4
_abi_test_clobber_d26:
AARCH64_VALID_CALL_TARGET
fmov d26, xzr
ret
.globl _abi_test_clobber_d27
.private_extern _abi_test_clobber_d27
.align 4
_abi_test_clobber_d27:
AARCH64_VALID_CALL_TARGET
fmov d27, xzr
ret
.globl _abi_test_clobber_d28
.private_extern _abi_test_clobber_d28
.align 4
_abi_test_clobber_d28:
AARCH64_VALID_CALL_TARGET
fmov d28, xzr
ret
.globl _abi_test_clobber_d29
.private_extern _abi_test_clobber_d29
.align 4
_abi_test_clobber_d29:
AARCH64_VALID_CALL_TARGET
fmov d29, xzr
ret
.globl _abi_test_clobber_d30
.private_extern _abi_test_clobber_d30
.align 4
_abi_test_clobber_d30:
AARCH64_VALID_CALL_TARGET
fmov d30, xzr
ret
.globl _abi_test_clobber_d31
.private_extern _abi_test_clobber_d31
.align 4
_abi_test_clobber_d31:
AARCH64_VALID_CALL_TARGET
fmov d31, xzr
ret
.globl _abi_test_clobber_v8_upper
.private_extern _abi_test_clobber_v8_upper
.align 4
_abi_test_clobber_v8_upper:
AARCH64_VALID_CALL_TARGET
fmov v8.d[1], xzr
ret
.globl _abi_test_clobber_v9_upper
.private_extern _abi_test_clobber_v9_upper
.align 4
_abi_test_clobber_v9_upper:
AARCH64_VALID_CALL_TARGET
fmov v9.d[1], xzr
ret
.globl _abi_test_clobber_v10_upper
.private_extern _abi_test_clobber_v10_upper
.align 4
_abi_test_clobber_v10_upper:
AARCH64_VALID_CALL_TARGET
fmov v10.d[1], xzr
ret
.globl _abi_test_clobber_v11_upper
.private_extern _abi_test_clobber_v11_upper
.align 4
_abi_test_clobber_v11_upper:
AARCH64_VALID_CALL_TARGET
fmov v11.d[1], xzr
ret
.globl _abi_test_clobber_v12_upper
.private_extern _abi_test_clobber_v12_upper
.align 4
_abi_test_clobber_v12_upper:
AARCH64_VALID_CALL_TARGET
fmov v12.d[1], xzr
ret
.globl _abi_test_clobber_v13_upper
.private_extern _abi_test_clobber_v13_upper
.align 4
_abi_test_clobber_v13_upper:
AARCH64_VALID_CALL_TARGET
fmov v13.d[1], xzr
ret
.globl _abi_test_clobber_v14_upper
.private_extern _abi_test_clobber_v14_upper
.align 4
_abi_test_clobber_v14_upper:
AARCH64_VALID_CALL_TARGET
fmov v14.d[1], xzr
ret
.globl _abi_test_clobber_v15_upper
.private_extern _abi_test_clobber_v15_upper
.align 4
_abi_test_clobber_v15_upper:
AARCH64_VALID_CALL_TARGET
fmov v15.d[1], xzr
ret
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,790 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.code 32
#undef __thumb2__
.align 5
Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl _aes_hw_set_encrypt_key
.private_extern _aes_hw_set_encrypt_key
#ifdef __thumb2__
.thumb_func _aes_hw_set_encrypt_key
#endif
.align 5
_aes_hw_set_encrypt_key:
Lenc_key:
mov r3,#-1
cmp r0,#0
beq Lenc_key_abort
cmp r2,#0
beq Lenc_key_abort
mov r3,#-2
cmp r1,#128
blt Lenc_key_abort
cmp r1,#256
bgt Lenc_key_abort
tst r1,#0x3f
bne Lenc_key_abort
adr r3,Lrcon
cmp r1,#192
veor q0,q0,q0
vld1.8 {q3},[r0]!
mov r1,#8 @ reuse r1
vld1.32 {q1,q2},[r3]!
blt Loop128
beq L192
b L256
.align 4
Loop128:
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
bne Loop128
vld1.32 {q1},[r3]
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
veor q3,q3,q10
vst1.32 {q3},[r2]
add r2,r2,#0x50
mov r12,#10
b Ldone
.align 4
L192:
vld1.8 {d16},[r0]!
vmov.i8 q10,#8 @ borrow q10
vst1.32 {q3},[r2]!
vsub.i8 q2,q2,q10 @ adjust the mask
Loop192:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {d16},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vdup.32 q9,d7[1]
veor q9,q9,q8
veor q10,q10,q1
vext.8 q8,q0,q8,#12
vshl.u8 q1,q1,#1
veor q8,q8,q9
veor q3,q3,q10
veor q8,q8,q10
vst1.32 {q3},[r2]!
bne Loop192
mov r12,#12
add r2,r2,#0x20
b Ldone
.align 4
L256:
vld1.8 {q8},[r0]
mov r1,#7
mov r12,#14
vst1.32 {q3},[r2]!
Loop256:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {q8},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vst1.32 {q3},[r2]!
beq Ldone
vdup.32 q10,d7[1]
vext.8 q9,q0,q8,#12
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
veor q8,q8,q10
b Loop256
Ldone:
str r12,[r2]
mov r3,#0
Lenc_key_abort:
mov r0,r3 @ return value
bx lr
.globl _aes_hw_set_decrypt_key
.private_extern _aes_hw_set_decrypt_key
#ifdef __thumb2__
.thumb_func _aes_hw_set_decrypt_key
#endif
.align 5
_aes_hw_set_decrypt_key:
stmdb sp!,{r4,lr}
bl Lenc_key
cmp r0,#0
bne Ldec_key_abort
sub r2,r2,#240 @ restore original r2
mov r4,#-16
add r0,r2,r12,lsl#4 @ end of key schedule
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
Loop_imc:
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
cmp r0,r2
bhi Loop_imc
vld1.32 {q0},[r2]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
vst1.32 {q0},[r0]
eor r0,r0,r0 @ return value
Ldec_key_abort:
ldmia sp!,{r4,pc}
.globl _aes_hw_encrypt
.private_extern _aes_hw_encrypt
#ifdef __thumb2__
.thumb_func _aes_hw_encrypt
#endif
.align 5
_aes_hw_encrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
Loop_enc:
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q1},[r2]!
bgt Loop_enc
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.globl _aes_hw_decrypt
.private_extern _aes_hw_decrypt
#ifdef __thumb2__
.thumb_func _aes_hw_decrypt
#endif
.align 5
_aes_hw_decrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
Loop_dec:
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q1},[r2]!
bgt Loop_dec
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.globl _aes_hw_cbc_encrypt
.private_extern _aes_hw_cbc_encrypt
#ifdef __thumb2__
.thumb_func _aes_hw_cbc_encrypt
#endif
.align 5
_aes_hw_cbc_encrypt:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load remaining args
subs r2,r2,#16
mov r8,#16
blo Lcbc_abort
moveq r8,#0
cmp r5,#0 @ en- or decrypting?
ldr r5,[r3,#240]
and r2,r2,#-16
vld1.8 {q6},[r4]
vld1.8 {q0},[r0],r8
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#6
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
sub r5,r5,#2
vld1.32 {q10,q11},[r7]!
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
beq Lcbc_dec
cmp r5,#2
veor q0,q0,q6
veor q5,q8,q7
beq Lcbc_enc128
vld1.32 {q2,q3},[r7]
add r7,r3,#16
add r6,r3,#16*4
add r12,r3,#16*5
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
add r14,r3,#16*6
add r3,r3,#16*7
b Lenter_cbc_enc
.align 4
Loop_cbc_enc:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
Lenter_cbc_enc:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r6]
cmp r5,#4
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r12]
beq Lcbc_enc192
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r14]
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r3]
nop
Lcbc_enc192:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs Loop_cbc_enc
vst1.8 {q6},[r1]!
b Lcbc_done
.align 5
Lcbc_enc128:
vld1.32 {q2,q3},[r7]
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
b Lenter_cbc_enc128
Loop_cbc_enc128:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
Lenter_cbc_enc128:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs Loop_cbc_enc128
vst1.8 {q6},[r1]!
b Lcbc_done
.align 5
Lcbc_dec:
vld1.8 {q10},[r0]!
subs r2,r2,#32 @ bias
add r6,r5,#2
vorr q3,q0,q0
vorr q1,q0,q0
vorr q11,q10,q10
blo Lcbc_dec_tail
vorr q1,q10,q10
vld1.8 {q10},[r0]!
vorr q2,q0,q0
vorr q3,q1,q1
vorr q11,q10,q10
Loop3x_cbc_dec:
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt Loop3x_cbc_dec
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q4,q6,q7
subs r2,r2,#0x30
veor q5,q2,q7
movlo r6,r2 @ r6, r6, is zero at this point
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
add r0,r0,r6 @ r0 is adjusted in such way that
@ at exit from the loop q1-q10
@ are loaded with last "words"
vorr q6,q11,q11
mov r7,r3
.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q2},[r0]!
.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q3},[r0]!
.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q11},[r0]!
.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
add r6,r5,#2
veor q4,q4,q0
veor q5,q5,q1
veor q10,q10,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q4},[r1]!
vorr q0,q2,q2
vst1.8 {q5},[r1]!
vorr q1,q3,q3
vst1.8 {q10},[r1]!
vorr q10,q11,q11
bhs Loop3x_cbc_dec
cmn r2,#0x30
beq Lcbc_done
nop
Lcbc_dec_tail:
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt Lcbc_dec_tail
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
cmn r2,#0x20
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q5,q6,q7
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
beq Lcbc_dec_one
veor q5,q5,q1
veor q9,q9,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
vst1.8 {q9},[r1]!
b Lcbc_done
Lcbc_dec_one:
veor q5,q5,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
Lcbc_done:
vst1.8 {q6},[r4]
Lcbc_abort:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,pc}
.globl _aes_hw_ctr32_encrypt_blocks
.private_extern _aes_hw_ctr32_encrypt_blocks
#ifdef __thumb2__
.thumb_func _aes_hw_ctr32_encrypt_blocks
#endif
.align 5
_aes_hw_ctr32_encrypt_blocks:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldr r4, [ip] @ load remaining arg
ldr r5,[r3,#240]
ldr r8, [r4, #12]
vld1.32 {q0},[r4]
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#4
mov r12,#16
cmp r2,#2
add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
sub r5,r5,#2
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
movlo r12,#0
#ifndef __ARMEB__
rev r8, r8
#endif
vorr q1,q0,q0
add r10, r8, #1
vorr q10,q0,q0
add r8, r8, #2
vorr q6,q0,q0
rev r10, r10
vmov.32 d3[1],r10
bls Lctr32_tail
rev r12, r8
sub r2,r2,#3 @ bias
vmov.32 d21[1],r12
b Loop3x_ctr32
.align 4
Loop3x_ctr32:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q9},[r7]!
bgt Loop3x_ctr32
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
vld1.8 {q2},[r0]!
vorr q0,q6,q6
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.8 {q3},[r0]!
vorr q1,q6,q6
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vld1.8 {q11},[r0]!
mov r7,r3
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
vorr q10,q6,q6
add r9,r8,#1
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q2,q2,q7
add r10,r8,#2
.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
veor q3,q3,q7
add r8,r8,#3
.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q11,q11,q7
rev r9,r9
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d1[1], r9
rev r10,r10
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vmov.32 d3[1], r10
rev r12,r8
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d21[1], r12
subs r2,r2,#3
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
veor q2,q2,q4
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
vst1.8 {q2},[r1]!
veor q3,q3,q5
mov r6,r5
vst1.8 {q3},[r1]!
veor q11,q11,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q11},[r1]!
bhs Loop3x_ctr32
adds r2,r2,#3
beq Lctr32_done
cmp r2,#1
mov r12,#16
moveq r12,#0
Lctr32_tail:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q9},[r7]!
bgt Lctr32_tail
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q2},[r0],r12
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q3},[r0]
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q2,q2,q7
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q3,q3,q7
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
cmp r2,#1
veor q2,q2,q0
veor q3,q3,q1
vst1.8 {q2},[r1]!
beq Lctr32_done
vst1.8 {q3},[r1]
Lctr32_done:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
#endif
#endif // !OPENSSL_NO_ASM

View File

@ -0,0 +1,982 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
.text
#if defined(__thumb2__)
.syntax unified
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.align 5
LOPENSSL_armcap:
.word OPENSSL_armcap_P-Lbn_mul_mont
#endif
.globl _bn_mul_mont
.private_extern _bn_mul_mont
#ifdef __thumb2__
.thumb_func _bn_mul_mont
#endif
.align 5
_bn_mul_mont:
Lbn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne Lialu
adr r0,Lbn_mul_mont
ldr r2,LOPENSSL_armcap
ldr r0,[r0,r2]
#ifdef __APPLE__
ldr r0,[r0]
#endif
tst r0,#ARMV7_NEON @ NEON available?
ldmia sp, {r0,r2}
beq Lialu
add sp,sp,#8
b bn_mul8x_mont_neon
.align 4
Lialu:
#endif
cmp ip,#2
mov r0,ip @ load num
#ifdef __thumb2__
ittt lt
#endif
movlt r0,#0
addlt sp,sp,#2*4
blt Labrt
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
mov r0,r0,lsl#2 @ rescale r0 for byte count
sub sp,sp,r0 @ alloca(4*num)
sub sp,sp,#4 @ +extra dword
sub r0,r0,#4 @ "num=num-1"
add r4,r2,r0 @ &bp[num-1]
add r0,sp,r0 @ r0 to point at &tp[num-1]
ldr r8,[r0,#14*4] @ &n0
ldr r2,[r2] @ bp[0]
ldr r5,[r1],#4 @ ap[0],ap++
ldr r6,[r3],#4 @ np[0],np++
ldr r8,[r8] @ *n0
str r4,[r0,#15*4] @ save &bp[num]
umull r10,r11,r5,r2 @ ap[0]*bp[0]
str r8,[r0,#14*4] @ save n0 value
mul r8,r10,r8 @ "tp[0]"*n0
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
mov r4,sp
L1st:
ldr r5,[r1],#4 @ ap[j],ap++
mov r10,r11
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne L1st
adds r12,r12,r11
ldr r4,[r0,#13*4] @ restore bp
mov r14,#0
ldr r8,[r0,#14*4] @ restore n0
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
mov r7,sp
str r14,[r0,#4] @ tp[num]=
Louter:
sub r7,r0,r7 @ "original" r0-1 value
sub r1,r1,r7 @ "rewind" ap to &ap[1]
ldr r2,[r4,#4]! @ *(++bp)
sub r3,r3,r7 @ "rewind" np to &np[1]
ldr r5,[r1,#-4] @ ap[0]
ldr r10,[sp] @ tp[0]
ldr r6,[r3,#-4] @ np[0]
ldr r7,[sp,#4] @ tp[1]
mov r11,#0
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
str r4,[r0,#13*4] @ save bp
mul r8,r10,r8
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
mov r4,sp
Linner:
ldr r5,[r1],#4 @ ap[j],ap++
adds r10,r11,r7 @ +=tp[j]
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adc r11,r11,#0
ldr r7,[r4,#8] @ tp[j+1]
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne Linner
adds r12,r12,r11
mov r14,#0
ldr r4,[r0,#13*4] @ restore bp
adc r14,r14,#0
ldr r8,[r0,#14*4] @ restore n0
adds r12,r12,r7
ldr r7,[r0,#15*4] @ restore &bp[num]
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
str r14,[r0,#4] @ tp[num]=
cmp r4,r7
#ifdef __thumb2__
itt ne
#endif
movne r7,sp
bne Louter
ldr r2,[r0,#12*4] @ pull rp
mov r5,sp
add r0,r0,#4 @ r0 to point at &tp[num]
sub r5,r0,r5 @ "original" num value
mov r4,sp @ "rewind" r4
mov r1,r4 @ "borrow" r1
sub r3,r3,r5 @ "rewind" r3 to &np[0]
subs r7,r7,r7 @ "clear" carry flag
Lsub: ldr r7,[r4],#4
ldr r6,[r3],#4
sbcs r7,r7,r6 @ tp[j]-np[j]
str r7,[r2],#4 @ rp[j]=
teq r4,r0 @ preserve carry
bne Lsub
sbcs r14,r14,#0 @ upmost carry
mov r4,sp @ "rewind" r4
sub r2,r2,r5 @ "rewind" r2
Lcopy: ldr r7,[r4] @ conditional copy
ldr r5,[r2]
str sp,[r4],#4 @ zap tp
#ifdef __thumb2__
it cc
#endif
movcc r5,r7
str r5,[r2],#4
teq r4,r0 @ preserve carry
bne Lcopy
mov sp,r0
add sp,sp,#4 @ skip over tp[num+1]
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
Labrt:
#if __ARM_ARCH__>=5
bx lr @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
#if __ARM_MAX_ARCH__>=7
#ifdef __thumb2__
.thumb_func bn_mul8x_mont_neon
#endif
.align 5
bn_mul8x_mont_neon:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load rest of parameter block
mov ip,sp
cmp r5,#8
bhi LNEON_8n
@ special case for r5==8, everything is in register bank...
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
sub r7,sp,r5,lsl#4
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
and r7,r7,#-64
vld1.32 {d30[0]}, [r4,:32]
mov sp,r7 @ alloca
vzip.16 d28,d8
vmull.u32 q6,d28,d0[0]
vmull.u32 q7,d28,d0[1]
vmull.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmull.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
vmul.u32 d29,d29,d30
vmull.u32 q10,d28,d2[0]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmull.u32 q11,d28,d2[1]
vmull.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmull.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
sub r9,r5,#1
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
b LNEON_outer8
.align 4
LNEON_outer8:
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
vzip.16 d28,d8
vadd.u64 d12,d12,d10
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
subs r9,r9,#1
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
bne LNEON_outer8
vadd.u64 d12,d12,d10
mov r7,sp
vshr.u64 d10,d12,#16
mov r8,r5
vadd.u64 d13,d13,d10
add r6,sp,#96
vshr.u64 d10,d13,#16
vzip.16 d12,d13
b LNEON_tail_entry
.align 4
LNEON_8n:
veor q6,q6,q6
sub r7,sp,#128
veor q7,q7,q7
sub r7,r7,r5,lsl#4
veor q8,q8,q8
and r7,r7,#-64
veor q9,q9,q9
mov sp,r7 @ alloca
veor q10,q10,q10
add r7,r7,#256
veor q11,q11,q11
sub r8,r5,#8
veor q12,q12,q12
veor q13,q13,q13
LNEON_8n_init:
vst1.64 {q6,q7},[r7,:256]!
subs r8,r8,#8
vst1.64 {q8,q9},[r7,:256]!
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12,q13},[r7,:256]!
bne LNEON_8n_init
add r6,sp,#256
vld1.32 {d0,d1,d2,d3},[r1]!
add r10,sp,#8
vld1.32 {d30[0]},[r4,:32]
mov r9,r5
b LNEON_8n_outer
.align 4
LNEON_8n_outer:
vld1.32 {d28[0]},[r2,:32]! @ *b++
veor d8,d8,d8
vzip.16 d28,d8
add r7,sp,#128
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
vmlal.u32 q10,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q11,d28,d2[1]
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q6,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q7,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q8,d29,d5[0]
vshr.u64 d12,d12,#16
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vadd.u64 d12,d12,d13
vmlal.u32 q11,d29,d6[1]
vshr.u64 d12,d12,#16
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vadd.u64 d14,d14,d12
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]!
vmlal.u32 q8,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q9,d28,d1[0]
vshl.i64 d29,d15,#16
vmlal.u32 q10,d28,d1[1]
vadd.u64 d29,d29,d14
vmlal.u32 q11,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q12,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
vmlal.u32 q13,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q7,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q8,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q9,d29,d5[0]
vshr.u64 d14,d14,#16
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vadd.u64 d14,d14,d15
vmlal.u32 q12,d29,d6[1]
vshr.u64 d14,d14,#16
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vadd.u64 d16,d16,d14
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]!
vmlal.u32 q9,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q10,d28,d1[0]
vshl.i64 d29,d17,#16
vmlal.u32 q11,d28,d1[1]
vadd.u64 d29,d29,d16
vmlal.u32 q12,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q13,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
vmlal.u32 q6,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q8,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q9,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q10,d29,d5[0]
vshr.u64 d16,d16,#16
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vadd.u64 d16,d16,d17
vmlal.u32 q13,d29,d6[1]
vshr.u64 d16,d16,#16
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vadd.u64 d18,d18,d16
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]!
vmlal.u32 q10,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q11,d28,d1[0]
vshl.i64 d29,d19,#16
vmlal.u32 q12,d28,d1[1]
vadd.u64 d29,d29,d18
vmlal.u32 q13,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q6,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
vmlal.u32 q7,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q9,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q10,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q11,d29,d5[0]
vshr.u64 d18,d18,#16
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vadd.u64 d18,d18,d19
vmlal.u32 q6,d29,d6[1]
vshr.u64 d18,d18,#16
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vadd.u64 d20,d20,d18
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]!
vmlal.u32 q11,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q12,d28,d1[0]
vshl.i64 d29,d21,#16
vmlal.u32 q13,d28,d1[1]
vadd.u64 d29,d29,d20
vmlal.u32 q6,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q7,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
vmlal.u32 q8,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q10,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q11,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q12,d29,d5[0]
vshr.u64 d20,d20,#16
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vadd.u64 d20,d20,d21
vmlal.u32 q7,d29,d6[1]
vshr.u64 d20,d20,#16
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vadd.u64 d22,d22,d20
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]!
vmlal.u32 q12,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q13,d28,d1[0]
vshl.i64 d29,d23,#16
vmlal.u32 q6,d28,d1[1]
vadd.u64 d29,d29,d22
vmlal.u32 q7,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q8,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
vmlal.u32 q9,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q11,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q12,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q13,d29,d5[0]
vshr.u64 d22,d22,#16
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vadd.u64 d22,d22,d23
vmlal.u32 q8,d29,d6[1]
vshr.u64 d22,d22,#16
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vadd.u64 d24,d24,d22
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]!
vmlal.u32 q13,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q6,d28,d1[0]
vshl.i64 d29,d25,#16
vmlal.u32 q7,d28,d1[1]
vadd.u64 d29,d29,d24
vmlal.u32 q8,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q9,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
vmlal.u32 q10,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q12,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q13,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q6,d29,d5[0]
vshr.u64 d24,d24,#16
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vadd.u64 d24,d24,d25
vmlal.u32 q9,d29,d6[1]
vshr.u64 d24,d24,#16
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vadd.u64 d26,d26,d24
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]!
vmlal.u32 q6,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q7,d28,d1[0]
vshl.i64 d29,d27,#16
vmlal.u32 q8,d28,d1[1]
vadd.u64 d29,d29,d26
vmlal.u32 q9,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
vmlal.u32 q11,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q12,d28,d3[1]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q13,d29,d4[0]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q6,d29,d4[1]
vmlal.u32 q7,d29,d5[0]
vshr.u64 d26,d26,#16
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vadd.u64 d26,d26,d27
vmlal.u32 q10,d29,d6[1]
vshr.u64 d26,d26,#16
vmlal.u32 q11,d29,d7[0]
vmlal.u32 q12,d29,d7[1]
vadd.u64 d12,d12,d26
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
add r10,sp,#8 @ rewind
sub r8,r5,#8
b LNEON_8n_inner
.align 4
LNEON_8n_inner:
subs r8,r8,#8
vmlal.u32 q6,d28,d0[0]
vld1.64 {q13},[r6,:128]
vmlal.u32 q7,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
vmlal.u32 q8,d28,d1[0]
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q9,d28,d1[1]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmlal.u32 q11,d29,d6[1]
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vst1.64 {q6},[r7,:128]!
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]
vmlal.u32 q8,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
vmlal.u32 q9,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d1[1]
vmlal.u32 q11,d28,d2[0]
vmlal.u32 q12,d28,d2[1]
vmlal.u32 q13,d28,d3[0]
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
vmlal.u32 q7,d29,d4[0]
vmlal.u32 q8,d29,d4[1]
vmlal.u32 q9,d29,d5[0]
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vmlal.u32 q12,d29,d6[1]
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vst1.64 {q7},[r7,:128]!
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]
vmlal.u32 q9,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
vmlal.u32 q10,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q11,d28,d1[1]
vmlal.u32 q12,d28,d2[0]
vmlal.u32 q13,d28,d2[1]
vmlal.u32 q6,d28,d3[0]
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
vmlal.u32 q8,d29,d4[0]
vmlal.u32 q9,d29,d4[1]
vmlal.u32 q10,d29,d5[0]
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vmlal.u32 q13,d29,d6[1]
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vst1.64 {q8},[r7,:128]!
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]
vmlal.u32 q10,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
vmlal.u32 q11,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q12,d28,d1[1]
vmlal.u32 q13,d28,d2[0]
vmlal.u32 q6,d28,d2[1]
vmlal.u32 q7,d28,d3[0]
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
vmlal.u32 q9,d29,d4[0]
vmlal.u32 q10,d29,d4[1]
vmlal.u32 q11,d29,d5[0]
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vmlal.u32 q6,d29,d6[1]
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vst1.64 {q9},[r7,:128]!
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]
vmlal.u32 q11,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
vmlal.u32 q12,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q13,d28,d1[1]
vmlal.u32 q6,d28,d2[0]
vmlal.u32 q7,d28,d2[1]
vmlal.u32 q8,d28,d3[0]
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
vmlal.u32 q10,d29,d4[0]
vmlal.u32 q11,d29,d4[1]
vmlal.u32 q12,d29,d5[0]
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vmlal.u32 q7,d29,d6[1]
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vst1.64 {q10},[r7,:128]!
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]
vmlal.u32 q12,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
vmlal.u32 q13,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q6,d28,d1[1]
vmlal.u32 q7,d28,d2[0]
vmlal.u32 q8,d28,d2[1]
vmlal.u32 q9,d28,d3[0]
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
vmlal.u32 q11,d29,d4[0]
vmlal.u32 q12,d29,d4[1]
vmlal.u32 q13,d29,d5[0]
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vmlal.u32 q8,d29,d6[1]
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vst1.64 {q11},[r7,:128]!
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]
vmlal.u32 q13,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
vmlal.u32 q6,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q7,d28,d1[1]
vmlal.u32 q8,d28,d2[0]
vmlal.u32 q9,d28,d2[1]
vmlal.u32 q10,d28,d3[0]
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
vmlal.u32 q12,d29,d4[0]
vmlal.u32 q13,d29,d4[1]
vmlal.u32 q6,d29,d5[0]
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vmlal.u32 q9,d29,d6[1]
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vst1.64 {q12},[r7,:128]!
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]
vmlal.u32 q6,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
vmlal.u32 q7,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q8,d28,d1[1]
vmlal.u32 q9,d28,d2[0]
vmlal.u32 q10,d28,d2[1]
vmlal.u32 q11,d28,d3[0]
vmlal.u32 q12,d28,d3[1]
it eq
subeq r1,r1,r5,lsl#2 @ rewind
vmlal.u32 q13,d29,d4[0]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q6,d29,d4[1]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q7,d29,d5[0]
add r10,sp,#8 @ rewind
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vmlal.u32 q10,d29,d6[1]
vmlal.u32 q11,d29,d7[0]
vst1.64 {q13},[r7,:128]!
vmlal.u32 q12,d29,d7[1]
bne LNEON_8n_inner
add r6,sp,#128
vst1.64 {q6,q7},[r7,:256]!
veor q2,q2,q2 @ d4-d5
vst1.64 {q8,q9},[r7,:256]!
veor q3,q3,q3 @ d6-d7
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12},[r7,:128]
subs r9,r9,#8
vld1.64 {q6,q7},[r6,:256]!
vld1.64 {q8,q9},[r6,:256]!
vld1.64 {q10,q11},[r6,:256]!
vld1.64 {q12,q13},[r6,:256]!
itt ne
subne r3,r3,r5,lsl#2 @ rewind
bne LNEON_8n_outer
add r7,sp,#128
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
vshr.u64 d10,d12,#16
vst1.64 {q2,q3},[sp,:256]!
vadd.u64 d13,d13,d10
vst1.64 {q2,q3}, [sp,:256]!
vshr.u64 d10,d13,#16
vst1.64 {q2,q3}, [sp,:256]!
vzip.16 d12,d13
mov r8,r5
b LNEON_tail_entry
.align 4
LNEON_tail:
vadd.u64 d12,d12,d10
vshr.u64 d10,d12,#16
vld1.64 {q8,q9}, [r6, :256]!
vadd.u64 d13,d13,d10
vld1.64 {q10,q11}, [r6, :256]!
vshr.u64 d10,d13,#16
vld1.64 {q12,q13}, [r6, :256]!
vzip.16 d12,d13
LNEON_tail_entry:
vadd.u64 d14,d14,d10
vst1.32 {d12[0]}, [r7, :32]!
vshr.u64 d10,d14,#16
vadd.u64 d15,d15,d10
vshr.u64 d10,d15,#16
vzip.16 d14,d15
vadd.u64 d16,d16,d10
vst1.32 {d14[0]}, [r7, :32]!
vshr.u64 d10,d16,#16
vadd.u64 d17,d17,d10
vshr.u64 d10,d17,#16
vzip.16 d16,d17
vadd.u64 d18,d18,d10
vst1.32 {d16[0]}, [r7, :32]!
vshr.u64 d10,d18,#16
vadd.u64 d19,d19,d10
vshr.u64 d10,d19,#16
vzip.16 d18,d19
vadd.u64 d20,d20,d10
vst1.32 {d18[0]}, [r7, :32]!
vshr.u64 d10,d20,#16
vadd.u64 d21,d21,d10
vshr.u64 d10,d21,#16
vzip.16 d20,d21
vadd.u64 d22,d22,d10
vst1.32 {d20[0]}, [r7, :32]!
vshr.u64 d10,d22,#16
vadd.u64 d23,d23,d10
vshr.u64 d10,d23,#16
vzip.16 d22,d23
vadd.u64 d24,d24,d10
vst1.32 {d22[0]}, [r7, :32]!
vshr.u64 d10,d24,#16
vadd.u64 d25,d25,d10
vshr.u64 d10,d25,#16
vzip.16 d24,d25
vadd.u64 d26,d26,d10
vst1.32 {d24[0]}, [r7, :32]!
vshr.u64 d10,d26,#16
vadd.u64 d27,d27,d10
vshr.u64 d10,d27,#16
vzip.16 d26,d27
vld1.64 {q6,q7}, [r6, :256]!
subs r8,r8,#8
vst1.32 {d26[0]}, [r7, :32]!
bne LNEON_tail
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
sub r3,r3,r5,lsl#2 @ rewind r3
subs r1,sp,#0 @ clear carry flag
add r2,sp,r5,lsl#2
LNEON_sub:
ldmia r1!, {r4,r5,r6,r7}
ldmia r3!, {r8,r9,r10,r11}
sbcs r8, r4,r8
sbcs r9, r5,r9
sbcs r10,r6,r10
sbcs r11,r7,r11
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne LNEON_sub
ldr r10, [r1] @ load top-most bit
mov r11,sp
veor q0,q0,q0
sub r11,r2,r11 @ this is num*4
veor q1,q1,q1
mov r1,sp
sub r0,r0,r11 @ rewind r0
mov r3,r2 @ second 3/4th of frame
sbcs r10,r10,#0 @ result is carry flag
LNEON_copy_n_zap:
ldmia r1!, {r4,r5,r6,r7}
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r3,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
ldmia r1, {r4,r5,r6,r7}
stmia r0!, {r8,r9,r10,r11}
sub r1,r1,#16
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r1,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne LNEON_copy_n_zap
mov sp,ip
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
bx lr @ bx lr
#endif
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7
.comm _OPENSSL_armcap_P,4
.non_lazy_symbol_pointer
OPENSSL_armcap_P:
.indirect_symbol _OPENSSL_armcap_P
.long 0
.private_extern _OPENSSL_armcap_P
#endif
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,258 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
@ instructions are in aesv8-armx.pl.)
.text
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
#define ldrplb ldrbpl
#define ldrneb ldrbne
#endif
#if defined(__thumb2__)
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.globl _gcm_init_neon
.private_extern _gcm_init_neon
#ifdef __thumb2__
.thumb_func _gcm_init_neon
#endif
.align 4
_gcm_init_neon:
vld1.64 d7,[r1]! @ load H
vmov.i8 q8,#0xe1
vld1.64 d6,[r1]
vshl.i64 d17,#57
vshr.u64 d16,#63 @ t0=0xc2....01
vdup.8 q9,d7[7]
vshr.u64 d26,d6,#63
vshr.s8 q9,#7 @ broadcast carry bit
vshl.i64 q3,q3,#1
vand q8,q8,q9
vorr d7,d26 @ H<<<=1
veor q3,q3,q8 @ twisted H
vstmia r0,{q3}
bx lr @ bx lr
.globl _gcm_gmult_neon
.private_extern _gcm_gmult_neon
#ifdef __thumb2__
.thumb_func _gcm_gmult_neon
#endif
.align 4
_gcm_gmult_neon:
vld1.64 d7,[r0]! @ load Xi
vld1.64 d6,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
mov r3,#16
b Lgmult_neon
.globl _gcm_ghash_neon
.private_extern _gcm_ghash_neon
#ifdef __thumb2__
.thumb_func _gcm_ghash_neon
#endif
.align 4
_gcm_ghash_neon:
vld1.64 d1,[r0]! @ load Xi
vld1.64 d0,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
Loop_neon:
vld1.64 d7,[r2]! @ load inp
vld1.64 d6,[r2]!
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
veor q3,q0 @ inp^=Xi
Lgmult_neon:
vext.8 d16, d26, d26, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d0, d6, d6, #1 @ B1
vmull.p8 q0, d26, d0 @ E = A*B1
vext.8 d18, d26, d26, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d26, d22 @ G = A*B2
vext.8 d20, d26, d26, #3 @ A3
veor q8, q8, q0 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d0, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q0, d26, d0 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d26, d22 @ K = A*B4
veor q10, q10, q0 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q0, d26, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q0, q0, q8
veor q0, q0, q10
veor d6,d6,d7 @ Karatsuba pre-processing
vext.8 d16, d28, d28, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d2, d6, d6, #1 @ B1
vmull.p8 q1, d28, d2 @ E = A*B1
vext.8 d18, d28, d28, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d28, d22 @ G = A*B2
vext.8 d20, d28, d28, #3 @ A3
veor q8, q8, q1 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d2, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q1, d28, d2 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d28, d22 @ K = A*B4
veor q10, q10, q1 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q1, d28, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q1, q1, q8
veor q1, q1, q10
vext.8 d16, d27, d27, #1 @ A1
vmull.p8 q8, d16, d7 @ F = A1*B
vext.8 d4, d7, d7, #1 @ B1
vmull.p8 q2, d27, d4 @ E = A*B1
vext.8 d18, d27, d27, #2 @ A2
vmull.p8 q9, d18, d7 @ H = A2*B
vext.8 d22, d7, d7, #2 @ B2
vmull.p8 q11, d27, d22 @ G = A*B2
vext.8 d20, d27, d27, #3 @ A3
veor q8, q8, q2 @ L = E + F
vmull.p8 q10, d20, d7 @ J = A3*B
vext.8 d4, d7, d7, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q2, d27, d4 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d7, d7, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d27, d22 @ K = A*B4
veor q10, q10, q2 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q2, d27, d7 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q2, q2, q8
veor q2, q2, q10
veor q1,q1,q0 @ Karatsuba post-processing
veor q1,q1,q2
veor d1,d1,d2
veor d4,d4,d3 @ Xh|Xl - 256-bit result
@ equivalent of reduction_avx from ghash-x86_64.pl
vshl.i64 q9,q0,#57 @ 1st phase
vshl.i64 q10,q0,#62
veor q10,q10,q9 @
vshl.i64 q9,q0,#63
veor q10, q10, q9 @
veor d1,d1,d20 @
veor d4,d4,d21
vshr.u64 q10,q0,#1 @ 2nd phase
veor q2,q2,q0
veor q0,q0,q10 @
vshr.u64 q10,q10,#6
vshr.u64 q0,q0,#1 @
veor q0,q0,q2 @
veor q0,q0,q10 @
subs r3,#16
bne Loop_neon
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
sub r0,#16
vst1.64 d1,[r0]! @ write out Xi
vst1.64 d0,[r0]
bx lr @ bx lr
#endif
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM

View File

@ -0,0 +1,256 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.code 32
#undef __thumb2__
.globl _gcm_init_v8
.private_extern _gcm_init_v8
#ifdef __thumb2__
.thumb_func _gcm_init_v8
#endif
.align 4
_gcm_init_v8:
vld1.64 {q9},[r1] @ load input H
vmov.i8 q11,#0xe1
vshl.i64 q11,q11,#57 @ 0xc2.0
vext.8 q3,q9,q9,#8
vshr.u64 q10,q11,#63
vdup.32 q9,d18[1]
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
vshr.u64 q10,q3,#63
vshr.s32 q9,q9,#31 @ broadcast carry bit
vand q10,q10,q8
vshl.i64 q3,q3,#1
vext.8 q10,q10,q10,#8
vand q8,q8,q9
vorr q3,q3,q10 @ H<<<=1
veor q12,q3,q8 @ twisted H
vst1.64 {q12},[r0]! @ store Htable[0]
@ calculate H^2
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
veor q8,q8,q12
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q14,q0,q10
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
veor q9,q9,q14
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
bx lr
.globl _gcm_gmult_v8
.private_extern _gcm_gmult_v8
#ifdef __thumb2__
.thumb_func _gcm_gmult_v8
#endif
.align 4
_gcm_gmult_v8:
vld1.64 {q9},[r0] @ load Xi
vmov.i8 q11,#0xe1
vld1.64 {q12,q13},[r1] @ load twisted H, ...
vshl.u64 q11,q11,#57
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q3,q9,q9,#8
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
bx lr
.globl _gcm_ghash_v8
.private_extern _gcm_ghash_v8
#ifdef __thumb2__
.thumb_func _gcm_ghash_v8
#endif
.align 4
_gcm_ghash_v8:
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
vld1.64 {q0},[r0] @ load [rotated] Xi
@ "[rotated]" means that
@ loaded value would have
@ to be rotated in order to
@ make it appear as in
@ algorithm specification
subs r3,r3,#32 @ see if r3 is 32 or larger
mov r12,#16 @ r12 is used as post-
@ increment for input pointer;
@ as loop is modulo-scheduled
@ r12 is zeroed just in time
@ to preclude overstepping
@ inp[len], which means that
@ last block[s] are actually
@ loaded twice, but last
@ copy is not processed
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
vmov.i8 q11,#0xe1
vld1.64 {q14},[r1]
moveq r12,#0 @ is it time to zero r12?
vext.8 q0,q0,q0,#8 @ rotate Xi
vld1.64 {q8},[r2]! @ load [rotated] I[0]
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
#ifndef __ARMEB__
vrev64.8 q8,q8
vrev64.8 q0,q0
#endif
vext.8 q3,q8,q8,#8 @ rotate I[0]
blo Lodd_tail_v8 @ r3 was less than 32
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q7,q9,q9,#8
veor q3,q3,q0 @ I[i]^=Xi
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q9,q9,q7 @ Karatsuba pre-processing
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
b Loop_mod2x_v8
.align 4
Loop_mod2x_v8:
vext.8 q10,q3,q3,#8
subs r3,r3,#32 @ is there more data?
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
movlo r12,#0 @ is it time to zero r12?
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
veor q10,q10,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
veor q0,q0,q4 @ accumulate
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
veor q2,q2,q6
moveq r12,#0 @ is it time to zero r12?
veor q1,q1,q5
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
#ifndef __ARMEB__
vrev64.8 q8,q8
#endif
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
vext.8 q7,q9,q9,#8
vext.8 q3,q8,q8,#8
veor q0,q1,q10
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q3,q3,q2 @ accumulate q3 early
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q3,q3,q10
veor q9,q9,q7 @ Karatsuba pre-processing
veor q3,q3,q0
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
bhs Loop_mod2x_v8 @ there was at least 32 more bytes
veor q2,q2,q10
vext.8 q3,q8,q8,#8 @ re-construct q3
adds r3,r3,#32 @ re-construct r3
veor q0,q0,q2 @ re-construct q0
beq Ldone_v8 @ is r3 zero?
Lodd_tail_v8:
vext.8 q10,q0,q0,#8
veor q3,q3,q0 @ inp^=Xi
veor q9,q8,q10 @ q9 is rotated inp^Xi
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
Ldone_v8:
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
bx lr
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,376 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.syntax unified
.text
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ with |argv|, then saves the callee-saved registers into |state|. It returns
@ the result of |func|. The |unwind| argument is unused.
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
@ const uint32_t *argv, size_t argc,
@ int unwind);
.globl _abi_test_trampoline
.private_extern _abi_test_trampoline
.align 4
_abi_test_trampoline:
@ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
sub sp, sp, #28
@ Every register in AAPCS is either non-volatile or a parameter (except
@ r9 on iOS), so this code, by the actual call, loses all its scratch
@ registers. First fill in stack parameters while there are registers
@ to spare.
cmp r3, #4
bls Lstack_args_done
mov r4, sp @ r4 is the output pointer.
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
add r2, r2, #16 @ Skip four arguments.
Lstack_args_loop:
ldr r6, [r2], #4
cmp r2, r5
str r6, [r4], #4
bne Lstack_args_loop
Lstack_args_done:
@ Load registers from |r1|.
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Load register parameters. This uses up our remaining registers, so we
@ repurpose lr as scratch space.
ldr r3, [sp, #40] @ Reload argc.
ldr lr, [sp, #36] @ Load argv into lr.
cmp r3, #3
bhi Larg_r3
beq Larg_r2
cmp r3, #1
bhi Larg_r1
beq Larg_r0
b Largs_done
Larg_r3:
ldr r3, [lr, #12] @ argv[3]
Larg_r2:
ldr r2, [lr, #8] @ argv[2]
Larg_r1:
ldr r1, [lr, #4] @ argv[1]
Larg_r0:
ldr r0, [lr] @ argv[0]
Largs_done:
@ With every other register in use, load the function pointer into lr
@ and call the function.
ldr lr, [sp, #28]
blx lr
@ r1-r3 are free for use again. The trampoline only supports
@ single-return functions. Pass r4-r11 to the caller.
ldr r1, [sp, #32]
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Unwind the stack and restore registers.
add sp, sp, #44 @ 44 = 28+16
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
bx lr
.globl _abi_test_clobber_r0
.private_extern _abi_test_clobber_r0
.align 4
_abi_test_clobber_r0:
mov r0, #0
bx lr
.globl _abi_test_clobber_r1
.private_extern _abi_test_clobber_r1
.align 4
_abi_test_clobber_r1:
mov r1, #0
bx lr
.globl _abi_test_clobber_r2
.private_extern _abi_test_clobber_r2
.align 4
_abi_test_clobber_r2:
mov r2, #0
bx lr
.globl _abi_test_clobber_r3
.private_extern _abi_test_clobber_r3
.align 4
_abi_test_clobber_r3:
mov r3, #0
bx lr
.globl _abi_test_clobber_r4
.private_extern _abi_test_clobber_r4
.align 4
_abi_test_clobber_r4:
mov r4, #0
bx lr
.globl _abi_test_clobber_r5
.private_extern _abi_test_clobber_r5
.align 4
_abi_test_clobber_r5:
mov r5, #0
bx lr
.globl _abi_test_clobber_r6
.private_extern _abi_test_clobber_r6
.align 4
_abi_test_clobber_r6:
mov r6, #0
bx lr
.globl _abi_test_clobber_r7
.private_extern _abi_test_clobber_r7
.align 4
_abi_test_clobber_r7:
mov r7, #0
bx lr
.globl _abi_test_clobber_r8
.private_extern _abi_test_clobber_r8
.align 4
_abi_test_clobber_r8:
mov r8, #0
bx lr
.globl _abi_test_clobber_r9
.private_extern _abi_test_clobber_r9
.align 4
_abi_test_clobber_r9:
mov r9, #0
bx lr
.globl _abi_test_clobber_r10
.private_extern _abi_test_clobber_r10
.align 4
_abi_test_clobber_r10:
mov r10, #0
bx lr
.globl _abi_test_clobber_r11
.private_extern _abi_test_clobber_r11
.align 4
_abi_test_clobber_r11:
mov r11, #0
bx lr
.globl _abi_test_clobber_r12
.private_extern _abi_test_clobber_r12
.align 4
_abi_test_clobber_r12:
mov r12, #0
bx lr
.globl _abi_test_clobber_d0
.private_extern _abi_test_clobber_d0
.align 4
_abi_test_clobber_d0:
mov r0, #0
vmov s0, r0
vmov s1, r0
bx lr
.globl _abi_test_clobber_d1
.private_extern _abi_test_clobber_d1
.align 4
_abi_test_clobber_d1:
mov r0, #0
vmov s2, r0
vmov s3, r0
bx lr
.globl _abi_test_clobber_d2
.private_extern _abi_test_clobber_d2
.align 4
_abi_test_clobber_d2:
mov r0, #0
vmov s4, r0
vmov s5, r0
bx lr
.globl _abi_test_clobber_d3
.private_extern _abi_test_clobber_d3
.align 4
_abi_test_clobber_d3:
mov r0, #0
vmov s6, r0
vmov s7, r0
bx lr
.globl _abi_test_clobber_d4
.private_extern _abi_test_clobber_d4
.align 4
_abi_test_clobber_d4:
mov r0, #0
vmov s8, r0
vmov s9, r0
bx lr
.globl _abi_test_clobber_d5
.private_extern _abi_test_clobber_d5
.align 4
_abi_test_clobber_d5:
mov r0, #0
vmov s10, r0
vmov s11, r0
bx lr
.globl _abi_test_clobber_d6
.private_extern _abi_test_clobber_d6
.align 4
_abi_test_clobber_d6:
mov r0, #0
vmov s12, r0
vmov s13, r0
bx lr
.globl _abi_test_clobber_d7
.private_extern _abi_test_clobber_d7
.align 4
_abi_test_clobber_d7:
mov r0, #0
vmov s14, r0
vmov s15, r0
bx lr
.globl _abi_test_clobber_d8
.private_extern _abi_test_clobber_d8
.align 4
_abi_test_clobber_d8:
mov r0, #0
vmov s16, r0
vmov s17, r0
bx lr
.globl _abi_test_clobber_d9
.private_extern _abi_test_clobber_d9
.align 4
_abi_test_clobber_d9:
mov r0, #0
vmov s18, r0
vmov s19, r0
bx lr
.globl _abi_test_clobber_d10
.private_extern _abi_test_clobber_d10
.align 4
_abi_test_clobber_d10:
mov r0, #0
vmov s20, r0
vmov s21, r0
bx lr
.globl _abi_test_clobber_d11
.private_extern _abi_test_clobber_d11
.align 4
_abi_test_clobber_d11:
mov r0, #0
vmov s22, r0
vmov s23, r0
bx lr
.globl _abi_test_clobber_d12
.private_extern _abi_test_clobber_d12
.align 4
_abi_test_clobber_d12:
mov r0, #0
vmov s24, r0
vmov s25, r0
bx lr
.globl _abi_test_clobber_d13
.private_extern _abi_test_clobber_d13
.align 4
_abi_test_clobber_d13:
mov r0, #0
vmov s26, r0
vmov s27, r0
bx lr
.globl _abi_test_clobber_d14
.private_extern _abi_test_clobber_d14
.align 4
_abi_test_clobber_d14:
mov r0, #0
vmov s28, r0
vmov s29, r0
bx lr
.globl _abi_test_clobber_d15
.private_extern _abi_test_clobber_d15
.align 4
_abi_test_clobber_d15:
mov r0, #0
vmov s30, r0
vmov s31, r0
bx lr
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,785 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.arch armv8-a+crypto
.section .rodata
.align 5
.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl aes_hw_set_encrypt_key
.hidden aes_hw_set_encrypt_key
.type aes_hw_set_encrypt_key,%function
.align 5
aes_hw_set_encrypt_key:
.Lenc_key:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
cmp x0,#0
b.eq .Lenc_key_abort
cmp x2,#0
b.eq .Lenc_key_abort
mov x3,#-2
cmp w1,#128
b.lt .Lenc_key_abort
cmp w1,#256
b.gt .Lenc_key_abort
tst w1,#0x3f
b.ne .Lenc_key_abort
adrp x3,.Lrcon
add x3,x3,:lo12:.Lrcon
cmp w1,#192
eor v0.16b,v0.16b,v0.16b
ld1 {v3.16b},[x0],#16
mov w1,#8 // reuse w1
ld1 {v1.4s,v2.4s},[x3],#32
b.lt .Loop128
b.eq .L192
b .L256
.align 4
.Loop128:
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
b.ne .Loop128
ld1 {v1.4s},[x3]
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2]
add x2,x2,#0x50
mov w12,#10
b .Ldone
.align 4
.L192:
ld1 {v4.8b},[x0],#8
movi v6.16b,#8 // borrow v6.16b
st1 {v3.4s},[x2],#16
sub v2.16b,v2.16b,v6.16b // adjust the mask
.Loop192:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.8b},[x2],#8
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
dup v5.4s,v3.s[3]
eor v5.16b,v5.16b,v4.16b
eor v6.16b,v6.16b,v1.16b
ext v4.16b,v0.16b,v4.16b,#12
shl v1.16b,v1.16b,#1
eor v4.16b,v4.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
eor v4.16b,v4.16b,v6.16b
st1 {v3.4s},[x2],#16
b.ne .Loop192
mov w12,#12
add x2,x2,#0x20
b .Ldone
.align 4
.L256:
ld1 {v4.16b},[x0]
mov w1,#7
mov w12,#14
st1 {v3.4s},[x2],#16
.Loop256:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2],#16
b.eq .Ldone
dup v6.4s,v3.s[3] // just splat
ext v5.16b,v0.16b,v4.16b,#12
aese v6.16b,v0.16b
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
eor v4.16b,v4.16b,v6.16b
b .Loop256
.Ldone:
str w12,[x2]
mov x3,#0
.Lenc_key_abort:
mov x0,x3 // return value
ldr x29,[sp],#16
ret
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
.globl aes_hw_set_decrypt_key
.hidden aes_hw_set_decrypt_key
.type aes_hw_set_decrypt_key,%function
.align 5
aes_hw_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl .Lenc_key
cmp x0,#0
b.ne .Ldec_key_abort
sub x2,x2,#240 // restore original x2
mov x4,#-16
add x0,x2,x12,lsl#4 // end of key schedule
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
.Loop_imc:
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
aesimc v0.16b,v0.16b
aesimc v1.16b,v1.16b
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
cmp x0,x2
b.hi .Loop_imc
ld1 {v0.4s},[x2]
aesimc v0.16b,v0.16b
st1 {v0.4s},[x0]
eor x0,x0,x0 // return value
.Ldec_key_abort:
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
.globl aes_hw_encrypt
.hidden aes_hw_encrypt
.type aes_hw_encrypt,%function
.align 5
aes_hw_encrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
.Loop_enc:
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aese v2.16b,v1.16b
aesmc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt .Loop_enc
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aese v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.size aes_hw_encrypt,.-aes_hw_encrypt
.globl aes_hw_decrypt
.hidden aes_hw_decrypt
.type aes_hw_decrypt,%function
.align 5
aes_hw_decrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
.Loop_dec:
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aesd v2.16b,v1.16b
aesimc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt .Loop_dec
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aesd v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.size aes_hw_decrypt,.-aes_hw_decrypt
.globl aes_hw_cbc_encrypt
.hidden aes_hw_cbc_encrypt
.type aes_hw_cbc_encrypt,%function
.align 5
aes_hw_cbc_encrypt:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
mov x8,#16
b.lo .Lcbc_abort
csel x8,xzr,x8,eq
cmp w5,#0 // en- or decrypting?
ldr w5,[x3,#240]
and x2,x2,#-16
ld1 {v6.16b},[x4]
ld1 {v0.16b},[x0],x8
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#6
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
sub w5,w5,#2
ld1 {v18.4s,v19.4s},[x7],#32
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
b.eq .Lcbc_dec
cmp w5,#2
eor v0.16b,v0.16b,v6.16b
eor v5.16b,v16.16b,v7.16b
b.eq .Lcbc_enc128
ld1 {v2.4s,v3.4s},[x7]
add x7,x3,#16
add x6,x3,#16*4
add x12,x3,#16*5
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
add x14,x3,#16*6
add x3,x3,#16*7
b .Lenter_cbc_enc
.align 4
.Loop_cbc_enc:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
.Lenter_cbc_enc:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x6]
cmp w5,#4
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x12]
b.eq .Lcbc_enc192
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x14]
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x3]
nop
.Lcbc_enc192:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs .Loop_cbc_enc
st1 {v6.16b},[x1],#16
b .Lcbc_done
.align 5
.Lcbc_enc128:
ld1 {v2.4s,v3.4s},[x7]
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
b .Lenter_cbc_enc128
.Loop_cbc_enc128:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
.Lenter_cbc_enc128:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs .Loop_cbc_enc128
st1 {v6.16b},[x1],#16
b .Lcbc_done
.align 5
.Lcbc_dec:
ld1 {v18.16b},[x0],#16
subs x2,x2,#32 // bias
add w6,w5,#2
orr v3.16b,v0.16b,v0.16b
orr v1.16b,v0.16b,v0.16b
orr v19.16b,v18.16b,v18.16b
b.lo .Lcbc_dec_tail
orr v1.16b,v18.16b,v18.16b
ld1 {v18.16b},[x0],#16
orr v2.16b,v0.16b,v0.16b
orr v3.16b,v1.16b,v1.16b
orr v19.16b,v18.16b,v18.16b
.Loop3x_cbc_dec:
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Loop3x_cbc_dec
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
eor v4.16b,v6.16b,v7.16b
subs x2,x2,#0x30
eor v5.16b,v2.16b,v7.16b
csel x6,x2,x6,lo // x6, w6, is zero at this point
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
add x0,x0,x6 // x0 is adjusted in such way that
// at exit from the loop v1.16b-v18.16b
// are loaded with last "words"
orr v6.16b,v19.16b,v19.16b
mov x7,x3
aesd v0.16b,v20.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
ld1 {v2.16b},[x0],#16
aesd v0.16b,v21.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
aesd v0.16b,v22.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
ld1 {v19.16b},[x0],#16
aesd v0.16b,v23.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
add w6,w5,#2
eor v4.16b,v4.16b,v0.16b
eor v5.16b,v5.16b,v1.16b
eor v18.16b,v18.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v4.16b},[x1],#16
orr v0.16b,v2.16b,v2.16b
st1 {v5.16b},[x1],#16
orr v1.16b,v3.16b,v3.16b
st1 {v18.16b},[x1],#16
orr v18.16b,v19.16b,v19.16b
b.hs .Loop3x_cbc_dec
cmn x2,#0x30
b.eq .Lcbc_done
nop
.Lcbc_dec_tail:
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Lcbc_dec_tail
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
cmn x2,#0x20
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
eor v5.16b,v6.16b,v7.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
b.eq .Lcbc_dec_one
eor v5.16b,v5.16b,v1.16b
eor v17.16b,v17.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
st1 {v17.16b},[x1],#16
b .Lcbc_done
.Lcbc_dec_one:
eor v5.16b,v5.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
.Lcbc_done:
st1 {v6.16b},[x4]
.Lcbc_abort:
ldr x29,[sp],#16
ret
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
.globl aes_hw_ctr32_encrypt_blocks
.hidden aes_hw_ctr32_encrypt_blocks
.type aes_hw_ctr32_encrypt_blocks,%function
.align 5
aes_hw_ctr32_encrypt_blocks:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]
ldr w8, [x4, #12]
ld1 {v0.4s},[x4]
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#4
mov x12,#16
cmp x2,#2
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
sub w5,w5,#2
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
csel x12,xzr,x12,lo
#ifndef __ARMEB__
rev w8, w8
#endif
orr v1.16b,v0.16b,v0.16b
add w10, w8, #1
orr v18.16b,v0.16b,v0.16b
add w8, w8, #2
orr v6.16b,v0.16b,v0.16b
rev w10, w10
mov v1.s[3],w10
b.ls .Lctr32_tail
rev w12, w8
sub x2,x2,#3 // bias
mov v18.s[3],w12
b .Loop3x_ctr32
.align 4
.Loop3x_ctr32:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
aese v18.16b,v17.16b
aesmc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Loop3x_ctr32
aese v0.16b,v16.16b
aesmc v4.16b,v0.16b
aese v1.16b,v16.16b
aesmc v5.16b,v1.16b
ld1 {v2.16b},[x0],#16
orr v0.16b,v6.16b,v6.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
orr v1.16b,v6.16b,v6.16b
aese v4.16b,v17.16b
aesmc v4.16b,v4.16b
aese v5.16b,v17.16b
aesmc v5.16b,v5.16b
ld1 {v19.16b},[x0],#16
mov x7,x3
aese v18.16b,v17.16b
aesmc v17.16b,v18.16b
orr v18.16b,v6.16b,v6.16b
add w9,w8,#1
aese v4.16b,v20.16b
aesmc v4.16b,v4.16b
aese v5.16b,v20.16b
aesmc v5.16b,v5.16b
eor v2.16b,v2.16b,v7.16b
add w10,w8,#2
aese v17.16b,v20.16b
aesmc v17.16b,v17.16b
eor v3.16b,v3.16b,v7.16b
add w8,w8,#3
aese v4.16b,v21.16b
aesmc v4.16b,v4.16b
aese v5.16b,v21.16b
aesmc v5.16b,v5.16b
eor v19.16b,v19.16b,v7.16b
rev w9,w9
aese v17.16b,v21.16b
aesmc v17.16b,v17.16b
mov v0.s[3], w9
rev w10,w10
aese v4.16b,v22.16b
aesmc v4.16b,v4.16b
aese v5.16b,v22.16b
aesmc v5.16b,v5.16b
mov v1.s[3], w10
rev w12,w8
aese v17.16b,v22.16b
aesmc v17.16b,v17.16b
mov v18.s[3], w12
subs x2,x2,#3
aese v4.16b,v23.16b
aese v5.16b,v23.16b
aese v17.16b,v23.16b
eor v2.16b,v2.16b,v4.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
st1 {v2.16b},[x1],#16
eor v3.16b,v3.16b,v5.16b
mov w6,w5
st1 {v3.16b},[x1],#16
eor v19.16b,v19.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v19.16b},[x1],#16
b.hs .Loop3x_ctr32
adds x2,x2,#3
b.eq .Lctr32_done
cmp x2,#1
mov x12,#16
csel x12,xzr,x12,eq
.Lctr32_tail:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v17.4s},[x7],#16
b.gt .Lctr32_tail
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v2.16b},[x0],x12
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v1.16b,v20.16b
aesmc v1.16b,v1.16b
ld1 {v3.16b},[x0]
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v1.16b,v21.16b
aesmc v1.16b,v1.16b
eor v2.16b,v2.16b,v7.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v1.16b,v22.16b
aesmc v1.16b,v1.16b
eor v3.16b,v3.16b,v7.16b
aese v0.16b,v23.16b
aese v1.16b,v23.16b
cmp x2,#1
eor v2.16b,v2.16b,v0.16b
eor v3.16b,v3.16b,v1.16b
st1 {v2.16b},[x1],#16
b.eq .Lctr32_done
st1 {v3.16b},[x1]
.Lctr32_done:
ldr x29,[sp],#16
ret
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,346 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl gcm_init_neon
.hidden gcm_init_neon
.type gcm_init_neon,%function
.align 4
gcm_init_neon:
AARCH64_VALID_CALL_TARGET
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
shl v19.2d, v19.2d, #57 // 0xc2.0
ext v3.16b, v17.16b, v17.16b, #8
ushr v18.2d, v19.2d, #63
dup v17.4s, v17.s[1]
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
ushr v18.2d, v3.2d, #63
sshr v17.4s, v17.4s, #31 // broadcast carry bit
and v18.16b, v18.16b, v16.16b
shl v3.2d, v3.2d, #1
ext v18.16b, v18.16b, v18.16b, #8
and v16.16b, v16.16b, v17.16b
orr v3.16b, v3.16b, v18.16b // H<<<=1
eor v5.16b, v3.16b, v16.16b // twisted H
st1 {v5.2d}, [x0] // store Htable[0]
ret
.size gcm_init_neon,.-gcm_init_neon
.globl gcm_gmult_neon
.hidden gcm_gmult_neon
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, .Lmasks // load constants
add x9, x9, :lo12:.Lmasks
ld1 {v24.2d, v25.2d}, [x9]
rev64 v3.16b, v3.16b // byteswap Xi
ext v3.16b, v3.16b, v3.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
mov x3, #16
b .Lgmult_neon
.size gcm_gmult_neon,.-gcm_gmult_neon
.globl gcm_ghash_neon
.hidden gcm_ghash_neon
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, .Lmasks // load constants
add x9, x9, :lo12:.Lmasks
ld1 {v24.2d, v25.2d}, [x9]
rev64 v0.16b, v0.16b // byteswap Xi
ext v0.16b, v0.16b, v0.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
.Loop_neon:
ld1 {v3.16b}, [x2], #16 // load inp
rev64 v3.16b, v3.16b // byteswap inp
ext v3.16b, v3.16b, v3.16b, #8
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
.Lgmult_neon:
// Split the input into v3 and v4. (The upper halves are unused,
// so it is okay to leave them alone.)
ins v4.d[0], v3.d[1]
ext v16.8b, v5.8b, v5.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v0.8b, v3.8b, v3.8b, #1 // B1
pmull v0.8h, v5.8b, v0.8b // E = A*B1
ext v17.8b, v5.8b, v5.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v5.8b, v19.8b // G = A*B2
ext v18.8b, v5.8b, v5.8b, #3 // A3
eor v16.16b, v16.16b, v0.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v0.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v0.8h, v5.8b, v0.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v0.16b // N = I + J
pmull v19.8h, v5.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v0.8h, v5.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v0.16b, v0.16b, v16.16b
eor v0.16b, v0.16b, v18.16b
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
ext v16.8b, v7.8b, v7.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v1.8b, v3.8b, v3.8b, #1 // B1
pmull v1.8h, v7.8b, v1.8b // E = A*B1
ext v17.8b, v7.8b, v7.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v7.8b, v19.8b // G = A*B2
ext v18.8b, v7.8b, v7.8b, #3 // A3
eor v16.16b, v16.16b, v1.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v1.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v1.8h, v7.8b, v1.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v1.16b // N = I + J
pmull v19.8h, v7.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v1.8h, v7.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v1.16b, v1.16b, v16.16b
eor v1.16b, v1.16b, v18.16b
ext v16.8b, v6.8b, v6.8b, #1 // A1
pmull v16.8h, v16.8b, v4.8b // F = A1*B
ext v2.8b, v4.8b, v4.8b, #1 // B1
pmull v2.8h, v6.8b, v2.8b // E = A*B1
ext v17.8b, v6.8b, v6.8b, #2 // A2
pmull v17.8h, v17.8b, v4.8b // H = A2*B
ext v19.8b, v4.8b, v4.8b, #2 // B2
pmull v19.8h, v6.8b, v19.8b // G = A*B2
ext v18.8b, v6.8b, v6.8b, #3 // A3
eor v16.16b, v16.16b, v2.16b // L = E + F
pmull v18.8h, v18.8b, v4.8b // J = A3*B
ext v2.8b, v4.8b, v4.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v2.8h, v6.8b, v2.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v4.8b, v4.8b, #4 // B4
eor v18.16b, v18.16b, v2.16b // N = I + J
pmull v19.8h, v6.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v2.8h, v6.8b, v4.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v2.16b, v2.16b, v16.16b
eor v2.16b, v2.16b, v18.16b
ext v16.16b, v0.16b, v2.16b, #8
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
eor v1.16b, v1.16b, v2.16b
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
// This is a no-op due to the ins instruction below.
// ins v2.d[0], v1.d[1]
// equivalent of reduction_avx from ghash-x86_64.pl
shl v17.2d, v0.2d, #57 // 1st phase
shl v18.2d, v0.2d, #62
eor v18.16b, v18.16b, v17.16b //
shl v17.2d, v0.2d, #63
eor v18.16b, v18.16b, v17.16b //
// Note Xm contains {Xl.d[1], Xh.d[0]}.
eor v18.16b, v18.16b, v1.16b
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
ushr v18.2d, v0.2d, #1 // 2nd phase
eor v2.16b, v2.16b,v0.16b
eor v0.16b, v0.16b,v18.16b //
ushr v18.2d, v18.2d, #6
ushr v0.2d, v0.2d, #1 //
eor v0.16b, v0.16b, v2.16b //
eor v0.16b, v0.16b, v18.16b //
subs x3, x3, #16
bne .Loop_neon
rev64 v0.16b, v0.16b // byteswap Xi and write
ext v0.16b, v0.16b, v0.16b, #8
st1 {v0.16b}, [x0]
ret
.size gcm_ghash_neon,.-gcm_ghash_neon
.section .rodata
.align 4
.Lmasks:
.quad 0x0000ffffffffffff // k48
.quad 0x00000000ffffffff // k32
.quad 0x000000000000ffff // k16
.quad 0x0000000000000000 // k0
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -0,0 +1,252 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.arch armv8-a+crypto
.globl gcm_init_v8
.hidden gcm_init_v8
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
ext v3.16b,v17.16b,v17.16b,#8
ushr v18.2d,v19.2d,#63
dup v17.4s,v17.s[1]
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
ushr v18.2d,v3.2d,#63
sshr v17.4s,v17.4s,#31 //broadcast carry bit
and v18.16b,v18.16b,v16.16b
shl v3.2d,v3.2d,#1
ext v18.16b,v18.16b,v18.16b,#8
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
st1 {v20.2d},[x0],#16 //store Htable[0]
//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
pmull v0.1q,v20.1d,v20.1d
eor v16.16b,v16.16b,v20.16b
pmull2 v2.1q,v20.2d,v20.2d
pmull v1.1q,v16.1d,v16.1d
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v22.16b,v0.16b,v18.16b
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
ret
.size gcm_init_v8,.-gcm_init_v8
.globl gcm_gmult_v8
.hidden gcm_gmult_v8
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
shl v19.2d,v19.2d,#57
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v3.16b,v17.16b,v17.16b,#8
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_gmult_v8,.-gcm_gmult_v8
.globl gcm_ghash_v8
.hidden gcm_ghash_v8
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
//to be rotated in order to
//make it appear as in
//algorithm specification
subs x3,x3,#32 //see if x3 is 32 or larger
mov x12,#16 //x12 is used as post-
//increment for input pointer;
//as loop is modulo-scheduled
//x12 is zeroed just in time
//to preclude overstepping
//inp[len], which means that
//last block[s] are actually
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
rev64 v0.16b,v0.16b
#endif
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
b.lo .Lodd_tail_v8 //x3 was less than 32
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v7.16b,v17.16b,v17.16b,#8
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
pmull2 v6.1q,v20.2d,v7.2d
b .Loop_mod2x_v8
.align 4
.Loop_mod2x_v8:
ext v18.16b,v3.16b,v3.16b,#8
subs x3,x3,#32 //is there more data?
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
csel x12,xzr,x12,lo //is it time to zero x12?
pmull v5.1q,v21.1d,v17.1d
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
eor v0.16b,v0.16b,v4.16b //accumulate
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
eor v2.16b,v2.16b,v6.16b
csel x12,xzr,x12,eq //is it time to zero x12?
eor v1.16b,v1.16b,v5.16b
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
#endif
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v7.16b,v17.16b,v17.16b,#8
ext v3.16b,v16.16b,v16.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v3.16b,v3.16b,v18.16b
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
eor v3.16b,v3.16b,v0.16b
pmull2 v6.1q,v20.2d,v7.2d
b.hs .Loop_mod2x_v8 //there was at least 32 more bytes
eor v2.16b,v2.16b,v18.16b
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
adds x3,x3,#32 //re-construct x3
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
b.eq .Ldone_v8 //is x3 zero?
.Lodd_tail_v8:
ext v18.16b,v0.16b,v0.16b,#8
eor v3.16b,v3.16b,v0.16b //inp^=Xi
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
.Ldone_v8:
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_ghash_v8,.-gcm_ghash_v8
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,761 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
// with |argv|, then saves the callee-saved registers into |state|. It returns
// the result of |func|. The |unwind| argument is unused.
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
// const uint64_t *argv, size_t argc,
// uint64_t unwind);
.type abi_test_trampoline, %function
.globl abi_test_trampoline
.hidden abi_test_trampoline
.align 4
abi_test_trampoline:
.Labi_test_trampoline_begin:
AARCH64_SIGN_LINK_REGISTER
// Stack layout (low to high addresses)
// x29,x30 (16 bytes)
// d8-d15 (64 bytes)
// x19-x28 (80 bytes)
// x1 (8 bytes)
// padding (8 bytes)
stp x29, x30, [sp, #-176]!
mov x29, sp
// Saved callee-saved registers and |state|.
stp d8, d9, [sp, #16]
stp d10, d11, [sp, #32]
stp d12, d13, [sp, #48]
stp d14, d15, [sp, #64]
stp x19, x20, [sp, #80]
stp x21, x22, [sp, #96]
stp x23, x24, [sp, #112]
stp x25, x26, [sp, #128]
stp x27, x28, [sp, #144]
str x1, [sp, #160]
// Load registers from |state|, with the exception of x29. x29 is the
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
// mandate that x29 always point to a frame. iOS64 does so, which means
// we cannot fill x29 with entropy without violating ABI rules
// ourselves. x29 is tested separately below.
ldp d8, d9, [x1], #16
ldp d10, d11, [x1], #16
ldp d12, d13, [x1], #16
ldp d14, d15, [x1], #16
ldp x19, x20, [x1], #16
ldp x21, x22, [x1], #16
ldp x23, x24, [x1], #16
ldp x25, x26, [x1], #16
ldp x27, x28, [x1], #16
// Move parameters into temporary registers.
mov x9, x0
mov x10, x2
mov x11, x3
// Load parameters into registers.
cbz x11, .Largs_done
ldr x0, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x1, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x2, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x3, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x4, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x5, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x6, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x7, [x10], #8
.Largs_done:
blr x9
// Reload |state| and store registers.
ldr x1, [sp, #160]
stp d8, d9, [x1], #16
stp d10, d11, [x1], #16
stp d12, d13, [x1], #16
stp d14, d15, [x1], #16
stp x19, x20, [x1], #16
stp x21, x22, [x1], #16
stp x23, x24, [x1], #16
stp x25, x26, [x1], #16
stp x27, x28, [x1], #16
// |func| is required to preserve x29, the frame pointer. We cannot load
// random values into x29 (see comment above), so compare it against the
// expected value and zero the field of |state| if corrupted.
mov x9, sp
cmp x29, x9
b.eq .Lx29_ok
str xzr, [x1]
.Lx29_ok:
// Restore callee-saved registers.
ldp d8, d9, [sp, #16]
ldp d10, d11, [sp, #32]
ldp d12, d13, [sp, #48]
ldp d14, d15, [sp, #64]
ldp x19, x20, [sp, #80]
ldp x21, x22, [sp, #96]
ldp x23, x24, [sp, #112]
ldp x25, x26, [sp, #128]
ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176
AARCH64_VALIDATE_LINK_REGISTER
ret
.size abi_test_trampoline,.-abi_test_trampoline
.type abi_test_clobber_x0, %function
.globl abi_test_clobber_x0
.hidden abi_test_clobber_x0
.align 4
abi_test_clobber_x0:
AARCH64_VALID_CALL_TARGET
mov x0, xzr
ret
.size abi_test_clobber_x0,.-abi_test_clobber_x0
.type abi_test_clobber_x1, %function
.globl abi_test_clobber_x1
.hidden abi_test_clobber_x1
.align 4
abi_test_clobber_x1:
AARCH64_VALID_CALL_TARGET
mov x1, xzr
ret
.size abi_test_clobber_x1,.-abi_test_clobber_x1
.type abi_test_clobber_x2, %function
.globl abi_test_clobber_x2
.hidden abi_test_clobber_x2
.align 4
abi_test_clobber_x2:
AARCH64_VALID_CALL_TARGET
mov x2, xzr
ret
.size abi_test_clobber_x2,.-abi_test_clobber_x2
.type abi_test_clobber_x3, %function
.globl abi_test_clobber_x3
.hidden abi_test_clobber_x3
.align 4
abi_test_clobber_x3:
AARCH64_VALID_CALL_TARGET
mov x3, xzr
ret
.size abi_test_clobber_x3,.-abi_test_clobber_x3
.type abi_test_clobber_x4, %function
.globl abi_test_clobber_x4
.hidden abi_test_clobber_x4
.align 4
abi_test_clobber_x4:
AARCH64_VALID_CALL_TARGET
mov x4, xzr
ret
.size abi_test_clobber_x4,.-abi_test_clobber_x4
.type abi_test_clobber_x5, %function
.globl abi_test_clobber_x5
.hidden abi_test_clobber_x5
.align 4
abi_test_clobber_x5:
AARCH64_VALID_CALL_TARGET
mov x5, xzr
ret
.size abi_test_clobber_x5,.-abi_test_clobber_x5
.type abi_test_clobber_x6, %function
.globl abi_test_clobber_x6
.hidden abi_test_clobber_x6
.align 4
abi_test_clobber_x6:
AARCH64_VALID_CALL_TARGET
mov x6, xzr
ret
.size abi_test_clobber_x6,.-abi_test_clobber_x6
.type abi_test_clobber_x7, %function
.globl abi_test_clobber_x7
.hidden abi_test_clobber_x7
.align 4
abi_test_clobber_x7:
AARCH64_VALID_CALL_TARGET
mov x7, xzr
ret
.size abi_test_clobber_x7,.-abi_test_clobber_x7
.type abi_test_clobber_x8, %function
.globl abi_test_clobber_x8
.hidden abi_test_clobber_x8
.align 4
abi_test_clobber_x8:
AARCH64_VALID_CALL_TARGET
mov x8, xzr
ret
.size abi_test_clobber_x8,.-abi_test_clobber_x8
.type abi_test_clobber_x9, %function
.globl abi_test_clobber_x9
.hidden abi_test_clobber_x9
.align 4
abi_test_clobber_x9:
AARCH64_VALID_CALL_TARGET
mov x9, xzr
ret
.size abi_test_clobber_x9,.-abi_test_clobber_x9
.type abi_test_clobber_x10, %function
.globl abi_test_clobber_x10
.hidden abi_test_clobber_x10
.align 4
abi_test_clobber_x10:
AARCH64_VALID_CALL_TARGET
mov x10, xzr
ret
.size abi_test_clobber_x10,.-abi_test_clobber_x10
.type abi_test_clobber_x11, %function
.globl abi_test_clobber_x11
.hidden abi_test_clobber_x11
.align 4
abi_test_clobber_x11:
AARCH64_VALID_CALL_TARGET
mov x11, xzr
ret
.size abi_test_clobber_x11,.-abi_test_clobber_x11
.type abi_test_clobber_x12, %function
.globl abi_test_clobber_x12
.hidden abi_test_clobber_x12
.align 4
abi_test_clobber_x12:
AARCH64_VALID_CALL_TARGET
mov x12, xzr
ret
.size abi_test_clobber_x12,.-abi_test_clobber_x12
.type abi_test_clobber_x13, %function
.globl abi_test_clobber_x13
.hidden abi_test_clobber_x13
.align 4
abi_test_clobber_x13:
AARCH64_VALID_CALL_TARGET
mov x13, xzr
ret
.size abi_test_clobber_x13,.-abi_test_clobber_x13
.type abi_test_clobber_x14, %function
.globl abi_test_clobber_x14
.hidden abi_test_clobber_x14
.align 4
abi_test_clobber_x14:
AARCH64_VALID_CALL_TARGET
mov x14, xzr
ret
.size abi_test_clobber_x14,.-abi_test_clobber_x14
.type abi_test_clobber_x15, %function
.globl abi_test_clobber_x15
.hidden abi_test_clobber_x15
.align 4
abi_test_clobber_x15:
AARCH64_VALID_CALL_TARGET
mov x15, xzr
ret
.size abi_test_clobber_x15,.-abi_test_clobber_x15
.type abi_test_clobber_x16, %function
.globl abi_test_clobber_x16
.hidden abi_test_clobber_x16
.align 4
abi_test_clobber_x16:
AARCH64_VALID_CALL_TARGET
mov x16, xzr
ret
.size abi_test_clobber_x16,.-abi_test_clobber_x16
.type abi_test_clobber_x17, %function
.globl abi_test_clobber_x17
.hidden abi_test_clobber_x17
.align 4
abi_test_clobber_x17:
AARCH64_VALID_CALL_TARGET
mov x17, xzr
ret
.size abi_test_clobber_x17,.-abi_test_clobber_x17
.type abi_test_clobber_x19, %function
.globl abi_test_clobber_x19
.hidden abi_test_clobber_x19
.align 4
abi_test_clobber_x19:
AARCH64_VALID_CALL_TARGET
mov x19, xzr
ret
.size abi_test_clobber_x19,.-abi_test_clobber_x19
.type abi_test_clobber_x20, %function
.globl abi_test_clobber_x20
.hidden abi_test_clobber_x20
.align 4
abi_test_clobber_x20:
AARCH64_VALID_CALL_TARGET
mov x20, xzr
ret
.size abi_test_clobber_x20,.-abi_test_clobber_x20
.type abi_test_clobber_x21, %function
.globl abi_test_clobber_x21
.hidden abi_test_clobber_x21
.align 4
abi_test_clobber_x21:
AARCH64_VALID_CALL_TARGET
mov x21, xzr
ret
.size abi_test_clobber_x21,.-abi_test_clobber_x21
.type abi_test_clobber_x22, %function
.globl abi_test_clobber_x22
.hidden abi_test_clobber_x22
.align 4
abi_test_clobber_x22:
AARCH64_VALID_CALL_TARGET
mov x22, xzr
ret
.size abi_test_clobber_x22,.-abi_test_clobber_x22
.type abi_test_clobber_x23, %function
.globl abi_test_clobber_x23
.hidden abi_test_clobber_x23
.align 4
abi_test_clobber_x23:
AARCH64_VALID_CALL_TARGET
mov x23, xzr
ret
.size abi_test_clobber_x23,.-abi_test_clobber_x23
.type abi_test_clobber_x24, %function
.globl abi_test_clobber_x24
.hidden abi_test_clobber_x24
.align 4
abi_test_clobber_x24:
AARCH64_VALID_CALL_TARGET
mov x24, xzr
ret
.size abi_test_clobber_x24,.-abi_test_clobber_x24
.type abi_test_clobber_x25, %function
.globl abi_test_clobber_x25
.hidden abi_test_clobber_x25
.align 4
abi_test_clobber_x25:
AARCH64_VALID_CALL_TARGET
mov x25, xzr
ret
.size abi_test_clobber_x25,.-abi_test_clobber_x25
.type abi_test_clobber_x26, %function
.globl abi_test_clobber_x26
.hidden abi_test_clobber_x26
.align 4
abi_test_clobber_x26:
AARCH64_VALID_CALL_TARGET
mov x26, xzr
ret
.size abi_test_clobber_x26,.-abi_test_clobber_x26
.type abi_test_clobber_x27, %function
.globl abi_test_clobber_x27
.hidden abi_test_clobber_x27
.align 4
abi_test_clobber_x27:
AARCH64_VALID_CALL_TARGET
mov x27, xzr
ret
.size abi_test_clobber_x27,.-abi_test_clobber_x27
.type abi_test_clobber_x28, %function
.globl abi_test_clobber_x28
.hidden abi_test_clobber_x28
.align 4
abi_test_clobber_x28:
AARCH64_VALID_CALL_TARGET
mov x28, xzr
ret
.size abi_test_clobber_x28,.-abi_test_clobber_x28
.type abi_test_clobber_x29, %function
.globl abi_test_clobber_x29
.hidden abi_test_clobber_x29
.align 4
abi_test_clobber_x29:
AARCH64_VALID_CALL_TARGET
mov x29, xzr
ret
.size abi_test_clobber_x29,.-abi_test_clobber_x29
.type abi_test_clobber_d0, %function
.globl abi_test_clobber_d0
.hidden abi_test_clobber_d0
.align 4
abi_test_clobber_d0:
AARCH64_VALID_CALL_TARGET
fmov d0, xzr
ret
.size abi_test_clobber_d0,.-abi_test_clobber_d0
.type abi_test_clobber_d1, %function
.globl abi_test_clobber_d1
.hidden abi_test_clobber_d1
.align 4
abi_test_clobber_d1:
AARCH64_VALID_CALL_TARGET
fmov d1, xzr
ret
.size abi_test_clobber_d1,.-abi_test_clobber_d1
.type abi_test_clobber_d2, %function
.globl abi_test_clobber_d2
.hidden abi_test_clobber_d2
.align 4
abi_test_clobber_d2:
AARCH64_VALID_CALL_TARGET
fmov d2, xzr
ret
.size abi_test_clobber_d2,.-abi_test_clobber_d2
.type abi_test_clobber_d3, %function
.globl abi_test_clobber_d3
.hidden abi_test_clobber_d3
.align 4
abi_test_clobber_d3:
AARCH64_VALID_CALL_TARGET
fmov d3, xzr
ret
.size abi_test_clobber_d3,.-abi_test_clobber_d3
.type abi_test_clobber_d4, %function
.globl abi_test_clobber_d4
.hidden abi_test_clobber_d4
.align 4
abi_test_clobber_d4:
AARCH64_VALID_CALL_TARGET
fmov d4, xzr
ret
.size abi_test_clobber_d4,.-abi_test_clobber_d4
.type abi_test_clobber_d5, %function
.globl abi_test_clobber_d5
.hidden abi_test_clobber_d5
.align 4
abi_test_clobber_d5:
AARCH64_VALID_CALL_TARGET
fmov d5, xzr
ret
.size abi_test_clobber_d5,.-abi_test_clobber_d5
.type abi_test_clobber_d6, %function
.globl abi_test_clobber_d6
.hidden abi_test_clobber_d6
.align 4
abi_test_clobber_d6:
AARCH64_VALID_CALL_TARGET
fmov d6, xzr
ret
.size abi_test_clobber_d6,.-abi_test_clobber_d6
.type abi_test_clobber_d7, %function
.globl abi_test_clobber_d7
.hidden abi_test_clobber_d7
.align 4
abi_test_clobber_d7:
AARCH64_VALID_CALL_TARGET
fmov d7, xzr
ret
.size abi_test_clobber_d7,.-abi_test_clobber_d7
.type abi_test_clobber_d8, %function
.globl abi_test_clobber_d8
.hidden abi_test_clobber_d8
.align 4
abi_test_clobber_d8:
AARCH64_VALID_CALL_TARGET
fmov d8, xzr
ret
.size abi_test_clobber_d8,.-abi_test_clobber_d8
.type abi_test_clobber_d9, %function
.globl abi_test_clobber_d9
.hidden abi_test_clobber_d9
.align 4
abi_test_clobber_d9:
AARCH64_VALID_CALL_TARGET
fmov d9, xzr
ret
.size abi_test_clobber_d9,.-abi_test_clobber_d9
.type abi_test_clobber_d10, %function
.globl abi_test_clobber_d10
.hidden abi_test_clobber_d10
.align 4
abi_test_clobber_d10:
AARCH64_VALID_CALL_TARGET
fmov d10, xzr
ret
.size abi_test_clobber_d10,.-abi_test_clobber_d10
.type abi_test_clobber_d11, %function
.globl abi_test_clobber_d11
.hidden abi_test_clobber_d11
.align 4
abi_test_clobber_d11:
AARCH64_VALID_CALL_TARGET
fmov d11, xzr
ret
.size abi_test_clobber_d11,.-abi_test_clobber_d11
.type abi_test_clobber_d12, %function
.globl abi_test_clobber_d12
.hidden abi_test_clobber_d12
.align 4
abi_test_clobber_d12:
AARCH64_VALID_CALL_TARGET
fmov d12, xzr
ret
.size abi_test_clobber_d12,.-abi_test_clobber_d12
.type abi_test_clobber_d13, %function
.globl abi_test_clobber_d13
.hidden abi_test_clobber_d13
.align 4
abi_test_clobber_d13:
AARCH64_VALID_CALL_TARGET
fmov d13, xzr
ret
.size abi_test_clobber_d13,.-abi_test_clobber_d13
.type abi_test_clobber_d14, %function
.globl abi_test_clobber_d14
.hidden abi_test_clobber_d14
.align 4
abi_test_clobber_d14:
AARCH64_VALID_CALL_TARGET
fmov d14, xzr
ret
.size abi_test_clobber_d14,.-abi_test_clobber_d14
.type abi_test_clobber_d15, %function
.globl abi_test_clobber_d15
.hidden abi_test_clobber_d15
.align 4
abi_test_clobber_d15:
AARCH64_VALID_CALL_TARGET
fmov d15, xzr
ret
.size abi_test_clobber_d15,.-abi_test_clobber_d15
.type abi_test_clobber_d16, %function
.globl abi_test_clobber_d16
.hidden abi_test_clobber_d16
.align 4
abi_test_clobber_d16:
AARCH64_VALID_CALL_TARGET
fmov d16, xzr
ret
.size abi_test_clobber_d16,.-abi_test_clobber_d16
.type abi_test_clobber_d17, %function
.globl abi_test_clobber_d17
.hidden abi_test_clobber_d17
.align 4
abi_test_clobber_d17:
AARCH64_VALID_CALL_TARGET
fmov d17, xzr
ret
.size abi_test_clobber_d17,.-abi_test_clobber_d17
.type abi_test_clobber_d18, %function
.globl abi_test_clobber_d18
.hidden abi_test_clobber_d18
.align 4
abi_test_clobber_d18:
AARCH64_VALID_CALL_TARGET
fmov d18, xzr
ret
.size abi_test_clobber_d18,.-abi_test_clobber_d18
.type abi_test_clobber_d19, %function
.globl abi_test_clobber_d19
.hidden abi_test_clobber_d19
.align 4
abi_test_clobber_d19:
AARCH64_VALID_CALL_TARGET
fmov d19, xzr
ret
.size abi_test_clobber_d19,.-abi_test_clobber_d19
.type abi_test_clobber_d20, %function
.globl abi_test_clobber_d20
.hidden abi_test_clobber_d20
.align 4
abi_test_clobber_d20:
AARCH64_VALID_CALL_TARGET
fmov d20, xzr
ret
.size abi_test_clobber_d20,.-abi_test_clobber_d20
.type abi_test_clobber_d21, %function
.globl abi_test_clobber_d21
.hidden abi_test_clobber_d21
.align 4
abi_test_clobber_d21:
AARCH64_VALID_CALL_TARGET
fmov d21, xzr
ret
.size abi_test_clobber_d21,.-abi_test_clobber_d21
.type abi_test_clobber_d22, %function
.globl abi_test_clobber_d22
.hidden abi_test_clobber_d22
.align 4
abi_test_clobber_d22:
AARCH64_VALID_CALL_TARGET
fmov d22, xzr
ret
.size abi_test_clobber_d22,.-abi_test_clobber_d22
.type abi_test_clobber_d23, %function
.globl abi_test_clobber_d23
.hidden abi_test_clobber_d23
.align 4
abi_test_clobber_d23:
AARCH64_VALID_CALL_TARGET
fmov d23, xzr
ret
.size abi_test_clobber_d23,.-abi_test_clobber_d23
.type abi_test_clobber_d24, %function
.globl abi_test_clobber_d24
.hidden abi_test_clobber_d24
.align 4
abi_test_clobber_d24:
AARCH64_VALID_CALL_TARGET
fmov d24, xzr
ret
.size abi_test_clobber_d24,.-abi_test_clobber_d24
.type abi_test_clobber_d25, %function
.globl abi_test_clobber_d25
.hidden abi_test_clobber_d25
.align 4
abi_test_clobber_d25:
AARCH64_VALID_CALL_TARGET
fmov d25, xzr
ret
.size abi_test_clobber_d25,.-abi_test_clobber_d25
.type abi_test_clobber_d26, %function
.globl abi_test_clobber_d26
.hidden abi_test_clobber_d26
.align 4
abi_test_clobber_d26:
AARCH64_VALID_CALL_TARGET
fmov d26, xzr
ret
.size abi_test_clobber_d26,.-abi_test_clobber_d26
.type abi_test_clobber_d27, %function
.globl abi_test_clobber_d27
.hidden abi_test_clobber_d27
.align 4
abi_test_clobber_d27:
AARCH64_VALID_CALL_TARGET
fmov d27, xzr
ret
.size abi_test_clobber_d27,.-abi_test_clobber_d27
.type abi_test_clobber_d28, %function
.globl abi_test_clobber_d28
.hidden abi_test_clobber_d28
.align 4
abi_test_clobber_d28:
AARCH64_VALID_CALL_TARGET
fmov d28, xzr
ret
.size abi_test_clobber_d28,.-abi_test_clobber_d28
.type abi_test_clobber_d29, %function
.globl abi_test_clobber_d29
.hidden abi_test_clobber_d29
.align 4
abi_test_clobber_d29:
AARCH64_VALID_CALL_TARGET
fmov d29, xzr
ret
.size abi_test_clobber_d29,.-abi_test_clobber_d29
.type abi_test_clobber_d30, %function
.globl abi_test_clobber_d30
.hidden abi_test_clobber_d30
.align 4
abi_test_clobber_d30:
AARCH64_VALID_CALL_TARGET
fmov d30, xzr
ret
.size abi_test_clobber_d30,.-abi_test_clobber_d30
.type abi_test_clobber_d31, %function
.globl abi_test_clobber_d31
.hidden abi_test_clobber_d31
.align 4
abi_test_clobber_d31:
AARCH64_VALID_CALL_TARGET
fmov d31, xzr
ret
.size abi_test_clobber_d31,.-abi_test_clobber_d31
.type abi_test_clobber_v8_upper, %function
.globl abi_test_clobber_v8_upper
.hidden abi_test_clobber_v8_upper
.align 4
abi_test_clobber_v8_upper:
AARCH64_VALID_CALL_TARGET
fmov v8.d[1], xzr
ret
.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
.type abi_test_clobber_v9_upper, %function
.globl abi_test_clobber_v9_upper
.hidden abi_test_clobber_v9_upper
.align 4
abi_test_clobber_v9_upper:
AARCH64_VALID_CALL_TARGET
fmov v9.d[1], xzr
ret
.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
.type abi_test_clobber_v10_upper, %function
.globl abi_test_clobber_v10_upper
.hidden abi_test_clobber_v10_upper
.align 4
abi_test_clobber_v10_upper:
AARCH64_VALID_CALL_TARGET
fmov v10.d[1], xzr
ret
.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
.type abi_test_clobber_v11_upper, %function
.globl abi_test_clobber_v11_upper
.hidden abi_test_clobber_v11_upper
.align 4
abi_test_clobber_v11_upper:
AARCH64_VALID_CALL_TARGET
fmov v11.d[1], xzr
ret
.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
.type abi_test_clobber_v12_upper, %function
.globl abi_test_clobber_v12_upper
.hidden abi_test_clobber_v12_upper
.align 4
abi_test_clobber_v12_upper:
AARCH64_VALID_CALL_TARGET
fmov v12.d[1], xzr
ret
.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
.type abi_test_clobber_v13_upper, %function
.globl abi_test_clobber_v13_upper
.hidden abi_test_clobber_v13_upper
.align 4
abi_test_clobber_v13_upper:
AARCH64_VALID_CALL_TARGET
fmov v13.d[1], xzr
ret
.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
.type abi_test_clobber_v14_upper, %function
.globl abi_test_clobber_v14_upper
.hidden abi_test_clobber_v14_upper
.align 4
abi_test_clobber_v14_upper:
AARCH64_VALID_CALL_TARGET
fmov v14.d[1], xzr
ret
.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
.type abi_test_clobber_v15_upper, %function
.globl abi_test_clobber_v15_upper
.hidden abi_test_clobber_v15_upper
.align 4
abi_test_clobber_v15_upper:
AARCH64_VALID_CALL_TARGET
fmov v15.d[1], xzr
ret
.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,781 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-)
.fpu neon
.code 32
#undef __thumb2__
.align 5
.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl aes_hw_set_encrypt_key
.hidden aes_hw_set_encrypt_key
.type aes_hw_set_encrypt_key,%function
.align 5
aes_hw_set_encrypt_key:
.Lenc_key:
mov r3,#-1
cmp r0,#0
beq .Lenc_key_abort
cmp r2,#0
beq .Lenc_key_abort
mov r3,#-2
cmp r1,#128
blt .Lenc_key_abort
cmp r1,#256
bgt .Lenc_key_abort
tst r1,#0x3f
bne .Lenc_key_abort
adr r3,.Lrcon
cmp r1,#192
veor q0,q0,q0
vld1.8 {q3},[r0]!
mov r1,#8 @ reuse r1
vld1.32 {q1,q2},[r3]!
blt .Loop128
beq .L192
b .L256
.align 4
.Loop128:
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
bne .Loop128
vld1.32 {q1},[r3]
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
veor q3,q3,q10
vst1.32 {q3},[r2]
add r2,r2,#0x50
mov r12,#10
b .Ldone
.align 4
.L192:
vld1.8 {d16},[r0]!
vmov.i8 q10,#8 @ borrow q10
vst1.32 {q3},[r2]!
vsub.i8 q2,q2,q10 @ adjust the mask
.Loop192:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {d16},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vdup.32 q9,d7[1]
veor q9,q9,q8
veor q10,q10,q1
vext.8 q8,q0,q8,#12
vshl.u8 q1,q1,#1
veor q8,q8,q9
veor q3,q3,q10
veor q8,q8,q10
vst1.32 {q3},[r2]!
bne .Loop192
mov r12,#12
add r2,r2,#0x20
b .Ldone
.align 4
.L256:
vld1.8 {q8},[r0]
mov r1,#7
mov r12,#14
vst1.32 {q3},[r2]!
.Loop256:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {q8},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vst1.32 {q3},[r2]!
beq .Ldone
vdup.32 q10,d7[1]
vext.8 q9,q0,q8,#12
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
veor q8,q8,q10
b .Loop256
.Ldone:
str r12,[r2]
mov r3,#0
.Lenc_key_abort:
mov r0,r3 @ return value
bx lr
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
.globl aes_hw_set_decrypt_key
.hidden aes_hw_set_decrypt_key
.type aes_hw_set_decrypt_key,%function
.align 5
aes_hw_set_decrypt_key:
stmdb sp!,{r4,lr}
bl .Lenc_key
cmp r0,#0
bne .Ldec_key_abort
sub r2,r2,#240 @ restore original r2
mov r4,#-16
add r0,r2,r12,lsl#4 @ end of key schedule
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
.Loop_imc:
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
cmp r0,r2
bhi .Loop_imc
vld1.32 {q0},[r2]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
vst1.32 {q0},[r0]
eor r0,r0,r0 @ return value
.Ldec_key_abort:
ldmia sp!,{r4,pc}
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
.globl aes_hw_encrypt
.hidden aes_hw_encrypt
.type aes_hw_encrypt,%function
.align 5
aes_hw_encrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
.Loop_enc:
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q1},[r2]!
bgt .Loop_enc
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.size aes_hw_encrypt,.-aes_hw_encrypt
.globl aes_hw_decrypt
.hidden aes_hw_decrypt
.type aes_hw_decrypt,%function
.align 5
aes_hw_decrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
.Loop_dec:
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q1},[r2]!
bgt .Loop_dec
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.size aes_hw_decrypt,.-aes_hw_decrypt
.globl aes_hw_cbc_encrypt
.hidden aes_hw_cbc_encrypt
.type aes_hw_cbc_encrypt,%function
.align 5
aes_hw_cbc_encrypt:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load remaining args
subs r2,r2,#16
mov r8,#16
blo .Lcbc_abort
moveq r8,#0
cmp r5,#0 @ en- or decrypting?
ldr r5,[r3,#240]
and r2,r2,#-16
vld1.8 {q6},[r4]
vld1.8 {q0},[r0],r8
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#6
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
sub r5,r5,#2
vld1.32 {q10,q11},[r7]!
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
beq .Lcbc_dec
cmp r5,#2
veor q0,q0,q6
veor q5,q8,q7
beq .Lcbc_enc128
vld1.32 {q2,q3},[r7]
add r7,r3,#16
add r6,r3,#16*4
add r12,r3,#16*5
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
add r14,r3,#16*6
add r3,r3,#16*7
b .Lenter_cbc_enc
.align 4
.Loop_cbc_enc:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
.Lenter_cbc_enc:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r6]
cmp r5,#4
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r12]
beq .Lcbc_enc192
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r14]
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r3]
nop
.Lcbc_enc192:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs .Loop_cbc_enc
vst1.8 {q6},[r1]!
b .Lcbc_done
.align 5
.Lcbc_enc128:
vld1.32 {q2,q3},[r7]
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
b .Lenter_cbc_enc128
.Loop_cbc_enc128:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
.Lenter_cbc_enc128:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs .Loop_cbc_enc128
vst1.8 {q6},[r1]!
b .Lcbc_done
.align 5
.Lcbc_dec:
vld1.8 {q10},[r0]!
subs r2,r2,#32 @ bias
add r6,r5,#2
vorr q3,q0,q0
vorr q1,q0,q0
vorr q11,q10,q10
blo .Lcbc_dec_tail
vorr q1,q10,q10
vld1.8 {q10},[r0]!
vorr q2,q0,q0
vorr q3,q1,q1
vorr q11,q10,q10
.Loop3x_cbc_dec:
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt .Loop3x_cbc_dec
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q4,q6,q7
subs r2,r2,#0x30
veor q5,q2,q7
movlo r6,r2 @ r6, r6, is zero at this point
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
add r0,r0,r6 @ r0 is adjusted in such way that
@ at exit from the loop q1-q10
@ are loaded with last "words"
vorr q6,q11,q11
mov r7,r3
.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q2},[r0]!
.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q3},[r0]!
.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q11},[r0]!
.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
add r6,r5,#2
veor q4,q4,q0
veor q5,q5,q1
veor q10,q10,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q4},[r1]!
vorr q0,q2,q2
vst1.8 {q5},[r1]!
vorr q1,q3,q3
vst1.8 {q10},[r1]!
vorr q10,q11,q11
bhs .Loop3x_cbc_dec
cmn r2,#0x30
beq .Lcbc_done
nop
.Lcbc_dec_tail:
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt .Lcbc_dec_tail
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
cmn r2,#0x20
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q5,q6,q7
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
beq .Lcbc_dec_one
veor q5,q5,q1
veor q9,q9,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
vst1.8 {q9},[r1]!
b .Lcbc_done
.Lcbc_dec_one:
veor q5,q5,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
.Lcbc_done:
vst1.8 {q6},[r4]
.Lcbc_abort:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,pc}
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
.globl aes_hw_ctr32_encrypt_blocks
.hidden aes_hw_ctr32_encrypt_blocks
.type aes_hw_ctr32_encrypt_blocks,%function
.align 5
aes_hw_ctr32_encrypt_blocks:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldr r4, [ip] @ load remaining arg
ldr r5,[r3,#240]
ldr r8, [r4, #12]
vld1.32 {q0},[r4]
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#4
mov r12,#16
cmp r2,#2
add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
sub r5,r5,#2
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
movlo r12,#0
#ifndef __ARMEB__
rev r8, r8
#endif
vorr q1,q0,q0
add r10, r8, #1
vorr q10,q0,q0
add r8, r8, #2
vorr q6,q0,q0
rev r10, r10
vmov.32 d3[1],r10
bls .Lctr32_tail
rev r12, r8
sub r2,r2,#3 @ bias
vmov.32 d21[1],r12
b .Loop3x_ctr32
.align 4
.Loop3x_ctr32:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q9},[r7]!
bgt .Loop3x_ctr32
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
vld1.8 {q2},[r0]!
vorr q0,q6,q6
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.8 {q3},[r0]!
vorr q1,q6,q6
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vld1.8 {q11},[r0]!
mov r7,r3
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
vorr q10,q6,q6
add r9,r8,#1
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q2,q2,q7
add r10,r8,#2
.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
veor q3,q3,q7
add r8,r8,#3
.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q11,q11,q7
rev r9,r9
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d1[1], r9
rev r10,r10
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vmov.32 d3[1], r10
rev r12,r8
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d21[1], r12
subs r2,r2,#3
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
veor q2,q2,q4
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
vst1.8 {q2},[r1]!
veor q3,q3,q5
mov r6,r5
vst1.8 {q3},[r1]!
veor q11,q11,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q11},[r1]!
bhs .Loop3x_ctr32
adds r2,r2,#3
beq .Lctr32_done
cmp r2,#1
mov r12,#16
moveq r12,#0
.Lctr32_tail:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q9},[r7]!
bgt .Lctr32_tail
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q2},[r0],r12
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q3},[r0]
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q2,q2,q7
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q3,q3,q7
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
cmp r2,#1
veor q2,q2,q0
veor q3,q3,q1
vst1.8 {q2},[r1]!
beq .Lctr32_done
vst1.8 {q3},[r1]
.Lctr32_done:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -0,0 +1,977 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
.arch armv7-a
.text
#if defined(__thumb2__)
.syntax unified
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-.Lbn_mul_mont
#endif
.globl bn_mul_mont
.hidden bn_mul_mont
.type bn_mul_mont,%function
.align 5
bn_mul_mont:
.Lbn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
adr r0,.Lbn_mul_mont
ldr r2,.LOPENSSL_armcap
ldr r0,[r0,r2]
#ifdef __APPLE__
ldr r0,[r0]
#endif
tst r0,#ARMV7_NEON @ NEON available?
ldmia sp, {r0,r2}
beq .Lialu
add sp,sp,#8
b bn_mul8x_mont_neon
.align 4
.Lialu:
#endif
cmp ip,#2
mov r0,ip @ load num
#ifdef __thumb2__
ittt lt
#endif
movlt r0,#0
addlt sp,sp,#2*4
blt .Labrt
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
mov r0,r0,lsl#2 @ rescale r0 for byte count
sub sp,sp,r0 @ alloca(4*num)
sub sp,sp,#4 @ +extra dword
sub r0,r0,#4 @ "num=num-1"
add r4,r2,r0 @ &bp[num-1]
add r0,sp,r0 @ r0 to point at &tp[num-1]
ldr r8,[r0,#14*4] @ &n0
ldr r2,[r2] @ bp[0]
ldr r5,[r1],#4 @ ap[0],ap++
ldr r6,[r3],#4 @ np[0],np++
ldr r8,[r8] @ *n0
str r4,[r0,#15*4] @ save &bp[num]
umull r10,r11,r5,r2 @ ap[0]*bp[0]
str r8,[r0,#14*4] @ save n0 value
mul r8,r10,r8 @ "tp[0]"*n0
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
mov r4,sp
.L1st:
ldr r5,[r1],#4 @ ap[j],ap++
mov r10,r11
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .L1st
adds r12,r12,r11
ldr r4,[r0,#13*4] @ restore bp
mov r14,#0
ldr r8,[r0,#14*4] @ restore n0
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
mov r7,sp
str r14,[r0,#4] @ tp[num]=
.Louter:
sub r7,r0,r7 @ "original" r0-1 value
sub r1,r1,r7 @ "rewind" ap to &ap[1]
ldr r2,[r4,#4]! @ *(++bp)
sub r3,r3,r7 @ "rewind" np to &np[1]
ldr r5,[r1,#-4] @ ap[0]
ldr r10,[sp] @ tp[0]
ldr r6,[r3,#-4] @ np[0]
ldr r7,[sp,#4] @ tp[1]
mov r11,#0
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
str r4,[r0,#13*4] @ save bp
mul r8,r10,r8
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
mov r4,sp
.Linner:
ldr r5,[r1],#4 @ ap[j],ap++
adds r10,r11,r7 @ +=tp[j]
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adc r11,r11,#0
ldr r7,[r4,#8] @ tp[j+1]
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .Linner
adds r12,r12,r11
mov r14,#0
ldr r4,[r0,#13*4] @ restore bp
adc r14,r14,#0
ldr r8,[r0,#14*4] @ restore n0
adds r12,r12,r7
ldr r7,[r0,#15*4] @ restore &bp[num]
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
str r14,[r0,#4] @ tp[num]=
cmp r4,r7
#ifdef __thumb2__
itt ne
#endif
movne r7,sp
bne .Louter
ldr r2,[r0,#12*4] @ pull rp
mov r5,sp
add r0,r0,#4 @ r0 to point at &tp[num]
sub r5,r0,r5 @ "original" num value
mov r4,sp @ "rewind" r4
mov r1,r4 @ "borrow" r1
sub r3,r3,r5 @ "rewind" r3 to &np[0]
subs r7,r7,r7 @ "clear" carry flag
.Lsub: ldr r7,[r4],#4
ldr r6,[r3],#4
sbcs r7,r7,r6 @ tp[j]-np[j]
str r7,[r2],#4 @ rp[j]=
teq r4,r0 @ preserve carry
bne .Lsub
sbcs r14,r14,#0 @ upmost carry
mov r4,sp @ "rewind" r4
sub r2,r2,r5 @ "rewind" r2
.Lcopy: ldr r7,[r4] @ conditional copy
ldr r5,[r2]
str sp,[r4],#4 @ zap tp
#ifdef __thumb2__
it cc
#endif
movcc r5,r7
str r5,[r2],#4
teq r4,r0 @ preserve carry
bne .Lcopy
mov sp,r0
add sp,sp,#4 @ skip over tp[num+1]
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
.Labrt:
#if __ARM_ARCH__>=5
bx lr @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size bn_mul_mont,.-bn_mul_mont
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.type bn_mul8x_mont_neon,%function
.align 5
bn_mul8x_mont_neon:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load rest of parameter block
mov ip,sp
cmp r5,#8
bhi .LNEON_8n
@ special case for r5==8, everything is in register bank...
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
sub r7,sp,r5,lsl#4
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
and r7,r7,#-64
vld1.32 {d30[0]}, [r4,:32]
mov sp,r7 @ alloca
vzip.16 d28,d8
vmull.u32 q6,d28,d0[0]
vmull.u32 q7,d28,d0[1]
vmull.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmull.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
vmul.u32 d29,d29,d30
vmull.u32 q10,d28,d2[0]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmull.u32 q11,d28,d2[1]
vmull.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmull.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
sub r9,r5,#1
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
b .LNEON_outer8
.align 4
.LNEON_outer8:
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
vzip.16 d28,d8
vadd.u64 d12,d12,d10
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
subs r9,r9,#1
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
bne .LNEON_outer8
vadd.u64 d12,d12,d10
mov r7,sp
vshr.u64 d10,d12,#16
mov r8,r5
vadd.u64 d13,d13,d10
add r6,sp,#96
vshr.u64 d10,d13,#16
vzip.16 d12,d13
b .LNEON_tail_entry
.align 4
.LNEON_8n:
veor q6,q6,q6
sub r7,sp,#128
veor q7,q7,q7
sub r7,r7,r5,lsl#4
veor q8,q8,q8
and r7,r7,#-64
veor q9,q9,q9
mov sp,r7 @ alloca
veor q10,q10,q10
add r7,r7,#256
veor q11,q11,q11
sub r8,r5,#8
veor q12,q12,q12
veor q13,q13,q13
.LNEON_8n_init:
vst1.64 {q6,q7},[r7,:256]!
subs r8,r8,#8
vst1.64 {q8,q9},[r7,:256]!
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12,q13},[r7,:256]!
bne .LNEON_8n_init
add r6,sp,#256
vld1.32 {d0,d1,d2,d3},[r1]!
add r10,sp,#8
vld1.32 {d30[0]},[r4,:32]
mov r9,r5
b .LNEON_8n_outer
.align 4
.LNEON_8n_outer:
vld1.32 {d28[0]},[r2,:32]! @ *b++
veor d8,d8,d8
vzip.16 d28,d8
add r7,sp,#128
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
vmlal.u32 q10,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q11,d28,d2[1]
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q6,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q7,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q8,d29,d5[0]
vshr.u64 d12,d12,#16
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vadd.u64 d12,d12,d13
vmlal.u32 q11,d29,d6[1]
vshr.u64 d12,d12,#16
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vadd.u64 d14,d14,d12
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]!
vmlal.u32 q8,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q9,d28,d1[0]
vshl.i64 d29,d15,#16
vmlal.u32 q10,d28,d1[1]
vadd.u64 d29,d29,d14
vmlal.u32 q11,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q12,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
vmlal.u32 q13,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q7,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q8,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q9,d29,d5[0]
vshr.u64 d14,d14,#16
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vadd.u64 d14,d14,d15
vmlal.u32 q12,d29,d6[1]
vshr.u64 d14,d14,#16
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vadd.u64 d16,d16,d14
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]!
vmlal.u32 q9,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q10,d28,d1[0]
vshl.i64 d29,d17,#16
vmlal.u32 q11,d28,d1[1]
vadd.u64 d29,d29,d16
vmlal.u32 q12,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q13,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
vmlal.u32 q6,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q8,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q9,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q10,d29,d5[0]
vshr.u64 d16,d16,#16
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vadd.u64 d16,d16,d17
vmlal.u32 q13,d29,d6[1]
vshr.u64 d16,d16,#16
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vadd.u64 d18,d18,d16
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]!
vmlal.u32 q10,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q11,d28,d1[0]
vshl.i64 d29,d19,#16
vmlal.u32 q12,d28,d1[1]
vadd.u64 d29,d29,d18
vmlal.u32 q13,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q6,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
vmlal.u32 q7,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q9,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q10,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q11,d29,d5[0]
vshr.u64 d18,d18,#16
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vadd.u64 d18,d18,d19
vmlal.u32 q6,d29,d6[1]
vshr.u64 d18,d18,#16
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vadd.u64 d20,d20,d18
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]!
vmlal.u32 q11,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q12,d28,d1[0]
vshl.i64 d29,d21,#16
vmlal.u32 q13,d28,d1[1]
vadd.u64 d29,d29,d20
vmlal.u32 q6,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q7,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
vmlal.u32 q8,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q10,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q11,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q12,d29,d5[0]
vshr.u64 d20,d20,#16
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vadd.u64 d20,d20,d21
vmlal.u32 q7,d29,d6[1]
vshr.u64 d20,d20,#16
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vadd.u64 d22,d22,d20
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]!
vmlal.u32 q12,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q13,d28,d1[0]
vshl.i64 d29,d23,#16
vmlal.u32 q6,d28,d1[1]
vadd.u64 d29,d29,d22
vmlal.u32 q7,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q8,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
vmlal.u32 q9,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q11,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q12,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q13,d29,d5[0]
vshr.u64 d22,d22,#16
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vadd.u64 d22,d22,d23
vmlal.u32 q8,d29,d6[1]
vshr.u64 d22,d22,#16
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vadd.u64 d24,d24,d22
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]!
vmlal.u32 q13,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q6,d28,d1[0]
vshl.i64 d29,d25,#16
vmlal.u32 q7,d28,d1[1]
vadd.u64 d29,d29,d24
vmlal.u32 q8,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q9,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
vmlal.u32 q10,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q12,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q13,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q6,d29,d5[0]
vshr.u64 d24,d24,#16
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vadd.u64 d24,d24,d25
vmlal.u32 q9,d29,d6[1]
vshr.u64 d24,d24,#16
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vadd.u64 d26,d26,d24
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]!
vmlal.u32 q6,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q7,d28,d1[0]
vshl.i64 d29,d27,#16
vmlal.u32 q8,d28,d1[1]
vadd.u64 d29,d29,d26
vmlal.u32 q9,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
vmlal.u32 q11,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q12,d28,d3[1]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q13,d29,d4[0]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q6,d29,d4[1]
vmlal.u32 q7,d29,d5[0]
vshr.u64 d26,d26,#16
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vadd.u64 d26,d26,d27
vmlal.u32 q10,d29,d6[1]
vshr.u64 d26,d26,#16
vmlal.u32 q11,d29,d7[0]
vmlal.u32 q12,d29,d7[1]
vadd.u64 d12,d12,d26
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
add r10,sp,#8 @ rewind
sub r8,r5,#8
b .LNEON_8n_inner
.align 4
.LNEON_8n_inner:
subs r8,r8,#8
vmlal.u32 q6,d28,d0[0]
vld1.64 {q13},[r6,:128]
vmlal.u32 q7,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
vmlal.u32 q8,d28,d1[0]
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q9,d28,d1[1]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmlal.u32 q11,d29,d6[1]
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vst1.64 {q6},[r7,:128]!
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]
vmlal.u32 q8,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
vmlal.u32 q9,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d1[1]
vmlal.u32 q11,d28,d2[0]
vmlal.u32 q12,d28,d2[1]
vmlal.u32 q13,d28,d3[0]
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
vmlal.u32 q7,d29,d4[0]
vmlal.u32 q8,d29,d4[1]
vmlal.u32 q9,d29,d5[0]
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vmlal.u32 q12,d29,d6[1]
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vst1.64 {q7},[r7,:128]!
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]
vmlal.u32 q9,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
vmlal.u32 q10,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q11,d28,d1[1]
vmlal.u32 q12,d28,d2[0]
vmlal.u32 q13,d28,d2[1]
vmlal.u32 q6,d28,d3[0]
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
vmlal.u32 q8,d29,d4[0]
vmlal.u32 q9,d29,d4[1]
vmlal.u32 q10,d29,d5[0]
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vmlal.u32 q13,d29,d6[1]
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vst1.64 {q8},[r7,:128]!
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]
vmlal.u32 q10,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
vmlal.u32 q11,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q12,d28,d1[1]
vmlal.u32 q13,d28,d2[0]
vmlal.u32 q6,d28,d2[1]
vmlal.u32 q7,d28,d3[0]
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
vmlal.u32 q9,d29,d4[0]
vmlal.u32 q10,d29,d4[1]
vmlal.u32 q11,d29,d5[0]
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vmlal.u32 q6,d29,d6[1]
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vst1.64 {q9},[r7,:128]!
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]
vmlal.u32 q11,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
vmlal.u32 q12,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q13,d28,d1[1]
vmlal.u32 q6,d28,d2[0]
vmlal.u32 q7,d28,d2[1]
vmlal.u32 q8,d28,d3[0]
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
vmlal.u32 q10,d29,d4[0]
vmlal.u32 q11,d29,d4[1]
vmlal.u32 q12,d29,d5[0]
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vmlal.u32 q7,d29,d6[1]
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vst1.64 {q10},[r7,:128]!
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]
vmlal.u32 q12,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
vmlal.u32 q13,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q6,d28,d1[1]
vmlal.u32 q7,d28,d2[0]
vmlal.u32 q8,d28,d2[1]
vmlal.u32 q9,d28,d3[0]
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
vmlal.u32 q11,d29,d4[0]
vmlal.u32 q12,d29,d4[1]
vmlal.u32 q13,d29,d5[0]
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vmlal.u32 q8,d29,d6[1]
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vst1.64 {q11},[r7,:128]!
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]
vmlal.u32 q13,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
vmlal.u32 q6,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q7,d28,d1[1]
vmlal.u32 q8,d28,d2[0]
vmlal.u32 q9,d28,d2[1]
vmlal.u32 q10,d28,d3[0]
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
vmlal.u32 q12,d29,d4[0]
vmlal.u32 q13,d29,d4[1]
vmlal.u32 q6,d29,d5[0]
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vmlal.u32 q9,d29,d6[1]
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vst1.64 {q12},[r7,:128]!
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]
vmlal.u32 q6,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
vmlal.u32 q7,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q8,d28,d1[1]
vmlal.u32 q9,d28,d2[0]
vmlal.u32 q10,d28,d2[1]
vmlal.u32 q11,d28,d3[0]
vmlal.u32 q12,d28,d3[1]
it eq
subeq r1,r1,r5,lsl#2 @ rewind
vmlal.u32 q13,d29,d4[0]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q6,d29,d4[1]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q7,d29,d5[0]
add r10,sp,#8 @ rewind
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vmlal.u32 q10,d29,d6[1]
vmlal.u32 q11,d29,d7[0]
vst1.64 {q13},[r7,:128]!
vmlal.u32 q12,d29,d7[1]
bne .LNEON_8n_inner
add r6,sp,#128
vst1.64 {q6,q7},[r7,:256]!
veor q2,q2,q2 @ d4-d5
vst1.64 {q8,q9},[r7,:256]!
veor q3,q3,q3 @ d6-d7
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12},[r7,:128]
subs r9,r9,#8
vld1.64 {q6,q7},[r6,:256]!
vld1.64 {q8,q9},[r6,:256]!
vld1.64 {q10,q11},[r6,:256]!
vld1.64 {q12,q13},[r6,:256]!
itt ne
subne r3,r3,r5,lsl#2 @ rewind
bne .LNEON_8n_outer
add r7,sp,#128
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
vshr.u64 d10,d12,#16
vst1.64 {q2,q3},[sp,:256]!
vadd.u64 d13,d13,d10
vst1.64 {q2,q3}, [sp,:256]!
vshr.u64 d10,d13,#16
vst1.64 {q2,q3}, [sp,:256]!
vzip.16 d12,d13
mov r8,r5
b .LNEON_tail_entry
.align 4
.LNEON_tail:
vadd.u64 d12,d12,d10
vshr.u64 d10,d12,#16
vld1.64 {q8,q9}, [r6, :256]!
vadd.u64 d13,d13,d10
vld1.64 {q10,q11}, [r6, :256]!
vshr.u64 d10,d13,#16
vld1.64 {q12,q13}, [r6, :256]!
vzip.16 d12,d13
.LNEON_tail_entry:
vadd.u64 d14,d14,d10
vst1.32 {d12[0]}, [r7, :32]!
vshr.u64 d10,d14,#16
vadd.u64 d15,d15,d10
vshr.u64 d10,d15,#16
vzip.16 d14,d15
vadd.u64 d16,d16,d10
vst1.32 {d14[0]}, [r7, :32]!
vshr.u64 d10,d16,#16
vadd.u64 d17,d17,d10
vshr.u64 d10,d17,#16
vzip.16 d16,d17
vadd.u64 d18,d18,d10
vst1.32 {d16[0]}, [r7, :32]!
vshr.u64 d10,d18,#16
vadd.u64 d19,d19,d10
vshr.u64 d10,d19,#16
vzip.16 d18,d19
vadd.u64 d20,d20,d10
vst1.32 {d18[0]}, [r7, :32]!
vshr.u64 d10,d20,#16
vadd.u64 d21,d21,d10
vshr.u64 d10,d21,#16
vzip.16 d20,d21
vadd.u64 d22,d22,d10
vst1.32 {d20[0]}, [r7, :32]!
vshr.u64 d10,d22,#16
vadd.u64 d23,d23,d10
vshr.u64 d10,d23,#16
vzip.16 d22,d23
vadd.u64 d24,d24,d10
vst1.32 {d22[0]}, [r7, :32]!
vshr.u64 d10,d24,#16
vadd.u64 d25,d25,d10
vshr.u64 d10,d25,#16
vzip.16 d24,d25
vadd.u64 d26,d26,d10
vst1.32 {d24[0]}, [r7, :32]!
vshr.u64 d10,d26,#16
vadd.u64 d27,d27,d10
vshr.u64 d10,d27,#16
vzip.16 d26,d27
vld1.64 {q6,q7}, [r6, :256]!
subs r8,r8,#8
vst1.32 {d26[0]}, [r7, :32]!
bne .LNEON_tail
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
sub r3,r3,r5,lsl#2 @ rewind r3
subs r1,sp,#0 @ clear carry flag
add r2,sp,r5,lsl#2
.LNEON_sub:
ldmia r1!, {r4,r5,r6,r7}
ldmia r3!, {r8,r9,r10,r11}
sbcs r8, r4,r8
sbcs r9, r5,r9
sbcs r10,r6,r10
sbcs r11,r7,r11
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_sub
ldr r10, [r1] @ load top-most bit
mov r11,sp
veor q0,q0,q0
sub r11,r2,r11 @ this is num*4
veor q1,q1,q1
mov r1,sp
sub r0,r0,r11 @ rewind r0
mov r3,r2 @ second 3/4th of frame
sbcs r10,r10,#0 @ result is carry flag
.LNEON_copy_n_zap:
ldmia r1!, {r4,r5,r6,r7}
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r3,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
ldmia r1, {r4,r5,r6,r7}
stmia r0!, {r8,r9,r10,r11}
sub r1,r1,#16
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r1,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_copy_n_zap
mov sp,ip
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
bx lr @ bx lr
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
#endif
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
.hidden OPENSSL_armcap_P
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,255 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
@ instructions are in aesv8-armx.pl.)
.arch armv7-a
.text
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
#define ldrplb ldrbpl
#define ldrneb ldrbne
#endif
#if defined(__thumb2__)
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.globl gcm_init_neon
.hidden gcm_init_neon
.type gcm_init_neon,%function
.align 4
gcm_init_neon:
vld1.64 d7,[r1]! @ load H
vmov.i8 q8,#0xe1
vld1.64 d6,[r1]
vshl.i64 d17,#57
vshr.u64 d16,#63 @ t0=0xc2....01
vdup.8 q9,d7[7]
vshr.u64 d26,d6,#63
vshr.s8 q9,#7 @ broadcast carry bit
vshl.i64 q3,q3,#1
vand q8,q8,q9
vorr d7,d26 @ H<<<=1
veor q3,q3,q8 @ twisted H
vstmia r0,{q3}
bx lr @ bx lr
.size gcm_init_neon,.-gcm_init_neon
.globl gcm_gmult_neon
.hidden gcm_gmult_neon
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
vld1.64 d7,[r0]! @ load Xi
vld1.64 d6,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
mov r3,#16
b .Lgmult_neon
.size gcm_gmult_neon,.-gcm_gmult_neon
.globl gcm_ghash_neon
.hidden gcm_ghash_neon
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
vld1.64 d1,[r0]! @ load Xi
vld1.64 d0,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
.Loop_neon:
vld1.64 d7,[r2]! @ load inp
vld1.64 d6,[r2]!
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
veor q3,q0 @ inp^=Xi
.Lgmult_neon:
vext.8 d16, d26, d26, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d0, d6, d6, #1 @ B1
vmull.p8 q0, d26, d0 @ E = A*B1
vext.8 d18, d26, d26, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d26, d22 @ G = A*B2
vext.8 d20, d26, d26, #3 @ A3
veor q8, q8, q0 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d0, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q0, d26, d0 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d26, d22 @ K = A*B4
veor q10, q10, q0 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q0, d26, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q0, q0, q8
veor q0, q0, q10
veor d6,d6,d7 @ Karatsuba pre-processing
vext.8 d16, d28, d28, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d2, d6, d6, #1 @ B1
vmull.p8 q1, d28, d2 @ E = A*B1
vext.8 d18, d28, d28, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d28, d22 @ G = A*B2
vext.8 d20, d28, d28, #3 @ A3
veor q8, q8, q1 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d2, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q1, d28, d2 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d28, d22 @ K = A*B4
veor q10, q10, q1 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q1, d28, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q1, q1, q8
veor q1, q1, q10
vext.8 d16, d27, d27, #1 @ A1
vmull.p8 q8, d16, d7 @ F = A1*B
vext.8 d4, d7, d7, #1 @ B1
vmull.p8 q2, d27, d4 @ E = A*B1
vext.8 d18, d27, d27, #2 @ A2
vmull.p8 q9, d18, d7 @ H = A2*B
vext.8 d22, d7, d7, #2 @ B2
vmull.p8 q11, d27, d22 @ G = A*B2
vext.8 d20, d27, d27, #3 @ A3
veor q8, q8, q2 @ L = E + F
vmull.p8 q10, d20, d7 @ J = A3*B
vext.8 d4, d7, d7, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q2, d27, d4 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d7, d7, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d27, d22 @ K = A*B4
veor q10, q10, q2 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q2, d27, d7 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q2, q2, q8
veor q2, q2, q10
veor q1,q1,q0 @ Karatsuba post-processing
veor q1,q1,q2
veor d1,d1,d2
veor d4,d4,d3 @ Xh|Xl - 256-bit result
@ equivalent of reduction_avx from ghash-x86_64.pl
vshl.i64 q9,q0,#57 @ 1st phase
vshl.i64 q10,q0,#62
veor q10,q10,q9 @
vshl.i64 q9,q0,#63
veor q10, q10, q9 @
veor d1,d1,d20 @
veor d4,d4,d21
vshr.u64 q10,q0,#1 @ 2nd phase
veor q2,q2,q0
veor q0,q0,q10 @
vshr.u64 q10,q10,#6
vshr.u64 q0,q0,#1 @
veor q0,q0,q2 @
veor q0,q0,q10 @
subs r3,#16
bne .Loop_neon
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
sub r0,#16
vst1.64 d1,[r0]! @ write out Xi
vst1.64 d0,[r0]
bx lr @ bx lr
.size gcm_ghash_neon,.-gcm_ghash_neon
#endif
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -0,0 +1,253 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.fpu neon
.code 32
#undef __thumb2__
.globl gcm_init_v8
.hidden gcm_init_v8
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
vld1.64 {q9},[r1] @ load input H
vmov.i8 q11,#0xe1
vshl.i64 q11,q11,#57 @ 0xc2.0
vext.8 q3,q9,q9,#8
vshr.u64 q10,q11,#63
vdup.32 q9,d18[1]
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
vshr.u64 q10,q3,#63
vshr.s32 q9,q9,#31 @ broadcast carry bit
vand q10,q10,q8
vshl.i64 q3,q3,#1
vext.8 q10,q10,q10,#8
vand q8,q8,q9
vorr q3,q3,q10 @ H<<<=1
veor q12,q3,q8 @ twisted H
vst1.64 {q12},[r0]! @ store Htable[0]
@ calculate H^2
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
veor q8,q8,q12
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q14,q0,q10
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
veor q9,q9,q14
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
bx lr
.size gcm_init_v8,.-gcm_init_v8
.globl gcm_gmult_v8
.hidden gcm_gmult_v8
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
vld1.64 {q9},[r0] @ load Xi
vmov.i8 q11,#0xe1
vld1.64 {q12,q13},[r1] @ load twisted H, ...
vshl.u64 q11,q11,#57
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q3,q9,q9,#8
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
bx lr
.size gcm_gmult_v8,.-gcm_gmult_v8
.globl gcm_ghash_v8
.hidden gcm_ghash_v8
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
vld1.64 {q0},[r0] @ load [rotated] Xi
@ "[rotated]" means that
@ loaded value would have
@ to be rotated in order to
@ make it appear as in
@ algorithm specification
subs r3,r3,#32 @ see if r3 is 32 or larger
mov r12,#16 @ r12 is used as post-
@ increment for input pointer;
@ as loop is modulo-scheduled
@ r12 is zeroed just in time
@ to preclude overstepping
@ inp[len], which means that
@ last block[s] are actually
@ loaded twice, but last
@ copy is not processed
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
vmov.i8 q11,#0xe1
vld1.64 {q14},[r1]
moveq r12,#0 @ is it time to zero r12?
vext.8 q0,q0,q0,#8 @ rotate Xi
vld1.64 {q8},[r2]! @ load [rotated] I[0]
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
#ifndef __ARMEB__
vrev64.8 q8,q8
vrev64.8 q0,q0
#endif
vext.8 q3,q8,q8,#8 @ rotate I[0]
blo .Lodd_tail_v8 @ r3 was less than 32
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q7,q9,q9,#8
veor q3,q3,q0 @ I[i]^=Xi
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q9,q9,q7 @ Karatsuba pre-processing
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
b .Loop_mod2x_v8
.align 4
.Loop_mod2x_v8:
vext.8 q10,q3,q3,#8
subs r3,r3,#32 @ is there more data?
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
movlo r12,#0 @ is it time to zero r12?
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
veor q10,q10,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
veor q0,q0,q4 @ accumulate
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
veor q2,q2,q6
moveq r12,#0 @ is it time to zero r12?
veor q1,q1,q5
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
#ifndef __ARMEB__
vrev64.8 q8,q8
#endif
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
vext.8 q7,q9,q9,#8
vext.8 q3,q8,q8,#8
veor q0,q1,q10
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q3,q3,q2 @ accumulate q3 early
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q3,q3,q10
veor q9,q9,q7 @ Karatsuba pre-processing
veor q3,q3,q0
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
bhs .Loop_mod2x_v8 @ there was at least 32 more bytes
veor q2,q2,q10
vext.8 q3,q8,q8,#8 @ re-construct q3
adds r3,r3,#32 @ re-construct r3
veor q0,q0,q2 @ re-construct q0
beq .Ldone_v8 @ is r3 zero?
.Lodd_tail_v8:
vext.8 q10,q0,q0,#8
veor q3,q3,q0 @ inp^=Xi
veor q9,q8,q10 @ q9 is rotated inp^Xi
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
.Ldone_v8:
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
bx lr
.size gcm_ghash_v8,.-gcm_ghash_v8
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,379 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.syntax unified
.arch armv7-a
.fpu vfp
.text
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ with |argv|, then saves the callee-saved registers into |state|. It returns
@ the result of |func|. The |unwind| argument is unused.
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
@ const uint32_t *argv, size_t argc,
@ int unwind);
.type abi_test_trampoline, %function
.globl abi_test_trampoline
.hidden abi_test_trampoline
.align 4
abi_test_trampoline:
@ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
sub sp, sp, #28
@ Every register in AAPCS is either non-volatile or a parameter (except
@ r9 on iOS), so this code, by the actual call, loses all its scratch
@ registers. First fill in stack parameters while there are registers
@ to spare.
cmp r3, #4
bls .Lstack_args_done
mov r4, sp @ r4 is the output pointer.
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
add r2, r2, #16 @ Skip four arguments.
.Lstack_args_loop:
ldr r6, [r2], #4
cmp r2, r5
str r6, [r4], #4
bne .Lstack_args_loop
.Lstack_args_done:
@ Load registers from |r1|.
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Load register parameters. This uses up our remaining registers, so we
@ repurpose lr as scratch space.
ldr r3, [sp, #40] @ Reload argc.
ldr lr, [sp, #36] @ .Load argv into lr.
cmp r3, #3
bhi .Larg_r3
beq .Larg_r2
cmp r3, #1
bhi .Larg_r1
beq .Larg_r0
b .Largs_done
.Larg_r3:
ldr r3, [lr, #12] @ argv[3]
.Larg_r2:
ldr r2, [lr, #8] @ argv[2]
.Larg_r1:
ldr r1, [lr, #4] @ argv[1]
.Larg_r0:
ldr r0, [lr] @ argv[0]
.Largs_done:
@ With every other register in use, load the function pointer into lr
@ and call the function.
ldr lr, [sp, #28]
blx lr
@ r1-r3 are free for use again. The trampoline only supports
@ single-return functions. Pass r4-r11 to the caller.
ldr r1, [sp, #32]
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Unwind the stack and restore registers.
add sp, sp, #44 @ 44 = 28+16
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
bx lr
.size abi_test_trampoline,.-abi_test_trampoline
.type abi_test_clobber_r0, %function
.globl abi_test_clobber_r0
.hidden abi_test_clobber_r0
.align 4
abi_test_clobber_r0:
mov r0, #0
bx lr
.size abi_test_clobber_r0,.-abi_test_clobber_r0
.type abi_test_clobber_r1, %function
.globl abi_test_clobber_r1
.hidden abi_test_clobber_r1
.align 4
abi_test_clobber_r1:
mov r1, #0
bx lr
.size abi_test_clobber_r1,.-abi_test_clobber_r1
.type abi_test_clobber_r2, %function
.globl abi_test_clobber_r2
.hidden abi_test_clobber_r2
.align 4
abi_test_clobber_r2:
mov r2, #0
bx lr
.size abi_test_clobber_r2,.-abi_test_clobber_r2
.type abi_test_clobber_r3, %function
.globl abi_test_clobber_r3
.hidden abi_test_clobber_r3
.align 4
abi_test_clobber_r3:
mov r3, #0
bx lr
.size abi_test_clobber_r3,.-abi_test_clobber_r3
.type abi_test_clobber_r4, %function
.globl abi_test_clobber_r4
.hidden abi_test_clobber_r4
.align 4
abi_test_clobber_r4:
mov r4, #0
bx lr
.size abi_test_clobber_r4,.-abi_test_clobber_r4
.type abi_test_clobber_r5, %function
.globl abi_test_clobber_r5
.hidden abi_test_clobber_r5
.align 4
abi_test_clobber_r5:
mov r5, #0
bx lr
.size abi_test_clobber_r5,.-abi_test_clobber_r5
.type abi_test_clobber_r6, %function
.globl abi_test_clobber_r6
.hidden abi_test_clobber_r6
.align 4
abi_test_clobber_r6:
mov r6, #0
bx lr
.size abi_test_clobber_r6,.-abi_test_clobber_r6
.type abi_test_clobber_r7, %function
.globl abi_test_clobber_r7
.hidden abi_test_clobber_r7
.align 4
abi_test_clobber_r7:
mov r7, #0
bx lr
.size abi_test_clobber_r7,.-abi_test_clobber_r7
.type abi_test_clobber_r8, %function
.globl abi_test_clobber_r8
.hidden abi_test_clobber_r8
.align 4
abi_test_clobber_r8:
mov r8, #0
bx lr
.size abi_test_clobber_r8,.-abi_test_clobber_r8
.type abi_test_clobber_r9, %function
.globl abi_test_clobber_r9
.hidden abi_test_clobber_r9
.align 4
abi_test_clobber_r9:
mov r9, #0
bx lr
.size abi_test_clobber_r9,.-abi_test_clobber_r9
.type abi_test_clobber_r10, %function
.globl abi_test_clobber_r10
.hidden abi_test_clobber_r10
.align 4
abi_test_clobber_r10:
mov r10, #0
bx lr
.size abi_test_clobber_r10,.-abi_test_clobber_r10
.type abi_test_clobber_r11, %function
.globl abi_test_clobber_r11
.hidden abi_test_clobber_r11
.align 4
abi_test_clobber_r11:
mov r11, #0
bx lr
.size abi_test_clobber_r11,.-abi_test_clobber_r11
.type abi_test_clobber_r12, %function
.globl abi_test_clobber_r12
.hidden abi_test_clobber_r12
.align 4
abi_test_clobber_r12:
mov r12, #0
bx lr
.size abi_test_clobber_r12,.-abi_test_clobber_r12
.type abi_test_clobber_d0, %function
.globl abi_test_clobber_d0
.hidden abi_test_clobber_d0
.align 4
abi_test_clobber_d0:
mov r0, #0
vmov s0, r0
vmov s1, r0
bx lr
.size abi_test_clobber_d0,.-abi_test_clobber_d0
.type abi_test_clobber_d1, %function
.globl abi_test_clobber_d1
.hidden abi_test_clobber_d1
.align 4
abi_test_clobber_d1:
mov r0, #0
vmov s2, r0
vmov s3, r0
bx lr
.size abi_test_clobber_d1,.-abi_test_clobber_d1
.type abi_test_clobber_d2, %function
.globl abi_test_clobber_d2
.hidden abi_test_clobber_d2
.align 4
abi_test_clobber_d2:
mov r0, #0
vmov s4, r0
vmov s5, r0
bx lr
.size abi_test_clobber_d2,.-abi_test_clobber_d2
.type abi_test_clobber_d3, %function
.globl abi_test_clobber_d3
.hidden abi_test_clobber_d3
.align 4
abi_test_clobber_d3:
mov r0, #0
vmov s6, r0
vmov s7, r0
bx lr
.size abi_test_clobber_d3,.-abi_test_clobber_d3
.type abi_test_clobber_d4, %function
.globl abi_test_clobber_d4
.hidden abi_test_clobber_d4
.align 4
abi_test_clobber_d4:
mov r0, #0
vmov s8, r0
vmov s9, r0
bx lr
.size abi_test_clobber_d4,.-abi_test_clobber_d4
.type abi_test_clobber_d5, %function
.globl abi_test_clobber_d5
.hidden abi_test_clobber_d5
.align 4
abi_test_clobber_d5:
mov r0, #0
vmov s10, r0
vmov s11, r0
bx lr
.size abi_test_clobber_d5,.-abi_test_clobber_d5
.type abi_test_clobber_d6, %function
.globl abi_test_clobber_d6
.hidden abi_test_clobber_d6
.align 4
abi_test_clobber_d6:
mov r0, #0
vmov s12, r0
vmov s13, r0
bx lr
.size abi_test_clobber_d6,.-abi_test_clobber_d6
.type abi_test_clobber_d7, %function
.globl abi_test_clobber_d7
.hidden abi_test_clobber_d7
.align 4
abi_test_clobber_d7:
mov r0, #0
vmov s14, r0
vmov s15, r0
bx lr
.size abi_test_clobber_d7,.-abi_test_clobber_d7
.type abi_test_clobber_d8, %function
.globl abi_test_clobber_d8
.hidden abi_test_clobber_d8
.align 4
abi_test_clobber_d8:
mov r0, #0
vmov s16, r0
vmov s17, r0
bx lr
.size abi_test_clobber_d8,.-abi_test_clobber_d8
.type abi_test_clobber_d9, %function
.globl abi_test_clobber_d9
.hidden abi_test_clobber_d9
.align 4
abi_test_clobber_d9:
mov r0, #0
vmov s18, r0
vmov s19, r0
bx lr
.size abi_test_clobber_d9,.-abi_test_clobber_d9
.type abi_test_clobber_d10, %function
.globl abi_test_clobber_d10
.hidden abi_test_clobber_d10
.align 4
abi_test_clobber_d10:
mov r0, #0
vmov s20, r0
vmov s21, r0
bx lr
.size abi_test_clobber_d10,.-abi_test_clobber_d10
.type abi_test_clobber_d11, %function
.globl abi_test_clobber_d11
.hidden abi_test_clobber_d11
.align 4
abi_test_clobber_d11:
mov r0, #0
vmov s22, r0
vmov s23, r0
bx lr
.size abi_test_clobber_d11,.-abi_test_clobber_d11
.type abi_test_clobber_d12, %function
.globl abi_test_clobber_d12
.hidden abi_test_clobber_d12
.align 4
abi_test_clobber_d12:
mov r0, #0
vmov s24, r0
vmov s25, r0
bx lr
.size abi_test_clobber_d12,.-abi_test_clobber_d12
.type abi_test_clobber_d13, %function
.globl abi_test_clobber_d13
.hidden abi_test_clobber_d13
.align 4
abi_test_clobber_d13:
mov r0, #0
vmov s26, r0
vmov s27, r0
bx lr
.size abi_test_clobber_d13,.-abi_test_clobber_d13
.type abi_test_clobber_d14, %function
.globl abi_test_clobber_d14
.hidden abi_test_clobber_d14
.align 4
abi_test_clobber_d14:
mov r0, #0
vmov s28, r0
vmov s29, r0
bx lr
.size abi_test_clobber_d14,.-abi_test_clobber_d14
.type abi_test_clobber_d15, %function
.globl abi_test_clobber_d15
.hidden abi_test_clobber_d15
.align 4
abi_test_clobber_d15:
mov r0, #0
vmov s30, r0
vmov s31, r0
bx lr
.size abi_test_clobber_d15,.-abi_test_clobber_d15
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,587 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if !defined(OPENSSL_NO_ASM) && defined(__powerpc64__)
.machine "any"
.abiversion 2
.text
.globl gcm_init_p8
.type gcm_init_p8,@function
.align 5
gcm_init_p8:
.localentry gcm_init_p8,0
li 0,-4096
li 8,0x10
li 12,-1
li 9,0x20
or 0,0,0
li 10,0x30
.long 0x7D202699
vspltisb 8,-16
vspltisb 5,1
vaddubm 8,8,8
vxor 4,4,4
vor 8,8,5
vsldoi 8,8,4,15
vsldoi 6,4,5,1
vaddubm 8,8,8
vspltisb 7,7
vor 8,8,6
vspltb 6,9,0
vsl 9,9,5
vsrab 6,6,7
vand 6,6,8
vxor 3,9,6
vsldoi 9,3,3,8
vsldoi 8,4,8,8
vsldoi 11,4,9,8
vsldoi 10,9,4,8
.long 0x7D001F99
.long 0x7D681F99
li 8,0x40
.long 0x7D291F99
li 9,0x50
.long 0x7D4A1F99
li 10,0x60
.long 0x10035CC8
.long 0x10234CC8
.long 0x104354C8
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
vxor 16,0,6
vsldoi 17,16,16,8
vsldoi 19,4,17,8
vsldoi 18,17,4,8
.long 0x7E681F99
li 8,0x70
.long 0x7E291F99
li 9,0x80
.long 0x7E4A1F99
li 10,0x90
.long 0x10039CC8
.long 0x11B09CC8
.long 0x10238CC8
.long 0x11D08CC8
.long 0x104394C8
.long 0x11F094C8
.long 0x10E044C8
.long 0x114D44C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vsldoi 11,14,4,8
vsldoi 9,4,14,8
vxor 0,0,5
vxor 2,2,6
vxor 13,13,11
vxor 15,15,9
vsldoi 0,0,0,8
vsldoi 13,13,13,8
vxor 0,0,7
vxor 13,13,10
vsldoi 6,0,0,8
vsldoi 9,13,13,8
.long 0x100044C8
.long 0x11AD44C8
vxor 6,6,2
vxor 9,9,15
vxor 0,0,6
vxor 13,13,9
vsldoi 9,0,0,8
vsldoi 17,13,13,8
vsldoi 11,4,9,8
vsldoi 10,9,4,8
vsldoi 19,4,17,8
vsldoi 18,17,4,8
.long 0x7D681F99
li 8,0xa0
.long 0x7D291F99
li 9,0xb0
.long 0x7D4A1F99
li 10,0xc0
.long 0x7E681F99
.long 0x7E291F99
.long 0x7E4A1F99
or 12,12,12
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size gcm_init_p8,.-gcm_init_p8
.globl gcm_gmult_p8
.type gcm_gmult_p8,@function
.align 5
gcm_gmult_p8:
.localentry gcm_gmult_p8,0
lis 0,0xfff8
li 8,0x10
li 12,-1
li 9,0x20
or 0,0,0
li 10,0x30
.long 0x7C601E99
.long 0x7D682699
lvsl 12,0,0
.long 0x7D292699
vspltisb 5,0x07
.long 0x7D4A2699
vxor 12,12,5
.long 0x7D002699
vperm 3,3,3,12
vxor 4,4,4
.long 0x10035CC8
.long 0x10234CC8
.long 0x104354C8
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
vxor 0,0,6
vperm 0,0,0,12
.long 0x7C001F99
or 12,12,12
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size gcm_gmult_p8,.-gcm_gmult_p8
.globl gcm_ghash_p8
.type gcm_ghash_p8,@function
.align 5
gcm_ghash_p8:
.localentry gcm_ghash_p8,0
li 0,-4096
li 8,0x10
li 12,-1
li 9,0x20
or 0,0,0
li 10,0x30
.long 0x7C001E99
.long 0x7D682699
li 8,0x40
lvsl 12,0,0
.long 0x7D292699
li 9,0x50
vspltisb 5,0x07
.long 0x7D4A2699
li 10,0x60
vxor 12,12,5
.long 0x7D002699
vperm 0,0,0,12
vxor 4,4,4
cmpldi 6,64
bge .Lgcm_ghash_p8_4x
.long 0x7C602E99
addi 5,5,16
subic. 6,6,16
vperm 3,3,3,12
vxor 3,3,0
beq .Lshort
.long 0x7E682699
li 8,16
.long 0x7E292699
add 9,5,6
.long 0x7E4A2699
.align 5
.Loop_2x:
.long 0x7E002E99
vperm 16,16,16,12
subic 6,6,32
.long 0x10039CC8
.long 0x11B05CC8
subfe 0,0,0
.long 0x10238CC8
.long 0x11D04CC8
and 0,0,6
.long 0x104394C8
.long 0x11F054C8
add 5,5,0
vxor 0,0,13
vxor 1,1,14
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 2,2,15
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
.long 0x7C682E99
addi 5,5,32
vsldoi 6,0,0,8
.long 0x100044C8
vperm 3,3,3,12
vxor 6,6,2
vxor 3,3,6
vxor 3,3,0
cmpld 9,5
bgt .Loop_2x
cmplwi 6,0
bne .Leven
.Lshort:
.long 0x10035CC8
.long 0x10234CC8
.long 0x104354C8
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
.Leven:
vxor 0,0,6
vperm 0,0,0,12
.long 0x7C001F99
or 12,12,12
blr
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
.align 5
.gcm_ghash_p8_4x:
.Lgcm_ghash_p8_4x:
stdu 1,-256(1)
li 10,63
li 11,79
stvx 20,10,1
addi 10,10,32
stvx 21,11,1
addi 11,11,32
stvx 22,10,1
addi 10,10,32
stvx 23,11,1
addi 11,11,32
stvx 24,10,1
addi 10,10,32
stvx 25,11,1
addi 11,11,32
stvx 26,10,1
addi 10,10,32
stvx 27,11,1
addi 11,11,32
stvx 28,10,1
addi 10,10,32
stvx 29,11,1
addi 11,11,32
stvx 30,10,1
li 10,0x60
stvx 31,11,1
li 0,-1
stw 12,252(1)
or 0,0,0
lvsl 5,0,8
li 8,0x70
.long 0x7E292699
li 9,0x80
vspltisb 6,8
li 10,0x90
.long 0x7EE82699
li 8,0xa0
.long 0x7F092699
li 9,0xb0
.long 0x7F2A2699
li 10,0xc0
.long 0x7FA82699
li 8,0x10
.long 0x7FC92699
li 9,0x20
.long 0x7FEA2699
li 10,0x30
vsldoi 7,4,6,8
vaddubm 18,5,7
vaddubm 19,6,18
srdi 6,6,4
.long 0x7C602E99
.long 0x7E082E99
subic. 6,6,8
.long 0x7EC92E99
.long 0x7F8A2E99
addi 5,5,0x40
vperm 3,3,3,12
vperm 16,16,16,12
vperm 22,22,22,12
vperm 28,28,28,12
vxor 2,3,0
.long 0x11B0BCC8
.long 0x11D0C4C8
.long 0x11F0CCC8
vperm 11,17,9,18
vperm 5,22,28,19
vperm 10,17,9,19
vperm 6,22,28,18
.long 0x12B68CC8
.long 0x12855CC8
.long 0x137C4CC8
.long 0x134654C8
vxor 21,21,14
vxor 20,20,13
vxor 27,27,21
vxor 26,26,15
blt .Ltail_4x
.Loop_4x:
.long 0x7C602E99
.long 0x7E082E99
subic. 6,6,4
.long 0x7EC92E99
.long 0x7F8A2E99
addi 5,5,0x40
vperm 16,16,16,12
vperm 22,22,22,12
vperm 28,28,28,12
vperm 3,3,3,12
.long 0x1002ECC8
.long 0x1022F4C8
.long 0x1042FCC8
.long 0x11B0BCC8
.long 0x11D0C4C8
.long 0x11F0CCC8
vxor 0,0,20
vxor 1,1,27
vxor 2,2,26
vperm 5,22,28,19
vperm 6,22,28,18
.long 0x10E044C8
.long 0x12855CC8
.long 0x134654C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x12B68CC8
.long 0x137C4CC8
.long 0x100044C8
vxor 20,20,13
vxor 26,26,15
vxor 2,2,3
vxor 21,21,14
vxor 2,2,6
vxor 27,27,21
vxor 2,2,0
bge .Loop_4x
.Ltail_4x:
.long 0x1002ECC8
.long 0x1022F4C8
.long 0x1042FCC8
vxor 0,0,20
vxor 1,1,27
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 2,2,26
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
vxor 0,0,6
addic. 6,6,4
beq .Ldone_4x
.long 0x7C602E99
cmpldi 6,2
li 6,-4
blt .Lone
.long 0x7E082E99
beq .Ltwo
.Lthree:
.long 0x7EC92E99
vperm 3,3,3,12
vperm 16,16,16,12
vperm 22,22,22,12
vxor 2,3,0
vor 29,23,23
vor 30, 24, 24
vor 31,25,25
vperm 5,16,22,19
vperm 6,16,22,18
.long 0x12B08CC8
.long 0x13764CC8
.long 0x12855CC8
.long 0x134654C8
vxor 27,27,21
b .Ltail_4x
.align 4
.Ltwo:
vperm 3,3,3,12
vperm 16,16,16,12
vxor 2,3,0
vperm 5,4,16,19
vperm 6,4,16,18
vsldoi 29,4,17,8
vor 30, 17, 17
vsldoi 31,17,4,8
.long 0x12855CC8
.long 0x13704CC8
.long 0x134654C8
b .Ltail_4x
.align 4
.Lone:
vperm 3,3,3,12
vsldoi 29,4,9,8
vor 30, 9, 9
vsldoi 31,9,4,8
vxor 2,3,0
vxor 20,20,20
vxor 27,27,27
vxor 26,26,26
b .Ltail_4x
.Ldone_4x:
vperm 0,0,0,12
.long 0x7C001F99
li 10,63
li 11,79
or 12,12,12
lvx 20,10,1
addi 10,10,32
lvx 21,11,1
addi 11,11,32
lvx 22,10,1
addi 10,10,32
lvx 23,11,1
addi 11,11,32
lvx 24,10,1
addi 10,10,32
lvx 25,11,1
addi 11,11,32
lvx 26,10,1
addi 10,10,32
lvx 27,11,1
addi 11,11,32
lvx 28,10,1
addi 10,10,32
lvx 29,11,1
addi 11,11,32
lvx 30,10,1
lvx 31,11,1
addi 1,1,256
blr
.long 0
.byte 0,12,0x04,0,0x80,0,4,0
.long 0
.size gcm_ghash_p8,.-gcm_ghash_p8
.byte 71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM && __powerpc64__
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,975 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl ChaCha20_ctr32
.hidden ChaCha20_ctr32
.type ChaCha20_ctr32,@function
.align 16
ChaCha20_ctr32:
.L_ChaCha20_ctr32_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %eax,%eax
cmpl 28(%esp),%eax
je .L000no_data
call .Lpic_point
.Lpic_point:
popl %eax
leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
testl $16777216,(%ebp)
jz .L001x86
testl $512,4(%ebp)
jz .L001x86
jmp .Lssse3_shortcut
.L001x86:
movl 32(%esp),%esi
movl 36(%esp),%edi
subl $132,%esp
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
movl %eax,80(%esp)
movl %ebx,84(%esp)
movl %ecx,88(%esp)
movl %edx,92(%esp)
movl 16(%esi),%eax
movl 20(%esi),%ebx
movl 24(%esi),%ecx
movl 28(%esi),%edx
movl %eax,96(%esp)
movl %ebx,100(%esp)
movl %ecx,104(%esp)
movl %edx,108(%esp)
movl (%edi),%eax
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%edx
subl $1,%eax
movl %eax,112(%esp)
movl %ebx,116(%esp)
movl %ecx,120(%esp)
movl %edx,124(%esp)
jmp .L002entry
.align 16
.L003outer_loop:
movl %ebx,156(%esp)
movl %eax,152(%esp)
movl %ecx,160(%esp)
.L002entry:
movl $1634760805,%eax
movl $857760878,4(%esp)
movl $2036477234,8(%esp)
movl $1797285236,12(%esp)
movl 84(%esp),%ebx
movl 88(%esp),%ebp
movl 104(%esp),%ecx
movl 108(%esp),%esi
movl 116(%esp),%edx
movl 120(%esp),%edi
movl %ebx,20(%esp)
movl %ebp,24(%esp)
movl %ecx,40(%esp)
movl %esi,44(%esp)
movl %edx,52(%esp)
movl %edi,56(%esp)
movl 92(%esp),%ebx
movl 124(%esp),%edi
movl 112(%esp),%edx
movl 80(%esp),%ebp
movl 96(%esp),%ecx
movl 100(%esp),%esi
addl $1,%edx
movl %ebx,28(%esp)
movl %edi,60(%esp)
movl %edx,112(%esp)
movl $10,%ebx
jmp .L004loop
.align 16
.L004loop:
addl %ebp,%eax
movl %ebx,128(%esp)
movl %ebp,%ebx
xorl %eax,%edx
roll $16,%edx
addl %edx,%ecx
xorl %ecx,%ebx
movl 52(%esp),%edi
roll $12,%ebx
movl 20(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,(%esp)
roll $8,%edx
movl 4(%esp),%eax
addl %edx,%ecx
movl %edx,48(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
movl %ecx,32(%esp)
roll $16,%edi
movl %ebx,16(%esp)
addl %edi,%esi
movl 40(%esp),%ecx
xorl %esi,%ebp
movl 56(%esp),%edx
roll $12,%ebp
movl 24(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,4(%esp)
roll $8,%edi
movl 8(%esp),%eax
addl %edi,%esi
movl %edi,52(%esp)
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
movl %esi,36(%esp)
roll $16,%edx
movl %ebp,20(%esp)
addl %edx,%ecx
movl 44(%esp),%esi
xorl %ecx,%ebx
movl 60(%esp),%edi
roll $12,%ebx
movl 28(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,8(%esp)
roll $8,%edx
movl 12(%esp),%eax
addl %edx,%ecx
movl %edx,56(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
roll $16,%edi
movl %ebx,24(%esp)
addl %edi,%esi
xorl %esi,%ebp
roll $12,%ebp
movl 20(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,12(%esp)
roll $8,%edi
movl (%esp),%eax
addl %edi,%esi
movl %edi,%edx
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
roll $16,%edx
movl %ebp,28(%esp)
addl %edx,%ecx
xorl %ecx,%ebx
movl 48(%esp),%edi
roll $12,%ebx
movl 24(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,(%esp)
roll $8,%edx
movl 4(%esp),%eax
addl %edx,%ecx
movl %edx,60(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
movl %ecx,40(%esp)
roll $16,%edi
movl %ebx,20(%esp)
addl %edi,%esi
movl 32(%esp),%ecx
xorl %esi,%ebp
movl 52(%esp),%edx
roll $12,%ebp
movl 28(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,4(%esp)
roll $8,%edi
movl 8(%esp),%eax
addl %edi,%esi
movl %edi,48(%esp)
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
movl %esi,44(%esp)
roll $16,%edx
movl %ebp,24(%esp)
addl %edx,%ecx
movl 36(%esp),%esi
xorl %ecx,%ebx
movl 56(%esp),%edi
roll $12,%ebx
movl 16(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,8(%esp)
roll $8,%edx
movl 12(%esp),%eax
addl %edx,%ecx
movl %edx,52(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
roll $16,%edi
movl %ebx,28(%esp)
addl %edi,%esi
xorl %esi,%ebp
movl 48(%esp),%edx
roll $12,%ebp
movl 128(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,12(%esp)
roll $8,%edi
movl (%esp),%eax
addl %edi,%esi
movl %edi,56(%esp)
xorl %esi,%ebp
roll $7,%ebp
decl %ebx
jnz .L004loop
movl 160(%esp),%ebx
addl $1634760805,%eax
addl 80(%esp),%ebp
addl 96(%esp),%ecx
addl 100(%esp),%esi
cmpl $64,%ebx
jb .L005tail
movl 156(%esp),%ebx
addl 112(%esp),%edx
addl 120(%esp),%edi
xorl (%ebx),%eax
xorl 16(%ebx),%ebp
movl %eax,(%esp)
movl 152(%esp),%eax
xorl 32(%ebx),%ecx
xorl 36(%ebx),%esi
xorl 48(%ebx),%edx
xorl 56(%ebx),%edi
movl %ebp,16(%eax)
movl %ecx,32(%eax)
movl %esi,36(%eax)
movl %edx,48(%eax)
movl %edi,56(%eax)
movl 4(%esp),%ebp
movl 8(%esp),%ecx
movl 12(%esp),%esi
movl 20(%esp),%edx
movl 24(%esp),%edi
addl $857760878,%ebp
addl $2036477234,%ecx
addl $1797285236,%esi
addl 84(%esp),%edx
addl 88(%esp),%edi
xorl 4(%ebx),%ebp
xorl 8(%ebx),%ecx
xorl 12(%ebx),%esi
xorl 20(%ebx),%edx
xorl 24(%ebx),%edi
movl %ebp,4(%eax)
movl %ecx,8(%eax)
movl %esi,12(%eax)
movl %edx,20(%eax)
movl %edi,24(%eax)
movl 28(%esp),%ebp
movl 40(%esp),%ecx
movl 44(%esp),%esi
movl 52(%esp),%edx
movl 60(%esp),%edi
addl 92(%esp),%ebp
addl 104(%esp),%ecx
addl 108(%esp),%esi
addl 116(%esp),%edx
addl 124(%esp),%edi
xorl 28(%ebx),%ebp
xorl 40(%ebx),%ecx
xorl 44(%ebx),%esi
xorl 52(%ebx),%edx
xorl 60(%ebx),%edi
leal 64(%ebx),%ebx
movl %ebp,28(%eax)
movl (%esp),%ebp
movl %ecx,40(%eax)
movl 160(%esp),%ecx
movl %esi,44(%eax)
movl %edx,52(%eax)
movl %edi,60(%eax)
movl %ebp,(%eax)
leal 64(%eax),%eax
subl $64,%ecx
jnz .L003outer_loop
jmp .L006done
.L005tail:
addl 112(%esp),%edx
addl 120(%esp),%edi
movl %eax,(%esp)
movl %ebp,16(%esp)
movl %ecx,32(%esp)
movl %esi,36(%esp)
movl %edx,48(%esp)
movl %edi,56(%esp)
movl 4(%esp),%ebp
movl 8(%esp),%ecx
movl 12(%esp),%esi
movl 20(%esp),%edx
movl 24(%esp),%edi
addl $857760878,%ebp
addl $2036477234,%ecx
addl $1797285236,%esi
addl 84(%esp),%edx
addl 88(%esp),%edi
movl %ebp,4(%esp)
movl %ecx,8(%esp)
movl %esi,12(%esp)
movl %edx,20(%esp)
movl %edi,24(%esp)
movl 28(%esp),%ebp
movl 40(%esp),%ecx
movl 44(%esp),%esi
movl 52(%esp),%edx
movl 60(%esp),%edi
addl 92(%esp),%ebp
addl 104(%esp),%ecx
addl 108(%esp),%esi
addl 116(%esp),%edx
addl 124(%esp),%edi
movl %ebp,28(%esp)
movl 156(%esp),%ebp
movl %ecx,40(%esp)
movl 152(%esp),%ecx
movl %esi,44(%esp)
xorl %esi,%esi
movl %edx,52(%esp)
movl %edi,60(%esp)
xorl %eax,%eax
xorl %edx,%edx
.L007tail_loop:
movb (%esi,%ebp,1),%al
movb (%esp,%esi,1),%dl
leal 1(%esi),%esi
xorb %dl,%al
movb %al,-1(%ecx,%esi,1)
decl %ebx
jnz .L007tail_loop
.L006done:
addl $132,%esp
.L000no_data:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
.globl ChaCha20_ssse3
.hidden ChaCha20_ssse3
.type ChaCha20_ssse3,@function
.align 16
ChaCha20_ssse3:
.L_ChaCha20_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
.Lssse3_shortcut:
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl 36(%esp),%ebx
movl %esp,%ebp
subl $524,%esp
andl $-64,%esp
movl %ebp,512(%esp)
leal .Lssse3_data-.Lpic_point(%eax),%eax
movdqu (%ebx),%xmm3
cmpl $256,%ecx
jb .L0081x
movl %edx,516(%esp)
movl %ebx,520(%esp)
subl $256,%ecx
leal 384(%esp),%ebp
movdqu (%edx),%xmm7
pshufd $0,%xmm3,%xmm0
pshufd $85,%xmm3,%xmm1
pshufd $170,%xmm3,%xmm2
pshufd $255,%xmm3,%xmm3
paddd 48(%eax),%xmm0
pshufd $0,%xmm7,%xmm4
pshufd $85,%xmm7,%xmm5
psubd 64(%eax),%xmm0
pshufd $170,%xmm7,%xmm6
pshufd $255,%xmm7,%xmm7
movdqa %xmm0,64(%ebp)
movdqa %xmm1,80(%ebp)
movdqa %xmm2,96(%ebp)
movdqa %xmm3,112(%ebp)
movdqu 16(%edx),%xmm3
movdqa %xmm4,-64(%ebp)
movdqa %xmm5,-48(%ebp)
movdqa %xmm6,-32(%ebp)
movdqa %xmm7,-16(%ebp)
movdqa 32(%eax),%xmm7
leal 128(%esp),%ebx
pshufd $0,%xmm3,%xmm0
pshufd $85,%xmm3,%xmm1
pshufd $170,%xmm3,%xmm2
pshufd $255,%xmm3,%xmm3
pshufd $0,%xmm7,%xmm4
pshufd $85,%xmm7,%xmm5
pshufd $170,%xmm7,%xmm6
pshufd $255,%xmm7,%xmm7
movdqa %xmm0,(%ebp)
movdqa %xmm1,16(%ebp)
movdqa %xmm2,32(%ebp)
movdqa %xmm3,48(%ebp)
movdqa %xmm4,-128(%ebp)
movdqa %xmm5,-112(%ebp)
movdqa %xmm6,-96(%ebp)
movdqa %xmm7,-80(%ebp)
leal 128(%esi),%esi
leal 128(%edi),%edi
jmp .L009outer_loop
.align 16
.L009outer_loop:
movdqa -112(%ebp),%xmm1
movdqa -96(%ebp),%xmm2
movdqa -80(%ebp),%xmm3
movdqa -48(%ebp),%xmm5
movdqa -32(%ebp),%xmm6
movdqa -16(%ebp),%xmm7
movdqa %xmm1,-112(%ebx)
movdqa %xmm2,-96(%ebx)
movdqa %xmm3,-80(%ebx)
movdqa %xmm5,-48(%ebx)
movdqa %xmm6,-32(%ebx)
movdqa %xmm7,-16(%ebx)
movdqa 32(%ebp),%xmm2
movdqa 48(%ebp),%xmm3
movdqa 64(%ebp),%xmm4
movdqa 80(%ebp),%xmm5
movdqa 96(%ebp),%xmm6
movdqa 112(%ebp),%xmm7
paddd 64(%eax),%xmm4
movdqa %xmm2,32(%ebx)
movdqa %xmm3,48(%ebx)
movdqa %xmm4,64(%ebx)
movdqa %xmm5,80(%ebx)
movdqa %xmm6,96(%ebx)
movdqa %xmm7,112(%ebx)
movdqa %xmm4,64(%ebp)
movdqa -128(%ebp),%xmm0
movdqa %xmm4,%xmm6
movdqa -64(%ebp),%xmm3
movdqa (%ebp),%xmm4
movdqa 16(%ebp),%xmm5
movl $10,%edx
nop
.align 16
.L010loop:
paddd %xmm3,%xmm0
movdqa %xmm3,%xmm2
pxor %xmm0,%xmm6
pshufb (%eax),%xmm6
paddd %xmm6,%xmm4
pxor %xmm4,%xmm2
movdqa -48(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -112(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 80(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-128(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,64(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
movdqa %xmm4,(%ebx)
pshufb (%eax),%xmm7
movdqa %xmm2,-64(%ebx)
paddd %xmm7,%xmm5
movdqa 32(%ebx),%xmm4
pxor %xmm5,%xmm3
movdqa -32(%ebx),%xmm2
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -96(%ebx),%xmm0
paddd %xmm3,%xmm1
movdqa 96(%ebx),%xmm6
pxor %xmm1,%xmm7
movdqa %xmm1,-112(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,80(%ebx)
pxor %xmm5,%xmm3
paddd %xmm2,%xmm0
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
pxor %xmm0,%xmm6
por %xmm1,%xmm3
movdqa %xmm5,16(%ebx)
pshufb (%eax),%xmm6
movdqa %xmm3,-48(%ebx)
paddd %xmm6,%xmm4
movdqa 48(%ebx),%xmm5
pxor %xmm4,%xmm2
movdqa -16(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -80(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 112(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-96(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,96(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
pshufb (%eax),%xmm7
movdqa %xmm2,-32(%ebx)
paddd %xmm7,%xmm5
pxor %xmm5,%xmm3
movdqa -48(%ebx),%xmm2
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -128(%ebx),%xmm0
paddd %xmm3,%xmm1
pxor %xmm1,%xmm7
movdqa %xmm1,-80(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,%xmm6
pxor %xmm5,%xmm3
paddd %xmm2,%xmm0
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
pxor %xmm0,%xmm6
por %xmm1,%xmm3
pshufb (%eax),%xmm6
movdqa %xmm3,-16(%ebx)
paddd %xmm6,%xmm4
pxor %xmm4,%xmm2
movdqa -32(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -112(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 64(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-128(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,112(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
movdqa %xmm4,32(%ebx)
pshufb (%eax),%xmm7
movdqa %xmm2,-48(%ebx)
paddd %xmm7,%xmm5
movdqa (%ebx),%xmm4
pxor %xmm5,%xmm3
movdqa -16(%ebx),%xmm2
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -96(%ebx),%xmm0
paddd %xmm3,%xmm1
movdqa 80(%ebx),%xmm6
pxor %xmm1,%xmm7
movdqa %xmm1,-112(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,64(%ebx)
pxor %xmm5,%xmm3
paddd %xmm2,%xmm0
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
pxor %xmm0,%xmm6
por %xmm1,%xmm3
movdqa %xmm5,48(%ebx)
pshufb (%eax),%xmm6
movdqa %xmm3,-32(%ebx)
paddd %xmm6,%xmm4
movdqa 16(%ebx),%xmm5
pxor %xmm4,%xmm2
movdqa -64(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -80(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 96(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-96(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,80(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
pshufb (%eax),%xmm7
movdqa %xmm2,-16(%ebx)
paddd %xmm7,%xmm5
pxor %xmm5,%xmm3
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -128(%ebx),%xmm0
paddd %xmm3,%xmm1
movdqa 64(%ebx),%xmm6
pxor %xmm1,%xmm7
movdqa %xmm1,-80(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,96(%ebx)
pxor %xmm5,%xmm3
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
por %xmm1,%xmm3
decl %edx
jnz .L010loop
movdqa %xmm3,-64(%ebx)
movdqa %xmm4,(%ebx)
movdqa %xmm5,16(%ebx)
movdqa %xmm6,64(%ebx)
movdqa %xmm7,96(%ebx)
movdqa -112(%ebx),%xmm1
movdqa -96(%ebx),%xmm2
movdqa -80(%ebx),%xmm3
paddd -128(%ebp),%xmm0
paddd -112(%ebp),%xmm1
paddd -96(%ebp),%xmm2
paddd -80(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 16(%esi),%esi
pxor %xmm0,%xmm4
movdqa -64(%ebx),%xmm0
pxor %xmm1,%xmm5
movdqa -48(%ebx),%xmm1
pxor %xmm2,%xmm6
movdqa -32(%ebx),%xmm2
pxor %xmm3,%xmm7
movdqa -16(%ebx),%xmm3
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 16(%edi),%edi
paddd -64(%ebp),%xmm0
paddd -48(%ebp),%xmm1
paddd -32(%ebp),%xmm2
paddd -16(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 16(%esi),%esi
pxor %xmm0,%xmm4
movdqa (%ebx),%xmm0
pxor %xmm1,%xmm5
movdqa 16(%ebx),%xmm1
pxor %xmm2,%xmm6
movdqa 32(%ebx),%xmm2
pxor %xmm3,%xmm7
movdqa 48(%ebx),%xmm3
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 16(%edi),%edi
paddd (%ebp),%xmm0
paddd 16(%ebp),%xmm1
paddd 32(%ebp),%xmm2
paddd 48(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 16(%esi),%esi
pxor %xmm0,%xmm4
movdqa 64(%ebx),%xmm0
pxor %xmm1,%xmm5
movdqa 80(%ebx),%xmm1
pxor %xmm2,%xmm6
movdqa 96(%ebx),%xmm2
pxor %xmm3,%xmm7
movdqa 112(%ebx),%xmm3
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 16(%edi),%edi
paddd 64(%ebp),%xmm0
paddd 80(%ebp),%xmm1
paddd 96(%ebp),%xmm2
paddd 112(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 208(%esi),%esi
pxor %xmm0,%xmm4
pxor %xmm1,%xmm5
pxor %xmm2,%xmm6
pxor %xmm3,%xmm7
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 208(%edi),%edi
subl $256,%ecx
jnc .L009outer_loop
addl $256,%ecx
jz .L011done
movl 520(%esp),%ebx
leal -128(%esi),%esi
movl 516(%esp),%edx
leal -128(%edi),%edi
movd 64(%ebp),%xmm2
movdqu (%ebx),%xmm3
paddd 96(%eax),%xmm2
pand 112(%eax),%xmm3
por %xmm2,%xmm3
.L0081x:
movdqa 32(%eax),%xmm0
movdqu (%edx),%xmm1
movdqu 16(%edx),%xmm2
movdqa (%eax),%xmm6
movdqa 16(%eax),%xmm7
movl %ebp,48(%esp)
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
movl $10,%edx
jmp .L012loop1x
.align 16
.L013outer1x:
movdqa 80(%eax),%xmm3
movdqa (%esp),%xmm0
movdqa 16(%esp),%xmm1
movdqa 32(%esp),%xmm2
paddd 48(%esp),%xmm3
movl $10,%edx
movdqa %xmm3,48(%esp)
jmp .L012loop1x
.align 16
.L012loop1x:
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,222
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $20,%xmm1
pslld $12,%xmm4
por %xmm4,%xmm1
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,223
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $25,%xmm1
pslld $7,%xmm4
por %xmm4,%xmm1
pshufd $78,%xmm2,%xmm2
pshufd $57,%xmm1,%xmm1
pshufd $147,%xmm3,%xmm3
nop
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,222
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $20,%xmm1
pslld $12,%xmm4
por %xmm4,%xmm1
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,223
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $25,%xmm1
pslld $7,%xmm4
por %xmm4,%xmm1
pshufd $78,%xmm2,%xmm2
pshufd $147,%xmm1,%xmm1
pshufd $57,%xmm3,%xmm3
decl %edx
jnz .L012loop1x
paddd (%esp),%xmm0
paddd 16(%esp),%xmm1
paddd 32(%esp),%xmm2
paddd 48(%esp),%xmm3
cmpl $64,%ecx
jb .L014tail
movdqu (%esi),%xmm4
movdqu 16(%esi),%xmm5
pxor %xmm4,%xmm0
movdqu 32(%esi),%xmm4
pxor %xmm5,%xmm1
movdqu 48(%esi),%xmm5
pxor %xmm4,%xmm2
pxor %xmm5,%xmm3
leal 64(%esi),%esi
movdqu %xmm0,(%edi)
movdqu %xmm1,16(%edi)
movdqu %xmm2,32(%edi)
movdqu %xmm3,48(%edi)
leal 64(%edi),%edi
subl $64,%ecx
jnz .L013outer1x
jmp .L011done
.L014tail:
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
xorl %eax,%eax
xorl %edx,%edx
xorl %ebp,%ebp
.L015tail_loop:
movb (%esp,%ebp,1),%al
movb (%esi,%ebp,1),%dl
leal 1(%ebp),%ebp
xorb %dl,%al
movb %al,-1(%edi,%ebp,1)
decl %ecx
jnz .L015tail_loop
.L011done:
movl 512(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
.align 64
.Lssse3_data:
.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
.long 1634760805,857760878,2036477234,1797285236
.long 0,1,2,3
.long 4,4,4,4
.long 1,0,0,0
.long 4,0,0,0
.long 0,-1,-1,-1
.align 64
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte 114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,997 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl bn_mul_add_words
.hidden bn_mul_add_words
.type bn_mul_add_words,@function
.align 16
bn_mul_add_words:
.L_bn_mul_add_words_begin:
call .L000PIC_me_up
.L000PIC_me_up:
popl %eax
leal OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L001maw_non_sse2
movl 4(%esp),%eax
movl 8(%esp),%edx
movl 12(%esp),%ecx
movd 16(%esp),%mm0
pxor %mm1,%mm1
jmp .L002maw_sse2_entry
.align 16
.L003maw_sse2_unrolled:
movd (%eax),%mm3
paddq %mm3,%mm1
movd (%edx),%mm2
pmuludq %mm0,%mm2
movd 4(%edx),%mm4
pmuludq %mm0,%mm4
movd 8(%edx),%mm6
pmuludq %mm0,%mm6
movd 12(%edx),%mm7
pmuludq %mm0,%mm7
paddq %mm2,%mm1
movd 4(%eax),%mm3
paddq %mm4,%mm3
movd 8(%eax),%mm5
paddq %mm6,%mm5
movd 12(%eax),%mm4
paddq %mm4,%mm7
movd %mm1,(%eax)
movd 16(%edx),%mm2
pmuludq %mm0,%mm2
psrlq $32,%mm1
movd 20(%edx),%mm4
pmuludq %mm0,%mm4
paddq %mm3,%mm1
movd 24(%edx),%mm6
pmuludq %mm0,%mm6
movd %mm1,4(%eax)
psrlq $32,%mm1
movd 28(%edx),%mm3
addl $32,%edx
pmuludq %mm0,%mm3
paddq %mm5,%mm1
movd 16(%eax),%mm5
paddq %mm5,%mm2
movd %mm1,8(%eax)
psrlq $32,%mm1
paddq %mm7,%mm1
movd 20(%eax),%mm5
paddq %mm5,%mm4
movd %mm1,12(%eax)
psrlq $32,%mm1
paddq %mm2,%mm1
movd 24(%eax),%mm5
paddq %mm5,%mm6
movd %mm1,16(%eax)
psrlq $32,%mm1
paddq %mm4,%mm1
movd 28(%eax),%mm5
paddq %mm5,%mm3
movd %mm1,20(%eax)
psrlq $32,%mm1
paddq %mm6,%mm1
movd %mm1,24(%eax)
psrlq $32,%mm1
paddq %mm3,%mm1
movd %mm1,28(%eax)
leal 32(%eax),%eax
psrlq $32,%mm1
subl $8,%ecx
jz .L004maw_sse2_exit
.L002maw_sse2_entry:
testl $4294967288,%ecx
jnz .L003maw_sse2_unrolled
.align 4
.L005maw_sse2_loop:
movd (%edx),%mm2
movd (%eax),%mm3
pmuludq %mm0,%mm2
leal 4(%edx),%edx
paddq %mm3,%mm1
paddq %mm2,%mm1
movd %mm1,(%eax)
subl $1,%ecx
psrlq $32,%mm1
leal 4(%eax),%eax
jnz .L005maw_sse2_loop
.L004maw_sse2_exit:
movd %mm1,%eax
emms
ret
.align 16
.L001maw_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %esi,%esi
movl 20(%esp),%edi
movl 28(%esp),%ecx
movl 24(%esp),%ebx
andl $4294967288,%ecx
movl 32(%esp),%ebp
pushl %ecx
jz .L006maw_finish
.align 16
.L007maw_loop:
movl (%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl (%edi),%eax
adcl $0,%edx
movl %eax,(%edi)
movl %edx,%esi
movl 4(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 4(%edi),%eax
adcl $0,%edx
movl %eax,4(%edi)
movl %edx,%esi
movl 8(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 8(%edi),%eax
adcl $0,%edx
movl %eax,8(%edi)
movl %edx,%esi
movl 12(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 12(%edi),%eax
adcl $0,%edx
movl %eax,12(%edi)
movl %edx,%esi
movl 16(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 16(%edi),%eax
adcl $0,%edx
movl %eax,16(%edi)
movl %edx,%esi
movl 20(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 20(%edi),%eax
adcl $0,%edx
movl %eax,20(%edi)
movl %edx,%esi
movl 24(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 24(%edi),%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
movl 28(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 28(%edi),%eax
adcl $0,%edx
movl %eax,28(%edi)
movl %edx,%esi
subl $8,%ecx
leal 32(%ebx),%ebx
leal 32(%edi),%edi
jnz .L007maw_loop
.L006maw_finish:
movl 32(%esp),%ecx
andl $7,%ecx
jnz .L008maw_finish2
jmp .L009maw_end
.L008maw_finish2:
movl (%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl (%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,(%edi)
movl %edx,%esi
jz .L009maw_end
movl 4(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 4(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,4(%edi)
movl %edx,%esi
jz .L009maw_end
movl 8(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 8(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,8(%edi)
movl %edx,%esi
jz .L009maw_end
movl 12(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 12(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,12(%edi)
movl %edx,%esi
jz .L009maw_end
movl 16(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 16(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,16(%edi)
movl %edx,%esi
jz .L009maw_end
movl 20(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 20(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,20(%edi)
movl %edx,%esi
jz .L009maw_end
movl 24(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 24(%edi),%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
.L009maw_end:
movl %esi,%eax
popl %ecx
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_mul_add_words,.-.L_bn_mul_add_words_begin
.globl bn_mul_words
.hidden bn_mul_words
.type bn_mul_words,@function
.align 16
bn_mul_words:
.L_bn_mul_words_begin:
call .L010PIC_me_up
.L010PIC_me_up:
popl %eax
leal OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L011mw_non_sse2
movl 4(%esp),%eax
movl 8(%esp),%edx
movl 12(%esp),%ecx
movd 16(%esp),%mm0
pxor %mm1,%mm1
.align 16
.L012mw_sse2_loop:
movd (%edx),%mm2
pmuludq %mm0,%mm2
leal 4(%edx),%edx
paddq %mm2,%mm1
movd %mm1,(%eax)
subl $1,%ecx
psrlq $32,%mm1
leal 4(%eax),%eax
jnz .L012mw_sse2_loop
movd %mm1,%eax
emms
ret
.align 16
.L011mw_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %esi,%esi
movl 20(%esp),%edi
movl 24(%esp),%ebx
movl 28(%esp),%ebp
movl 32(%esp),%ecx
andl $4294967288,%ebp
jz .L013mw_finish
.L014mw_loop:
movl (%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,(%edi)
movl %edx,%esi
movl 4(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,4(%edi)
movl %edx,%esi
movl 8(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,8(%edi)
movl %edx,%esi
movl 12(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,12(%edi)
movl %edx,%esi
movl 16(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,16(%edi)
movl %edx,%esi
movl 20(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,20(%edi)
movl %edx,%esi
movl 24(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
movl 28(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,28(%edi)
movl %edx,%esi
addl $32,%ebx
addl $32,%edi
subl $8,%ebp
jz .L013mw_finish
jmp .L014mw_loop
.L013mw_finish:
movl 28(%esp),%ebp
andl $7,%ebp
jnz .L015mw_finish2
jmp .L016mw_end
.L015mw_finish2:
movl (%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 4(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,4(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 8(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,8(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 12(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,12(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 16(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,16(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 20(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,20(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 24(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
.L016mw_end:
movl %esi,%eax
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_mul_words,.-.L_bn_mul_words_begin
.globl bn_sqr_words
.hidden bn_sqr_words
.type bn_sqr_words,@function
.align 16
bn_sqr_words:
.L_bn_sqr_words_begin:
call .L017PIC_me_up
.L017PIC_me_up:
popl %eax
leal OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L018sqr_non_sse2
movl 4(%esp),%eax
movl 8(%esp),%edx
movl 12(%esp),%ecx
.align 16
.L019sqr_sse2_loop:
movd (%edx),%mm0
pmuludq %mm0,%mm0
leal 4(%edx),%edx
movq %mm0,(%eax)
subl $1,%ecx
leal 8(%eax),%eax
jnz .L019sqr_sse2_loop
emms
ret
.align 16
.L018sqr_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
movl 24(%esp),%edi
movl 28(%esp),%ebx
andl $4294967288,%ebx
jz .L020sw_finish
.L021sw_loop:
movl (%edi),%eax
mull %eax
movl %eax,(%esi)
movl %edx,4(%esi)
movl 4(%edi),%eax
mull %eax
movl %eax,8(%esi)
movl %edx,12(%esi)
movl 8(%edi),%eax
mull %eax
movl %eax,16(%esi)
movl %edx,20(%esi)
movl 12(%edi),%eax
mull %eax
movl %eax,24(%esi)
movl %edx,28(%esi)
movl 16(%edi),%eax
mull %eax
movl %eax,32(%esi)
movl %edx,36(%esi)
movl 20(%edi),%eax
mull %eax
movl %eax,40(%esi)
movl %edx,44(%esi)
movl 24(%edi),%eax
mull %eax
movl %eax,48(%esi)
movl %edx,52(%esi)
movl 28(%edi),%eax
mull %eax
movl %eax,56(%esi)
movl %edx,60(%esi)
addl $32,%edi
addl $64,%esi
subl $8,%ebx
jnz .L021sw_loop
.L020sw_finish:
movl 28(%esp),%ebx
andl $7,%ebx
jz .L022sw_end
movl (%edi),%eax
mull %eax
movl %eax,(%esi)
decl %ebx
movl %edx,4(%esi)
jz .L022sw_end
movl 4(%edi),%eax
mull %eax
movl %eax,8(%esi)
decl %ebx
movl %edx,12(%esi)
jz .L022sw_end
movl 8(%edi),%eax
mull %eax
movl %eax,16(%esi)
decl %ebx
movl %edx,20(%esi)
jz .L022sw_end
movl 12(%edi),%eax
mull %eax
movl %eax,24(%esi)
decl %ebx
movl %edx,28(%esi)
jz .L022sw_end
movl 16(%edi),%eax
mull %eax
movl %eax,32(%esi)
decl %ebx
movl %edx,36(%esi)
jz .L022sw_end
movl 20(%edi),%eax
mull %eax
movl %eax,40(%esi)
decl %ebx
movl %edx,44(%esi)
jz .L022sw_end
movl 24(%edi),%eax
mull %eax
movl %eax,48(%esi)
movl %edx,52(%esi)
.L022sw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_sqr_words,.-.L_bn_sqr_words_begin
.globl bn_div_words
.hidden bn_div_words
.type bn_div_words,@function
.align 16
bn_div_words:
.L_bn_div_words_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
movl 12(%esp),%ecx
divl %ecx
ret
.size bn_div_words,.-.L_bn_div_words_begin
.globl bn_add_words
.hidden bn_add_words
.type bn_add_words,@function
.align 16
bn_add_words:
.L_bn_add_words_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebx
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
jz .L023aw_finish
.L024aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
movl 4(%esi),%ecx
movl 4(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
movl 8(%esi),%ecx
movl 8(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
movl 12(%esi),%ecx
movl 12(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
movl 16(%esi),%ecx
movl 16(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
movl 20(%esi),%ecx
movl 20(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
movl 24(%esi),%ecx
movl 24(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
movl 28(%esi),%ecx
movl 28(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%esi
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz .L024aw_loop
.L023aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
jz .L025aw_end
movl (%esi),%ecx
movl (%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
jz .L025aw_end
movl 4(%esi),%ecx
movl 4(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
jz .L025aw_end
movl 8(%esi),%ecx
movl 8(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
jz .L025aw_end
movl 12(%esi),%ecx
movl 12(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
jz .L025aw_end
movl 16(%esi),%ecx
movl 16(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
jz .L025aw_end
movl 20(%esi),%ecx
movl 20(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
jz .L025aw_end
movl 24(%esi),%ecx
movl 24(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
.L025aw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_add_words,.-.L_bn_add_words_begin
.globl bn_sub_words
.hidden bn_sub_words
.type bn_sub_words,@function
.align 16
bn_sub_words:
.L_bn_sub_words_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebx
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
jz .L026aw_finish
.L027aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
movl 4(%esi),%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
movl 8(%esi),%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
movl 12(%esi),%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
movl 16(%esi),%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
movl 20(%esi),%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
movl 24(%esi),%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
movl 28(%esi),%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%esi
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz .L027aw_loop
.L026aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
jz .L028aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
jz .L028aw_end
movl 4(%esi),%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
jz .L028aw_end
movl 8(%esi),%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
jz .L028aw_end
movl 12(%esi),%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
jz .L028aw_end
movl 16(%esi),%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
jz .L028aw_end
movl 20(%esi),%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
jz .L028aw_end
movl 24(%esi),%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
.L028aw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_sub_words,.-.L_bn_sub_words_begin
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,294 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl gcm_gmult_ssse3
.hidden gcm_gmult_ssse3
.type gcm_gmult_ssse3,@function
.align 16
gcm_gmult_ssse3:
.L_gcm_gmult_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movdqu (%edi),%xmm0
call .L000pic_point
.L000pic_point:
popl %eax
movdqa .Lreverse_bytes-.L000pic_point(%eax),%xmm7
movdqa .Llow4_mask-.L000pic_point(%eax),%xmm2
.byte 102,15,56,0,199
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L001loop_row_1:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L001loop_row_1
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L002loop_row_2:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L002loop_row_2
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
.L003loop_row_3:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L003loop_row_3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
.byte 102,15,56,0,215
movdqu %xmm2,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_gmult_ssse3,.-.L_gcm_gmult_ssse3_begin
.globl gcm_ghash_ssse3
.hidden gcm_ghash_ssse3
.type gcm_ghash_ssse3,@function
.align 16
gcm_ghash_ssse3:
.L_gcm_ghash_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%edx
movl 32(%esp),%ecx
movdqu (%edi),%xmm0
call .L004pic_point
.L004pic_point:
popl %ebx
movdqa .Lreverse_bytes-.L004pic_point(%ebx),%xmm7
andl $-16,%ecx
.byte 102,15,56,0,199
pxor %xmm3,%xmm3
.L005loop_ghash:
movdqa .Llow4_mask-.L004pic_point(%ebx),%xmm2
movdqu (%edx),%xmm1
.byte 102,15,56,0,207
pxor %xmm1,%xmm0
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
movl $5,%eax
.L006loop_row_4:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L006loop_row_4
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L007loop_row_5:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L007loop_row_5
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
.L008loop_row_6:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L008loop_row_6
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movdqa %xmm2,%xmm0
leal -256(%esi),%esi
leal 16(%edx),%edx
subl $16,%ecx
jnz .L005loop_ghash
.byte 102,15,56,0,199
movdqu %xmm0,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_ssse3,.-.L_gcm_ghash_ssse3_begin
.align 16
.Lreverse_bytes:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.align 16
.Llow4_mask:
.long 252645135,252645135,252645135,252645135
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -0,0 +1,330 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl gcm_init_clmul
.hidden gcm_init_clmul
.type gcm_init_clmul,@function
.align 16
gcm_init_clmul:
.L_gcm_init_clmul_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
call .L000pic
.L000pic:
popl %ecx
leal .Lbswap-.L000pic(%ecx),%ecx
movdqu (%eax),%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $255,%xmm2,%xmm4
movdqa %xmm2,%xmm3
psllq $1,%xmm2
pxor %xmm5,%xmm5
psrlq $63,%xmm3
pcmpgtd %xmm4,%xmm5
pslldq $8,%xmm3
por %xmm3,%xmm2
pand 16(%ecx),%xmm5
pxor %xmm5,%xmm2
movdqa %xmm2,%xmm0
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $78,%xmm2,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm2,%xmm3
movdqu %xmm2,(%edx)
pxor %xmm0,%xmm4
movdqu %xmm0,16(%edx)
.byte 102,15,58,15,227,8
movdqu %xmm4,32(%edx)
ret
.size gcm_init_clmul,.-.L_gcm_init_clmul_begin
.globl gcm_gmult_clmul
.hidden gcm_gmult_clmul
.type gcm_gmult_clmul,@function
.align 16
gcm_gmult_clmul:
.L_gcm_gmult_clmul_begin:
movl 4(%esp),%eax
movl 8(%esp),%edx
call .L001pic
.L001pic:
popl %ecx
leal .Lbswap-.L001pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movups (%edx),%xmm2
.byte 102,15,56,0,197
movups 32(%edx),%xmm4
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
ret
.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
.globl gcm_ghash_clmul
.hidden gcm_ghash_clmul
.type gcm_ghash_clmul,@function
.align 16
gcm_ghash_clmul:
.L_gcm_ghash_clmul_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%eax
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebx
call .L002pic
.L002pic:
popl %ecx
leal .Lbswap-.L002pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movdqu (%edx),%xmm2
.byte 102,15,56,0,197
subl $16,%ebx
jz .L003odd_tail
movdqu (%esi),%xmm3
movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221
.byte 102,15,56,0,245
movdqu 32(%edx),%xmm5
pxor %xmm3,%xmm0
pshufd $78,%xmm6,%xmm3
movdqa %xmm6,%xmm7
pxor %xmm6,%xmm3
leal 32(%esi),%esi
.byte 102,15,58,68,242,0
.byte 102,15,58,68,250,17
.byte 102,15,58,68,221,0
movups 16(%edx),%xmm2
nop
subl $32,%ebx
jbe .L004even_tail
jmp .L005mod_loop
.align 32
.L005mod_loop:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
nop
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,229,16
movups (%edx),%xmm2
xorps %xmm6,%xmm0
movdqa (%ecx),%xmm5
xorps %xmm7,%xmm1
movdqu (%esi),%xmm7
pxor %xmm0,%xmm3
movdqu 16(%esi),%xmm6
pxor %xmm1,%xmm3
.byte 102,15,56,0,253
pxor %xmm3,%xmm4
movdqa %xmm4,%xmm3
psrldq $8,%xmm4
pslldq $8,%xmm3
pxor %xmm4,%xmm1
pxor %xmm3,%xmm0
.byte 102,15,56,0,245
pxor %xmm7,%xmm1
movdqa %xmm6,%xmm7
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
.byte 102,15,58,68,242,0
movups 32(%edx),%xmm5
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
pshufd $78,%xmm7,%xmm3
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm7,%xmm3
pxor %xmm4,%xmm1
.byte 102,15,58,68,250,17
movups 16(%edx),%xmm2
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.byte 102,15,58,68,221,0
leal 32(%esi),%esi
subl $32,%ebx
ja .L005mod_loop
.L004even_tail:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,229,16
movdqa (%ecx),%xmm5
xorps %xmm6,%xmm0
xorps %xmm7,%xmm1
pxor %xmm0,%xmm3
pxor %xmm1,%xmm3
pxor %xmm3,%xmm4
movdqa %xmm4,%xmm3
psrldq $8,%xmm4
pslldq $8,%xmm3
pxor %xmm4,%xmm1
pxor %xmm3,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
testl %ebx,%ebx
jnz .L006done
movups (%edx),%xmm2
.L003odd_tail:
movdqu (%esi),%xmm3
.byte 102,15,56,0,221
pxor %xmm3,%xmm0
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.L006done:
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
.align 64
.Lbswap:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte 0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -0,0 +1,688 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl md5_block_asm_data_order
.hidden md5_block_asm_data_order
.type md5_block_asm_data_order,@function
.align 16
md5_block_asm_data_order:
.L_md5_block_asm_data_order_begin:
pushl %esi
pushl %edi
movl 12(%esp),%edi
movl 16(%esp),%esi
movl 20(%esp),%ecx
pushl %ebp
shll $6,%ecx
pushl %ebx
addl %esi,%ecx
subl $64,%ecx
movl (%edi),%eax
pushl %ecx
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%edx
.L000start:
movl %ecx,%edi
movl (%esi),%ebp
xorl %edx,%edi
andl %ebx,%edi
leal 3614090360(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 4(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 3905402710(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 8(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 606105819(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 12(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 3250441966(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 16(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 4118548399(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 20(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 1200080426(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 24(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 2821735955(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 28(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 4249261313(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 32(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 1770035416(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 36(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 2336552879(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 40(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 4294925233(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 44(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 2304563134(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 48(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 1804603682(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 52(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 4254626195(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 56(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 2792965006(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 60(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 1236535329(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 4(%esi),%ebp
addl %ecx,%ebx
leal 4129170786(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 24(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 3225465664(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 44(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 643717713(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl (%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 3921069994(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 3593408605(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 40(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 38016083(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 60(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 3634488961(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 16(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 3889429448(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 36(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 568446438(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 56(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 3275163606(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 12(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 4107603335(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 32(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 1163531501(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 52(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 2850285829(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 8(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 4243563512(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 28(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 1735328473(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 48(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 2368359562(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 4294588738(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 32(%esi),%ebp
movl %ebx,%edi
leal 2272392833(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 44(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 1839030562(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 56(%esi),%ebp
movl %edx,%edi
leal 4259657740(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 4(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 2763975236(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 16(%esi),%ebp
movl %ebx,%edi
leal 1272893353(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 28(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 4139469664(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 40(%esi),%ebp
movl %edx,%edi
leal 3200236656(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 52(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 681279174(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl (%esi),%ebp
movl %ebx,%edi
leal 3936430074(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 12(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 3572445317(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 24(%esi),%ebp
movl %edx,%edi
leal 76029189(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 36(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 3654602809(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 48(%esi),%ebp
movl %ebx,%edi
leal 3873151461(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 60(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 530742520(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 8(%esi),%ebp
movl %edx,%edi
leal 3299628645(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl (%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
orl %ebx,%edi
leal 4096336452(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 28(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 1126891415(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 56(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 2878612391(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 20(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 4237533241(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 48(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 1700485571(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 12(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 2399980690(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 40(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 4293915773(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 4(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 2240044497(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 32(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 1873313359(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 60(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 4264355552(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 24(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 2734768916(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 52(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 1309151649(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 16(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 4149444226(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 44(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 3174756917(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 8(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 718787259(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 36(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 3951481745(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 24(%esp),%ebp
addl %edi,%ebx
addl $64,%esi
roll $21,%ebx
movl (%ebp),%edi
addl %ecx,%ebx
addl %edi,%eax
movl 4(%ebp),%edi
addl %edi,%ebx
movl 8(%ebp),%edi
addl %edi,%ecx
movl 12(%ebp),%edi
addl %edi,%edx
movl %eax,(%ebp)
movl %ebx,4(%ebp)
movl (%esp),%edi
movl %ecx,8(%ebp)
movl %edx,12(%ebp)
cmpl %esi,%edi
jae .L000start
popl %eax
popl %ebx
popl %ebp
popl %edi
popl %esi
ret
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More