mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-29 11:02:08 +00:00
Merge remote-tracking branch 'upstream/master' into speedup-apply-cidr-mask-v6
This commit is contained in:
commit
b1d7778287
18
.github/workflows/anchore-analysis.yml
vendored
18
.github/workflows/anchore-analysis.yml
vendored
@ -1,8 +1,8 @@
|
||||
# This workflow checks out code, performs an Anchore container image
|
||||
# vulnerability and compliance scan, and integrates the results with
|
||||
# GitHub Advanced Security code scanning feature. For more information on
|
||||
# GitHub Advanced Security code scanning feature. For more information on
|
||||
# the Anchore scan action usage and parameters, see
|
||||
# https://github.com/anchore/scan-action. For more information on
|
||||
# https://github.com/anchore/scan-action. For more information on
|
||||
# Anchore container image scanning in general, see
|
||||
# https://docs.anchore.com.
|
||||
|
||||
@ -28,18 +28,12 @@ jobs:
|
||||
perl -pi -e 's|=\$version||g' Dockerfile
|
||||
docker build . --file Dockerfile --tag localbuild/testimage:latest
|
||||
- name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled
|
||||
uses: anchore/scan-action@master
|
||||
uses: anchore/scan-action@v2
|
||||
id: scan
|
||||
with:
|
||||
image-reference: "localbuild/testimage:latest"
|
||||
dockerfile-path: "docker/server/Dockerfile"
|
||||
image: "localbuild/testimage:latest"
|
||||
acs-report-enable: true
|
||||
fail-build: true
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-artifact@v1.0.0
|
||||
with:
|
||||
name: AnchoreReports
|
||||
path: ./anchore-reports/
|
||||
- name: Upload Anchore Scan Report
|
||||
uses: github/codeql-action/upload-sarif@v1
|
||||
with:
|
||||
sarif_file: results.sarif
|
||||
sarif_file: ${{ steps.scan.outputs.sarif }}
|
||||
|
9
.github/workflows/codeql-analysis.yml
vendored
9
.github/workflows/codeql-analysis.yml
vendored
@ -1,3 +1,5 @@
|
||||
# See the example here: https://github.com/github/codeql-action
|
||||
|
||||
name: "CodeQL Scanning"
|
||||
|
||||
on:
|
||||
@ -16,17 +18,14 @@ jobs:
|
||||
fetch-depth: 2
|
||||
submodules: 'recursive'
|
||||
|
||||
- run: git checkout HEAD^2
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v1
|
||||
|
||||
with:
|
||||
languages: cpp
|
||||
|
||||
- run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-9 g++-9 && mkdir build
|
||||
- run: cd build && CC=gcc-9 CXX=g++-9 cmake ..
|
||||
- run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-10 g++-10 && mkdir build
|
||||
- run: cd build && CC=gcc-10 CXX=g++-10 cmake ..
|
||||
- run: cd build && ninja
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
|
17
.gitmodules
vendored
17
.gitmodules
vendored
@ -53,7 +53,8 @@
|
||||
url = https://github.com/ClickHouse-Extras/Turbo-Base64.git
|
||||
[submodule "contrib/arrow"]
|
||||
path = contrib/arrow
|
||||
url = https://github.com/apache/arrow
|
||||
url = https://github.com/ClickHouse-Extras/arrow
|
||||
branch = clickhouse-arrow-2.0.0
|
||||
[submodule "contrib/thrift"]
|
||||
path = contrib/thrift
|
||||
url = https://github.com/apache/thrift.git
|
||||
@ -124,9 +125,6 @@
|
||||
[submodule "contrib/curl"]
|
||||
path = contrib/curl
|
||||
url = https://github.com/curl/curl.git
|
||||
[submodule "contrib/openssl"]
|
||||
path = contrib/openssl
|
||||
url = https://github.com/ClickHouse-Extras/openssl.git
|
||||
[submodule "contrib/icudata"]
|
||||
path = contrib/icudata
|
||||
url = https://github.com/ClickHouse-Extras/icudata.git
|
||||
@ -183,7 +181,7 @@
|
||||
url = https://github.com/kthohr/stats.git
|
||||
[submodule "contrib/krb5"]
|
||||
path = contrib/krb5
|
||||
url = https://github.com/krb5/krb5
|
||||
url = https://github.com/ClickHouse-Extras/krb5
|
||||
[submodule "contrib/cyrus-sasl"]
|
||||
path = contrib/cyrus-sasl
|
||||
url = https://github.com/cyrusimap/cyrus-sasl
|
||||
@ -197,8 +195,7 @@
|
||||
url = https://github.com/danlark1/miniselect
|
||||
[submodule "contrib/rocksdb"]
|
||||
path = contrib/rocksdb
|
||||
url = https://github.com/facebook/rocksdb
|
||||
branch = v6.14.5
|
||||
url = https://github.com/ClickHouse-Extras/rocksdb.git
|
||||
[submodule "contrib/xz"]
|
||||
path = contrib/xz
|
||||
url = https://github.com/xz-mirror/xz
|
||||
@ -209,3 +206,9 @@
|
||||
[submodule "contrib/dragonbox"]
|
||||
path = contrib/dragonbox
|
||||
url = https://github.com/ClickHouse-Extras/dragonbox.git
|
||||
[submodule "contrib/fast_float"]
|
||||
path = contrib/fast_float
|
||||
url = https://github.com/fastfloat/fast_float
|
||||
[submodule "contrib/boringssl"]
|
||||
path = contrib/boringssl
|
||||
url = https://github.com/ClickHouse-Extras/boringssl.git
|
||||
|
376
CHANGELOG.md
376
CHANGELOG.md
@ -1,5 +1,188 @@
|
||||
### ClickHouse release 20.12
|
||||
|
||||
### ClickHouse release v20.12.4.5-stable, 2020-12-24
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fixed issue when `clickhouse-odbc-bridge` process is unreachable by server on machines with dual IPv4/IPv6 stack; - Fixed issue when ODBC dictionary updates are performed using malformed queries and/or cause crashes; Possibly closes [#14489](https://github.com/ClickHouse/ClickHouse/issues/14489). [#18278](https://github.com/ClickHouse/ClickHouse/pull/18278) ([Denis Glazachev](https://github.com/traceon)).
|
||||
* Fixed key comparison between Enum and Int types. This fixes [#17989](https://github.com/ClickHouse/ClickHouse/issues/17989). [#18214](https://github.com/ClickHouse/ClickHouse/pull/18214) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed unique key convert crash in `MaterializeMySQL` database engine. This fixes [#18186](https://github.com/ClickHouse/ClickHouse/issues/18186) and fixes [#16372](https://github.com/ClickHouse/ClickHouse/issues/16372) [#18211](https://github.com/ClickHouse/ClickHouse/pull/18211) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed `std::out_of_range: basic_string` in S3 URL parsing. [#18059](https://github.com/ClickHouse/ClickHouse/pull/18059) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Fixed the issue when some tables not synchronized to ClickHouse from MySQL caused by the fact that convertion MySQL prefix index wasn't supported for MaterializeMySQL. This fixes [#15187](https://github.com/ClickHouse/ClickHouse/issues/15187) and fixes [#17912](https://github.com/ClickHouse/ClickHouse/issues/17912) [#17944](https://github.com/ClickHouse/ClickHouse/pull/17944) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed the issue when query optimization was producing wrong result if query contains `ARRAY JOIN`. [#17887](https://github.com/ClickHouse/ClickHouse/pull/17887) ([sundyli](https://github.com/sundy-li)).
|
||||
* Fixed possible segfault in `topK` aggregate function. This closes [#17404](https://github.com/ClickHouse/ClickHouse/issues/17404). [#17845](https://github.com/ClickHouse/ClickHouse/pull/17845) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fixed empty `system.stack_trace` table when server is running in daemon mode. [#17630](https://github.com/ClickHouse/ClickHouse/pull/17630) ([Amos Bird](https://github.com/amosbird)).
|
||||
|
||||
|
||||
### ClickHouse release v20.12.3.3-stable, 2020-12-13
|
||||
|
||||
#### Backward Incompatible Change
|
||||
|
||||
* Enable `use_compact_format_in_distributed_parts_names` by default (see the documentation for the reference). [#16728](https://github.com/ClickHouse/ClickHouse/pull/16728) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Accept user settings related to file formats (e.g. `format_csv_delimiter`) in the `SETTINGS` clause when creating a table that uses `File` engine, and use these settings in all `INSERT`s and `SELECT`s. The file format settings changed in the current user session, or in the `SETTINGS` clause of a DML query itself, no longer affect the query. [#16591](https://github.com/ClickHouse/ClickHouse/pull/16591) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
|
||||
#### New Feature
|
||||
|
||||
* add `*.xz` compression/decompression support.It enables using `*.xz` in `file()` function. This closes [#8828](https://github.com/ClickHouse/ClickHouse/issues/8828). [#16578](https://github.com/ClickHouse/ClickHouse/pull/16578) ([Abi Palagashvili](https://github.com/fibersel)).
|
||||
* Introduce the query `ALTER TABLE ... DROP|DETACH PART 'part_name'`. [#15511](https://github.com/ClickHouse/ClickHouse/pull/15511) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Added new ALTER UPDATE/DELETE IN PARTITION syntax. [#13403](https://github.com/ClickHouse/ClickHouse/pull/13403) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Allow formatting named tuples as JSON objects when using JSON input/output formats, controlled by the `output_format_json_named_tuples_as_objects` setting, disabled by default. [#17175](https://github.com/ClickHouse/ClickHouse/pull/17175) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Add a possibility to input enum value as it's id in TSV and CSV formats by default. [#16834](https://github.com/ClickHouse/ClickHouse/pull/16834) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add COLLATE support for Nullable, LowCardinality, Array and Tuple, where nested type is String. Also refactor the code associated with collations in ColumnString.cpp. [#16273](https://github.com/ClickHouse/ClickHouse/pull/16273) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* New `tcpPort` function returns TCP port listened by this server. [#17134](https://github.com/ClickHouse/ClickHouse/pull/17134) ([Ivan](https://github.com/abyss7)).
|
||||
* Add new math functions: `acosh`, `asinh`, `atan2`, `atanh`, `cosh`, `hypot`, `log1p`, `sinh`. [#16636](https://github.com/ClickHouse/ClickHouse/pull/16636) ([Konstantin Malanchev](https://github.com/hombit)).
|
||||
* Possibility to distribute the merges between different replicas. Introduces the `execute_merges_on_single_replica_time_threshold` mergetree setting. [#16424](https://github.com/ClickHouse/ClickHouse/pull/16424) ([filimonov](https://github.com/filimonov)).
|
||||
* Add setting `aggregate_functions_null_for_empty` for SQL standard compatibility. This option will rewrite all aggregate functions in a query, adding -OrNull suffix to them. Implements [10273](https://github.com/ClickHouse/ClickHouse/issues/10273). [#16123](https://github.com/ClickHouse/ClickHouse/pull/16123) ([flynn](https://github.com/ucasFL)).
|
||||
* Updated DateTime, DateTime64 parsing to accept string Date literal format. [#16040](https://github.com/ClickHouse/ClickHouse/pull/16040) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Make it possible to change the path to history file in `clickhouse-client` using the `--history_file` parameter. [#15960](https://github.com/ClickHouse/ClickHouse/pull/15960) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)).
|
||||
* fix incorrect initialization of `max_compress_block_size` of MergeTreeWriterSettings with `min_compress_block_size`. [#17833](https://github.com/ClickHouse/ClickHouse/pull/17833) ([flynn](https://github.com/ucasFL)).
|
||||
* Exception message about max table size to drop was displayed incorrectly. [#17764](https://github.com/ClickHouse/ClickHouse/pull/17764) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed possible segfault when there is not enough space when inserting into `Distributed` table. [#17737](https://github.com/ClickHouse/ClickHouse/pull/17737) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)).
|
||||
* In might be determined incorrectly if cluster is circular- (cross-) replicated or not when executing `ON CLUSTER` query due to race condition when `pool_size` > 1. It's fixed. [#17640](https://github.com/ClickHouse/ClickHouse/pull/17640) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Exception `fmt::v7::format_error` can be logged in background for MergeTree tables. This fixes [#17613](https://github.com/ClickHouse/ClickHouse/issues/17613). [#17615](https://github.com/ClickHouse/ClickHouse/pull/17615) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix issue when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* fix `toInt256(inf)` stack overflow. Int256 is an experimental feature. Closed [#17235](https://github.com/ClickHouse/ClickHouse/issues/17235). [#17257](https://github.com/ClickHouse/ClickHouse/pull/17257) ([flynn](https://github.com/ucasFL)).
|
||||
* Fix possible `Unexpected packet Data received from client` error logged for Distributed queries with `LIMIT`. [#17254](https://github.com/ClickHouse/ClickHouse/pull/17254) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246). [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix possible wrong index analysis when the types of the index comparison are different. This fixes [#17122](https://github.com/ClickHouse/ClickHouse/issues/17122). [#17145](https://github.com/ClickHouse/ClickHouse/pull/17145) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Multiple fixed for MaterializeMySQL (experimental feature). Fixes [#16923](https://github.com/ClickHouse/ClickHouse/issues/16923) Fixes [#15883](https://github.com/ClickHouse/ClickHouse/issues/15883) Fix MaterializeMySQL SYNC failure when the modify MySQL binlog_checksum. [#17091](https://github.com/ClickHouse/ClickHouse/pull/17091) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Bug unfinished implementation for funciton fuzzBits, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Fix LLVM's libunwind in the case when CFA register is RAX. This is the [bug](https://bugs.llvm.org/show_bug.cgi?id=48186) in [LLVM's libunwind](https://github.com/llvm/llvm-project/tree/master/libunwind). We already have workarounds for this bug. [#17046](https://github.com/ClickHouse/ClickHouse/pull/17046) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix `optimize_distributed_group_by_sharding_key` setting (that is disabled by default) for query with OFFSET only. [#16996](https://github.com/ClickHouse/ClickHouse/pull/16996) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix for Merge tables over Distributed tables with JOIN. [#16993](https://github.com/ClickHouse/ClickHouse/pull/16993) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed wrong result in big integers (128, 256 bit) when casting from double. Big integers support is experimental. [#16986](https://github.com/ClickHouse/ClickHouse/pull/16986) ([Mike](https://github.com/myrrc)).
|
||||
* Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Blame info was not calculated correctly in `clickhouse-git-import`. [#16959](https://github.com/ClickHouse/ClickHouse/pull/16959) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix order by optimization with monotonous functions. Fixes [#16107](https://github.com/ClickHouse/ClickHouse/issues/16107). [#16956](https://github.com/ClickHouse/ClickHouse/pull/16956) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix optimization of group by with enabled setting `optimize_aggregators_of_group_by_keys` and joins. Fixes [#12604](https://github.com/ClickHouse/ClickHouse/issues/12604). [#16951](https://github.com/ClickHouse/ClickHouse/pull/16951) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix strange code in InterpreterShowAccessQuery. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Prevent clickhouse server crashes when using the function `timeSeriesGroupSum`. The function is removed from newer ClickHouse releases. [#16865](https://github.com/ClickHouse/ClickHouse/pull/16865) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)).
|
||||
* If no memory can be allocated while writing table metadata on disk, broken metadata file can be written. [#16772](https://github.com/ClickHouse/ClickHouse/pull/16772) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix trivial query optimization with partition predicate. [#16767](https://github.com/ClickHouse/ClickHouse/pull/16767) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Return number of affected rows for INSERT queries via MySQL protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix remote query failure when using 'if' suffix aggregate function. Fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) Fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix inconsistent behavior caused by `select_sequential_consistency` for optimized trivial count query and system.tables. [#16309](https://github.com/ClickHouse/ClickHouse/pull/16309) ([Hao Chen](https://github.com/haoch)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
* Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm. [#16895](https://github.com/ClickHouse/ClickHouse/pull/16895) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Enable compact format of directories for asynchronous sends in Distributed tables: `use_compact_format_in_distributed_parts_names` is set to 1 by default. [#16788](https://github.com/ClickHouse/ClickHouse/pull/16788) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Abort multipart upload if no data was written to S3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
* Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)).
|
||||
* Mask password in data_path in the system.distribution_queue. [#16727](https://github.com/ClickHouse/ClickHouse/pull/16727) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Throw error when use column transformer replaces non existing column. [#16183](https://github.com/ClickHouse/ClickHouse/pull/16183) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Turn off parallel parsing when there is no enough memory for all threads to work simultaneously. Also there could be exceptions like "Memory limit exceeded" when somebody will try to insert extremely huge rows (> min_chunk_bytes_for_parallel_parsing), because each piece to parse has to be independent set of strings (one or more). [#16721](https://github.com/ClickHouse/ClickHouse/pull/16721) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)).
|
||||
* Correct grammar in error message in JSONEachRow, JSONCompactEachRow, and RegexpRow input formats. [#17205](https://github.com/ClickHouse/ClickHouse/pull/17205) ([nico piderman](https://github.com/sneako)).
|
||||
* Set default `host` and `port` parameters for `SOURCE(CLICKHOUSE(...))` to current instance and set default `user` value to `'default'`. [#16997](https://github.com/ClickHouse/ClickHouse/pull/16997) ([vdimir](https://github.com/vdimir)).
|
||||
* Throw an informative error message when doing `ATTACH/DETACH TABLE <DICTIONARY>`. Before this PR, `detach table <dict>` works but leads to an ill-formed in-memory metadata. [#16885](https://github.com/ClickHouse/ClickHouse/pull/16885) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Add cutToFirstSignificantSubdomainWithWWW(). [#16845](https://github.com/ClickHouse/ClickHouse/pull/16845) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Server refused to startup with exception message if wrong config is given (`metric_log`.`collect_interval_milliseconds` is missing). [#16815](https://github.com/ClickHouse/ClickHouse/pull/16815) ([Ivan](https://github.com/abyss7)).
|
||||
* Better exception message when configuration for distributed DDL is absent. This fixes [#5075](https://github.com/ClickHouse/ClickHouse/issues/5075). [#16769](https://github.com/ClickHouse/ClickHouse/pull/16769) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Usability improvement: better suggestions in syntax error message when `CODEC` expression is misplaced in `CREATE TABLE` query. This fixes [#12493](https://github.com/ClickHouse/ClickHouse/issues/12493). [#16768](https://github.com/ClickHouse/ClickHouse/pull/16768) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove empty directories for async INSERT at start of Distributed engine. [#16729](https://github.com/ClickHouse/ClickHouse/pull/16729) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Workaround for use S3 with nginx server as proxy. Nginx currenty does not accept urls with empty path like `http://domain.com?delete`, but vanilla aws-sdk-cpp produces this kind of urls. This commit uses patched aws-sdk-cpp version, which makes urls with "/" as path in this cases, like `http://domain.com/?delete`. [#16709](https://github.com/ClickHouse/ClickHouse/pull/16709) ([ianton-ru](https://github.com/ianton-ru)).
|
||||
* Allow `reinterpretAs*` functions to work for integers and floats of the same size. Implements [16640](https://github.com/ClickHouse/ClickHouse/issues/16640). [#16657](https://github.com/ClickHouse/ClickHouse/pull/16657) ([flynn](https://github.com/ucasFL)).
|
||||
* Now, `<auxiliary_zookeepers>` configuration can be changed in `config.xml` and reloaded without server startup. [#16627](https://github.com/ClickHouse/ClickHouse/pull/16627) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Support SNI in https connections to remote resources. This will allow to connect to Cloudflare servers that require SNI. This fixes [#10055](https://github.com/ClickHouse/ClickHouse/issues/10055). [#16252](https://github.com/ClickHouse/ClickHouse/pull/16252) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix possible stack overflow if a loop of materialized views is created. This closes [#15732](https://github.com/ClickHouse/ClickHouse/issues/15732). [#16048](https://github.com/ClickHouse/ClickHouse/pull/16048) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Simplify the implementation of background tasks processing for the MergeTree table engines family. There should be no visible changes for user. [#15983](https://github.com/ClickHouse/ClickHouse/pull/15983) ([alesapin](https://github.com/alesapin)).
|
||||
* Improvement for MaterializeMySQL (experimental feature). Throw exception about right sync privileges when MySQL sync user has error privileges. [#15977](https://github.com/ClickHouse/ClickHouse/pull/15977) ([TCeason](https://github.com/TCeason)).
|
||||
* Made `indexOf()` use BloomFilter. [#14977](https://github.com/ClickHouse/ClickHouse/pull/14977) ([achimbab](https://github.com/achimbab)).
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* Use Floyd-Rivest algorithm, it is the best for the ClickHouse use case of partial sorting. Bechmarks are in https://github.com/danlark1/miniselect and [here](https://drive.google.com/drive/folders/1DHEaeXgZuX6AJ9eByeZ8iQVQv0ueP8XM). [#16825](https://github.com/ClickHouse/ClickHouse/pull/16825) ([Danila Kutenin](https://github.com/danlark1)).
|
||||
* Now `ReplicatedMergeTree` tree engines family uses a separate thread pool for replicated fetches. Size of the pool limited by setting `background_fetches_pool_size` which can be tuned with a server restart. The default value of the setting is 3 and it means that the maximum amount of parallel fetches is equal to 3 (and it allows to utilize 10G network). Fixes #520. [#16390](https://github.com/ClickHouse/ClickHouse/pull/16390) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed uncontrolled growth of the state of `quantileTDigest`. [#16680](https://github.com/ClickHouse/ClickHouse/pull/16680) ([hrissan](https://github.com/hrissan)).
|
||||
* Add `VIEW` subquery description to `EXPLAIN`. Limit push down optimisation for `VIEW`. Add local replicas of `Distributed` to query plan. [#14936](https://github.com/ClickHouse/ClickHouse/pull/14936) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix optimize_read_in_order/optimize_aggregation_in_order with max_threads > 0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Now we can safely prune partitions with exact match. Useful case: Suppose table is partitioned by `intHash64(x) % 100` and the query has condition on `intHash64(x) % 100` verbatim, not on x. [#16253](https://github.com/ClickHouse/ClickHouse/pull/16253) ([Amos Bird](https://github.com/amosbird)).
|
||||
|
||||
#### Experimental Feature
|
||||
|
||||
* Add `EmbeddedRocksDB` table engine (can be used for dictionaries). [#15073](https://github.com/ClickHouse/ClickHouse/pull/15073) ([sundyli](https://github.com/sundy-li)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
||||
* Improvements in test coverage building images. [#17233](https://github.com/ClickHouse/ClickHouse/pull/17233) ([alesapin](https://github.com/alesapin)).
|
||||
* Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix UBSan report in Poco. This closes [#12719](https://github.com/ClickHouse/ClickHouse/issues/12719). [#16765](https://github.com/ClickHouse/ClickHouse/pull/16765) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Do not instrument 3rd-party libraries with UBSan. [#16764](https://github.com/ClickHouse/ClickHouse/pull/16764) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix UBSan report in cache dictionaries. This closes [#12641](https://github.com/ClickHouse/ClickHouse/issues/12641). [#16763](https://github.com/ClickHouse/ClickHouse/pull/16763) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix UBSan report when trying to convert infinite floating point number to integer. This closes [#14190](https://github.com/ClickHouse/ClickHouse/issues/14190). [#16677](https://github.com/ClickHouse/ClickHouse/pull/16677) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
|
||||
## ClickHouse release 20.11
|
||||
|
||||
### ClickHouse release v20.11.6.6-stable, 2020-12-24
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fixed issue when `clickhouse-odbc-bridge` process is unreachable by server on machines with dual `IPv4/IPv6 stack` and fixed issue when ODBC dictionary updates are performed using malformed queries and/or cause crashes. This possibly closes [#14489](https://github.com/ClickHouse/ClickHouse/issues/14489). [#18278](https://github.com/ClickHouse/ClickHouse/pull/18278) ([Denis Glazachev](https://github.com/traceon)).
|
||||
* Fixed key comparison between Enum and Int types. This fixes [#17989](https://github.com/ClickHouse/ClickHouse/issues/17989). [#18214](https://github.com/ClickHouse/ClickHouse/pull/18214) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed unique key convert crash in `MaterializeMySQL` database engine. This fixes [#18186](https://github.com/ClickHouse/ClickHouse/issues/18186) and fixes [#16372](https://github.com/ClickHouse/ClickHouse/issues/16372) [#18211](https://github.com/ClickHouse/ClickHouse/pull/18211) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed `std::out_of_range: basic_string` in S3 URL parsing. [#18059](https://github.com/ClickHouse/ClickHouse/pull/18059) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Fixed the issue when some tables not synchronized to ClickHouse from MySQL caused by the fact that convertion MySQL prefix index wasn't supported for MaterializeMySQL. This fixes [#15187](https://github.com/ClickHouse/ClickHouse/issues/15187) and fixes [#17912](https://github.com/ClickHouse/ClickHouse/issues/17912) [#17944](https://github.com/ClickHouse/ClickHouse/pull/17944) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed the issue when query optimization was producing wrong result if query contains `ARRAY JOIN`. [#17887](https://github.com/ClickHouse/ClickHouse/pull/17887) ([sundyli](https://github.com/sundy-li)).
|
||||
* Fix possible segfault in `topK` aggregate function. This closes [#17404](https://github.com/ClickHouse/ClickHouse/issues/17404). [#17845](https://github.com/ClickHouse/ClickHouse/pull/17845) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)).
|
||||
* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)).
|
||||
* Fixed inconsistent behaviour of `optimize_trivial_count_query` with partition predicate. [#17644](https://github.com/ClickHouse/ClickHouse/pull/17644) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed empty `system.stack_trace` table when server is running in daemon mode. [#17630](https://github.com/ClickHouse/ClickHouse/pull/17630) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed the behaviour when xxception `fmt::v7::format_error` can be logged in background for MergeTree tables. This fixes [#17613](https://github.com/ClickHouse/ClickHouse/issues/17613). [#17615](https://github.com/ClickHouse/ClickHouse/pull/17615) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed the behaviour when clickhouse-client is used in interactive mode with multiline queries and single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed alter query hang when the corresponding mutation was killed on the different replica. This fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed bug when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixed duplicates after `DISTINCT` which were possible because of incorrect optimization. This fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed crash while reading from `JOIN` table with `LowCardinality` types. This fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246) . [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed possible wrong index analysis when the types of the index comparison are different. This fixes [#17122](https://github.com/ClickHouse/ClickHouse/issues/17122). [#17145](https://github.com/ClickHouse/ClickHouse/pull/17145) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed `ColumnConst` comparison which leads to crash. This fixes [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed bug when `ON CLUSTER` queries may hang forever for non-leader `ReplicatedMergeTreeTables`. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed fuzzer-found bug in funciton `fuzzBits`. This fixes [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed wrong result in big integers (128, 256 bit) when casting from double. [#16986](https://github.com/ClickHouse/ClickHouse/pull/16986) ([Mike](https://github.com/myrrc)).
|
||||
* Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)).
|
||||
* Fixed possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Blame info was not calculated correctly in `clickhouse-git-import`. [#16959](https://github.com/ClickHouse/ClickHouse/pull/16959) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed order by optimization with monotonous functions. Fixes [#16107](https://github.com/ClickHouse/ClickHouse/issues/16107). [#16956](https://github.com/ClickHouse/ClickHouse/pull/16956) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixed optimization of group by with enabled setting `optimize_aggregators_of_group_by_keys` and joins. This fixes [#12604](https://github.com/ClickHouse/ClickHouse/issues/12604). [#16951](https://github.com/ClickHouse/ClickHouse/pull/16951) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)).
|
||||
* Fixed possible error `Illegal type of argument` for queries with `ORDER BY`. This fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Abort multipart upload if no data was written to WriteBufferFromS3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
* Fixed crash when using `any` without any arguments. This fixes [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803). [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed the behaviour when ClickHouse used to always return 0 insted of a number of affected rows for `INSERT` queries via MySQL protocol. This fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed uncontrolled growth of TDigest. [#16680](https://github.com/ClickHouse/ClickHouse/pull/16680) ([hrissan](https://github.com/hrissan)).
|
||||
* Fixed remote query failure when using suffix `if` in Aggregate function. This fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed inconsistent behavior caused by `select_sequential_consistency` for optimized trivial count query and system.tables. [#16309](https://github.com/ClickHouse/ClickHouse/pull/16309) ([Hao Chen](https://github.com/haoch)).
|
||||
* Throw error when use ColumnTransformer replace non exist column. [#16183](https://github.com/ClickHouse/ClickHouse/pull/16183) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
|
||||
|
||||
### ClickHouse release v20.11.3.3-stable, 2020-11-13
|
||||
|
||||
#### Bug Fix
|
||||
@ -15,7 +198,7 @@
|
||||
* Restrict to use of non-comparable data types (like `AggregateFunction`) in keys (Sorting key, Primary key, Partition key, and so on). [#16601](https://github.com/ClickHouse/ClickHouse/pull/16601) ([alesapin](https://github.com/alesapin)).
|
||||
* Remove `ANALYZE` and `AST` queries, and make the setting `enable_debug_queries` obsolete since now it is the part of full featured `EXPLAIN` query. [#16536](https://github.com/ClickHouse/ClickHouse/pull/16536) ([Ivan](https://github.com/abyss7)).
|
||||
* Aggregate functions `boundingRatio`, `rankCorr`, `retention`, `timeSeriesGroupSum`, `timeSeriesGroupRateSum`, `windowFunnel` were erroneously made case-insensitive. Now their names are made case sensitive as designed. Only functions that are specified in SQL standard or made for compatibility with other DBMS or functions similar to those should be case-insensitive. [#16407](https://github.com/ClickHouse/ClickHouse/pull/16407) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Make `rankCorr` function return nan on insufficient data https://github.com/ClickHouse/ClickHouse/issues/16124. [#16135](https://github.com/ClickHouse/ClickHouse/pull/16135) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Make `rankCorr` function return nan on insufficient data [#16124](https://github.com/ClickHouse/ClickHouse/issues/16124). [#16135](https://github.com/ClickHouse/ClickHouse/pull/16135) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* When upgrading from versions older than 20.5, if rolling update is performed and cluster contains both versions 20.5 or greater and less than 20.5, if ClickHouse nodes with old versions are restarted and old version has been started up in presence of newer versions, it may lead to `Part ... intersects previous part` errors. To prevent this error, first install newer clickhouse-server packages on all cluster nodes and then do restarts (so, when clickhouse-server is restarted, it will start up with the new version).
|
||||
|
||||
#### New Feature
|
||||
@ -33,7 +216,7 @@
|
||||
* Now we can provide identifiers via query parameters. And these parameters can be used as table objects or columns. [#16594](https://github.com/ClickHouse/ClickHouse/pull/16594) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Added big integers (UInt256, Int128, Int256) and UUID data types support for MergeTree BloomFilter index. Big integers is an experimental feature. [#16642](https://github.com/ClickHouse/ClickHouse/pull/16642) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Add `farmFingerprint64` function (non-cryptographic string hashing). [#16570](https://github.com/ClickHouse/ClickHouse/pull/16570) ([Jacob Hayes](https://github.com/JacobHayes)).
|
||||
* Add `log_queries_min_query_duration_ms`, only queries slower then the value of this setting will go to `query_log`/`query_thread_log` (i.e. something like `slow_query_log` in mysql). [#16529](https://github.com/ClickHouse/ClickHouse/pull/16529) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Add `log_queries_min_query_duration_ms`, only queries slower than the value of this setting will go to `query_log`/`query_thread_log` (i.e. something like `slow_query_log` in mysql). [#16529](https://github.com/ClickHouse/ClickHouse/pull/16529) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Ability to create a docker image on the top of `Alpine`. Uses precompiled binary and glibc components from ubuntu 20.04. [#16479](https://github.com/ClickHouse/ClickHouse/pull/16479) ([filimonov](https://github.com/filimonov)).
|
||||
* Added `toUUIDOrNull`, `toUUIDOrZero` cast functions. [#16337](https://github.com/ClickHouse/ClickHouse/pull/16337) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Add `max_concurrent_queries_for_all_users` setting, see [#6636](https://github.com/ClickHouse/ClickHouse/issues/6636) for use cases. [#16154](https://github.com/ClickHouse/ClickHouse/pull/16154) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
@ -129,6 +312,46 @@
|
||||
|
||||
## ClickHouse release 20.10
|
||||
|
||||
### ClickHouse release v20.10.7.4-stable, 2020-12-24
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fixed issue when `clickhouse-odbc-bridge` process is unreachable by server on machines with dual `IPv4/IPv6` stack and fixed issue when ODBC dictionary updates are performed using malformed queries and/or cause crashes. This possibly closes [#14489](https://github.com/ClickHouse/ClickHouse/issues/14489). [#18278](https://github.com/ClickHouse/ClickHouse/pull/18278) ([Denis Glazachev](https://github.com/traceon)).
|
||||
* Fix key comparison between Enum and Int types. This fixes [#17989](https://github.com/ClickHouse/ClickHouse/issues/17989). [#18214](https://github.com/ClickHouse/ClickHouse/pull/18214) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed unique key convert crash in `MaterializeMySQL` database engine. This fixes [#18186](https://github.com/ClickHouse/ClickHouse/issues/18186) and fixes [#16372](https://github.com/ClickHouse/ClickHouse/issues/16372) [#18211](https://github.com/ClickHouse/ClickHouse/pull/18211) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed `std::out_of_range: basic_string` in S3 URL parsing. [#18059](https://github.com/ClickHouse/ClickHouse/pull/18059) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Fixed the issue when some tables not synchronized to ClickHouse from MySQL caused by the fact that convertion MySQL prefix index wasn't supported for MaterializeMySQL. This fixes [#15187](https://github.com/ClickHouse/ClickHouse/issues/15187) and fixes [#17912](https://github.com/ClickHouse/ClickHouse/issues/17912) [#17944](https://github.com/ClickHouse/ClickHouse/pull/17944) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix possible segfault in `topK` aggregate function. This closes [#17404](https://github.com/ClickHouse/ClickHouse/issues/17404). [#17845](https://github.com/ClickHouse/ClickHouse/pull/17845) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Do not restore parts from `WAL` if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)).
|
||||
* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)).
|
||||
* Fixed empty `system.stack_trace` table when server is running in daemon mode. [#17630](https://github.com/ClickHouse/ClickHouse/pull/17630) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed the behaviour when `clickhouse-client` is used in interactive mode with multiline queries and single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed `ALTER` query hang when the corresponding mutation was killed on the different replica. This fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed bug when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixed duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed crash while reading from `JOIN` table with `LowCardinality` types. This fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246) . [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed `ColumnConst` comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed bug when `ON CLUSTER` queries may hang forever for non-leader `ReplicatedMergeTreeTables`. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed fuzzer-found bug in function `fuzzBits`. This fixes [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed wrong result in big integers (128, 256 bit) when casting from double. [#16986](https://github.com/ClickHouse/ClickHouse/pull/16986) ([Mike](https://github.com/myrrc)).
|
||||
* Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)).
|
||||
* Fixed possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Blame info was not calculated correctly in `clickhouse-git-import`. [#16959](https://github.com/ClickHouse/ClickHouse/pull/16959) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed order by optimization with monotonous functions. This fixes [#16107](https://github.com/ClickHouse/ClickHouse/issues/16107). [#16956](https://github.com/ClickHouse/ClickHouse/pull/16956) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixrf optimization of group by with enabled setting `optimize_aggregators_of_group_by_keys` and joins. This fixes [#12604](https://github.com/ClickHouse/ClickHouse/issues/12604). [#16951](https://github.com/ClickHouse/ClickHouse/pull/16951) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)).
|
||||
* Fixrf possible error `Illegal type of argument` for queries with `ORDER BY`. This fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Abort multipart upload if no data was written to `WriteBufferFromS3`. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
* Fixed crash when using `any` without any arguments. This fixes [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803). [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed the behaviour when ClickHouse used to always return 0 insted of a number of affected rows for `INSERT` queries via MySQL protocol. This fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed uncontrolled growth of `TDigest`. [#16680](https://github.com/ClickHouse/ClickHouse/pull/16680) ([hrissan](https://github.com/hrissan)).
|
||||
* Fixed remote query failure when using suffix `if` in Aggregate function. This fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
|
||||
|
||||
### ClickHouse release v20.10.4.1-stable, 2020-11-13
|
||||
|
||||
#### Bug Fix
|
||||
@ -178,7 +401,7 @@
|
||||
* Add `JSONStrings` format which output data in arrays of strings. [#14333](https://github.com/ClickHouse/ClickHouse/pull/14333) ([hcz](https://github.com/hczhcz)).
|
||||
* Add support for "Raw" column format for `Regexp` format. It allows to simply extract subpatterns as a whole without any escaping rules. [#15363](https://github.com/ClickHouse/ClickHouse/pull/15363) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Allow configurable `NULL` representation for `TSV` output format. It is controlled by the setting `output_format_tsv_null_representation` which is `\N` by default. This closes [#9375](https://github.com/ClickHouse/ClickHouse/issues/9375). Note that the setting only controls output format and `\N` is the only supported `NULL` representation for `TSV` input format. [#14586](https://github.com/ClickHouse/ClickHouse/pull/14586) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Support Decimal data type for `MaterializedMySQL`. `MaterializedMySQL` is an experimental feature. [#14535](https://github.com/ClickHouse/ClickHouse/pull/14535) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Support Decimal data type for `MaterializeMySQL`. `MaterializeMySQL` is an experimental feature. [#14535](https://github.com/ClickHouse/ClickHouse/pull/14535) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Add new feature: `SHOW DATABASES LIKE 'xxx'`. [#14521](https://github.com/ClickHouse/ClickHouse/pull/14521) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Added a script to import (arbitrary) git repository to ClickHouse as a sample dataset. [#14471](https://github.com/ClickHouse/ClickHouse/pull/14471) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Now insert statements can have asterisk (or variants) with column transformers in the column list. [#14453](https://github.com/ClickHouse/ClickHouse/pull/14453) ([Amos Bird](https://github.com/amosbird)).
|
||||
@ -200,18 +423,18 @@
|
||||
* Fix a very wrong code in TwoLevelStringHashTable implementation, which might lead to memory leak. [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix segfault in some cases of wrong aggregation in lambdas. [#16082](https://github.com/ClickHouse/ClickHouse/pull/16082) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)).
|
||||
* `MaterializedMySQL` (experimental feature): Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* `MaterializeMySQL` (experimental feature): Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Allow to use `direct` layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* `MaterializedMySQL` (experimental feature): Fix crash on create database failure. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* `MaterializeMySQL` (experimental feature): Fix crash on create database failure. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) - Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixes [#12513](https://github.com/ClickHouse/ClickHouse/issues/12513): difference expressions with same alias when query is reanalyzed. [#15886](https://github.com/ClickHouse/ClickHouse/pull/15886) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix possible very rare deadlocks in RBAC implementation. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)).
|
||||
* `MaterializedMySQL` (experimental feature): Fix `select count()` inaccuracy. [#15767](https://github.com/ClickHouse/ClickHouse/pull/15767) ([tavplubix](https://github.com/tavplubix)).
|
||||
* `MaterializeMySQL` (experimental feature): Fix `select count()` inaccuracy. [#15767](https://github.com/ClickHouse/ClickHouse/pull/15767) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix some cases of queries, in which only virtual columns are selected. Previously `Not found column _nothing in block` exception may be thrown. Fixes [#12298](https://github.com/ClickHouse/ClickHouse/issues/12298). [#15756](https://github.com/ClickHouse/ClickHouse/pull/15756) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix drop of materialized view with inner table in Atomic database (hangs all subsequent DROP TABLE due to hang of the worker thread, due to recursive DROP TABLE for inner table of MV). [#15743](https://github.com/ClickHouse/ClickHouse/pull/15743) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Possibility to move part to another disk/volume if the first attempt was failed. [#15723](https://github.com/ClickHouse/ClickHouse/pull/15723) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
@ -243,37 +466,37 @@
|
||||
* Fix hang of queries with a lot of subqueries to same table of `MySQL` engine. Previously, if there were more than 16 subqueries to same `MySQL` table in query, it hang forever. [#15299](https://github.com/ClickHouse/ClickHouse/pull/15299) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix 'Unknown identifier' in GROUP BY when query has JOIN over Merge table. [#15242](https://github.com/ClickHouse/ClickHouse/pull/15242) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fix instance crash when using `joinGet` with `LowCardinality` types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix instance crash when using `joinGet` with `LowCardinality` types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)).
|
||||
* Adjust Decimal field size in MySQL column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)).
|
||||
* Fixes `Data compressed with different methods` in `join_algorithm='auto'`. Keep LowCardinality as type for left table join key in `join_algorithm='partial_merge'`. [#15088](https://github.com/ClickHouse/ClickHouse/pull/15088) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Update `jemalloc` to fix `percpu_arena` with affinity mask. [#15035](https://github.com/ClickHouse/ClickHouse/pull/15035) ([Azat Khuzhin](https://github.com/azat)). [#14957](https://github.com/ClickHouse/ClickHouse/pull/14957) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes https://github.com/ClickHouse/ClickHouse/issues/14908. [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
|
||||
* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
|
||||
* If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in Docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix crash in RIGHT or FULL JOIN with join_algorith='auto' when memory limit exceeded and we should change HashJoin with MergeJoin. [#15002](https://github.com/ClickHouse/ClickHouse/pull/15002) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix to make predicate push down work when subquery contains `finalizeAggregation` function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)).
|
||||
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes https://github.com/ClickHouse/ClickHouse/issues/14923. [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* `MaterializedMySQL` (experimental feature): Fixed `.metadata.tmp File exists` error. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* `MaterializeMySQL` (experimental feature): Fixed `.metadata.tmp File exists` error. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix SIGSEGV for an attempt to INSERT into StorageFile with file descriptor. [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed segfault in `cache` dictionary [#14837](https://github.com/ClickHouse/ClickHouse/issues/14837). [#14879](https://github.com/ClickHouse/ClickHouse/pull/14879) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* `MaterializedMySQL` (experimental feature): Fixed bug in parsing MySQL binlog events, which causes `Attempt to read after eof` and `Packet payload is not fully read` in `MaterializeMySQL` database engine. [#14852](https://github.com/ClickHouse/ClickHouse/pull/14852) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* `MaterializeMySQL` (experimental feature): Fixed bug in parsing MySQL binlog events, which causes `Attempt to read after eof` and `Packet payload is not fully read` in `MaterializeMySQL` database engine. [#14852](https://github.com/ClickHouse/ClickHouse/pull/14852) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix rare error in `SELECT` queries when the queried column has `DEFAULT` expression which depends on the other column which also has `DEFAULT` and not present in select query and not exists on disk. Partially fixes [#14531](https://github.com/ClickHouse/ClickHouse/issues/14531). [#14845](https://github.com/ClickHouse/ClickHouse/pull/14845) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)).
|
||||
* `Replace` column transformer should replace identifiers with cloned ASTs. This fixes https://github.com/ClickHouse/ClickHouse/issues/14695 . [#14734](https://github.com/ClickHouse/ClickHouse/pull/14734) ([Amos Bird](https://github.com/amosbird)).
|
||||
* `Replace` column transformer should replace identifiers with cloned ASTs. This fixes [#14695](https://github.com/ClickHouse/ClickHouse/issues/14695) . [#14734](https://github.com/ClickHouse/ClickHouse/pull/14734) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed missed default database name in metadata of materialized view when executing `ALTER ... MODIFY QUERY`. [#14664](https://github.com/ClickHouse/ClickHouse/pull/14664) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix bug when `ALTER UPDATE` mutation with `Nullable` column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fix function `has` with `LowCardinality` of `Nullable`. [#14591](https://github.com/ClickHouse/ClickHouse/pull/14591) ([Mike](https://github.com/myrrc)).
|
||||
* Cleanup data directory after Zookeeper exceptions during CreateQuery for StorageReplicatedMergeTree Engine. [#14563](https://github.com/ClickHouse/ClickHouse/pull/14563) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Fix rare segfaults in functions with combinator `-Resample`, which could appear in result of overflow with very large parameters. [#14562](https://github.com/ClickHouse/ClickHouse/pull/14562) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix a bug when converting `Nullable(String)` to Enum. Introduced by https://github.com/ClickHouse/ClickHouse/pull/12745. This fixes https://github.com/ClickHouse/ClickHouse/issues/14435. [#14530](https://github.com/ClickHouse/ClickHouse/pull/14530) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix a bug when converting `Nullable(String)` to Enum. Introduced by [#12745](https://github.com/ClickHouse/ClickHouse/pull/12745). This fixes [#14435](https://github.com/ClickHouse/ClickHouse/issues/14435). [#14530](https://github.com/ClickHouse/ClickHouse/pull/14530) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix `currentDatabase()` function cannot be used in `ON CLUSTER` ddl query. [#14211](https://github.com/ClickHouse/ClickHouse/pull/14211) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* `MaterializedMySQL` (experimental feature): Fixed `Packet payload is not fully read` error in `MaterializeMySQL` database engine. [#14696](https://github.com/ClickHouse/ClickHouse/pull/14696) ([BohuTANG](https://github.com/BohuTANG)).
|
||||
* `MaterializeMySQL` (experimental feature): Fixed `Packet payload is not fully read` error in `MaterializeMySQL` database engine. [#14696](https://github.com/ClickHouse/ClickHouse/pull/14696) ([BohuTANG](https://github.com/BohuTANG)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
@ -308,7 +531,7 @@
|
||||
* Add an option to skip access checks for `DiskS3`. `s3` disk is an experimental feature. [#14497](https://github.com/ClickHouse/ClickHouse/pull/14497) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
* Speed up server shutdown process if there are ongoing S3 requests. [#14496](https://github.com/ClickHouse/ClickHouse/pull/14496) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
* `SYSTEM RELOAD CONFIG` now throws an exception if failed to reload and continues using the previous users.xml. The background periodic reloading also continues using the previous users.xml if failed to reload. [#14492](https://github.com/ClickHouse/ClickHouse/pull/14492) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* For INSERTs with inline data in VALUES format in the script mode of `clickhouse-client`, support semicolon as the data terminator, in addition to the new line. Closes https://github.com/ClickHouse/ClickHouse/issues/12288. [#13192](https://github.com/ClickHouse/ClickHouse/pull/13192) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* For INSERTs with inline data in VALUES format in the script mode of `clickhouse-client`, support semicolon as the data terminator, in addition to the new line. Closes [#12288](https://github.com/ClickHouse/ClickHouse/issues/12288). [#13192](https://github.com/ClickHouse/ClickHouse/pull/13192) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Support custom codecs in compact parts. [#12183](https://github.com/ClickHouse/ClickHouse/pull/12183) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### Performance Improvement
|
||||
@ -320,7 +543,7 @@
|
||||
* Improve performance of 256-bit types using (u)int64_t as base type for wide integers. Original wide integers use 8-bit types as base. [#14859](https://github.com/ClickHouse/ClickHouse/pull/14859) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Explicitly use a temporary disk to store vertical merge temporary data. [#15639](https://github.com/ClickHouse/ClickHouse/pull/15639) ([Grigory Pervakov](https://github.com/GrigoryPervakov)).
|
||||
* Use one S3 DeleteObjects request instead of multiple DeleteObject in a loop. No any functionality changes, so covered by existing tests like integration/test_log_family_s3. [#15238](https://github.com/ClickHouse/ClickHouse/pull/15238) ([ianton-ru](https://github.com/ianton-ru)).
|
||||
* Fix `DateTime <op> DateTime` mistakenly choosing the slow generic implementation. This fixes https://github.com/ClickHouse/ClickHouse/issues/15153. [#15178](https://github.com/ClickHouse/ClickHouse/pull/15178) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix `DateTime <op> DateTime` mistakenly choosing the slow generic implementation. This fixes [#15153](https://github.com/ClickHouse/ClickHouse/issues/15153). [#15178](https://github.com/ClickHouse/ClickHouse/pull/15178) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Improve performance of GROUP BY key of type `FixedString`. [#15034](https://github.com/ClickHouse/ClickHouse/pull/15034) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Only `mlock` code segment when starting clickhouse-server. In previous versions, all mapped regions were locked in memory, including debug info. Debug info is usually splitted to a separate file but if it isn't, it led to +2..3 GiB memory usage. [#14929](https://github.com/ClickHouse/ClickHouse/pull/14929) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* ClickHouse binary become smaller due to link time optimization.
|
||||
@ -387,7 +610,7 @@
|
||||
* Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes https://github.com/ClickHouse/ClickHouse/issues/15628. [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -398,7 +621,7 @@
|
||||
* Fixed too low default value of `max_replicated_logs_to_keep` setting, which might cause replicas to become lost too often. Improve lost replica recovery process by choosing the most up-to-date replica to clone. Also do not remove old parts from lost replica, detach them instead. [#15701](https://github.com/ClickHouse/ClickHouse/pull/15701) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes https://github.com/ClickHouse/ClickHouse/issues/15598. [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)).
|
||||
* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes [#15598](https://github.com/ClickHouse/ClickHouse/issues/15598). [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)).
|
||||
* Throw an error when a single parameter is passed to ReplicatedMergeTree instead of ignoring it. [#15516](https://github.com/ClickHouse/ClickHouse/pull/15516) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
|
||||
#### Improvement
|
||||
@ -422,11 +645,11 @@
|
||||
* Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)).
|
||||
* Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)).
|
||||
* Fix bug where queries like SELECT toStartOfDay(today()) fail complaining about empty time_zone argument. [#15319](https://github.com/ClickHouse/ClickHouse/pull/15319) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Fix bug where queries like `SELECT toStartOfDay(today())` fail complaining about empty time_zone argument. [#15319](https://github.com/ClickHouse/ClickHouse/pull/15319) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix instance crash when using joinGet with LowCardinality types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix instance crash when using joinGet with LowCardinality types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)).
|
||||
* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)).
|
||||
* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)).
|
||||
@ -455,10 +678,10 @@
|
||||
* Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fixed inconsistent comparison with primary key of type `FixedString` on index analysis if they're compered with a string of less size. This fixes https://github.com/ClickHouse/ClickHouse/issues/14908. [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed inconsistent comparison with primary key of type `FixedString` on index analysis if they're compered with a string of less size. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix bug which leads to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)).
|
||||
* If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes https://github.com/ClickHouse/ClickHouse/issues/14923. [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fixed `.metadata.tmp File exists` error when using `MaterializeMySQL` database engine. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix SIGSEGV for an attempt to INSERT into StorageFile(fd). [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)).
|
||||
@ -501,7 +724,7 @@
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* Optimize queries with LIMIT/LIMIT BY/ORDER BY for distributed with GROUP BY sharding_key (under optimize_skip_unused_shards and optimize_distributed_group_by_sharding_key). [#10373](https://github.com/ClickHouse/ClickHouse/pull/10373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Optimize queries with LIMIT/LIMIT BY/ORDER BY for distributed with GROUP BY sharding_key (under `optimize_skip_unused_shards` and `optimize_distributed_group_by_sharding_key`). [#10373](https://github.com/ClickHouse/ClickHouse/pull/10373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Creating sets for multiple `JOIN` and `IN` in parallel. It may slightly improve performance for queries with several different `IN subquery` expressions. [#14412](https://github.com/ClickHouse/ClickHouse/pull/14412) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Improve Kafka engine performance by providing independent thread for each consumer. Separate thread pool for streaming engines (like Kafka). [#13939](https://github.com/ClickHouse/ClickHouse/pull/13939) ([fastio](https://github.com/fastio)).
|
||||
|
||||
@ -516,6 +739,31 @@
|
||||
|
||||
## ClickHouse release 20.8
|
||||
|
||||
### ClickHouse release v20.8.10.13-lts, 2020-12-24
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* When server log rotation was configured using `logger.size` parameter with numeric value larger than 2^32, the logs were not rotated properly. [#17905](https://github.com/ClickHouse/ClickHouse/pull/17905) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fixed incorrect initialization of `max_compress_block_size` in MergeTreeWriterSettings with `min_compress_block_size`. [#17833](https://github.com/ClickHouse/ClickHouse/pull/17833) ([flynn](https://github.com/ucasFL)).
|
||||
* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)).
|
||||
* Fixed `ALTER` query hang when the corresponding mutation was killed on the different replica. This fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed a bug when mark cache size was underestimated by ClickHouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixed `ColumnConst` comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)).
|
||||
* Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)).
|
||||
* Fixed possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)).
|
||||
* Fixed possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Abort multipart upload if no data was written to WriteBufferFromS3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
* Fixed crash when using `any` without any arguments. This fixes [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803). [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixed inconsistent behaviour of `optimize_read_in_order/optimize_aggregation_in_order` with max_threads > 0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed the issue when query optimization was producing wrong result if query contains `ARRAY JOIN`. [#17887](https://github.com/ClickHouse/ClickHouse/pull/17887) ([sundyli](https://github.com/sundy-li)).
|
||||
* Query is finished faster in case of exception. Cancel execution on remote replicas if exception happens. [#15578](https://github.com/ClickHouse/ClickHouse/pull/15578) ([Azat Khuzhin](https://github.com/azat)).
|
||||
|
||||
|
||||
### ClickHouse release v20.8.6.6-lts, 2020-11-13
|
||||
|
||||
#### Bug Fix
|
||||
@ -579,15 +827,15 @@
|
||||
* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix instance crash when using joinGet with LowCardinality types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix instance crash when using joinGet with LowCardinality types. This fixes [#15214](https://github.com/ClickHouse/ClickHouse/issues/15214). [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)).
|
||||
* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)).
|
||||
* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes https://github.com/ClickHouse/ClickHouse/issues/14908. [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
|
||||
* If function `bar` was called with specifically crafter arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes [#14908](https://github.com/ClickHouse/ClickHouse/issues/14908). [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
|
||||
* If function `bar` was called with specifically crafted arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)).
|
||||
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes https://github.com/ClickHouse/ClickHouse/issues/14923. [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes [#14923](https://github.com/ClickHouse/ClickHouse/issues/14923). [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fixed `.metadata.tmp File exists` error when using `MaterializeMySQL` database engine. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)).
|
||||
@ -647,16 +895,16 @@
|
||||
|
||||
* Fix visible data clobbering by progress bar in client in interactive mode. This fixes [#12562](https://github.com/ClickHouse/ClickHouse/issues/12562) and [#13369](https://github.com/ClickHouse/ClickHouse/issues/13369) and [#13584](https://github.com/ClickHouse/ClickHouse/issues/13584) and fixes [#12964](https://github.com/ClickHouse/ClickHouse/issues/12964). [#13691](https://github.com/ClickHouse/ClickHouse/pull/13691) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed incorrect sorting order if `LowCardinality` column when sorting by multiple columns. This fixes [#13958](https://github.com/ClickHouse/ClickHouse/issues/13958). [#14223](https://github.com/ClickHouse/ClickHouse/pull/14223) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafter parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Check for array size overflow in `topK` aggregate function. Without this check the user may send a query with carefully crafted parameters that will lead to server crash. This closes [#14452](https://github.com/ClickHouse/ClickHouse/issues/14452). [#14467](https://github.com/ClickHouse/ClickHouse/pull/14467) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix bug which can lead to wrong merges assignment if table has partitions with a single part. [#14444](https://github.com/ClickHouse/ClickHouse/pull/14444) ([alesapin](https://github.com/alesapin)).
|
||||
* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) [#14334](https://github.com/ClickHouse/ClickHouse/pull/14334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix crash during `ALTER` query for table which was created `AS table_function`. Fixes [#14212](https://github.com/ClickHouse/ClickHouse/issues/14212). [#14326](https://github.com/ClickHouse/ClickHouse/pull/14326) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix exception during ALTER LIVE VIEW query with REFRESH command. Live view is an experimental feature. [#14320](https://github.com/ClickHouse/ClickHouse/pull/14320) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Fix QueryPlan lifetime (for EXPLAIN PIPELINE graph=1) for queries with nested interpreter. [#14315](https://github.com/ClickHouse/ClickHouse/pull/14315) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix segfault in `clickhouse-odbc-bridge` during schema fetch from some external sources. This PR fixes https://github.com/ClickHouse/ClickHouse/issues/13861. [#14267](https://github.com/ClickHouse/ClickHouse/pull/14267) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash in mark inclusion search introduced in https://github.com/ClickHouse/ClickHouse/pull/12277. [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix segfault in `clickhouse-odbc-bridge` during schema fetch from some external sources. This PR fixes [#13861](https://github.com/ClickHouse/ClickHouse/issues/13861). [#14267](https://github.com/ClickHouse/ClickHouse/pull/14267) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash in mark inclusion search introduced in [#12277](https://github.com/ClickHouse/ClickHouse/pull/12277). [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix creation of tables with named tuples. This fixes [#13027](https://github.com/ClickHouse/ClickHouse/issues/13027). [#14143](https://github.com/ClickHouse/ClickHouse/pull/14143) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix formatting of minimal negative decimal numbers. This fixes https://github.com/ClickHouse/ClickHouse/issues/14111. [#14119](https://github.com/ClickHouse/ClickHouse/pull/14119) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fix formatting of minimal negative decimal numbers. This fixes [#14111](https://github.com/ClickHouse/ClickHouse/issues/14111). [#14119](https://github.com/ClickHouse/ClickHouse/pull/14119) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fix `DistributedFilesToInsert` metric (zeroed when it should not). [#14095](https://github.com/ClickHouse/ClickHouse/pull/14095) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix `pointInPolygon` with const 2d array as polygon. [#14079](https://github.com/ClickHouse/ClickHouse/pull/14079) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fixed wrong mount point in extra info for `Poco::Exception: no space left on device`. [#14050](https://github.com/ClickHouse/ClickHouse/pull/14050) ([tavplubix](https://github.com/tavplubix)).
|
||||
@ -685,10 +933,10 @@
|
||||
* Fix wrong code in function `netloc`. This fixes [#13335](https://github.com/ClickHouse/ClickHouse/issues/13335). [#13446](https://github.com/ClickHouse/ClickHouse/pull/13446) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix possible race in `StorageMemory`. [#13416](https://github.com/ClickHouse/ClickHouse/pull/13416) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix missing or excessive headers in `TSV/CSVWithNames` formats in HTTP protocol. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes https://github.com/ClickHouse/ClickHouse/issues/5779, https://github.com/ClickHouse/ClickHouse/issues/12527. [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779), [#12527](https://github.com/ClickHouse/ClickHouse/issues/12527). [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix access to `redis` dictionary after connection was dropped once. It may happen with `cache` and `direct` dictionary layouts. [#13082](https://github.com/ClickHouse/ClickHouse/pull/13082) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Removed wrong auth access check when using ClickHouseDictionarySource to query remote tables. [#12756](https://github.com/ClickHouse/ClickHouse/pull/12756) ([sundyli](https://github.com/sundy-li)).
|
||||
* Properly distinguish subqueries in some cases for common subexpression elimination. https://github.com/ClickHouse/ClickHouse/issues/8333. [#8367](https://github.com/ClickHouse/ClickHouse/pull/8367) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Properly distinguish subqueries in some cases for common subexpression elimination. [#8333](https://github.com/ClickHouse/ClickHouse/issues/8333). [#8367](https://github.com/ClickHouse/ClickHouse/pull/8367) ([Amos Bird](https://github.com/amosbird)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
@ -756,7 +1004,7 @@
|
||||
* Updating LDAP user authentication suite to check that it works with RBAC. [#13656](https://github.com/ClickHouse/ClickHouse/pull/13656) ([vzakaznikov](https://github.com/vzakaznikov)).
|
||||
* Removed `-DENABLE_CURL_CLIENT` for `contrib/aws`. [#13628](https://github.com/ClickHouse/ClickHouse/pull/13628) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Increasing health-check timeouts for ClickHouse nodes and adding support to dump docker-compose logs if unhealthy containers found. [#13612](https://github.com/ClickHouse/ClickHouse/pull/13612) ([vzakaznikov](https://github.com/vzakaznikov)).
|
||||
* Make sure https://github.com/ClickHouse/ClickHouse/issues/10977 is invalid. [#13539](https://github.com/ClickHouse/ClickHouse/pull/13539) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Make sure [#10977](https://github.com/ClickHouse/ClickHouse/issues/10977) is invalid. [#13539](https://github.com/ClickHouse/ClickHouse/pull/13539) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Skip PR's from robot-clickhouse. [#13489](https://github.com/ClickHouse/ClickHouse/pull/13489) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Move Dockerfiles from integration tests to `docker/test` directory. docker_compose files are available in `runner` docker container. Docker images are built in CI and not in integration tests. [#13448](https://github.com/ClickHouse/ClickHouse/pull/13448) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
|
||||
@ -788,7 +1036,7 @@
|
||||
* Add `FROM_UNIXTIME` function for compatibility with MySQL, related to [12149](https://github.com/ClickHouse/ClickHouse/issues/12149). [#12484](https://github.com/ClickHouse/ClickHouse/pull/12484) ([flynn](https://github.com/ucasFL)).
|
||||
* Allow Nullable types as keys in MergeTree tables if `allow_nullable_key` table setting is enabled. Closes [#5319](https://github.com/ClickHouse/ClickHouse/issues/5319). [#12433](https://github.com/ClickHouse/ClickHouse/pull/12433) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Integration with [COS](https://intl.cloud.tencent.com/product/cos). [#12386](https://github.com/ClickHouse/ClickHouse/pull/12386) ([fastio](https://github.com/fastio)).
|
||||
* Add mapAdd and mapSubtract functions for adding/subtracting key-mapped values. [#11735](https://github.com/ClickHouse/ClickHouse/pull/11735) ([Ildus Kurbangaliev](https://github.com/ildus)).
|
||||
* Add `mapAdd` and `mapSubtract` functions for adding/subtracting key-mapped values. [#11735](https://github.com/ClickHouse/ClickHouse/pull/11735) ([Ildus Kurbangaliev](https://github.com/ildus)).
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
@ -1071,7 +1319,7 @@
|
||||
|
||||
* Improved performace of 'ORDER BY' and 'GROUP BY' by prefix of sorting key (enabled with `optimize_aggregation_in_order` setting, disabled by default). [#11696](https://github.com/ClickHouse/ClickHouse/pull/11696) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Removed injective functions inside `uniq*()` if `set optimize_injective_functions_inside_uniq=1`. [#12337](https://github.com/ClickHouse/ClickHouse/pull/12337) ([Ruslan Kamalov](https://github.com/kamalov-ruslan)).
|
||||
* Index not used for IN operator with literals", performance regression introduced around v19.3. This fixes "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Index not used for IN operator with literals, performance regression introduced around v19.3. This fixes [#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Implemented single part uploads for DiskS3 (experimental feature). [#12026](https://github.com/ClickHouse/ClickHouse/pull/12026) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
|
||||
#### Experimental Feature
|
||||
@ -1133,7 +1381,7 @@
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* Index not used for IN operator with literals", performance regression introduced around v19.3. This fixes "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Index not used for IN operator with literals, performance regression introduced around v19.3. This fixes [#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
||||
@ -1213,7 +1461,7 @@
|
||||
* Fix wrong result of comparison of FixedString with constant String. This fixes [#11393](https://github.com/ClickHouse/ClickHouse/issues/11393). This bug appeared in version 20.4. [#11828](https://github.com/ClickHouse/ClickHouse/pull/11828) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix wrong result for `if` with NULLs in condition. [#11807](https://github.com/ClickHouse/ClickHouse/pull/11807) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fix using too many threads for queries. [#11788](https://github.com/ClickHouse/ClickHouse/pull/11788) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed `Scalar doesn't exist` exception when using `WITH <scalar subquery> ...` in `SELECT ... FROM merge_tree_table ...` https://github.com/ClickHouse/ClickHouse/issues/11621. [#11767](https://github.com/ClickHouse/ClickHouse/pull/11767) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed `Scalar doesn't exist` exception when using `WITH <scalar subquery> ...` in `SELECT ... FROM merge_tree_table ...` [#11621](https://github.com/ClickHouse/ClickHouse/issues/11621). [#11767](https://github.com/ClickHouse/ClickHouse/pull/11767) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix unexpected behaviour of queries like `SELECT *, xyz.*` which were success while an error expected. [#11753](https://github.com/ClickHouse/ClickHouse/pull/11753) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Now replicated fetches will be cancelled during metadata alter. [#11744](https://github.com/ClickHouse/ClickHouse/pull/11744) ([alesapin](https://github.com/alesapin)).
|
||||
* Parse metadata stored in zookeeper before checking for equality. [#11739](https://github.com/ClickHouse/ClickHouse/pull/11739) ([Azat Khuzhin](https://github.com/azat)).
|
||||
@ -1264,8 +1512,8 @@
|
||||
* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash when `SET DEFAULT ROLE` is called with wrong arguments. This fixes https://github.com/ClickHouse/ClickHouse/issues/10586. [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash while reading malformed data in `Protobuf` format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash when `SET DEFAULT ROLE` is called with wrong arguments. This fixes [#10586](https://github.com/ClickHouse/ClickHouse/issues/10586). [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash while reading malformed data in `Protobuf` format. This fixes [#5957](https://github.com/ClickHouse/ClickHouse/issues/5957), fixes [#11203](https://github.com/ClickHouse/ClickHouse/issues/11203). [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fixed a bug when `cache` dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -1331,7 +1579,7 @@
|
||||
* Fix error `the BloomFilter false positive must be a double number between 0 and 1` [#10551](https://github.com/ClickHouse/ClickHouse/issues/10551). [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix SELECT of column ALIAS which default expression type different from column type. [#10563](https://github.com/ClickHouse/ClickHouse/pull/10563) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Implemented comparison between DateTime64 and String values (just like for DateTime). [#10560](https://github.com/ClickHouse/ClickHouse/pull/10560) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
* Fix index corruption, which may accur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix index corruption, which may occur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Disable GROUP BY sharding_key optimization by default (`optimize_distributed_group_by_sharding_key` had been introduced and turned of by default, due to trickery of sharding_key analyzing, simple example is `if` in sharding key) and fix it for WITH ROLLUP/CUBE/TOTALS. [#10516](https://github.com/ClickHouse/ClickHouse/pull/10516) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixes: [#10263](https://github.com/ClickHouse/ClickHouse/issues/10263) (after that PR dist send via INSERT had been postponing on each INSERT) Fixes: [#8756](https://github.com/ClickHouse/ClickHouse/issues/8756) (that PR breaks distributed sends with all of the following conditions met (unlikely setup for now I guess): `internal_replication == false`, multiple local shards (activates the hardlinking code) and `distributed_storage_policy` (makes `link(2)` fails on `EXDEV`)). [#10486](https://github.com/ClickHouse/ClickHouse/pull/10486) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed error with "max_rows_to_sort" limit. [#10268](https://github.com/ClickHouse/ClickHouse/pull/10268) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
@ -1488,7 +1736,7 @@
|
||||
* Lower memory usage in tests. [#10617](https://github.com/ClickHouse/ClickHouse/pull/10617) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixing hard coded timeouts in new live view tests. [#10604](https://github.com/ClickHouse/ClickHouse/pull/10604) ([vzakaznikov](https://github.com/vzakaznikov)).
|
||||
* Increasing timeout when opening a client in tests/queries/0_stateless/helpers/client.py. [#10599](https://github.com/ClickHouse/ClickHouse/pull/10599) ([vzakaznikov](https://github.com/vzakaznikov)).
|
||||
* Enable ThinLTO for clang builds, continuation of https://github.com/ClickHouse/ClickHouse/pull/10435. [#10585](https://github.com/ClickHouse/ClickHouse/pull/10585) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Enable ThinLTO for clang builds, continuation of [#10435](https://github.com/ClickHouse/ClickHouse/pull/10435). [#10585](https://github.com/ClickHouse/ClickHouse/pull/10585) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Adding fuzzers and preparing for oss-fuzz integration. [#10546](https://github.com/ClickHouse/ClickHouse/pull/10546) ([kyprizel](https://github.com/kyprizel)).
|
||||
* Fix FreeBSD build. [#10150](https://github.com/ClickHouse/ClickHouse/pull/10150) ([Ivan](https://github.com/abyss7)).
|
||||
* Add new build for query tests using pytest framework. [#10039](https://github.com/ClickHouse/ClickHouse/pull/10039) ([Ivan](https://github.com/abyss7)).
|
||||
@ -1563,7 +1811,7 @@
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* Index not used for IN operator with literals", performance regression introduced around v19.3. This fixes "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Index not used for IN operator with literals, performance regression introduced around v19.3. This fixes [#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
||||
@ -1617,7 +1865,7 @@
|
||||
* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fix pointInPolygon with nan as point. Fixes [#11375](https://github.com/ClickHouse/ClickHouse/issues/11375). [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fix potential uninitialized memory read in MergeTree shutdown if table was not created successfully. [#11420](https://github.com/ClickHouse/ClickHouse/pull/11420) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -1633,8 +1881,8 @@
|
||||
* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash when SET DEFAULT ROLE is called with wrong arguments. This fixes https://github.com/ClickHouse/ClickHouse/issues/10586. [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash when SET DEFAULT ROLE is called with wrong arguments. This fixes [#10586](https://github.com/ClickHouse/ClickHouse/issues/10586). [#11278](https://github.com/ClickHouse/ClickHouse/pull/11278) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash while reading malformed data in Protobuf format. This fixes [#5957](https://github.com/ClickHouse/ClickHouse/issues/5957), fixes [#11203](https://github.com/ClickHouse/ClickHouse/issues/11203). [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fixed a bug when cache-dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -1679,7 +1927,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Now constraints are updated if the column participating in `CONSTRAINT` expression was renamed. Fixes [#10844](https://github.com/ClickHouse/ClickHouse/issues/10844). [#10847](https://github.com/ClickHouse/ClickHouse/pull/10847) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed potential read of uninitialized memory in cache-dictionary. [#10834](https://github.com/ClickHouse/ClickHouse/pull/10834) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed columns order after `Block::sortColumns()`. [#10826](https://github.com/ClickHouse/ClickHouse/pull/10826) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed the issue with `ODBC` bridge when no quoting of identifiers is requested. Fixes [#7984] (https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed the issue with `ODBC` bridge when no quoting of identifiers is requested. Fixes [#7984](https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed `UBSan` and `MSan` report in `DateLUT`. [#10798](https://github.com/ClickHouse/ClickHouse/pull/10798) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed incorrect type conversion in key conditions. Fixes [#6287](https://github.com/ClickHouse/ClickHouse/issues/6287). [#10791](https://github.com/ClickHouse/ClickHouse/pull/10791) ([Andrew Onyshchuk](https://github.com/oandrew)).
|
||||
* Fixed `parallel_view_processing` behavior. Now all insertions into `MATERIALIZED VIEW` without exception should be finished if exception happened. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#10757](https://github.com/ClickHouse/ClickHouse/pull/10757) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -1707,15 +1955,15 @@ No changes compared to v20.4.3.16-stable.
|
||||
|
||||
#### New Feature
|
||||
* Add support for secured connection from ClickHouse to Zookeeper [#10184](https://github.com/ClickHouse/ClickHouse/pull/10184) ([Konstantin Lebedev](https://github.com/xzkostyan))
|
||||
* Support custom HTTP handlers. See ISSUES-5436 for description. [#7572](https://github.com/ClickHouse/ClickHouse/pull/7572) ([Winter Zhang](https://github.com/zhang2014))
|
||||
* Support custom HTTP handlers. See [#5436](https://github.com/ClickHouse/ClickHouse/issues/5436) for description. [#7572](https://github.com/ClickHouse/ClickHouse/pull/7572) ([Winter Zhang](https://github.com/zhang2014))
|
||||
* Add MessagePack Input/Output format. [#9889](https://github.com/ClickHouse/ClickHouse/pull/9889) ([Kruglov Pavel](https://github.com/Avogar))
|
||||
* Add Regexp input format. [#9196](https://github.com/ClickHouse/ClickHouse/pull/9196) ([Kruglov Pavel](https://github.com/Avogar))
|
||||
* Added output format `Markdown` for embedding tables in markdown documents. [#10317](https://github.com/ClickHouse/ClickHouse/pull/10317) ([Kruglov Pavel](https://github.com/Avogar))
|
||||
* Added support for custom settings section in dictionaries. Also fixes issue [#2829](https://github.com/ClickHouse/ClickHouse/issues/2829). [#10137](https://github.com/ClickHouse/ClickHouse/pull/10137) ([Artem Streltsov](https://github.com/kekekekule))
|
||||
* Added custom settings support in DDL-queries for CREATE DICTIONARY [#10465](https://github.com/ClickHouse/ClickHouse/pull/10465) ([Artem Streltsov](https://github.com/kekekekule))
|
||||
* Added custom settings support in DDL-queries for `CREATE DICTIONARY` [#10465](https://github.com/ClickHouse/ClickHouse/pull/10465) ([Artem Streltsov](https://github.com/kekekekule))
|
||||
* Add simple server-wide memory profiler that will collect allocation contexts when server memory usage becomes higher than the next allocation threshold. [#10444](https://github.com/ClickHouse/ClickHouse/pull/10444) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Add setting `always_fetch_merged_part` which restrict replica to merge parts by itself and always prefer dowloading from other replicas. [#10379](https://github.com/ClickHouse/ClickHouse/pull/10379) ([alesapin](https://github.com/alesapin))
|
||||
* Add function JSONExtractKeysAndValuesRaw which extracts raw data from JSON objects [#10378](https://github.com/ClickHouse/ClickHouse/pull/10378) ([hcz](https://github.com/hczhcz))
|
||||
* Add function `JSONExtractKeysAndValuesRaw` which extracts raw data from JSON objects [#10378](https://github.com/ClickHouse/ClickHouse/pull/10378) ([hcz](https://github.com/hczhcz))
|
||||
* Add memory usage from OS to `system.asynchronous_metrics`. [#10361](https://github.com/ClickHouse/ClickHouse/pull/10361) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Added generic variants for functions `least` and `greatest`. Now they work with arbitrary number of arguments of arbitrary types. This fixes [#4767](https://github.com/ClickHouse/ClickHouse/issues/4767) [#10318](https://github.com/ClickHouse/ClickHouse/pull/10318) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Now ClickHouse controls timeouts of dictionary sources on its side. Two new settings added to cache dictionary configuration: `strict_max_lifetime_seconds`, which is `max_lifetime` by default, and `query_wait_timeout_milliseconds`, which is one minute by default. The first settings is also useful with `allow_read_expired_keys` settings (to forbid reading very expired keys). [#10337](https://github.com/ClickHouse/ClickHouse/pull/10337) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov))
|
||||
@ -1728,7 +1976,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Add ability to query Distributed over Distributed (w/o `distributed_group_by_no_merge`) ... [#9923](https://github.com/ClickHouse/ClickHouse/pull/9923) ([Azat Khuzhin](https://github.com/azat))
|
||||
* Add function `arrayReduceInRanges` which aggregates array elements in given ranges. [#9598](https://github.com/ClickHouse/ClickHouse/pull/9598) ([hcz](https://github.com/hczhcz))
|
||||
* Add Dictionary Status on prometheus exporter. [#9622](https://github.com/ClickHouse/ClickHouse/pull/9622) ([Guillaume Tassery](https://github.com/YiuRULE))
|
||||
* Add function arrayAUC [#8698](https://github.com/ClickHouse/ClickHouse/pull/8698) ([taiyang-li](https://github.com/taiyang-li))
|
||||
* Add function `arrayAUC` [#8698](https://github.com/ClickHouse/ClickHouse/pull/8698) ([taiyang-li](https://github.com/taiyang-li))
|
||||
* Support `DROP VIEW` statement for better TPC-H compatibility. [#9831](https://github.com/ClickHouse/ClickHouse/pull/9831) ([Amos Bird](https://github.com/amosbird))
|
||||
* Add 'strict_order' option to windowFunnel() [#9773](https://github.com/ClickHouse/ClickHouse/pull/9773) ([achimbab](https://github.com/achimbab))
|
||||
* Support `DATE` and `TIMESTAMP` SQL operators, e.g. `SELECT date '2001-01-01'` [#9691](https://github.com/ClickHouse/ClickHouse/pull/9691) ([Artem Zuikov](https://github.com/4ertus2))
|
||||
@ -1932,7 +2180,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Move integration tests docker files to docker/ directory. [#10335](https://github.com/ClickHouse/ClickHouse/pull/10335) ([Ilya Yatsishin](https://github.com/qoega))
|
||||
* Allow to use `clang-10` in CI. It ensures that [#10238](https://github.com/ClickHouse/ClickHouse/issues/10238) is fixed. [#10384](https://github.com/ClickHouse/ClickHouse/pull/10384) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Update OpenSSL to upstream master. Fixed the issue when TLS connections may fail with the message `OpenSSL SSL_read: error:14094438:SSL routines:ssl3_read_bytes:tlsv1 alert internal error` and `SSL Exception: error:2400006E:random number generator::error retrieving entropy`. The issue was present in version 20.1. [#8956](https://github.com/ClickHouse/ClickHouse/pull/8956) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Fix clang-10 build. https://github.com/ClickHouse/ClickHouse/issues/10238 [#10370](https://github.com/ClickHouse/ClickHouse/pull/10370) ([Amos Bird](https://github.com/amosbird))
|
||||
* Fix clang-10 build. [#10238](https://github.com/ClickHouse/ClickHouse/issues/10238) [#10370](https://github.com/ClickHouse/ClickHouse/pull/10370) ([Amos Bird](https://github.com/amosbird))
|
||||
* Add performance test for [Parallel INSERT for materialized view](https://github.com/ClickHouse/ClickHouse/pull/10052). [#10345](https://github.com/ClickHouse/ClickHouse/pull/10345) ([vxider](https://github.com/Vxider))
|
||||
* Fix flaky test `test_settings_constraints_distributed.test_insert_clamps_settings`. [#10346](https://github.com/ClickHouse/ClickHouse/pull/10346) ([Vitaly Baranov](https://github.com/vitlibar))
|
||||
* Add util to test results upload in CI ClickHouse [#10330](https://github.com/ClickHouse/ClickHouse/pull/10330) ([Ilya Yatsishin](https://github.com/qoega))
|
||||
@ -2106,7 +2354,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* Index not used for IN operator with literals", performance regression introduced around v19.3. This fixes "[#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Index not used for IN operator with literals, performance regression introduced around v19.3. This fixes [#10574](https://github.com/ClickHouse/ClickHouse/issues/10574). [#12062](https://github.com/ClickHouse/ClickHouse/pull/12062) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
|
||||
|
||||
### ClickHouse release v20.3.12.112-lts 2020-06-25
|
||||
@ -2148,7 +2396,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fix pointInPolygon with nan as point. Fixes [#11375](https://github.com/ClickHouse/ClickHouse/issues/11375). [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fix crash in JOIN over LowCarinality(T) and Nullable(T). [#11380](https://github.com/ClickHouse/ClickHouse/issues/11380). [#11414](https://github.com/ClickHouse/ClickHouse/pull/11414) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fix error code for wrong `USING` key. [#11373](https://github.com/ClickHouse/ClickHouse/issues/11373). [#11404](https://github.com/ClickHouse/ClickHouse/pull/11404) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
@ -2165,7 +2413,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash while reading malformed data in Protobuf format. This fixes [#5957](https://github.com/ClickHouse/ClickHouse/issues/5957), fixes [#11203](https://github.com/ClickHouse/ClickHouse/issues/11203). [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fixed a bug when cache-dictionary could return default value instead of normal (when there are only expired keys). This affects only string fields. [#11233](https://github.com/ClickHouse/ClickHouse/pull/11233) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix error `Block structure mismatch in QueryPipeline` while reading from `VIEW` with constants in inner query. Fixes [#11181](https://github.com/ClickHouse/ClickHouse/issues/11181). [#11205](https://github.com/ClickHouse/ClickHouse/pull/11205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix possible exception `Invalid status for associated output`. [#11200](https://github.com/ClickHouse/ClickHouse/pull/11200) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -2196,7 +2444,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fixed `SIGSEGV` in `StringHashTable` if such a key does not exist. [#10870](https://github.com/ClickHouse/ClickHouse/pull/10870) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed bug in `ReplicatedMergeTree` which might cause some `ALTER` on `OPTIMIZE` query to hang waiting for some replica after it become inactive. [#10849](https://github.com/ClickHouse/ClickHouse/pull/10849) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fixed columns order after `Block::sortColumns()`. [#10826](https://github.com/ClickHouse/ClickHouse/pull/10826) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed the issue with `ODBC` bridge when no quoting of identifiers is requested. Fixes [#7984] (https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed the issue with `ODBC` bridge when no quoting of identifiers is requested. Fixes [#7984](https://github.com/ClickHouse/ClickHouse/issues/7984). [#10821](https://github.com/ClickHouse/ClickHouse/pull/10821) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed `UBSan` and `MSan` report in `DateLUT`. [#10798](https://github.com/ClickHouse/ClickHouse/pull/10798) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed incorrect type conversion in key conditions. Fixes [#6287](https://github.com/ClickHouse/ClickHouse/issues/6287). [#10791](https://github.com/ClickHouse/ClickHouse/pull/10791) ([Andrew Onyshchuk](https://github.com/oandrew))
|
||||
* Fixed `parallel_view_processing` behavior. Now all insertions into `MATERIALIZED VIEW` without exception should be finished if exception happened. Fixes [#10241](https://github.com/ClickHouse/ClickHouse/issues/10241). [#10757](https://github.com/ClickHouse/ClickHouse/pull/10757) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -2215,7 +2463,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fixed incorrect scalar results inside inner query of `MATERIALIZED VIEW` in case if this query contained dependent table. [#10603](https://github.com/ClickHouse/ClickHouse/pull/10603) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed `SELECT` of column `ALIAS` which default expression type different from column type. [#10563](https://github.com/ClickHouse/ClickHouse/pull/10563) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Implemented comparison between DateTime64 and String values. [#10560](https://github.com/ClickHouse/ClickHouse/pull/10560) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
* Fixed index corruption, which may accur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixed index corruption, which may occur in some cases after merge compact parts into another compact part. [#10531](https://github.com/ClickHouse/ClickHouse/pull/10531) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixed the situation, when mutation finished all parts, but hung up in `is_done=0`. [#10526](https://github.com/ClickHouse/ClickHouse/pull/10526) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed overflow at beginning of unix epoch for timezones with fractional offset from `UTC`. This fixes [#9335](https://github.com/ClickHouse/ClickHouse/issues/9335). [#10513](https://github.com/ClickHouse/ClickHouse/pull/10513) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed improper shutdown of `Distributed` storage. [#10491](https://github.com/ClickHouse/ClickHouse/pull/10491) ([Azat Khuzhin](https://github.com/azat)).
|
||||
@ -2225,14 +2473,14 @@ No changes compared to v20.4.3.16-stable.
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
||||
* Fix UBSan report in LZ4 library. [#10631](https://github.com/ClickHouse/ClickHouse/pull/10631) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix clang-10 build. https://github.com/ClickHouse/ClickHouse/issues/10238. [#10370](https://github.com/ClickHouse/ClickHouse/pull/10370) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix clang-10 build. [#10238](https://github.com/ClickHouse/ClickHouse/issues/10238). [#10370](https://github.com/ClickHouse/ClickHouse/pull/10370) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Added failing tests about `max_rows_to_sort` setting. [#10268](https://github.com/ClickHouse/ClickHouse/pull/10268) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Added some improvements in printing diagnostic info in input formats. Fixes [#10204](https://github.com/ClickHouse/ClickHouse/issues/10204). [#10418](https://github.com/ClickHouse/ClickHouse/pull/10418) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Added CA certificates to clickhouse-server docker image. [#10476](https://github.com/ClickHouse/ClickHouse/pull/10476) ([filimonov](https://github.com/filimonov)).
|
||||
|
||||
#### Bug fix
|
||||
|
||||
* #10551. [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix error `the BloomFilter false positive must be a double number between 0 and 1` [#10551](https://github.com/ClickHouse/ClickHouse/issues/10551). [#10569](https://github.com/ClickHouse/ClickHouse/pull/10569) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
|
||||
|
||||
### ClickHouse release v20.3.8.53, 2020-04-23
|
||||
@ -2424,7 +2672,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fixed the behaviour of `match` and `extract` functions when haystack has zero bytes. The behaviour was wrong when haystack was constant. This fixes [#9160](https://github.com/ClickHouse/ClickHouse/issues/9160) [#9163](https://github.com/ClickHouse/ClickHouse/pull/9163) ([alexey-milovidov](https://github.com/alexey-milovidov)) [#9345](https://github.com/ClickHouse/ClickHouse/pull/9345) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||
* Avoid throwing from destructor in Apache Avro 3rd-party library. [#9066](https://github.com/ClickHouse/ClickHouse/pull/9066) ([Andrew Onyshchuk](https://github.com/oandrew))
|
||||
* Don't commit a batch polled from `Kafka` partially as it can lead to holes in data. [#8876](https://github.com/ClickHouse/ClickHouse/pull/8876) ([filimonov](https://github.com/filimonov))
|
||||
* Fix `joinGet` with nullable return types. https://github.com/ClickHouse/ClickHouse/issues/8919 [#9014](https://github.com/ClickHouse/ClickHouse/pull/9014) ([Amos Bird](https://github.com/amosbird))
|
||||
* Fix `joinGet` with nullable return types. [#8919](https://github.com/ClickHouse/ClickHouse/issues/8919) [#9014](https://github.com/ClickHouse/ClickHouse/pull/9014) ([Amos Bird](https://github.com/amosbird))
|
||||
* Fix data incompatibility when compressed with `T64` codec. [#9016](https://github.com/ClickHouse/ClickHouse/pull/9016) ([Artem Zuikov](https://github.com/4ertus2)) Fix data type ids in `T64` compression codec that leads to wrong (de)compression in affected versions. [#9033](https://github.com/ClickHouse/ClickHouse/pull/9033) ([Artem Zuikov](https://github.com/4ertus2))
|
||||
* Add setting `enable_early_constant_folding` and disable it in some cases that leads to errors. [#9010](https://github.com/ClickHouse/ClickHouse/pull/9010) ([Artem Zuikov](https://github.com/4ertus2))
|
||||
* Fix pushdown predicate optimizer with VIEW and enable the test [#9011](https://github.com/ClickHouse/ClickHouse/pull/9011) ([Winter Zhang](https://github.com/zhang2014))
|
||||
@ -2626,7 +2874,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fix the error `Data compressed with different methods` that can happen if `min_bytes_to_use_direct_io` is enabled and PREWHERE is active and using SAMPLE or high number of threads. This fixes [#11539](https://github.com/ClickHouse/ClickHouse/issues/11539). [#11540](https://github.com/ClickHouse/ClickHouse/pull/11540) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix return compressed size for codecs. [#11448](https://github.com/ClickHouse/ClickHouse/pull/11448) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix server crash when a column has compression codec with non-literal arguments. Fixes [#11365](https://github.com/ClickHouse/ClickHouse/issues/11365). [#11431](https://github.com/ClickHouse/ClickHouse/pull/11431) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix pointInPolygon with nan as point. Fixes https://github.com/ClickHouse/ClickHouse/issues/11375. [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fix pointInPolygon with nan as point. Fixes [#11375](https://github.com/ClickHouse/ClickHouse/issues/11375). [#11421](https://github.com/ClickHouse/ClickHouse/pull/11421) ([Alexey Ilyukhov](https://github.com/livace)).
|
||||
* Fixed geohashesInBox with arguments outside of latitude/longitude range. [#11403](https://github.com/ClickHouse/ClickHouse/pull/11403) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
* Fix possible `Pipeline stuck` error for queries with external sort and limit. Fixes [#11359](https://github.com/ClickHouse/ClickHouse/issues/11359). [#11366](https://github.com/ClickHouse/ClickHouse/pull/11366) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix crash in `quantilesExactWeightedArray`. [#11337](https://github.com/ClickHouse/ClickHouse/pull/11337) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
@ -2636,7 +2884,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Fix potential uninitialized memory in conversion. Example: `SELECT toIntervalSecond(now64())`. [#11311](https://github.com/ClickHouse/ClickHouse/pull/11311) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix the issue when index analysis cannot work if a table has Array column in primary key and if a query is filtering by this column with `empty` or `notEmpty` functions. This fixes [#11286](https://github.com/ClickHouse/ClickHouse/issues/11286). [#11303](https://github.com/ClickHouse/ClickHouse/pull/11303) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix bug when query speed estimation can be incorrect and the limit of `min_execution_speed` may not work or work incorrectly if the query is throttled by `max_network_bandwidth`, `max_execution_speed` or `priority` settings. Change the default value of `timeout_before_checking_execution_speed` to non-zero, because otherwise the settings `min_execution_speed` and `max_execution_speed` have no effect. This fixes [#11297](https://github.com/ClickHouse/ClickHouse/issues/11297). This fixes [#5732](https://github.com/ClickHouse/ClickHouse/issues/5732). This fixes [#6228](https://github.com/ClickHouse/ClickHouse/issues/6228). Usability improvement: avoid concatenation of exception message with progress bar in `clickhouse-client`. [#11296](https://github.com/ClickHouse/ClickHouse/pull/11296) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash while reading malformed data in Protobuf format. This fixes https://github.com/ClickHouse/ClickHouse/issues/5957, fixes https://github.com/ClickHouse/ClickHouse/issues/11203. [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix crash while reading malformed data in Protobuf format. This fixes [#5957](https://github.com/ClickHouse/ClickHouse/issues/5957), fixes [#11203](https://github.com/ClickHouse/ClickHouse/issues/11203). [#11258](https://github.com/ClickHouse/ClickHouse/pull/11258) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix possible error `Cannot capture column` for higher-order functions with `Array(Array(LowCardinality))` captured argument. [#11185](https://github.com/ClickHouse/ClickHouse/pull/11185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* If data skipping index is dependent on columns that are going to be modified during background merge (for SummingMergeTree, AggregatingMergeTree as well as for TTL GROUP BY), it was calculated incorrectly. This issue is fixed by moving index calculation after merge so the index is calculated on merged data. [#11162](https://github.com/ClickHouse/ClickHouse/pull/11162) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Remove logging from mutation finalization task if nothing was finalized. [#11109](https://github.com/ClickHouse/ClickHouse/pull/11109) ([alesapin](https://github.com/alesapin)).
|
||||
@ -2914,7 +3162,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Several improvements ClickHouse grammar in `.g4` file. [#8294](https://github.com/ClickHouse/ClickHouse/pull/8294) ([taiyang-li](https://github.com/taiyang-li))
|
||||
* Fix bug that leads to crashes in `JOIN`s with tables with engine `Join`. This fixes [#7556](https://github.com/ClickHouse/ClickHouse/issues/7556) [#8254](https://github.com/ClickHouse/ClickHouse/issues/8254) [#7915](https://github.com/ClickHouse/ClickHouse/issues/7915) [#8100](https://github.com/ClickHouse/ClickHouse/issues/8100). [#8298](https://github.com/ClickHouse/ClickHouse/pull/8298) ([Artem Zuikov](https://github.com/4ertus2))
|
||||
* Fix redundant dictionaries reload on `CREATE DATABASE`. [#7916](https://github.com/ClickHouse/ClickHouse/pull/7916) ([Azat Khuzhin](https://github.com/azat))
|
||||
* Limit maximum number of streams for read from `StorageFile` and `StorageHDFS`. Fixes https://github.com/ClickHouse/ClickHouse/issues/7650. [#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([alesapin](https://github.com/alesapin))
|
||||
* Limit maximum number of streams for read from `StorageFile` and `StorageHDFS`. Fixes [#7650](https://github.com/ClickHouse/ClickHouse/issues/7650). [#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([alesapin](https://github.com/alesapin))
|
||||
* Fix bug in `ALTER ... MODIFY ... CODEC` query, when user specify both default expression and codec. Fixes [8593](https://github.com/ClickHouse/ClickHouse/issues/8593). [#8614](https://github.com/ClickHouse/ClickHouse/pull/8614) ([alesapin](https://github.com/alesapin))
|
||||
* Fix error in background merge of columns with `SimpleAggregateFunction(LowCardinality)` type. [#8613](https://github.com/ClickHouse/ClickHouse/pull/8613) ([Nikolai Kochetov](https://github.com/KochetovNicolai))
|
||||
* Fixed type check in function `toDateTime64`. [#8375](https://github.com/ClickHouse/ClickHouse/pull/8375) ([Vasily Nemkov](https://github.com/Enmk))
|
||||
@ -2998,7 +3246,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* Added check for extra parts of `MergeTree` at different disks, in order to not allow to miss data parts at undefined disks. [#8118](https://github.com/ClickHouse/ClickHouse/pull/8118) ([Vladimir Chebotarev](https://github.com/excitoon))
|
||||
* Enable SSL support for Mac client and server. [#8297](https://github.com/ClickHouse/ClickHouse/pull/8297) ([Ivan](https://github.com/abyss7))
|
||||
* Now ClickHouse can work as MySQL federated server (see https://dev.mysql.com/doc/refman/5.7/en/federated-create-server.html). [#7717](https://github.com/ClickHouse/ClickHouse/pull/7717) ([Maxim Fedotov](https://github.com/MaxFedotov))
|
||||
* `clickhouse-client` now only enable `bracketed-paste` when multiquery is on and multiline is off. This fixes (#7757)[https://github.com/ClickHouse/ClickHouse/issues/7757]. [#7761](https://github.com/ClickHouse/ClickHouse/pull/7761) ([Amos Bird](https://github.com/amosbird))
|
||||
* `clickhouse-client` now only enable `bracketed-paste` when multiquery is on and multiline is off. This fixes [#7757](https://github.com/ClickHouse/ClickHouse/issues/7757). [#7761](https://github.com/ClickHouse/ClickHouse/pull/7761) ([Amos Bird](https://github.com/amosbird))
|
||||
* Support `Array(Decimal)` in `if` function. [#7721](https://github.com/ClickHouse/ClickHouse/pull/7721) ([Artem Zuikov](https://github.com/4ertus2))
|
||||
* Support Decimals in `arrayDifference`, `arrayCumSum` and `arrayCumSumNegative` functions. [#7724](https://github.com/ClickHouse/ClickHouse/pull/7724) ([Artem Zuikov](https://github.com/4ertus2))
|
||||
* Added `lifetime` column to `system.dictionaries` table. [#6820](https://github.com/ClickHouse/ClickHouse/issues/6820) [#7727](https://github.com/ClickHouse/ClickHouse/pull/7727) ([kekekekule](https://github.com/kekekekule))
|
||||
|
@ -112,6 +112,12 @@ if (ENABLE_FUZZING)
|
||||
set (FUZZER "libfuzzer")
|
||||
endif()
|
||||
|
||||
# Global libraries
|
||||
# See:
|
||||
# - default_libs.cmake
|
||||
# - sanitize.cmake
|
||||
add_library(global-libs INTERFACE)
|
||||
|
||||
include (cmake/fuzzer.cmake)
|
||||
include (cmake/sanitize.cmake)
|
||||
|
||||
@ -223,16 +229,16 @@ if (ARCH_NATIVE)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native")
|
||||
endif ()
|
||||
|
||||
if (UNBUNDLED AND (COMPILER_GCC OR COMPILER_CLANG))
|
||||
# to make numeric_limits<__int128> works for unbundled build
|
||||
set (_CXX_STANDARD "-std=gnu++2a")
|
||||
if (COMPILER_GCC OR COMPILER_CLANG)
|
||||
# to make numeric_limits<__int128> works with GCC
|
||||
set (_CXX_STANDARD "gnu++2a")
|
||||
else()
|
||||
set (_CXX_STANDARD "-std=c++2a")
|
||||
set (_CXX_STANDARD "c++2a")
|
||||
endif()
|
||||
|
||||
# cmake < 3.12 doesn't support 20. We'll set CMAKE_CXX_FLAGS for now
|
||||
# set (CMAKE_CXX_STANDARD 20)
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_CXX_STANDARD}")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=${_CXX_STANDARD}")
|
||||
|
||||
set (CMAKE_CXX_EXTENSIONS 0) # https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html#prop_tgt:CXX_EXTENSIONS
|
||||
set (CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
@ -457,6 +463,7 @@ include (cmake/find/s3.cmake)
|
||||
include (cmake/find/base64.cmake)
|
||||
include (cmake/find/parquet.cmake)
|
||||
include (cmake/find/simdjson.cmake)
|
||||
include (cmake/find/fast_float.cmake)
|
||||
include (cmake/find/rapidjson.cmake)
|
||||
include (cmake/find/fastops.cmake)
|
||||
include (cmake/find/odbc.cmake)
|
||||
@ -512,8 +519,11 @@ macro (add_executable target)
|
||||
|
||||
get_target_property (type ${target} TYPE)
|
||||
if (${type} STREQUAL EXECUTABLE)
|
||||
# operator::new/delete for executables (MemoryTracker stuff)
|
||||
target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES})
|
||||
# disabled for TSAN and gcc since libtsan.a provides overrides too
|
||||
if (TARGET clickhouse_new_delete)
|
||||
# operator::new/delete for executables (MemoryTracker stuff)
|
||||
target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES})
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
@ -524,8 +534,8 @@ include_directories(${ConfigIncludePath})
|
||||
include (cmake/warnings.cmake)
|
||||
|
||||
add_subdirectory (base)
|
||||
add_subdirectory (programs)
|
||||
add_subdirectory (src)
|
||||
add_subdirectory (programs)
|
||||
add_subdirectory (tests)
|
||||
add_subdirectory (utils)
|
||||
|
||||
|
@ -6,6 +6,7 @@ set (SRCS
|
||||
demangle.cpp
|
||||
getFQDNOrHostName.cpp
|
||||
getMemoryAmount.cpp
|
||||
getPageSize.cpp
|
||||
getThreadId.cpp
|
||||
JSON.cpp
|
||||
LineReader.cpp
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <common/ReadlineLineReader.h>
|
||||
#include <common/errnoToString.h>
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
#include <errno.h>
|
||||
@ -69,7 +70,7 @@ ReadlineLineReader::ReadlineLineReader(
|
||||
{
|
||||
int res = read_history(history_file_path.c_str());
|
||||
if (res)
|
||||
std::cerr << "Cannot read history from file " + history_file_path + ": "+ strerror(errno) << std::endl;
|
||||
std::cerr << "Cannot read history from file " + history_file_path + ": "+ errnoToString(errno) << std::endl;
|
||||
}
|
||||
|
||||
/// Added '.' to the default list. Because it is used to separate database and table.
|
||||
@ -107,7 +108,7 @@ ReadlineLineReader::ReadlineLineReader(
|
||||
};
|
||||
|
||||
if (signal(SIGINT, clear_prompt_or_exit) == SIG_ERR)
|
||||
throw std::runtime_error(std::string("Cannot set signal handler for readline: ") + strerror(errno));
|
||||
throw std::runtime_error(std::string("Cannot set signal handler for readline: ") + errnoToString(errno));
|
||||
|
||||
rl_variable_bind("completion-ignore-case", "on");
|
||||
// TODO: it doesn't work
|
||||
|
@ -6,6 +6,12 @@
|
||||
#include <unistd.h>
|
||||
#include <functional>
|
||||
#include <sys/file.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <csignal>
|
||||
#include <dlfcn.h>
|
||||
#include <fcntl.h>
|
||||
#include <fstream>
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -47,7 +53,7 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
{
|
||||
if (!rx.history_load(history_file_path))
|
||||
{
|
||||
rx.print("Loading history failed: %s\n", strerror(errno));
|
||||
rx.print("Loading history failed: %s\n", errnoToString(errno).c_str());
|
||||
}
|
||||
|
||||
if (flock(history_file_fd, LOCK_UN))
|
||||
@ -58,6 +64,8 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
}
|
||||
}
|
||||
|
||||
rx.install_window_change_handler();
|
||||
|
||||
auto callback = [&suggest] (const String & context, size_t context_size)
|
||||
{
|
||||
if (auto range = suggest.getCompletions(context, context_size))
|
||||
@ -81,12 +89,14 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
/// it also binded to M-p/M-n).
|
||||
rx.bind_key(Replxx::KEY::meta('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMPLETE_NEXT, code); });
|
||||
rx.bind_key(Replxx::KEY::meta('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMPLETE_PREVIOUS, code); });
|
||||
|
||||
rx.bind_key(Replxx::KEY::meta('E'), [this](char32_t) { openEditor(); return Replxx::ACTION_RESULT::CONTINUE; });
|
||||
}
|
||||
|
||||
ReplxxLineReader::~ReplxxLineReader()
|
||||
{
|
||||
if (close(history_file_fd))
|
||||
rx.print("Close of history file failed: %s\n", strerror(errno));
|
||||
rx.print("Close of history file failed: %s\n", errnoToString(errno).c_str());
|
||||
}
|
||||
|
||||
LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
|
||||
@ -111,7 +121,7 @@ void ReplxxLineReader::addToHistory(const String & line)
|
||||
// and that is why flock() is added here.
|
||||
bool locked = false;
|
||||
if (flock(history_file_fd, LOCK_EX))
|
||||
rx.print("Lock of history file failed: %s\n", strerror(errno));
|
||||
rx.print("Lock of history file failed: %s\n", errnoToString(errno).c_str());
|
||||
else
|
||||
locked = true;
|
||||
|
||||
@ -119,13 +129,120 @@ void ReplxxLineReader::addToHistory(const String & line)
|
||||
|
||||
// flush changes to the disk
|
||||
if (!rx.history_save(history_file_path))
|
||||
rx.print("Saving history failed: %s\n", strerror(errno));
|
||||
rx.print("Saving history failed: %s\n", errnoToString(errno).c_str());
|
||||
|
||||
if (locked && 0 != flock(history_file_fd, LOCK_UN))
|
||||
rx.print("Unlock of history file failed: %s\n", strerror(errno));
|
||||
rx.print("Unlock of history file failed: %s\n", errnoToString(errno).c_str());
|
||||
}
|
||||
|
||||
int ReplxxLineReader::execute(const std::string & command)
|
||||
{
|
||||
std::vector<char> argv0("sh", &("sh"[3]));
|
||||
std::vector<char> argv1("-c", &("-c"[3]));
|
||||
std::vector<char> argv2(command.data(), command.data() + command.size() + 1);
|
||||
|
||||
const char * filename = "/bin/sh";
|
||||
char * const argv[] = {argv0.data(), argv1.data(), argv2.data(), nullptr};
|
||||
|
||||
static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork");
|
||||
if (!real_vfork)
|
||||
{
|
||||
rx.print("Cannot find symbol vfork in myself: %s\n", errnoToString(errno).c_str());
|
||||
return -1;
|
||||
}
|
||||
|
||||
pid_t pid = reinterpret_cast<pid_t (*)()>(real_vfork)();
|
||||
|
||||
if (-1 == pid)
|
||||
{
|
||||
rx.print("Cannot vfork: %s\n", errnoToString(errno).c_str());
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 == pid)
|
||||
{
|
||||
sigset_t mask;
|
||||
sigemptyset(&mask);
|
||||
sigprocmask(0, nullptr, &mask);
|
||||
sigprocmask(SIG_UNBLOCK, &mask, nullptr);
|
||||
|
||||
execv(filename, argv);
|
||||
_exit(-1);
|
||||
}
|
||||
|
||||
int status = 0;
|
||||
if (-1 == waitpid(pid, &status, 0))
|
||||
{
|
||||
rx.print("Cannot waitpid: %s\n", errnoToString(errno).c_str());
|
||||
return -1;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
void ReplxxLineReader::openEditor()
|
||||
{
|
||||
char filename[] = "clickhouse_replxx_XXXXXX.sql";
|
||||
int fd = ::mkstemps(filename, 4);
|
||||
if (-1 == fd)
|
||||
{
|
||||
rx.print("Cannot create temporary file to edit query: %s\n", errnoToString(errno).c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
String editor = std::getenv("EDITOR");
|
||||
if (editor.empty())
|
||||
editor = "vim";
|
||||
|
||||
replxx::Replxx::State state(rx.get_state());
|
||||
|
||||
size_t bytes_written = 0;
|
||||
const char * begin = state.text();
|
||||
size_t offset = strlen(state.text());
|
||||
while (bytes_written != offset)
|
||||
{
|
||||
ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written);
|
||||
if ((-1 == res || 0 == res) && errno != EINTR)
|
||||
{
|
||||
rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
|
||||
return;
|
||||
}
|
||||
bytes_written += res;
|
||||
}
|
||||
|
||||
if (0 != ::close(fd))
|
||||
{
|
||||
rx.print("Cannot close temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
if (0 == execute(editor + " " + filename))
|
||||
{
|
||||
try
|
||||
{
|
||||
std::ifstream t(filename);
|
||||
std::string str;
|
||||
t.seekg(0, std::ios::end);
|
||||
str.reserve(t.tellg());
|
||||
t.seekg(0, std::ios::beg);
|
||||
str.assign((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
|
||||
rx.set_state(replxx::Replxx::State(str.c_str(), str.size()));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
rx.print("Cannot read from temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (bracketed_paste_enabled)
|
||||
enableBracketedPaste();
|
||||
|
||||
if (0 != ::unlink(filename))
|
||||
rx.print("Cannot remove temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
|
||||
}
|
||||
|
||||
void ReplxxLineReader::enableBracketedPaste()
|
||||
{
|
||||
bracketed_paste_enabled = true;
|
||||
rx.enable_bracketed_paste();
|
||||
};
|
||||
|
@ -22,10 +22,13 @@ public:
|
||||
private:
|
||||
InputStatus readOneLine(const String & prompt) override;
|
||||
void addToHistory(const String & line) override;
|
||||
int execute(const std::string & command);
|
||||
void openEditor();
|
||||
|
||||
replxx::Replxx rx;
|
||||
replxx::Replxx::highlighter_callback_t highlighter;
|
||||
|
||||
// used to call flock() to synchronize multiple clients using same history file
|
||||
int history_file_fd = -1;
|
||||
bool bracketed_paste_enabled = false;
|
||||
};
|
||||
|
@ -61,6 +61,20 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(ADDRESS_SANITIZER)
|
||||
# define BOOST_USE_ASAN 1
|
||||
# define BOOST_USE_UCONTEXT 1
|
||||
#endif
|
||||
|
||||
#if defined(THREAD_SANITIZER)
|
||||
# define BOOST_USE_TSAN 1
|
||||
# define BOOST_USE_UCONTEXT 1
|
||||
#endif
|
||||
|
||||
#if defined(ARCADIA_BUILD) && defined(BOOST_USE_UCONTEXT)
|
||||
# undef BOOST_USE_UCONTEXT
|
||||
#endif
|
||||
|
||||
/// TODO: Strange enough, there is no way to detect UB sanitizer.
|
||||
|
||||
/// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute.
|
||||
@ -76,6 +90,8 @@
|
||||
# define NO_SANITIZE_THREAD
|
||||
#endif
|
||||
|
||||
/// A macro for suppressing warnings about unused variables or function results.
|
||||
/// Useful for structured bindings which have no standard way to declare this.
|
||||
#define UNUSED(...) (void)(__VA_ARGS__)
|
||||
/// A template function for suppressing warnings about unused variables or function results.
|
||||
template <typename... Args>
|
||||
constexpr void UNUSED(Args &&... args [[maybe_unused]])
|
||||
{
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <stdexcept>
|
||||
#include "common/getMemoryAmount.h"
|
||||
#include "common/getPageSize.h"
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
@ -18,7 +19,7 @@ uint64_t getMemoryAmountOrZero()
|
||||
if (num_pages <= 0)
|
||||
return 0;
|
||||
|
||||
int64_t page_size = sysconf(_SC_PAGESIZE);
|
||||
int64_t page_size = getPageSize();
|
||||
if (page_size <= 0)
|
||||
return 0;
|
||||
|
||||
|
8
base/common/getPageSize.cpp
Normal file
8
base/common/getPageSize.cpp
Normal file
@ -0,0 +1,8 @@
|
||||
#include "common/getPageSize.h"
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
Int64 getPageSize()
|
||||
{
|
||||
return sysconf(_SC_PAGESIZE);
|
||||
}
|
6
base/common/getPageSize.h
Normal file
6
base/common/getPageSize.h
Normal file
@ -0,0 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
/// Get memory page size
|
||||
Int64 getPageSize();
|
@ -1,6 +1,7 @@
|
||||
// https://stackoverflow.com/questions/1413445/reading-a-password-from-stdcin
|
||||
|
||||
#include <common/setTerminalEcho.h>
|
||||
#include <common/errnoToString.h>
|
||||
#include <stdexcept>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
@ -31,7 +32,7 @@ void setTerminalEcho(bool enable)
|
||||
#else
|
||||
struct termios tty;
|
||||
if (tcgetattr(STDIN_FILENO, &tty))
|
||||
throw std::runtime_error(std::string("setTerminalEcho failed get: ") + strerror(errno));
|
||||
throw std::runtime_error(std::string("setTerminalEcho failed get: ") + errnoToString(errno));
|
||||
if (!enable)
|
||||
tty.c_lflag &= ~ECHO;
|
||||
else
|
||||
@ -39,6 +40,6 @@ void setTerminalEcho(bool enable)
|
||||
|
||||
auto ret = tcsetattr(STDIN_FILENO, TCSANOW, &tty);
|
||||
if (ret)
|
||||
throw std::runtime_error(std::string("setTerminalEcho failed set: ") + strerror(errno));
|
||||
throw std::runtime_error(std::string("setTerminalEcho failed set: ") + errnoToString(errno));
|
||||
#endif
|
||||
}
|
||||
|
@ -58,8 +58,7 @@ public:
|
||||
using signed_base_type = int64_t;
|
||||
|
||||
// ctors
|
||||
integer() = default;
|
||||
|
||||
constexpr integer() noexcept;
|
||||
template <typename T>
|
||||
constexpr integer(T rhs) noexcept;
|
||||
template <typename T>
|
||||
|
@ -5,9 +5,11 @@
|
||||
/// (See at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include "throwError.h"
|
||||
#include <cmath>
|
||||
#include <cfloat>
|
||||
#include <limits>
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
|
||||
|
||||
namespace wide
|
||||
{
|
||||
@ -239,6 +241,14 @@ struct integer<Bits, Signed>::_impl
|
||||
template <class T>
|
||||
constexpr static void set_multiplier(integer<Bits, Signed> & self, T t) noexcept {
|
||||
constexpr uint64_t max_int = std::numeric_limits<uint64_t>::max();
|
||||
|
||||
/// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast).
|
||||
if (!std::isfinite(t))
|
||||
{
|
||||
self = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
const T alpha = t / max_int;
|
||||
|
||||
if (alpha <= max_int)
|
||||
@ -916,6 +926,11 @@ public:
|
||||
|
||||
// Members
|
||||
|
||||
template <size_t Bits, typename Signed>
|
||||
constexpr integer<Bits, Signed>::integer() noexcept
|
||||
: items{}
|
||||
{}
|
||||
|
||||
template <size_t Bits, typename Signed>
|
||||
template <typename T>
|
||||
constexpr integer<Bits, Signed>::integer(T rhs) noexcept
|
||||
|
@ -47,6 +47,7 @@ SRCS(
|
||||
errnoToString.cpp
|
||||
getFQDNOrHostName.cpp
|
||||
getMemoryAmount.cpp
|
||||
getPageSize.cpp
|
||||
getResource.cpp
|
||||
getThreadId.cpp
|
||||
mremap.cpp
|
||||
|
@ -4,6 +4,11 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/resource.h>
|
||||
#if defined(__linux__)
|
||||
#include <sys/prctl.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
@ -12,7 +17,6 @@
|
||||
#include <unistd.h>
|
||||
|
||||
#include <typeinfo>
|
||||
#include <sys/resource.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
@ -22,7 +26,6 @@
|
||||
#include <Poco/Observer.h>
|
||||
#include <Poco/AutoPtr.h>
|
||||
#include <Poco/PatternFormatter.h>
|
||||
#include <Poco/TaskManager.h>
|
||||
#include <Poco/File.h>
|
||||
#include <Poco/Path.h>
|
||||
#include <Poco/Message.h>
|
||||
@ -470,7 +473,6 @@ BaseDaemon::~BaseDaemon()
|
||||
|
||||
void BaseDaemon::terminate()
|
||||
{
|
||||
getTaskManager().cancelAll();
|
||||
if (::raise(SIGTERM) != 0)
|
||||
throw Poco::SystemException("cannot terminate process");
|
||||
}
|
||||
@ -478,22 +480,11 @@ void BaseDaemon::terminate()
|
||||
void BaseDaemon::kill()
|
||||
{
|
||||
dumpCoverageReportIfPossible();
|
||||
pid.reset();
|
||||
pid_file.reset();
|
||||
if (::raise(SIGKILL) != 0)
|
||||
throw Poco::SystemException("cannot kill process");
|
||||
}
|
||||
|
||||
void BaseDaemon::sleep(double seconds)
|
||||
{
|
||||
wakeup_event.reset();
|
||||
wakeup_event.tryWait(seconds * 1000);
|
||||
}
|
||||
|
||||
void BaseDaemon::wakeup()
|
||||
{
|
||||
wakeup_event.set();
|
||||
}
|
||||
|
||||
std::string BaseDaemon::getDefaultCorePath() const
|
||||
{
|
||||
return "/opt/cores/";
|
||||
@ -564,7 +555,6 @@ void BaseDaemon::initialize(Application & self)
|
||||
{
|
||||
closeFDs();
|
||||
|
||||
task_manager = std::make_unique<Poco::TaskManager>();
|
||||
ServerApplication::initialize(self);
|
||||
|
||||
/// now highest priority (lowest value) is PRIO_APPLICATION = -100, we want higher!
|
||||
@ -648,10 +638,6 @@ void BaseDaemon::initialize(Application & self)
|
||||
throw Poco::OpenFileException("Cannot attach stdout to " + stdout_path);
|
||||
}
|
||||
|
||||
/// Create pid file.
|
||||
if (config().has("pid"))
|
||||
pid.emplace(config().getString("pid"), DB::StatusFile::write_pid);
|
||||
|
||||
/// Change path for logging.
|
||||
if (!log_path.empty())
|
||||
{
|
||||
@ -667,9 +653,17 @@ void BaseDaemon::initialize(Application & self)
|
||||
throw Poco::Exception("Cannot change directory to /tmp");
|
||||
}
|
||||
|
||||
// sensitive data masking rules are not used here
|
||||
/// sensitive data masking rules are not used here
|
||||
buildLoggers(config(), logger(), self.commandName());
|
||||
|
||||
/// After initialized loggers but before initialized signal handling.
|
||||
if (should_setup_watchdog)
|
||||
setupWatchdog();
|
||||
|
||||
/// Create pid file.
|
||||
if (config().has("pid"))
|
||||
pid_file.emplace(config().getString("pid"), DB::StatusFile::write_pid);
|
||||
|
||||
if (is_daemon)
|
||||
{
|
||||
/** Change working directory to the directory to write core dumps.
|
||||
@ -704,54 +698,71 @@ void BaseDaemon::initialize(Application & self)
|
||||
}
|
||||
|
||||
|
||||
static void addSignalHandler(const std::vector<int> & signals, signal_function handler, std::vector<int> * out_handled_signals)
|
||||
{
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sa_sigaction = handler;
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
sigemptyset(&sa.sa_mask);
|
||||
for (auto signal : signals)
|
||||
sigaddset(&sa.sa_mask, signal);
|
||||
#else
|
||||
if (sigemptyset(&sa.sa_mask))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaddset(&sa.sa_mask, signal))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
#endif
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaction(signal, &sa, nullptr))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
|
||||
if (out_handled_signals)
|
||||
std::copy(signals.begin(), signals.end(), std::back_inserter(*out_handled_signals));
|
||||
};
|
||||
|
||||
|
||||
static void blockSignals(const std::vector<int> & signals)
|
||||
{
|
||||
sigset_t sig_set;
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
sigemptyset(&sig_set);
|
||||
for (auto signal : signals)
|
||||
sigaddset(&sig_set, signal);
|
||||
#else
|
||||
if (sigemptyset(&sig_set))
|
||||
throw Poco::Exception("Cannot block signal.");
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaddset(&sig_set, signal))
|
||||
throw Poco::Exception("Cannot block signal.");
|
||||
#endif
|
||||
|
||||
if (pthread_sigmask(SIG_BLOCK, &sig_set, nullptr))
|
||||
throw Poco::Exception("Cannot block signal.");
|
||||
};
|
||||
|
||||
|
||||
void BaseDaemon::initializeTerminationAndSignalProcessing()
|
||||
{
|
||||
SentryWriter::initialize(config());
|
||||
std::set_terminate(terminate_handler);
|
||||
|
||||
/// We want to avoid SIGPIPE when working with sockets and pipes, and just handle return value/errno instead.
|
||||
{
|
||||
sigset_t sig_set;
|
||||
if (sigemptyset(&sig_set) || sigaddset(&sig_set, SIGPIPE) || pthread_sigmask(SIG_BLOCK, &sig_set, nullptr))
|
||||
throw Poco::Exception("Cannot block signal.");
|
||||
}
|
||||
blockSignals({SIGPIPE});
|
||||
|
||||
/// Setup signal handlers.
|
||||
auto add_signal_handler =
|
||||
[this](const std::vector<int> & signals, signal_function handler)
|
||||
{
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sa_sigaction = handler;
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
|
||||
{
|
||||
#if defined(OS_DARWIN)
|
||||
sigemptyset(&sa.sa_mask);
|
||||
for (auto signal : signals)
|
||||
sigaddset(&sa.sa_mask, signal);
|
||||
#else
|
||||
if (sigemptyset(&sa.sa_mask))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaddset(&sa.sa_mask, signal))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
#endif
|
||||
|
||||
for (auto signal : signals)
|
||||
if (sigaction(signal, &sa, nullptr))
|
||||
throw Poco::Exception("Cannot set signal handler.");
|
||||
|
||||
std::copy(signals.begin(), signals.end(), std::back_inserter(handled_signals));
|
||||
}
|
||||
};
|
||||
|
||||
/// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime.
|
||||
|
||||
add_signal_handler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP}, signalHandler);
|
||||
add_signal_handler({SIGHUP, SIGUSR1}, closeLogsSignalHandler);
|
||||
add_signal_handler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler);
|
||||
addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP}, signalHandler, &handled_signals);
|
||||
addSignalHandler({SIGHUP, SIGUSR1}, closeLogsSignalHandler, &handled_signals);
|
||||
addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals);
|
||||
|
||||
#if defined(SANITIZER)
|
||||
__sanitizer_set_death_callback(sanitizerDeathCallback);
|
||||
@ -761,14 +772,14 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
|
||||
static KillingErrorHandler killing_error_handler;
|
||||
Poco::ErrorHandler::set(&killing_error_handler);
|
||||
|
||||
signal_pipe.setNonBlocking();
|
||||
signal_pipe.setNonBlockingWrite();
|
||||
signal_pipe.tryIncreaseSize(1 << 20);
|
||||
|
||||
signal_listener = std::make_unique<SignalListener>(*this);
|
||||
signal_listener_thread.start(*signal_listener);
|
||||
|
||||
#if defined(__ELF__) && !defined(__FreeBSD__)
|
||||
String build_id_hex = DB::SymbolIndex::instance().getBuildIDHex();
|
||||
String build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex();
|
||||
if (build_id_hex.empty())
|
||||
build_id_info = "no build id";
|
||||
else
|
||||
@ -786,23 +797,6 @@ void BaseDaemon::logRevision() const
|
||||
+ ", PID " + std::to_string(getpid()));
|
||||
}
|
||||
|
||||
/// Makes server shutdown if at least one Poco::Task have failed.
|
||||
void BaseDaemon::exitOnTaskError()
|
||||
{
|
||||
Poco::Observer<BaseDaemon, Poco::TaskFailedNotification> obs(*this, &BaseDaemon::handleNotification);
|
||||
getTaskManager().addObserver(obs);
|
||||
}
|
||||
|
||||
/// Used for exitOnTaskError()
|
||||
void BaseDaemon::handleNotification(Poco::TaskFailedNotification *_tfn)
|
||||
{
|
||||
task_failed = true;
|
||||
Poco::AutoPtr<Poco::TaskFailedNotification> fn(_tfn);
|
||||
Poco::Logger * lg = &(logger());
|
||||
LOG_ERROR(lg, "Task '{}' failed. Daemon is shutting down. Reason - {}", fn->task()->name(), fn->reason().displayText());
|
||||
ServerApplication::terminate();
|
||||
}
|
||||
|
||||
void BaseDaemon::defineOptions(Poco::Util::OptionSet & new_options)
|
||||
{
|
||||
new_options.addOption(
|
||||
@ -863,13 +857,144 @@ void BaseDaemon::onInterruptSignals(int signal_id)
|
||||
if (sigint_signals_counter >= 2)
|
||||
{
|
||||
LOG_INFO(&logger(), "Received second signal Interrupt. Immediately terminate.");
|
||||
kill();
|
||||
call_default_signal_handler(signal_id);
|
||||
/// If the above did not help.
|
||||
_exit(128 + signal_id);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void BaseDaemon::waitForTerminationRequest()
|
||||
{
|
||||
/// NOTE: as we already process signals via pipe, we don't have to block them with sigprocmask in threads
|
||||
std::unique_lock<std::mutex> lock(signal_handler_mutex);
|
||||
signal_event.wait(lock, [this](){ return terminate_signals_counter > 0; });
|
||||
}
|
||||
|
||||
|
||||
void BaseDaemon::shouldSetupWatchdog(char * argv0_)
|
||||
{
|
||||
should_setup_watchdog = true;
|
||||
argv0 = argv0_;
|
||||
}
|
||||
|
||||
|
||||
void BaseDaemon::setupWatchdog()
|
||||
{
|
||||
/// Initialize in advance to avoid double initialization in forked processes.
|
||||
DateLUT::instance();
|
||||
|
||||
std::string original_process_name;
|
||||
if (argv0)
|
||||
original_process_name = argv0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
static pid_t pid = -1;
|
||||
pid = fork();
|
||||
|
||||
if (-1 == pid)
|
||||
throw Poco::Exception("Cannot fork");
|
||||
|
||||
if (0 == pid)
|
||||
{
|
||||
logger().information("Forked a child process to watch");
|
||||
#if defined(__linux__)
|
||||
if (0 != prctl(PR_SET_PDEATHSIG, SIGKILL))
|
||||
logger().warning("Cannot do prctl to ask termination with parent.");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
/// Change short thread name and process name.
|
||||
setThreadName("clckhouse-watch"); /// 15 characters
|
||||
|
||||
if (argv0)
|
||||
{
|
||||
const char * new_process_name = "clickhouse-watchdog";
|
||||
memset(argv0, 0, original_process_name.size());
|
||||
memcpy(argv0, new_process_name, std::min(strlen(new_process_name), original_process_name.size()));
|
||||
}
|
||||
|
||||
logger().information(fmt::format("Will watch for the process with pid {}", pid));
|
||||
|
||||
/// Forward signals to the child process.
|
||||
addSignalHandler(
|
||||
{SIGHUP, SIGUSR1, SIGINT, SIGQUIT, SIGTERM},
|
||||
[](int sig, siginfo_t *, void *)
|
||||
{
|
||||
/// Forward all signals except INT as it can be send by terminal to the process group when user press Ctrl+C,
|
||||
/// and we process double delivery of this signal as immediate termination.
|
||||
if (sig == SIGINT)
|
||||
return;
|
||||
|
||||
const char * error_message = "Cannot forward signal to the child process.\n";
|
||||
if (0 != ::kill(pid, sig))
|
||||
{
|
||||
auto res = write(STDERR_FILENO, error_message, strlen(error_message));
|
||||
(void)res;
|
||||
}
|
||||
},
|
||||
nullptr);
|
||||
|
||||
int status = 0;
|
||||
do
|
||||
{
|
||||
if (-1 != waitpid(pid, &status, WUNTRACED | WCONTINUED) || errno == ECHILD)
|
||||
{
|
||||
if (WIFSTOPPED(status))
|
||||
logger().warning(fmt::format("Child process was stopped by signal {}.", WSTOPSIG(status)));
|
||||
else if (WIFCONTINUED(status))
|
||||
logger().warning(fmt::format("Child process was continued."));
|
||||
else
|
||||
break;
|
||||
}
|
||||
else if (errno != EINTR)
|
||||
throw Poco::Exception("Cannot waitpid, errno: " + std::string(strerror(errno)));
|
||||
} while (true);
|
||||
|
||||
if (errno == ECHILD)
|
||||
{
|
||||
logger().information("Child process no longer exists.");
|
||||
_exit(status);
|
||||
}
|
||||
|
||||
if (WIFEXITED(status))
|
||||
{
|
||||
logger().information(fmt::format("Child process exited normally with code {}.", WEXITSTATUS(status)));
|
||||
_exit(status);
|
||||
}
|
||||
|
||||
if (WIFSIGNALED(status))
|
||||
{
|
||||
int sig = WTERMSIG(status);
|
||||
|
||||
if (sig == SIGKILL)
|
||||
{
|
||||
logger().fatal(fmt::format("Child process was terminated by signal {} (KILL)."
|
||||
" If it is not done by 'forcestop' command or manually,"
|
||||
" the possible cause is OOM Killer (see 'dmesg' and look at the '/var/log/kern.log' for the details).", sig));
|
||||
}
|
||||
else
|
||||
{
|
||||
logger().fatal(fmt::format("Child process was terminated by signal {}.", sig));
|
||||
|
||||
if (sig == SIGINT || sig == SIGTERM || sig == SIGQUIT)
|
||||
_exit(status);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
logger().fatal("Child process was not exited normally by unknown reason.");
|
||||
}
|
||||
|
||||
/// Automatic restart is not enabled but you can play with it.
|
||||
#if 1
|
||||
_exit(status);
|
||||
#else
|
||||
logger().information("Will restart.");
|
||||
if (argv0)
|
||||
memcpy(argv0, original_process_name.c_str(), original_process_name.size());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <chrono>
|
||||
#include <Poco/Process.h>
|
||||
#include <Poco/ThreadPool.h>
|
||||
#include <Poco/TaskNotification.h>
|
||||
#include <Poco/Util/Application.h>
|
||||
#include <Poco/Util/ServerApplication.h>
|
||||
#include <Poco/Net/SocketAddress.h>
|
||||
@ -26,9 +25,6 @@
|
||||
#include <loggers/Loggers.h>
|
||||
|
||||
|
||||
namespace Poco { class TaskManager; }
|
||||
|
||||
|
||||
/// \brief Base class for applications that can run as daemons.
|
||||
///
|
||||
/// \code
|
||||
@ -52,31 +48,26 @@ public:
|
||||
BaseDaemon();
|
||||
~BaseDaemon() override;
|
||||
|
||||
/// Загружает конфигурацию и "строит" логгеры на запись в файлы
|
||||
/// Load configuration, prepare loggers, etc.
|
||||
void initialize(Poco::Util::Application &) override;
|
||||
|
||||
/// Читает конфигурацию
|
||||
void reloadConfiguration();
|
||||
|
||||
/// Определяет параметр командной строки
|
||||
/// Process command line parameters
|
||||
void defineOptions(Poco::Util::OptionSet & new_options) override;
|
||||
|
||||
/// Заставляет демон завершаться, если хотя бы одна задача завершилась неудачно
|
||||
void exitOnTaskError();
|
||||
/// Graceful shutdown
|
||||
static void terminate();
|
||||
|
||||
/// Завершение демона ("мягкое")
|
||||
void terminate();
|
||||
|
||||
/// Завершение демона ("жёсткое")
|
||||
/// Forceful shutdown
|
||||
void kill();
|
||||
|
||||
/// Получен ли сигнал на завершение?
|
||||
/// Cancellation request has been received.
|
||||
bool isCancelled() const
|
||||
{
|
||||
return is_cancelled;
|
||||
}
|
||||
|
||||
/// Получение ссылки на экземпляр демона
|
||||
static BaseDaemon & instance()
|
||||
{
|
||||
return dynamic_cast<BaseDaemon &>(Poco::Util::Application::instance());
|
||||
@ -85,12 +76,6 @@ public:
|
||||
/// return none if daemon doesn't exist, reference to the daemon otherwise
|
||||
static std::optional<std::reference_wrapper<BaseDaemon>> tryGetInstance() { return tryGetInstance<BaseDaemon>(); }
|
||||
|
||||
/// Спит заданное количество секунд или до события wakeup
|
||||
void sleep(double seconds);
|
||||
|
||||
/// Разбудить
|
||||
void wakeup();
|
||||
|
||||
/// В Graphite компоненты пути(папки) разделяются точкой.
|
||||
/// У нас принят путь формата root_path.hostname_yandex_ru.key
|
||||
/// root_path по умолчанию one_min
|
||||
@ -131,24 +116,23 @@ public:
|
||||
/// also doesn't close global internal pipes for signal handling
|
||||
static void closeFDs();
|
||||
|
||||
/// If this method is called after initialization and before run,
|
||||
/// will fork child process and setup watchdog that will print diagnostic info, if the child terminates.
|
||||
/// argv0 is needed to change process name (consequently, it is needed for scripts involving "pgrep", "pidof" to work correctly).
|
||||
void shouldSetupWatchdog(char * argv0_);
|
||||
|
||||
protected:
|
||||
/// Возвращает TaskManager приложения
|
||||
/// все методы task_manager следует вызывать из одного потока
|
||||
/// иначе возможен deadlock, т.к. joinAll выполняется под локом, а любой метод тоже берет лок
|
||||
Poco::TaskManager & getTaskManager() { return *task_manager; }
|
||||
|
||||
virtual void logRevision() const;
|
||||
|
||||
/// Используется при exitOnTaskError()
|
||||
void handleNotification(Poco::TaskFailedNotification *);
|
||||
|
||||
/// thread safe
|
||||
virtual void handleSignal(int signal_id);
|
||||
|
||||
/// initialize termination process and signal handlers
|
||||
virtual void initializeTerminationAndSignalProcessing();
|
||||
|
||||
/// реализация обработки сигналов завершения через pipe не требует блокировки сигнала с помощью sigprocmask во всех потоках
|
||||
/// fork the main process and watch if it was killed
|
||||
void setupWatchdog();
|
||||
|
||||
void waitForTerminationRequest()
|
||||
#if defined(POCO_CLICKHOUSE_PATCH) || POCO_VERSION >= 0x02000000 // in old upstream poco not vitrual
|
||||
override
|
||||
@ -162,21 +146,13 @@ protected:
|
||||
|
||||
virtual std::string getDefaultCorePath() const;
|
||||
|
||||
std::unique_ptr<Poco::TaskManager> task_manager;
|
||||
|
||||
std::optional<DB::StatusFile> pid;
|
||||
std::optional<DB::StatusFile> pid_file;
|
||||
|
||||
std::atomic_bool is_cancelled{false};
|
||||
|
||||
/// Флаг устанавливается по сообщению из Task (при аварийном завершении).
|
||||
bool task_failed = false;
|
||||
|
||||
bool log_to_console = false;
|
||||
|
||||
/// Событие, чтобы проснуться во время ожидания
|
||||
Poco::Event wakeup_event;
|
||||
|
||||
/// Поток, в котором принимается сигнал HUP/USR1 для закрытия логов.
|
||||
/// A thread that acts on HUP and USR1 signal (close logs).
|
||||
Poco::Thread signal_listener_thread;
|
||||
std::unique_ptr<Poco::Runnable> signal_listener;
|
||||
|
||||
@ -194,6 +170,9 @@ protected:
|
||||
String build_id_info;
|
||||
|
||||
std::vector<int> handled_signals;
|
||||
|
||||
bool should_setup_watchdog = false;
|
||||
char * argv0 = nullptr;
|
||||
};
|
||||
|
||||
|
||||
|
@ -179,7 +179,7 @@ void SentryWriter::onFault(int sig, const std::string & error_message, const Sta
|
||||
sentry_set_extra("signal_number", sentry_value_new_int32(sig));
|
||||
|
||||
#if defined(__ELF__) && !defined(__FreeBSD__)
|
||||
const String & build_id_hex = DB::SymbolIndex::instance().getBuildIDHex();
|
||||
const String & build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex();
|
||||
sentry_set_tag("build_id", build_id_hex.c_str());
|
||||
#endif
|
||||
|
||||
|
93
base/glibc-compatibility/musl/__polevll.c
Normal file
93
base/glibc-compatibility/musl/__polevll.c
Normal file
@ -0,0 +1,93 @@
|
||||
/* origin: OpenBSD /usr/src/lib/libm/src/polevll.c */
|
||||
/*
|
||||
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
/*
|
||||
* Evaluate polynomial
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* int N;
|
||||
* long double x, y, coef[N+1], polevl[];
|
||||
*
|
||||
* y = polevll( x, coef, N );
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Evaluates polynomial of degree N:
|
||||
*
|
||||
* 2 N
|
||||
* y = C + C x + C x +...+ C x
|
||||
* 0 1 2 N
|
||||
*
|
||||
* Coefficients are stored in reverse order:
|
||||
*
|
||||
* coef[0] = C , ..., coef[N] = C .
|
||||
* N 0
|
||||
*
|
||||
* The function p1evll() assumes that coef[N] = 1.0 and is
|
||||
* omitted from the array. Its calling arguments are
|
||||
* otherwise the same as polevll().
|
||||
*
|
||||
*
|
||||
* SPEED:
|
||||
*
|
||||
* In the interest of speed, there are no checks for out
|
||||
* of bounds arithmetic. This routine is used by most of
|
||||
* the functions in the library. Depending on available
|
||||
* equipment features, the user may wish to rewrite the
|
||||
* program in microcode or assembly language.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "libm.h"
|
||||
|
||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
||||
#else
|
||||
/*
|
||||
* Polynomial evaluator:
|
||||
* P[0] x^n + P[1] x^(n-1) + ... + P[n]
|
||||
*/
|
||||
long double __polevll(long double x, const long double *P, int n)
|
||||
{
|
||||
long double y;
|
||||
|
||||
y = *P++;
|
||||
do {
|
||||
y = y * x + *P++;
|
||||
} while (--n);
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
/*
|
||||
* Polynomial evaluator:
|
||||
* x^n + P[0] x^(n-1) + P[1] x^(n-2) + ... + P[n]
|
||||
*/
|
||||
long double __p1evll(long double x, const long double *P, int n)
|
||||
{
|
||||
long double y;
|
||||
|
||||
n -= 1;
|
||||
y = x + *P++;
|
||||
do {
|
||||
y = y * x + *P++;
|
||||
} while (--n);
|
||||
|
||||
return y;
|
||||
}
|
||||
#endif
|
@ -38,7 +38,7 @@
|
||||
* = log(6.3*5.3) + lgamma(5.3)
|
||||
* = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
|
||||
* 2. Polynomial approximation of lgamma around its
|
||||
* minimun ymin=1.461632144968362245 to maintain monotonicity.
|
||||
* minimum ymin=1.461632144968362245 to maintain monotonicity.
|
||||
* On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
|
||||
* Let z = x-ymin;
|
||||
* lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
|
||||
|
44
base/glibc-compatibility/musl/mkstemps.c
Normal file
44
base/glibc-compatibility/musl/mkstemps.c
Normal file
@ -0,0 +1,44 @@
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/* This assumes that a check for the
|
||||
template size has already been made */
|
||||
static char * __randname(char * template)
|
||||
{
|
||||
int i;
|
||||
struct timespec ts;
|
||||
unsigned long r;
|
||||
|
||||
clock_gettime(CLOCK_REALTIME, &ts);
|
||||
r = (ts.tv_nsec * 65537) ^ ((((intptr_t)(&ts)) / 16) + ((intptr_t)template));
|
||||
for (i = 0; i < 6; i++, r >>= 5)
|
||||
template[i] = 'A' + (r & 15) + (r & 16) * 2;
|
||||
|
||||
return template;
|
||||
}
|
||||
|
||||
int mkstemps(char * template, int len)
|
||||
{
|
||||
size_t l = strlen(template);
|
||||
if (l < 6 || len > l - 6 || memcmp(template + l - len - 6, "XXXXXX", 6))
|
||||
{
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int fd, retries = 100;
|
||||
do
|
||||
{
|
||||
__randname(template + l - len - 6);
|
||||
if ((fd = open(template, O_RDWR | O_CREAT | O_EXCL, 0600)) >= 0)
|
||||
return fd;
|
||||
} while (--retries && errno == EEXIST);
|
||||
|
||||
memcpy(template + l - len - 6, "XXXXXX", 6);
|
||||
return -1;
|
||||
}
|
185
base/glibc-compatibility/musl/powf.c
Normal file
185
base/glibc-compatibility/musl/powf.c
Normal file
@ -0,0 +1,185 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "libm.h"
|
||||
#include "exp2f_data.h"
|
||||
#include "powf_data.h"
|
||||
|
||||
/*
|
||||
POWF_LOG2_POLY_ORDER = 5
|
||||
EXP2F_TABLE_BITS = 5
|
||||
|
||||
ULP error: 0.82 (~ 0.5 + relerr*2^24)
|
||||
relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
|
||||
relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
|
||||
relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
|
||||
*/
|
||||
|
||||
#define N (1 << POWF_LOG2_TABLE_BITS)
|
||||
#define T __powf_log2_data.tab
|
||||
#define A __powf_log2_data.poly
|
||||
#define OFF 0x3f330000
|
||||
|
||||
/* Subnormal input is normalized so ix has negative biased exponent.
|
||||
Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */
|
||||
static inline double_t log2_inline(uint32_t ix)
|
||||
{
|
||||
double_t z, r, r2, r4, p, q, y, y0, invc, logc;
|
||||
uint32_t iz, top, tmp;
|
||||
int k, i;
|
||||
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
|
||||
top = tmp & 0xff800000;
|
||||
iz = ix - top;
|
||||
k = (int32_t)top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
z = (double_t)asfloat(iz);
|
||||
|
||||
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
|
||||
r = z * invc - 1;
|
||||
y0 = logc + (double_t)k;
|
||||
|
||||
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
|
||||
r2 = r * r;
|
||||
y = A[0] * r + A[1];
|
||||
p = A[2] * r + A[3];
|
||||
r4 = r2 * r2;
|
||||
q = A[4] * r + y0;
|
||||
q = p * r2 + q;
|
||||
y = y * r4 + q;
|
||||
return y;
|
||||
}
|
||||
|
||||
#undef N
|
||||
#undef T
|
||||
#define N (1 << EXP2F_TABLE_BITS)
|
||||
#define T __exp2f_data.tab
|
||||
#define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11))
|
||||
|
||||
/* The output of log2 and thus the input of exp2 is either scaled by N
|
||||
(in case of fast toint intrinsics) or not. The unscaled xd must be
|
||||
in [-1021,1023], sign_bias sets the sign of the result. */
|
||||
static inline float exp2_inline(double_t xd, uint32_t sign_bias)
|
||||
{
|
||||
uint64_t ki, ski, t;
|
||||
double_t kd, z, r, r2, y, s;
|
||||
|
||||
#if TOINT_INTRINSICS
|
||||
#define C __exp2f_data.poly_scaled
|
||||
/* N*x = k + r with r in [-1/2, 1/2] */
|
||||
kd = roundtoint(xd); /* k */
|
||||
ki = converttoint(xd);
|
||||
#else
|
||||
#define C __exp2f_data.poly
|
||||
#define SHIFT __exp2f_data.shift_scaled
|
||||
/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
|
||||
kd = eval_as_double(xd + SHIFT);
|
||||
ki = asuint64(kd);
|
||||
kd -= SHIFT; /* k/N */
|
||||
#endif
|
||||
r = xd - kd;
|
||||
|
||||
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
|
||||
t = T[ki % N];
|
||||
ski = ki + sign_bias;
|
||||
t += ski << (52 - EXP2F_TABLE_BITS);
|
||||
s = asdouble(t);
|
||||
z = C[0] * r + C[1];
|
||||
r2 = r * r;
|
||||
y = C[2] * r + 1;
|
||||
y = z * r2 + y;
|
||||
y = y * s;
|
||||
return eval_as_float(y);
|
||||
}
|
||||
|
||||
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
|
||||
the bit representation of a non-zero finite floating-point value. */
|
||||
static inline int checkint(uint32_t iy)
|
||||
{
|
||||
int e = iy >> 23 & 0xff;
|
||||
if (e < 0x7f)
|
||||
return 0;
|
||||
if (e > 0x7f + 23)
|
||||
return 2;
|
||||
if (iy & ((1 << (0x7f + 23 - e)) - 1))
|
||||
return 0;
|
||||
if (iy & (1 << (0x7f + 23 - e)))
|
||||
return 1;
|
||||
return 2;
|
||||
}
|
||||
|
||||
static inline int zeroinfnan(uint32_t ix)
|
||||
{
|
||||
return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
|
||||
}
|
||||
|
||||
float powf(float x, float y)
|
||||
{
|
||||
uint32_t sign_bias = 0;
|
||||
uint32_t ix, iy;
|
||||
|
||||
ix = asuint(x);
|
||||
iy = asuint(y);
|
||||
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
|
||||
zeroinfnan(iy))) {
|
||||
/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
|
||||
if (predict_false(zeroinfnan(iy))) {
|
||||
if (2 * iy == 0)
|
||||
return issignalingf_inline(x) ? x + y : 1.0f;
|
||||
if (ix == 0x3f800000)
|
||||
return issignalingf_inline(y) ? x + y : 1.0f;
|
||||
if (2 * ix > 2u * 0x7f800000 ||
|
||||
2 * iy > 2u * 0x7f800000)
|
||||
return x + y;
|
||||
if (2 * ix == 2 * 0x3f800000)
|
||||
return 1.0f;
|
||||
if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
|
||||
return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
|
||||
return y * y;
|
||||
}
|
||||
if (predict_false(zeroinfnan(ix))) {
|
||||
float_t x2 = x * x;
|
||||
if (ix & 0x80000000 && checkint(iy) == 1)
|
||||
x2 = -x2;
|
||||
/* Without the barrier some versions of clang hoist the 1/x2 and
|
||||
thus division by zero exception can be signaled spuriously. */
|
||||
return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
|
||||
}
|
||||
/* x and y are non-zero finite. */
|
||||
if (ix & 0x80000000) {
|
||||
/* Finite x < 0. */
|
||||
int yint = checkint(iy);
|
||||
if (yint == 0)
|
||||
return __math_invalidf(x);
|
||||
if (yint == 1)
|
||||
sign_bias = SIGN_BIAS;
|
||||
ix &= 0x7fffffff;
|
||||
}
|
||||
if (ix < 0x00800000) {
|
||||
/* Normalize subnormal x so exponent becomes negative. */
|
||||
ix = asuint(x * 0x1p23f);
|
||||
ix &= 0x7fffffff;
|
||||
ix -= 23 << 23;
|
||||
}
|
||||
}
|
||||
double_t logx = log2_inline(ix);
|
||||
double_t ylogx = y * logx; /* cannot overflow, y is single prec. */
|
||||
if (predict_false((asuint64(ylogx) >> 47 & 0xffff) >=
|
||||
asuint64(126.0 * POWF_SCALE) >> 47)) {
|
||||
/* |y*log(x)| >= 126. */
|
||||
if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
|
||||
return __math_oflowf(sign_bias);
|
||||
if (ylogx <= -150.0 * POWF_SCALE)
|
||||
return __math_uflowf(sign_bias);
|
||||
}
|
||||
return exp2_inline(ylogx, sign_bias);
|
||||
}
|
34
base/glibc-compatibility/musl/powf_data.c
Normal file
34
base/glibc-compatibility/musl/powf_data.c
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Data definition for powf.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "powf_data.h"
|
||||
|
||||
const struct powf_log2_data __powf_log2_data = {
|
||||
.tab = {
|
||||
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
|
||||
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE },
|
||||
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE },
|
||||
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE },
|
||||
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE },
|
||||
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE },
|
||||
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE },
|
||||
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE },
|
||||
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE },
|
||||
{ 0x1p+0, 0x0p+0 * POWF_SCALE },
|
||||
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE },
|
||||
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE },
|
||||
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE },
|
||||
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE },
|
||||
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE },
|
||||
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE },
|
||||
},
|
||||
.poly = {
|
||||
0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE,
|
||||
0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE,
|
||||
0x1.71547652ab82bp0 * POWF_SCALE,
|
||||
}
|
||||
};
|
26
base/glibc-compatibility/musl/powf_data.h
Normal file
26
base/glibc-compatibility/musl/powf_data.h
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#ifndef _POWF_DATA_H
|
||||
#define _POWF_DATA_H
|
||||
|
||||
#include "libm.h"
|
||||
#include "exp2f_data.h"
|
||||
|
||||
#define POWF_LOG2_TABLE_BITS 4
|
||||
#define POWF_LOG2_POLY_ORDER 5
|
||||
#if TOINT_INTRINSICS
|
||||
#define POWF_SCALE_BITS EXP2F_TABLE_BITS
|
||||
#else
|
||||
#define POWF_SCALE_BITS 0
|
||||
#endif
|
||||
#define POWF_SCALE ((double)(1 << POWF_SCALE_BITS))
|
||||
extern hidden const struct powf_log2_data {
|
||||
struct {
|
||||
double invc, logc;
|
||||
} tab[1 << POWF_LOG2_TABLE_BITS];
|
||||
double poly[POWF_LOG2_POLY_ORDER];
|
||||
} __powf_log2_data;
|
||||
|
||||
#endif
|
525
base/glibc-compatibility/musl/powl.c
Normal file
525
base/glibc-compatibility/musl/powl.c
Normal file
@ -0,0 +1,525 @@
|
||||
/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c */
|
||||
/*
|
||||
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
/* powl.c
|
||||
*
|
||||
* Power function, long double precision
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* long double x, y, z, powl();
|
||||
*
|
||||
* z = powl( x, y );
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Computes x raised to the yth power. Analytically,
|
||||
*
|
||||
* x**y = exp( y log(x) ).
|
||||
*
|
||||
* Following Cody and Waite, this program uses a lookup table
|
||||
* of 2**-i/32 and pseudo extended precision arithmetic to
|
||||
* obtain several extra bits of accuracy in both the logarithm
|
||||
* and the exponential.
|
||||
*
|
||||
*
|
||||
* ACCURACY:
|
||||
*
|
||||
* The relative error of pow(x,y) can be estimated
|
||||
* by y dl ln(2), where dl is the absolute error of
|
||||
* the internally computed base 2 logarithm. At the ends
|
||||
* of the approximation interval the logarithm equal 1/32
|
||||
* and its relative error is about 1 lsb = 1.1e-19. Hence
|
||||
* the predicted relative error in the result is 2.3e-21 y .
|
||||
*
|
||||
* Relative error:
|
||||
* arithmetic domain # trials peak rms
|
||||
*
|
||||
* IEEE +-1000 40000 2.8e-18 3.7e-19
|
||||
* .001 < x < 1000, with log(x) uniformly distributed.
|
||||
* -1000 < y < 1000, y uniformly distributed.
|
||||
*
|
||||
* IEEE 0,8700 60000 6.5e-18 1.0e-18
|
||||
* 0.99 < x < 1.01, 0 < y < 8700, uniformly distributed.
|
||||
*
|
||||
*
|
||||
* ERROR MESSAGES:
|
||||
*
|
||||
* message condition value returned
|
||||
* pow overflow x**y > MAXNUM INFINITY
|
||||
* pow underflow x**y < 1/MAXNUM 0.0
|
||||
* pow domain x<0 and y noninteger 0.0
|
||||
*
|
||||
*/
|
||||
|
||||
#include "libm.h"
|
||||
|
||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
||||
long double powl(long double x, long double y)
|
||||
{
|
||||
return pow(x, y);
|
||||
}
|
||||
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
|
||||
|
||||
/* Table size */
|
||||
#define NXT 32
|
||||
|
||||
/* log(1+x) = x - .5x^2 + x^3 * P(z)/Q(z)
|
||||
* on the domain 2^(-1/32) - 1 <= x <= 2^(1/32) - 1
|
||||
*/
|
||||
static const long double P[] = {
|
||||
8.3319510773868690346226E-4L,
|
||||
4.9000050881978028599627E-1L,
|
||||
1.7500123722550302671919E0L,
|
||||
1.4000100839971580279335E0L,
|
||||
};
|
||||
static const long double Q[] = {
|
||||
/* 1.0000000000000000000000E0L,*/
|
||||
5.2500282295834889175431E0L,
|
||||
8.4000598057587009834666E0L,
|
||||
4.2000302519914740834728E0L,
|
||||
};
|
||||
/* A[i] = 2^(-i/32), rounded to IEEE long double precision.
|
||||
* If i is even, A[i] + B[i/2] gives additional accuracy.
|
||||
*/
|
||||
static const long double A[33] = {
|
||||
1.0000000000000000000000E0L,
|
||||
9.7857206208770013448287E-1L,
|
||||
9.5760328069857364691013E-1L,
|
||||
9.3708381705514995065011E-1L,
|
||||
9.1700404320467123175367E-1L,
|
||||
8.9735453750155359320742E-1L,
|
||||
8.7812608018664974155474E-1L,
|
||||
8.5930964906123895780165E-1L,
|
||||
8.4089641525371454301892E-1L,
|
||||
8.2287773907698242225554E-1L,
|
||||
8.0524516597462715409607E-1L,
|
||||
7.8799042255394324325455E-1L,
|
||||
7.7110541270397041179298E-1L,
|
||||
7.5458221379671136985669E-1L,
|
||||
7.3841307296974965571198E-1L,
|
||||
7.2259040348852331001267E-1L,
|
||||
7.0710678118654752438189E-1L,
|
||||
6.9195494098191597746178E-1L,
|
||||
6.7712777346844636413344E-1L,
|
||||
6.6261832157987064729696E-1L,
|
||||
6.4841977732550483296079E-1L,
|
||||
6.3452547859586661129850E-1L,
|
||||
6.2092890603674202431705E-1L,
|
||||
6.0762367999023443907803E-1L,
|
||||
5.9460355750136053334378E-1L,
|
||||
5.8186242938878875689693E-1L,
|
||||
5.6939431737834582684856E-1L,
|
||||
5.5719337129794626814472E-1L,
|
||||
5.4525386633262882960438E-1L,
|
||||
5.3357020033841180906486E-1L,
|
||||
5.2213689121370692017331E-1L,
|
||||
5.1094857432705833910408E-1L,
|
||||
5.0000000000000000000000E-1L,
|
||||
};
|
||||
static const long double B[17] = {
|
||||
0.0000000000000000000000E0L,
|
||||
2.6176170809902549338711E-20L,
|
||||
-1.0126791927256478897086E-20L,
|
||||
1.3438228172316276937655E-21L,
|
||||
1.2207982955417546912101E-20L,
|
||||
-6.3084814358060867200133E-21L,
|
||||
1.3164426894366316434230E-20L,
|
||||
-1.8527916071632873716786E-20L,
|
||||
1.8950325588932570796551E-20L,
|
||||
1.5564775779538780478155E-20L,
|
||||
6.0859793637556860974380E-21L,
|
||||
-2.0208749253662532228949E-20L,
|
||||
1.4966292219224761844552E-20L,
|
||||
3.3540909728056476875639E-21L,
|
||||
-8.6987564101742849540743E-22L,
|
||||
-1.2327176863327626135542E-20L,
|
||||
0.0000000000000000000000E0L,
|
||||
};
|
||||
|
||||
/* 2^x = 1 + x P(x),
|
||||
* on the interval -1/32 <= x <= 0
|
||||
*/
|
||||
static const long double R[] = {
|
||||
1.5089970579127659901157E-5L,
|
||||
1.5402715328927013076125E-4L,
|
||||
1.3333556028915671091390E-3L,
|
||||
9.6181291046036762031786E-3L,
|
||||
5.5504108664798463044015E-2L,
|
||||
2.4022650695910062854352E-1L,
|
||||
6.9314718055994530931447E-1L,
|
||||
};
|
||||
|
||||
#define MEXP (NXT*16384.0L)
|
||||
/* The following if denormal numbers are supported, else -MEXP: */
|
||||
#define MNEXP (-NXT*(16384.0L+64.0L))
|
||||
/* log2(e) - 1 */
|
||||
#define LOG2EA 0.44269504088896340735992L
|
||||
|
||||
#define F W
|
||||
#define Fa Wa
|
||||
#define Fb Wb
|
||||
#define G W
|
||||
#define Ga Wa
|
||||
#define Gb u
|
||||
#define H W
|
||||
#define Ha Wb
|
||||
#define Hb Wb
|
||||
|
||||
static const long double MAXLOGL = 1.1356523406294143949492E4L;
|
||||
static const long double MINLOGL = -1.13994985314888605586758E4L;
|
||||
static const long double LOGE2L = 6.9314718055994530941723E-1L;
|
||||
static const long double huge = 0x1p10000L;
|
||||
/* XXX Prevent gcc from erroneously constant folding this. */
|
||||
static const volatile long double twom10000 = 0x1p-10000L;
|
||||
|
||||
static long double reducl(long double);
|
||||
static long double powil(long double, int);
|
||||
|
||||
long double __polevll(long double x, const long double *P, int n);
|
||||
long double __p1evll(long double x, const long double *P, int n);
|
||||
|
||||
long double powl(long double x, long double y)
|
||||
{
|
||||
/* double F, Fa, Fb, G, Ga, Gb, H, Ha, Hb */
|
||||
int i, nflg, iyflg, yoddint;
|
||||
long e;
|
||||
volatile long double z=0;
|
||||
long double w=0, W=0, Wa=0, Wb=0, ya=0, yb=0, u=0;
|
||||
|
||||
/* make sure no invalid exception is raised by nan comparison */
|
||||
if (isnan(x)) {
|
||||
if (!isnan(y) && y == 0.0)
|
||||
return 1.0;
|
||||
return x;
|
||||
}
|
||||
if (isnan(y)) {
|
||||
if (x == 1.0)
|
||||
return 1.0;
|
||||
return y;
|
||||
}
|
||||
if (x == 1.0)
|
||||
return 1.0; /* 1**y = 1, even if y is nan */
|
||||
if (x == -1.0 && !isfinite(y))
|
||||
return 1.0; /* -1**inf = 1 */
|
||||
if (y == 0.0)
|
||||
return 1.0; /* x**0 = 1, even if x is nan */
|
||||
if (y == 1.0)
|
||||
return x;
|
||||
if (y >= LDBL_MAX) {
|
||||
if (x > 1.0 || x < -1.0)
|
||||
return INFINITY;
|
||||
if (x != 0.0)
|
||||
return 0.0;
|
||||
}
|
||||
if (y <= -LDBL_MAX) {
|
||||
if (x > 1.0 || x < -1.0)
|
||||
return 0.0;
|
||||
if (x != 0.0 || y == -INFINITY)
|
||||
return INFINITY;
|
||||
}
|
||||
if (x >= LDBL_MAX) {
|
||||
if (y > 0.0)
|
||||
return INFINITY;
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
w = floorl(y);
|
||||
|
||||
/* Set iyflg to 1 if y is an integer. */
|
||||
iyflg = 0;
|
||||
if (w == y)
|
||||
iyflg = 1;
|
||||
|
||||
/* Test for odd integer y. */
|
||||
yoddint = 0;
|
||||
if (iyflg) {
|
||||
ya = fabsl(y);
|
||||
ya = floorl(0.5 * ya);
|
||||
yb = 0.5 * fabsl(w);
|
||||
if( ya != yb )
|
||||
yoddint = 1;
|
||||
}
|
||||
|
||||
if (x <= -LDBL_MAX) {
|
||||
if (y > 0.0) {
|
||||
if (yoddint)
|
||||
return -INFINITY;
|
||||
return INFINITY;
|
||||
}
|
||||
if (y < 0.0) {
|
||||
if (yoddint)
|
||||
return -0.0;
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
nflg = 0; /* (x<0)**(odd int) */
|
||||
if (x <= 0.0) {
|
||||
if (x == 0.0) {
|
||||
if (y < 0.0) {
|
||||
if (signbit(x) && yoddint)
|
||||
/* (-0.0)**(-odd int) = -inf, divbyzero */
|
||||
return -1.0/0.0;
|
||||
/* (+-0.0)**(negative) = inf, divbyzero */
|
||||
return 1.0/0.0;
|
||||
}
|
||||
if (signbit(x) && yoddint)
|
||||
return -0.0;
|
||||
return 0.0;
|
||||
}
|
||||
if (iyflg == 0)
|
||||
return (x - x) / (x - x); /* (x<0)**(non-int) is NaN */
|
||||
/* (x<0)**(integer) */
|
||||
if (yoddint)
|
||||
nflg = 1; /* negate result */
|
||||
x = -x;
|
||||
}
|
||||
/* (+integer)**(integer) */
|
||||
if (iyflg && floorl(x) == x && fabsl(y) < 32768.0) {
|
||||
w = powil(x, (int)y);
|
||||
return nflg ? -w : w;
|
||||
}
|
||||
|
||||
/* separate significand from exponent */
|
||||
x = frexpl(x, &i);
|
||||
e = i;
|
||||
|
||||
/* find significand in antilog table A[] */
|
||||
i = 1;
|
||||
if (x <= A[17])
|
||||
i = 17;
|
||||
if (x <= A[i+8])
|
||||
i += 8;
|
||||
if (x <= A[i+4])
|
||||
i += 4;
|
||||
if (x <= A[i+2])
|
||||
i += 2;
|
||||
if (x >= A[1])
|
||||
i = -1;
|
||||
i += 1;
|
||||
|
||||
/* Find (x - A[i])/A[i]
|
||||
* in order to compute log(x/A[i]):
|
||||
*
|
||||
* log(x) = log( a x/a ) = log(a) + log(x/a)
|
||||
*
|
||||
* log(x/a) = log(1+v), v = x/a - 1 = (x-a)/a
|
||||
*/
|
||||
x -= A[i];
|
||||
x -= B[i/2];
|
||||
x /= A[i];
|
||||
|
||||
/* rational approximation for log(1+v):
|
||||
*
|
||||
* log(1+v) = v - v**2/2 + v**3 P(v) / Q(v)
|
||||
*/
|
||||
z = x*x;
|
||||
w = x * (z * __polevll(x, P, 3) / __p1evll(x, Q, 3));
|
||||
w = w - 0.5*z;
|
||||
|
||||
/* Convert to base 2 logarithm:
|
||||
* multiply by log2(e) = 1 + LOG2EA
|
||||
*/
|
||||
z = LOG2EA * w;
|
||||
z += w;
|
||||
z += LOG2EA * x;
|
||||
z += x;
|
||||
|
||||
/* Compute exponent term of the base 2 logarithm. */
|
||||
w = -i;
|
||||
w /= NXT;
|
||||
w += e;
|
||||
/* Now base 2 log of x is w + z. */
|
||||
|
||||
/* Multiply base 2 log by y, in extended precision. */
|
||||
|
||||
/* separate y into large part ya
|
||||
* and small part yb less than 1/NXT
|
||||
*/
|
||||
ya = reducl(y);
|
||||
yb = y - ya;
|
||||
|
||||
/* (w+z)(ya+yb)
|
||||
* = w*ya + w*yb + z*y
|
||||
*/
|
||||
F = z * y + w * yb;
|
||||
Fa = reducl(F);
|
||||
Fb = F - Fa;
|
||||
|
||||
G = Fa + w * ya;
|
||||
Ga = reducl(G);
|
||||
Gb = G - Ga;
|
||||
|
||||
H = Fb + Gb;
|
||||
Ha = reducl(H);
|
||||
w = (Ga + Ha) * NXT;
|
||||
|
||||
/* Test the power of 2 for overflow */
|
||||
if (w > MEXP)
|
||||
return huge * huge; /* overflow */
|
||||
if (w < MNEXP)
|
||||
return twom10000 * twom10000; /* underflow */
|
||||
|
||||
e = w;
|
||||
Hb = H - Ha;
|
||||
|
||||
if (Hb > 0.0) {
|
||||
e += 1;
|
||||
Hb -= 1.0/NXT; /*0.0625L;*/
|
||||
}
|
||||
|
||||
/* Now the product y * log2(x) = Hb + e/NXT.
|
||||
*
|
||||
* Compute base 2 exponential of Hb,
|
||||
* where -0.0625 <= Hb <= 0.
|
||||
*/
|
||||
z = Hb * __polevll(Hb, R, 6); /* z = 2**Hb - 1 */
|
||||
|
||||
/* Express e/NXT as an integer plus a negative number of (1/NXT)ths.
|
||||
* Find lookup table entry for the fractional power of 2.
|
||||
*/
|
||||
if (e < 0)
|
||||
i = 0;
|
||||
else
|
||||
i = 1;
|
||||
i = e/NXT + i;
|
||||
e = NXT*i - e;
|
||||
w = A[e];
|
||||
z = w * z; /* 2**-e * ( 1 + (2**Hb-1) ) */
|
||||
z = z + w;
|
||||
z = scalbnl(z, i); /* multiply by integer power of 2 */
|
||||
|
||||
if (nflg)
|
||||
z = -z;
|
||||
return z;
|
||||
}
|
||||
|
||||
|
||||
/* Find a multiple of 1/NXT that is within 1/NXT of x. */
|
||||
static long double reducl(long double x)
|
||||
{
|
||||
long double t;
|
||||
|
||||
t = x * NXT;
|
||||
t = floorl(t);
|
||||
t = t / NXT;
|
||||
return t;
|
||||
}
|
||||
|
||||
/*
|
||||
* Positive real raised to integer power, long double precision
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* long double x, y, powil();
|
||||
* int n;
|
||||
*
|
||||
* y = powil( x, n );
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Returns argument x>0 raised to the nth power.
|
||||
* The routine efficiently decomposes n as a sum of powers of
|
||||
* two. The desired power is a product of two-to-the-kth
|
||||
* powers of x. Thus to compute the 32767 power of x requires
|
||||
* 28 multiplications instead of 32767 multiplications.
|
||||
*
|
||||
*
|
||||
* ACCURACY:
|
||||
*
|
||||
* Relative error:
|
||||
* arithmetic x domain n domain # trials peak rms
|
||||
* IEEE .001,1000 -1022,1023 50000 4.3e-17 7.8e-18
|
||||
* IEEE 1,2 -1022,1023 20000 3.9e-17 7.6e-18
|
||||
* IEEE .99,1.01 0,8700 10000 3.6e-16 7.2e-17
|
||||
*
|
||||
* Returns MAXNUM on overflow, zero on underflow.
|
||||
*/
|
||||
|
||||
static long double powil(long double x, int nn)
|
||||
{
|
||||
long double ww, y;
|
||||
long double s;
|
||||
int n, e, sign, lx;
|
||||
|
||||
if (nn == 0)
|
||||
return 1.0;
|
||||
|
||||
if (nn < 0) {
|
||||
sign = -1;
|
||||
n = -nn;
|
||||
} else {
|
||||
sign = 1;
|
||||
n = nn;
|
||||
}
|
||||
|
||||
/* Overflow detection */
|
||||
|
||||
/* Calculate approximate logarithm of answer */
|
||||
s = x;
|
||||
s = frexpl( s, &lx);
|
||||
e = (lx - 1)*n;
|
||||
if ((e == 0) || (e > 64) || (e < -64)) {
|
||||
s = (s - 7.0710678118654752e-1L) / (s + 7.0710678118654752e-1L);
|
||||
s = (2.9142135623730950L * s - 0.5 + lx) * nn * LOGE2L;
|
||||
} else {
|
||||
s = LOGE2L * e;
|
||||
}
|
||||
|
||||
if (s > MAXLOGL)
|
||||
return huge * huge; /* overflow */
|
||||
|
||||
if (s < MINLOGL)
|
||||
return twom10000 * twom10000; /* underflow */
|
||||
/* Handle tiny denormal answer, but with less accuracy
|
||||
* since roundoff error in 1.0/x will be amplified.
|
||||
* The precise demarcation should be the gradual underflow threshold.
|
||||
*/
|
||||
if (s < -MAXLOGL+2.0) {
|
||||
x = 1.0/x;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
/* First bit of the power */
|
||||
if (n & 1)
|
||||
y = x;
|
||||
else
|
||||
y = 1.0;
|
||||
|
||||
ww = x;
|
||||
n >>= 1;
|
||||
while (n) {
|
||||
ww = ww * ww; /* arg to the 2-to-the-kth power */
|
||||
if (n & 1) /* if that bit is set, then include in product */
|
||||
y *= ww;
|
||||
n >>= 1;
|
||||
}
|
||||
|
||||
if (sign < 0)
|
||||
y = 1.0/y;
|
||||
return y;
|
||||
}
|
||||
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
|
||||
// TODO: broken implementation to make things compile
|
||||
long double powl(long double x, long double y)
|
||||
{
|
||||
return pow(x, y);
|
||||
}
|
||||
#endif
|
17
base/glibc-compatibility/musl/timerfd.c
Normal file
17
base/glibc-compatibility/musl/timerfd.c
Normal file
@ -0,0 +1,17 @@
|
||||
#include <sys/timerfd.h>
|
||||
#include "syscall.h"
|
||||
|
||||
int timerfd_create(int clockid, int flags)
|
||||
{
|
||||
return syscall(SYS_timerfd_create, clockid, flags);
|
||||
}
|
||||
|
||||
int timerfd_settime(int fd, int flags, const struct itimerspec *new, struct itimerspec *old)
|
||||
{
|
||||
return syscall(SYS_timerfd_settime, fd, flags, new, old);
|
||||
}
|
||||
|
||||
int timerfd_gettime(int fd, struct itimerspec *cur)
|
||||
{
|
||||
return syscall(SYS_timerfd_gettime, fd, cur);
|
||||
}
|
@ -248,7 +248,7 @@ bool Pool::Entry::tryForceConnected() const
|
||||
if (prev_connection_id != current_connection_id)
|
||||
{
|
||||
auto & logger = Poco::Util::Application::instance().logger();
|
||||
logger.information("Connection to mysql server has been reestablished. Connection id changed: %d -> %d",
|
||||
logger.information("Connection to mysql server has been reestablished. Connection id changed: %lu -> %lu",
|
||||
prev_connection_id, current_connection_id);
|
||||
}
|
||||
return true;
|
||||
|
@ -22,4 +22,12 @@ ResultBase::~ResultBase()
|
||||
mysql_free_result(res);
|
||||
}
|
||||
|
||||
std::string ResultBase::getFieldName(size_t n) const
|
||||
{
|
||||
if (num_fields <= n)
|
||||
throw Exception(std::string("Unknown column position ") + std::to_string(n));
|
||||
|
||||
return fields[n].name;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -31,6 +31,8 @@ public:
|
||||
MYSQL_RES * getRes() { return res; }
|
||||
const Query * getQuery() const { return query; }
|
||||
|
||||
std::string getFieldName(size_t n) const;
|
||||
|
||||
virtual ~ResultBase();
|
||||
|
||||
protected:
|
||||
|
@ -129,7 +129,7 @@ using namespace pcg_extras;
|
||||
*
|
||||
* default_multiplier<uint32_t>::multiplier()
|
||||
*
|
||||
* gives you the default multipler for 32-bit integers. We use the name
|
||||
* gives you the default multiplier for 32-bit integers. We use the name
|
||||
* of the constant and not a generic word like value to allow these classes
|
||||
* to be used as mixins.
|
||||
*/
|
||||
|
@ -14,10 +14,6 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
|
||||
# Minimal supported SDK version
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
|
||||
|
||||
# Global libraries
|
||||
|
||||
add_library(global-libs INTERFACE)
|
||||
|
||||
# Unfortunately '-pthread' doesn't work with '-nodefaultlibs'.
|
||||
# Just make sure we have pthreads at all.
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
|
@ -31,8 +31,20 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
|
||||
|
||||
if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND})
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
|
||||
|
||||
# 4+ ccache respect SOURCE_DATE_EPOCH (always includes it into the hash
|
||||
# of the manifest) and debian will extract these from d/changelog, and
|
||||
# makes cache of ccache unusable
|
||||
#
|
||||
# FIXME: once sloppiness will be introduced for this this can be removed.
|
||||
if (CCACHE_VERSION VERSION_GREATER "4.0")
|
||||
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
|
||||
else()
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND})
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
|
||||
endif()
|
||||
else ()
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "Not using ${CCACHE_FOUND} ${CCACHE_VERSION} bug: https://bugzilla.samba.org/show_bug.cgi?id=8118")
|
||||
endif ()
|
||||
|
6
cmake/find/fast_float.cmake
Normal file
6
cmake/find/fast_float.cmake
Normal file
@ -0,0 +1,6 @@
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/fast_float/fast_float.h")
|
||||
message (FATAL_ERROR "submodule contrib/fast_float is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
endif ()
|
||||
|
||||
set(FAST_FLOAT_LIBRARY fast_float)
|
||||
set(FAST_FLOAT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/")
|
@ -1,4 +1,11 @@
|
||||
option(ENABLE_GRPC "Use gRPC" ${ENABLE_LIBRARIES})
|
||||
# disable grpc due to conflicts of abseil (required by grpc) dynamic annotations with libtsan.a
|
||||
if (SANITIZE STREQUAL "thread" AND COMPILER_GCC)
|
||||
set(ENABLE_GRPC_DEFAULT OFF)
|
||||
else()
|
||||
set(ENABLE_GRPC_DEFAULT ${ENABLE_LIBRARIES})
|
||||
endif()
|
||||
|
||||
option(ENABLE_GRPC "Use gRPC" ${ENABLE_GRPC_DEFAULT})
|
||||
|
||||
if(NOT ENABLE_GRPC)
|
||||
if(USE_INTERNAL_GRPC_LIBRARY)
|
||||
|
@ -141,11 +141,6 @@ if(NOT EXTERNAL_PARQUET_FOUND AND NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT O
|
||||
else()
|
||||
set(USE_INTERNAL_PARQUET_LIBRARY 1)
|
||||
|
||||
if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
|
||||
set(ARROW_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src")
|
||||
set(PARQUET_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" ${ClickHouse_BINARY_DIR}/contrib/arrow/cpp/src)
|
||||
endif()
|
||||
|
||||
if(MAKE_STATIC_LIBRARIES)
|
||||
set(FLATBUFFERS_LIBRARY flatbuffers)
|
||||
set(ARROW_LIBRARY arrow_static)
|
||||
@ -155,9 +150,6 @@ if(NOT EXTERNAL_PARQUET_FOUND AND NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT O
|
||||
set(FLATBUFFERS_LIBRARY flatbuffers_shared)
|
||||
set(ARROW_LIBRARY arrow_shared)
|
||||
set(PARQUET_LIBRARY parquet_shared)
|
||||
if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
|
||||
list(APPEND PARQUET_LIBRARY boost::regex)
|
||||
endif()
|
||||
set(THRIFT_LIBRARY thrift)
|
||||
endif()
|
||||
|
||||
|
@ -1,8 +1,6 @@
|
||||
option (USE_INTERNAL_POCO_LIBRARY "Use internal Poco library" ON)
|
||||
|
||||
if (USE_INTERNAL_POCO_LIBRARY)
|
||||
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/poco)
|
||||
else ()
|
||||
if (NOT USE_INTERNAL_POCO_LIBRARY)
|
||||
find_path (ROOT_DIR NAMES Foundation/include/Poco/Poco.h include/Poco/Poco.h)
|
||||
if (NOT ROOT_DIR)
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system poco")
|
||||
|
@ -11,9 +11,9 @@ endif()
|
||||
|
||||
option(USE_INTERNAL_SSL_LIBRARY "Set to FALSE to use system *ssl library instead of bundled" ${NOT_UNBUNDLED})
|
||||
|
||||
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/openssl/README")
|
||||
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boringssl/README.md")
|
||||
if(USE_INTERNAL_SSL_LIBRARY)
|
||||
message(WARNING "submodule contrib/openssl is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message(WARNING "submodule contrib/boringssl is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal ssl library")
|
||||
endif()
|
||||
set(USE_INTERNAL_SSL_LIBRARY 0)
|
||||
@ -52,12 +52,12 @@ endif ()
|
||||
|
||||
if (NOT OPENSSL_FOUND AND NOT MISSING_INTERNAL_SSL_LIBRARY)
|
||||
set (USE_INTERNAL_SSL_LIBRARY 1)
|
||||
set (OPENSSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/openssl")
|
||||
set (OPENSSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/boringssl")
|
||||
|
||||
if (ARCH_AMD64)
|
||||
set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include" "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake/linux_x86_64/include")
|
||||
set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include")
|
||||
elseif (ARCH_AARCH64)
|
||||
set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include" "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake/linux_aarch64/include")
|
||||
set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include")
|
||||
endif ()
|
||||
set (OPENSSL_CRYPTO_LIBRARY crypto)
|
||||
set (OPENSSL_SSL_LIBRARY ssl)
|
||||
|
@ -17,10 +17,6 @@ message(STATUS "Default libraries: ${DEFAULT_LIBS}")
|
||||
set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
|
||||
set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
|
||||
|
||||
# Global libraries
|
||||
|
||||
add_library(global-libs INTERFACE)
|
||||
|
||||
# Unfortunately '-pthread' doesn't work with '-nodefaultlibs'.
|
||||
# Just make sure we have pthreads at all.
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
|
@ -35,6 +35,15 @@ if (NOT PARALLEL_LINK_JOBS AND AVAILABLE_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# ThinLTO provides its own parallel linking
|
||||
# But use 2 parallel jobs, since:
|
||||
# - this is what llvm does
|
||||
# - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary
|
||||
if (ENABLE_THINLTO AND PARALLEL_LINK_JOBS GREATER 2)
|
||||
message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.")
|
||||
set (PARALLEL_LINK_JOBS 2)
|
||||
endif()
|
||||
|
||||
if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES))
|
||||
set(CMAKE_JOB_POOL_LINK link_job_pool${CMAKE_CURRENT_SOURCE_DIR})
|
||||
string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_LINK ${CMAKE_JOB_POOL_LINK})
|
||||
|
@ -12,10 +12,10 @@ else ()
|
||||
endif ()
|
||||
|
||||
if (OS_ANDROID)
|
||||
# pthread and rt are included in libc
|
||||
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -ldl")
|
||||
# pthread and rt are included in libc
|
||||
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -ldl")
|
||||
else ()
|
||||
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread -ldl")
|
||||
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread -ldl")
|
||||
endif ()
|
||||
|
||||
message(STATUS "Default libraries: ${DEFAULT_LIBS}")
|
||||
@ -31,10 +31,6 @@ if (ARCH_AMD64 AND NOT_UNBUNDLED)
|
||||
set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers)
|
||||
endif ()
|
||||
|
||||
# Global libraries
|
||||
|
||||
add_library(global-libs INTERFACE)
|
||||
|
||||
# Unfortunately '-pthread' doesn't work with '-nodefaultlibs'.
|
||||
# Just make sure we have pthreads at all.
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
|
@ -1,18 +1,34 @@
|
||||
# Possible values: `address` (ASan), `memory` (MSan), `thread` (TSan), `undefined` (UBSan), and "" (no sanitizing)
|
||||
# Possible values:
|
||||
# - `address` (ASan)
|
||||
# - `memory` (MSan)
|
||||
# - `thread` (TSan)
|
||||
# - `undefined` (UBSan)
|
||||
# - "" (no sanitizing)
|
||||
option (SANITIZE "Enable one of the code sanitizers" "")
|
||||
|
||||
set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")
|
||||
|
||||
# gcc with -nodefaultlibs does not add sanitizer libraries
|
||||
# with -static-libasan and similar
|
||||
macro(add_explicit_sanitizer_library lib)
|
||||
target_link_libraries(global-libs INTERFACE "-Wl,-static -l${lib} -Wl,-Bdynamic")
|
||||
endmacro()
|
||||
|
||||
if (SANITIZE)
|
||||
if (SANITIZE STREQUAL "address")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope")
|
||||
set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_FLAGS}")
|
||||
endif()
|
||||
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libasan")
|
||||
endif ()
|
||||
if (COMPILER_GCC)
|
||||
add_explicit_sanitizer_library(asan)
|
||||
endif()
|
||||
|
||||
elseif (SANITIZE STREQUAL "memory")
|
||||
# MemorySanitizer flags are set according to the official documentation:
|
||||
@ -41,9 +57,10 @@ if (SANITIZE)
|
||||
if (COMPILER_CLANG)
|
||||
set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt")
|
||||
else()
|
||||
message (WARNING "TSAN suppressions was not passed to the compiler (since the compiler is not clang)")
|
||||
message (WARNING "Use the following command to pass them manually:")
|
||||
message (WARNING " export TSAN_OPTIONS=\"$TSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt\"")
|
||||
set (MESSAGE "TSAN suppressions was not passed to the compiler (since the compiler is not clang)\n")
|
||||
set (MESSAGE "${MESSAGE}Use the following command to pass them manually:\n")
|
||||
set (MESSAGE "${MESSAGE} export TSAN_OPTIONS=\"$TSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt\"")
|
||||
message (WARNING "${MESSAGE}")
|
||||
endif()
|
||||
|
||||
|
||||
@ -55,16 +72,32 @@ if (SANITIZE)
|
||||
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libtsan")
|
||||
endif ()
|
||||
if (COMPILER_GCC)
|
||||
add_explicit_sanitizer_library(tsan)
|
||||
endif()
|
||||
|
||||
elseif (SANITIZE STREQUAL "undefined")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
set (UBSAN_FLAGS "-fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
|
||||
if (COMPILER_CLANG)
|
||||
set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
else()
|
||||
set (MESSAGE "UBSAN suppressions was not passed to the compiler (since the compiler is not clang)\n")
|
||||
set (MESSAGE "${MESSAGE}Use the following command to pass them manually:\n")
|
||||
set (MESSAGE "${MESSAGE} export UBSAN_OPTIONS=\"$UBSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt\"")
|
||||
message (WARNING "${MESSAGE}")
|
||||
endif()
|
||||
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
|
||||
endif()
|
||||
if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan")
|
||||
endif ()
|
||||
if (COMPILER_GCC)
|
||||
add_explicit_sanitizer_library(ubsan)
|
||||
endif()
|
||||
|
||||
# llvm-tblgen, that is used during LLVM build, doesn't work with UBSan.
|
||||
set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "")
|
||||
|
71
contrib/CMakeLists.txt
vendored
71
contrib/CMakeLists.txt
vendored
@ -26,6 +26,7 @@ add_subdirectory (boost-cmake)
|
||||
add_subdirectory (cctz-cmake)
|
||||
add_subdirectory (consistent-hashing-sumbur)
|
||||
add_subdirectory (consistent-hashing)
|
||||
add_subdirectory (dragonbox-cmake)
|
||||
add_subdirectory (FastMemcpy)
|
||||
add_subdirectory (hyperscan-cmake)
|
||||
add_subdirectory (jemalloc-cmake)
|
||||
@ -98,10 +99,10 @@ if (USE_INTERNAL_H3_LIBRARY)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_SSL_LIBRARY)
|
||||
add_subdirectory (openssl-cmake)
|
||||
add_subdirectory (boringssl-cmake)
|
||||
|
||||
add_library(OpenSSL::Crypto ALIAS ${OPENSSL_CRYPTO_LIBRARY})
|
||||
add_library(OpenSSL::SSL ALIAS ${OPENSSL_SSL_LIBRARY})
|
||||
add_library(OpenSSL::Crypto ALIAS crypto)
|
||||
add_library(OpenSSL::SSL ALIAS ssl)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_LDAP_LIBRARY)
|
||||
@ -162,51 +163,21 @@ if(USE_INTERNAL_SNAPPY_LIBRARY)
|
||||
endif()
|
||||
|
||||
if (USE_INTERNAL_PARQUET_LIBRARY)
|
||||
if (USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
|
||||
# We dont use arrow's cmakefiles because they uses too many depends and download some libs in compile time
|
||||
# But this mode can be used for updating auto-generated parquet files:
|
||||
# cmake -DUSE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE=1 -DUSE_STATIC_LIBRARIES=0
|
||||
# copy {BUILD_DIR}/contrib/arrow/cpp/src/parquet/*.cpp,*.h -> /contrib/arrow-cmake/cpp/src/parquet/
|
||||
# But you can update auto-generated parquet files manually:
|
||||
# cd {BUILD_DIR}/contrib/arrow/cpp/src/parquet && mkdir -p build && cd build
|
||||
# cmake .. -DARROW_COMPUTE=ON -DARROW_PARQUET=ON -DARROW_SIMD_LEVEL=NONE -DARROW_VERBOSE_THIRDPARTY_BUILD=ON
|
||||
# -DARROW_BUILD_SHARED=1 -DARROW_BUILD_UTILITIES=OFF -DARROW_BUILD_INTEGRATION=OFF
|
||||
# -DBoost_FOUND=1 -DARROW_TEST_LINKAGE="shared"
|
||||
# make -j8
|
||||
# copy {BUILD_DIR}/contrib/arrow/cpp/src/parquet/*.cpp,*.h -> {BUILD_DIR}/contrib/arrow-cmake/cpp/src/parquet/
|
||||
|
||||
# Also useful parquet reader:
|
||||
# cd contrib/arrow/cpp/build && mkdir -p build && cmake .. -DPARQUET_BUILD_EXECUTABLES=1 && make -j8
|
||||
# contrib/arrow/cpp/build/debug/parquet-reader some_file.parquet
|
||||
# cd {BUILD_DIR}/contrib/arrow/cpp && mkdir -p build && cd build
|
||||
# cmake .. -DARROW_PARQUET=1 -DARROW_WITH_SNAPPY=1 -DPARQUET_BUILD_EXECUTABLES=1
|
||||
# make -j8
|
||||
# {BUILD_DIR}/contrib/arrow/cpp/build/release/parquet-reader some_file.parquet
|
||||
|
||||
set (ARROW_COMPUTE ON CACHE INTERNAL "")
|
||||
set (ARROW_PARQUET ON CACHE INTERNAL "")
|
||||
set (ARROW_VERBOSE_THIRDPARTY_BUILD ON CACHE INTERNAL "")
|
||||
set (ARROW_BUILD_SHARED 1 CACHE INTERNAL "")
|
||||
set (ARROW_BUILD_UTILITIES OFF CACHE INTERNAL "")
|
||||
set (ARROW_BUILD_INTEGRATION OFF CACHE INTERNAL "")
|
||||
set (ARROW_BOOST_HEADER_ONLY ON CACHE INTERNAL "")
|
||||
set (Boost_FOUND 1 CACHE INTERNAL "")
|
||||
if (MAKE_STATIC_LIBRARIES)
|
||||
set (PARQUET_ARROW_LINKAGE "static" CACHE INTERNAL "")
|
||||
set (ARROW_TEST_LINKAGE "static" CACHE INTERNAL "")
|
||||
set (ARROW_BUILD_STATIC ${MAKE_STATIC_LIBRARIES} CACHE INTERNAL "")
|
||||
else ()
|
||||
set (PARQUET_ARROW_LINKAGE "shared" CACHE INTERNAL "")
|
||||
set (ARROW_TEST_LINKAGE "shared" CACHE INTERNAL "")
|
||||
endif ()
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
|
||||
set (_save_build_type ${CMAKE_BUILD_TYPE})
|
||||
set (CMAKE_BUILD_TYPE Release)
|
||||
string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
|
||||
endif ()
|
||||
|
||||
# Because Arrow uses CMAKE_SOURCE_DIR as a project path
|
||||
# Hopefully will be fixed in https://github.com/apache/arrow/pull/2676
|
||||
set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/cmake_modules")
|
||||
add_subdirectory (arrow/cpp)
|
||||
|
||||
if (_save_build_type)
|
||||
set (CMAKE_BUILD_TYPE ${_save_build_type})
|
||||
unset (_save_build_type)
|
||||
string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
|
||||
endif ()
|
||||
|
||||
else()
|
||||
add_subdirectory(arrow-cmake)
|
||||
|
||||
# The library is large - avoid bloat.
|
||||
@ -214,7 +185,6 @@ else()
|
||||
target_compile_options (${THRIFT_LIBRARY} PRIVATE -g0)
|
||||
target_compile_options (${PARQUET_LIBRARY} PRIVATE -g0)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (USE_INTERNAL_AVRO_LIBRARY)
|
||||
add_subdirectory(avro-cmake)
|
||||
@ -240,6 +210,14 @@ if (USE_EMBEDDED_COMPILER AND USE_INTERNAL_LLVM_LIBRARY)
|
||||
set (LLVM_ENABLE_RTTI 1 CACHE INTERNAL "")
|
||||
set (LLVM_ENABLE_PIC 0 CACHE INTERNAL "")
|
||||
set (LLVM_TARGETS_TO_BUILD "X86;AArch64" CACHE STRING "")
|
||||
# Yes it is set globally, but this is not enough, since llvm will add -std=c++11 after default
|
||||
# And c++2a cannot be used, due to ambiguous operator !=
|
||||
if (COMPILER_GCC OR COMPILER_CLANG)
|
||||
set (_CXX_STANDARD "gnu++17")
|
||||
else()
|
||||
set (_CXX_STANDARD "c++17")
|
||||
endif()
|
||||
set (LLVM_CXX_STD ${_CXX_STANDARD} CACHE STRING "" FORCE)
|
||||
add_subdirectory (llvm/llvm)
|
||||
target_include_directories(LLVMSupport SYSTEM BEFORE PRIVATE ${ZLIB_INCLUDE_DIR})
|
||||
endif ()
|
||||
@ -322,4 +300,5 @@ if (USE_INTERNAL_ROCKSDB_LIBRARY)
|
||||
add_subdirectory(rocksdb-cmake)
|
||||
endif()
|
||||
|
||||
add_subdirectory(dragonbox)
|
||||
add_subdirectory(fast_float)
|
||||
|
||||
|
2
contrib/arrow
vendored
2
contrib/arrow
vendored
@ -1 +1 @@
|
||||
Subproject commit 3cbcb7b62c2f2d02851bff837758637eb592a64b
|
||||
Subproject commit 744bdfe188f018e5e05f5deebd4e9ee0a7706cf4
|
@ -144,15 +144,16 @@ set(ORC_SRCS
|
||||
|
||||
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow)
|
||||
|
||||
configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/arrow/util/config.h")
|
||||
configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/cpp/src/arrow/util/config.h")
|
||||
|
||||
# arrow/cpp/src/arrow/CMakeLists.txt
|
||||
set(ARROW_SRCS
|
||||
${LIBRARY_DIR}/array.cc
|
||||
${LIBRARY_DIR}/buffer.cc
|
||||
${LIBRARY_DIR}/device.cc
|
||||
${LIBRARY_DIR}/builder.cc
|
||||
${LIBRARY_DIR}/chunked_array.cc
|
||||
${LIBRARY_DIR}/compare.cc
|
||||
${LIBRARY_DIR}/datum.cc
|
||||
${LIBRARY_DIR}/device.cc
|
||||
${LIBRARY_DIR}/extension_type.cc
|
||||
${LIBRARY_DIR}/memory_pool.cc
|
||||
${LIBRARY_DIR}/pretty_print.cc
|
||||
@ -167,11 +168,12 @@ set(ARROW_SRCS
|
||||
${LIBRARY_DIR}/type.cc
|
||||
${LIBRARY_DIR}/visitor.cc
|
||||
|
||||
${LIBRARY_DIR}/tensor/coo_converter.cc
|
||||
${LIBRARY_DIR}/tensor/csc_converter.cc
|
||||
${LIBRARY_DIR}/tensor/csf_converter.cc
|
||||
${LIBRARY_DIR}/tensor/csr_converter.cc
|
||||
|
||||
${LIBRARY_DIR}/array/array_base.cc
|
||||
${LIBRARY_DIR}/array/array_binary.cc
|
||||
${LIBRARY_DIR}/array/array_decimal.cc
|
||||
${LIBRARY_DIR}/array/array_dict.cc
|
||||
${LIBRARY_DIR}/array/array_nested.cc
|
||||
${LIBRARY_DIR}/array/array_primitive.cc
|
||||
${LIBRARY_DIR}/array/builder_adaptive.cc
|
||||
${LIBRARY_DIR}/array/builder_base.cc
|
||||
${LIBRARY_DIR}/array/builder_binary.cc
|
||||
@ -181,17 +183,50 @@ set(ARROW_SRCS
|
||||
${LIBRARY_DIR}/array/builder_primitive.cc
|
||||
${LIBRARY_DIR}/array/builder_union.cc
|
||||
${LIBRARY_DIR}/array/concatenate.cc
|
||||
${LIBRARY_DIR}/array/dict_internal.cc
|
||||
${LIBRARY_DIR}/array/data.cc
|
||||
${LIBRARY_DIR}/array/diff.cc
|
||||
${LIBRARY_DIR}/array/util.cc
|
||||
${LIBRARY_DIR}/array/validate.cc
|
||||
|
||||
${LIBRARY_DIR}/csv/converter.cc
|
||||
${LIBRARY_DIR}/compute/api_scalar.cc
|
||||
${LIBRARY_DIR}/compute/api_vector.cc
|
||||
${LIBRARY_DIR}/compute/cast.cc
|
||||
${LIBRARY_DIR}/compute/exec.cc
|
||||
${LIBRARY_DIR}/compute/function.cc
|
||||
${LIBRARY_DIR}/compute/kernel.cc
|
||||
${LIBRARY_DIR}/compute/registry.cc
|
||||
|
||||
${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc
|
||||
${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc
|
||||
${LIBRARY_DIR}/compute/kernels/aggregate_var_std.cc
|
||||
${LIBRARY_DIR}/compute/kernels/codegen_internal.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_compare.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_fill_null.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_nested.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_string.cc
|
||||
${LIBRARY_DIR}/compute/kernels/scalar_validity.cc
|
||||
${LIBRARY_DIR}/compute/kernels/vector_hash.cc
|
||||
${LIBRARY_DIR}/compute/kernels/vector_nested.cc
|
||||
${LIBRARY_DIR}/compute/kernels/vector_selection.cc
|
||||
${LIBRARY_DIR}/compute/kernels/vector_sort.cc
|
||||
${LIBRARY_DIR}/compute/kernels/util_internal.cc
|
||||
|
||||
${LIBRARY_DIR}/csv/chunker.cc
|
||||
${LIBRARY_DIR}/csv/column_builder.cc
|
||||
${LIBRARY_DIR}/csv/column_decoder.cc
|
||||
${LIBRARY_DIR}/csv/converter.cc
|
||||
${LIBRARY_DIR}/csv/options.cc
|
||||
${LIBRARY_DIR}/csv/parser.cc
|
||||
${LIBRARY_DIR}/csv/reader.cc
|
||||
${LIBRARY_DIR}/csv/column_decoder.cc
|
||||
|
||||
${LIBRARY_DIR}/ipc/dictionary.cc
|
||||
${LIBRARY_DIR}/ipc/feather.cc
|
||||
@ -202,14 +237,25 @@ set(ARROW_SRCS
|
||||
${LIBRARY_DIR}/ipc/writer.cc
|
||||
|
||||
${LIBRARY_DIR}/io/buffered.cc
|
||||
${LIBRARY_DIR}/io/caching.cc
|
||||
${LIBRARY_DIR}/io/compressed.cc
|
||||
${LIBRARY_DIR}/io/file.cc
|
||||
${LIBRARY_DIR}/io/interfaces.cc
|
||||
${LIBRARY_DIR}/io/memory.cc
|
||||
${LIBRARY_DIR}/io/slow.cc
|
||||
|
||||
${LIBRARY_DIR}/tensor/coo_converter.cc
|
||||
${LIBRARY_DIR}/tensor/csf_converter.cc
|
||||
${LIBRARY_DIR}/tensor/csx_converter.cc
|
||||
|
||||
${LIBRARY_DIR}/util/basic_decimal.cc
|
||||
${LIBRARY_DIR}/util/bit_block_counter.cc
|
||||
${LIBRARY_DIR}/util/bit_run_reader.cc
|
||||
${LIBRARY_DIR}/util/bit_util.cc
|
||||
${LIBRARY_DIR}/util/bitmap.cc
|
||||
${LIBRARY_DIR}/util/bitmap_builders.cc
|
||||
${LIBRARY_DIR}/util/bitmap_ops.cc
|
||||
${LIBRARY_DIR}/util/bpacking.cc
|
||||
${LIBRARY_DIR}/util/compression.cc
|
||||
${LIBRARY_DIR}/util/compression_lz4.cc
|
||||
${LIBRARY_DIR}/util/compression_snappy.cc
|
||||
@ -217,8 +263,12 @@ set(ARROW_SRCS
|
||||
${LIBRARY_DIR}/util/compression_zstd.cc
|
||||
${LIBRARY_DIR}/util/cpu_info.cc
|
||||
${LIBRARY_DIR}/util/decimal.cc
|
||||
${LIBRARY_DIR}/util/delimiting.cc
|
||||
${LIBRARY_DIR}/util/formatting.cc
|
||||
${LIBRARY_DIR}/util/future.cc
|
||||
${LIBRARY_DIR}/util/int_util.cc
|
||||
${LIBRARY_DIR}/util/io_util.cc
|
||||
${LIBRARY_DIR}/util/iterator.cc
|
||||
${LIBRARY_DIR}/util/key_value_metadata.cc
|
||||
${LIBRARY_DIR}/util/logging.cc
|
||||
${LIBRARY_DIR}/util/memory.cc
|
||||
@ -226,27 +276,15 @@ set(ARROW_SRCS
|
||||
${LIBRARY_DIR}/util/string.cc
|
||||
${LIBRARY_DIR}/util/task_group.cc
|
||||
${LIBRARY_DIR}/util/thread_pool.cc
|
||||
${LIBRARY_DIR}/util/time.cc
|
||||
${LIBRARY_DIR}/util/trie.cc
|
||||
${LIBRARY_DIR}/util/utf8.cc
|
||||
${LIBRARY_DIR}/util/future.cc
|
||||
${LIBRARY_DIR}/util/formatting.cc
|
||||
${LIBRARY_DIR}/util/parsing.cc
|
||||
${LIBRARY_DIR}/util/time.cc
|
||||
${LIBRARY_DIR}/util/delimiting.cc
|
||||
${LIBRARY_DIR}/util/iterator.cc
|
||||
${LIBRARY_DIR}/util/value_parsing.cc
|
||||
|
||||
${LIBRARY_DIR}/vendored/base64.cpp
|
||||
${ORC_SRCS}
|
||||
)
|
||||
|
||||
set(ARROW_SRCS ${ARROW_SRCS}
|
||||
${LIBRARY_DIR}/compute/context.cc
|
||||
${LIBRARY_DIR}/compute/kernels/boolean.cc
|
||||
${LIBRARY_DIR}/compute/kernels/cast.cc
|
||||
${LIBRARY_DIR}/compute/kernels/hash.cc
|
||||
${LIBRARY_DIR}/compute/kernels/util_internal.cc
|
||||
)
|
||||
|
||||
if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
|
||||
set(ARROW_WITH_SNAPPY 1)
|
||||
endif ()
|
||||
@ -289,7 +327,8 @@ if (USE_INTERNAL_PROTOBUF_LIBRARY)
|
||||
add_dependencies(${ARROW_LIBRARY} protoc)
|
||||
endif ()
|
||||
|
||||
target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
|
||||
target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src)
|
||||
target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/cpp/src)
|
||||
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${Protobuf_LIBRARY})
|
||||
target_link_libraries(${ARROW_LIBRARY} PRIVATE lz4)
|
||||
if (ARROW_WITH_SNAPPY)
|
||||
@ -319,19 +358,26 @@ set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/parquet)
|
||||
set(GEN_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/generated)
|
||||
# arrow/cpp/src/parquet/CMakeLists.txt
|
||||
set(PARQUET_SRCS
|
||||
${LIBRARY_DIR}/arrow/path_internal.cc
|
||||
${LIBRARY_DIR}/arrow/reader.cc
|
||||
${LIBRARY_DIR}/arrow/reader_internal.cc
|
||||
${LIBRARY_DIR}/arrow/schema.cc
|
||||
${LIBRARY_DIR}/arrow/schema_internal.cc
|
||||
${LIBRARY_DIR}/arrow/writer.cc
|
||||
${LIBRARY_DIR}/arrow/path_internal.cc
|
||||
${LIBRARY_DIR}/bloom_filter.cc
|
||||
${LIBRARY_DIR}/column_reader.cc
|
||||
${LIBRARY_DIR}/column_scanner.cc
|
||||
${LIBRARY_DIR}/column_writer.cc
|
||||
${LIBRARY_DIR}/deprecated_io.cc
|
||||
${LIBRARY_DIR}/encoding.cc
|
||||
${LIBRARY_DIR}/encryption.cc
|
||||
${LIBRARY_DIR}/encryption_internal.cc
|
||||
${LIBRARY_DIR}/file_reader.cc
|
||||
${LIBRARY_DIR}/file_writer.cc
|
||||
${LIBRARY_DIR}/internal_file_decryptor.cc
|
||||
${LIBRARY_DIR}/internal_file_encryptor.cc
|
||||
${LIBRARY_DIR}/level_conversion.cc
|
||||
${LIBRARY_DIR}/level_comparison.cc
|
||||
${LIBRARY_DIR}/metadata.cc
|
||||
${LIBRARY_DIR}/murmur3.cc
|
||||
${LIBRARY_DIR}/platform.cc
|
||||
@ -340,10 +386,6 @@ set(PARQUET_SRCS
|
||||
${LIBRARY_DIR}/schema.cc
|
||||
${LIBRARY_DIR}/statistics.cc
|
||||
${LIBRARY_DIR}/types.cc
|
||||
${LIBRARY_DIR}/encryption.cc
|
||||
${LIBRARY_DIR}/encryption_internal.cc
|
||||
${LIBRARY_DIR}/internal_file_decryptor.cc
|
||||
${LIBRARY_DIR}/internal_file_encryptor.cc
|
||||
|
||||
${GEN_LIBRARY_DIR}/parquet_constants.cpp
|
||||
${GEN_LIBRARY_DIR}/parquet_types.cpp
|
||||
|
@ -1,26 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#define ARROW_VERSION_MAJOR
|
||||
#define ARROW_VERSION_MINOR
|
||||
#define ARROW_VERSION_PATCH
|
||||
#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
|
||||
|
||||
#define ARROW_SO_VERSION ""
|
||||
#define ARROW_FULL_SO_VERSION ""
|
||||
|
||||
/* #undef GRPCPP_PP_INCLUDE */
|
@ -22,8 +22,8 @@
|
||||
#define PARQUET_VERSION_MINOR 5
|
||||
#define PARQUET_VERSION_PATCH 1
|
||||
|
||||
#define PARQUET_SO_VERSION 0
|
||||
#define PARQUET_FULL_SO_VERSION 0.17
|
||||
#define PARQUET_SO_VERSION "200"
|
||||
#define PARQUET_FULL_SO_VERSION "200.0.0"
|
||||
|
||||
// define the parquet created by version
|
||||
#define CREATED_BY_VERSION "parquet-cpp version 1.5.1-SNAPSHOT"
|
||||
|
2
contrib/boost
vendored
2
contrib/boost
vendored
@ -1 +1 @@
|
||||
Subproject commit a04e72c0464f0c31d3384f18f0c0db36a05538e0
|
||||
Subproject commit 0b98b443aa7bb77d65efd7b23b3b8c8a0ab5f1f3
|
@ -11,10 +11,11 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
|
||||
iostreams
|
||||
program_options
|
||||
regex
|
||||
context
|
||||
)
|
||||
|
||||
if(Boost_INCLUDE_DIR AND Boost_FILESYSTEM_LIBRARY AND Boost_FILESYSTEM_LIBRARY AND
|
||||
Boost_PROGRAM_OPTIONS_LIBRARY AND Boost_REGEX_LIBRARY AND Boost_SYSTEM_LIBRARY)
|
||||
Boost_PROGRAM_OPTIONS_LIBRARY AND Boost_REGEX_LIBRARY AND Boost_SYSTEM_LIBRARY AND Boost_CONTEXT_LIBRARY)
|
||||
|
||||
set(EXTERNAL_BOOST_FOUND 1)
|
||||
|
||||
@ -27,18 +28,21 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
|
||||
add_library (_boost_program_options INTERFACE)
|
||||
add_library (_boost_regex INTERFACE)
|
||||
add_library (_boost_system INTERFACE)
|
||||
add_library (_boost_context INTERFACE)
|
||||
|
||||
target_link_libraries (_boost_filesystem INTERFACE ${Boost_FILESYSTEM_LIBRARY})
|
||||
target_link_libraries (_boost_iostreams INTERFACE ${Boost_IOSTREAMS_LIBRARY})
|
||||
target_link_libraries (_boost_program_options INTERFACE ${Boost_PROGRAM_OPTIONS_LIBRARY})
|
||||
target_link_libraries (_boost_regex INTERFACE ${Boost_REGEX_LIBRARY})
|
||||
target_link_libraries (_boost_system INTERFACE ${Boost_SYSTEM_LIBRARY})
|
||||
target_link_libraries (_boost_context INTERFACE ${Boost_CONTEXT_LIBRARY})
|
||||
|
||||
add_library (boost::filesystem ALIAS _boost_filesystem)
|
||||
add_library (boost::iostreams ALIAS _boost_iostreams)
|
||||
add_library (boost::program_options ALIAS _boost_program_options)
|
||||
add_library (boost::regex ALIAS _boost_regex)
|
||||
add_library (boost::system ALIAS _boost_system)
|
||||
add_library (boost::context ALIAS _boost_context)
|
||||
else()
|
||||
set(EXTERNAL_BOOST_FOUND 0)
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system boost")
|
||||
@ -142,4 +146,57 @@ if (NOT EXTERNAL_BOOST_FOUND)
|
||||
add_library (_boost_system ${SRCS_SYSTEM})
|
||||
add_library (boost::system ALIAS _boost_system)
|
||||
target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR})
|
||||
|
||||
# context
|
||||
enable_language(ASM)
|
||||
SET(ASM_OPTIONS "-x assembler-with-cpp")
|
||||
|
||||
if (SANITIZE AND (SANITIZE STREQUAL "address" OR SANITIZE STREQUAL "thread"))
|
||||
add_compile_definitions(BOOST_USE_UCONTEXT)
|
||||
|
||||
if (SANITIZE STREQUAL "address")
|
||||
add_compile_definitions(BOOST_USE_ASAN)
|
||||
elseif (SANITIZE STREQUAL "thread")
|
||||
add_compile_definitions(BOOST_USE_TSAN)
|
||||
endif()
|
||||
|
||||
set (SRCS_CONTEXT
|
||||
${LIBRARY_DIR}/libs/context/src/fiber.cpp
|
||||
${LIBRARY_DIR}/libs/context/src/continuation.cpp
|
||||
${LIBRARY_DIR}/libs/context/src/dummy.cpp
|
||||
${LIBRARY_DIR}/libs/context/src/execution_context.cpp
|
||||
${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp
|
||||
)
|
||||
elseif (ARCH_ARM)
|
||||
set (SRCS_CONTEXT
|
||||
${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S
|
||||
${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_elf_gas.S
|
||||
${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S
|
||||
${LIBRARY_DIR}/libs/context/src/dummy.cpp
|
||||
${LIBRARY_DIR}/libs/context/src/execution_context.cpp
|
||||
${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp
|
||||
)
|
||||
elseif(OS_DARWIN)
|
||||
set (SRCS_CONTEXT
|
||||
${LIBRARY_DIR}/libs/context/src/asm/jump_x86_64_sysv_macho_gas.S
|
||||
${LIBRARY_DIR}/libs/context/src/asm/make_x86_64_sysv_macho_gas.S
|
||||
${LIBRARY_DIR}/libs/context/src/asm/ontop_x86_64_sysv_macho_gas.S
|
||||
${LIBRARY_DIR}/libs/context/src/dummy.cpp
|
||||
${LIBRARY_DIR}/libs/context/src/execution_context.cpp
|
||||
${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp
|
||||
)
|
||||
else()
|
||||
set (SRCS_CONTEXT
|
||||
${LIBRARY_DIR}/libs/context/src/asm/jump_x86_64_sysv_elf_gas.S
|
||||
${LIBRARY_DIR}/libs/context/src/asm/make_x86_64_sysv_elf_gas.S
|
||||
${LIBRARY_DIR}/libs/context/src/asm/ontop_x86_64_sysv_elf_gas.S
|
||||
${LIBRARY_DIR}/libs/context/src/dummy.cpp
|
||||
${LIBRARY_DIR}/libs/context/src/execution_context.cpp
|
||||
${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library (_boost_context ${SRCS_CONTEXT})
|
||||
add_library (boost::context ALIAS _boost_context)
|
||||
target_include_directories (_boost_context PRIVATE ${LIBRARY_DIR})
|
||||
endif ()
|
||||
|
1
contrib/boringssl
vendored
Submodule
1
contrib/boringssl
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 8b2bf912ba04823cfe9e7e8f5bb60cb7f6252449
|
661
contrib/boringssl-cmake/CMakeLists.txt
Normal file
661
contrib/boringssl-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,661 @@
|
||||
# Copyright (c) 2019 The Chromium Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
# This file is created by generate_build_files.py and edited accordingly.
|
||||
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
|
||||
project(BoringSSL LANGUAGES C CXX)
|
||||
|
||||
set(BORINGSSL_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/boringssl)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
set(CLANG 1)
|
||||
endif()
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX OR CLANG)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fvisibility=hidden -fno-common -fno-exceptions -fno-rtti")
|
||||
if(APPLE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden -fno-common")
|
||||
if((CMAKE_C_COMPILER_VERSION VERSION_GREATER "4.8.99") OR CLANG)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11")
|
||||
else()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# pthread_rwlock_t requires a feature flag.
|
||||
if(NOT WIN32)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_XOPEN_SOURCE=700")
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
add_definitions(-D_HAS_EXCEPTIONS=0)
|
||||
add_definitions(-DWIN32_LEAN_AND_MEAN)
|
||||
add_definitions(-DNOMINMAX)
|
||||
# Allow use of fopen.
|
||||
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
|
||||
# VS 2017 and higher supports STL-only warning suppressions.
|
||||
# A bug in CMake < 3.13.0 may cause the space in this value to
|
||||
# cause issues when building with NASM. In that case, update CMake.
|
||||
add_definitions("-D_STL_EXTRA_DISABLED_WARNINGS=4774 4987")
|
||||
endif()
|
||||
|
||||
add_definitions(-DBORINGSSL_IMPLEMENTATION)
|
||||
|
||||
# CMake's iOS support uses Apple's multiple-architecture toolchain. It takes an
|
||||
# architecture list from CMAKE_OSX_ARCHITECTURES, leaves CMAKE_SYSTEM_PROCESSOR
|
||||
# alone, and expects all architecture-specific logic to be conditioned within
|
||||
# the source files rather than the build. This does not work for our assembly
|
||||
# files, so we fix CMAKE_SYSTEM_PROCESSOR and only support single-architecture
|
||||
# builds.
|
||||
if(NOT OPENSSL_NO_ASM AND CMAKE_OSX_ARCHITECTURES)
|
||||
list(LENGTH CMAKE_OSX_ARCHITECTURES NUM_ARCHES)
|
||||
if(NOT ${NUM_ARCHES} EQUAL 1)
|
||||
message(FATAL_ERROR "Universal binaries not supported.")
|
||||
endif()
|
||||
list(GET CMAKE_OSX_ARCHITECTURES 0 CMAKE_SYSTEM_PROCESSOR)
|
||||
endif()
|
||||
|
||||
if(OPENSSL_NO_ASM)
|
||||
add_definitions(-DOPENSSL_NO_ASM)
|
||||
set(ARCH "generic")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
|
||||
set(ARCH "x86_64")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64")
|
||||
set(ARCH "x86_64")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
|
||||
# cmake reports AMD64 on Windows, but we might be building for 32-bit.
|
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(ARCH "x86_64")
|
||||
else()
|
||||
set(ARCH "x86")
|
||||
endif()
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86")
|
||||
set(ARCH "x86")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386")
|
||||
set(ARCH "x86")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
|
||||
set(ARCH "x86")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
|
||||
set(ARCH "aarch64")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
|
||||
set(ARCH "aarch64")
|
||||
# Apple A12 Bionic chipset which is added in iPhone XS/XS Max/XR uses arm64e architecture.
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64e")
|
||||
set(ARCH "aarch64")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm*")
|
||||
set(ARCH "arm")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "mips")
|
||||
# Just to avoid the “unknown processor” error.
|
||||
set(ARCH "generic")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le")
|
||||
set(ARCH "ppc64le")
|
||||
else()
|
||||
message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR})
|
||||
endif()
|
||||
|
||||
if(NOT OPENSSL_NO_ASM)
|
||||
if(UNIX)
|
||||
enable_language(ASM)
|
||||
|
||||
# Clang's integerated assembler does not support debug symbols.
|
||||
if(NOT CMAKE_ASM_COMPILER_ID MATCHES "Clang")
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -Wa,-g")
|
||||
endif()
|
||||
|
||||
# CMake does not add -isysroot and -arch flags to assembly.
|
||||
if(APPLE)
|
||||
if(CMAKE_OSX_SYSROOT)
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -isysroot \"${CMAKE_OSX_SYSROOT}\"")
|
||||
endif()
|
||||
foreach(arch ${CMAKE_OSX_ARCHITECTURES})
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -arch ${arch}")
|
||||
endforeach()
|
||||
endif()
|
||||
else()
|
||||
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -gcv8")
|
||||
enable_language(ASM_NASM)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_definitions(-DBORINGSSL_SHARED_LIBRARY)
|
||||
# Enable position-independent code globally. This is needed because
|
||||
# some library targets are OBJECT libraries.
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
|
||||
endif()
|
||||
|
||||
include_directories(${BORINGSSL_SOURCE_DIR}/include)
|
||||
|
||||
set(
|
||||
CRYPTO_ios_aarch64_SOURCES
|
||||
|
||||
ios-aarch64/crypto/chacha/chacha-armv8.S
|
||||
ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
|
||||
ios-aarch64/crypto/fipsmodule/armv8-mont.S
|
||||
ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
|
||||
ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
|
||||
ios-aarch64/crypto/fipsmodule/sha1-armv8.S
|
||||
ios-aarch64/crypto/fipsmodule/sha256-armv8.S
|
||||
ios-aarch64/crypto/fipsmodule/sha512-armv8.S
|
||||
ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
|
||||
ios-aarch64/crypto/test/trampoline-armv8.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_ios_arm_SOURCES
|
||||
|
||||
ios-arm/crypto/chacha/chacha-armv4.S
|
||||
ios-arm/crypto/fipsmodule/aesv8-armx32.S
|
||||
ios-arm/crypto/fipsmodule/armv4-mont.S
|
||||
ios-arm/crypto/fipsmodule/bsaes-armv7.S
|
||||
ios-arm/crypto/fipsmodule/ghash-armv4.S
|
||||
ios-arm/crypto/fipsmodule/ghashv8-armx32.S
|
||||
ios-arm/crypto/fipsmodule/sha1-armv4-large.S
|
||||
ios-arm/crypto/fipsmodule/sha256-armv4.S
|
||||
ios-arm/crypto/fipsmodule/sha512-armv4.S
|
||||
ios-arm/crypto/fipsmodule/vpaes-armv7.S
|
||||
ios-arm/crypto/test/trampoline-armv4.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_aarch64_SOURCES
|
||||
|
||||
linux-aarch64/crypto/chacha/chacha-armv8.S
|
||||
linux-aarch64/crypto/fipsmodule/aesv8-armx64.S
|
||||
linux-aarch64/crypto/fipsmodule/armv8-mont.S
|
||||
linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
|
||||
linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S
|
||||
linux-aarch64/crypto/fipsmodule/sha1-armv8.S
|
||||
linux-aarch64/crypto/fipsmodule/sha256-armv8.S
|
||||
linux-aarch64/crypto/fipsmodule/sha512-armv8.S
|
||||
linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
|
||||
linux-aarch64/crypto/test/trampoline-armv8.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_arm_SOURCES
|
||||
|
||||
linux-arm/crypto/chacha/chacha-armv4.S
|
||||
linux-arm/crypto/fipsmodule/aesv8-armx32.S
|
||||
linux-arm/crypto/fipsmodule/armv4-mont.S
|
||||
linux-arm/crypto/fipsmodule/bsaes-armv7.S
|
||||
linux-arm/crypto/fipsmodule/ghash-armv4.S
|
||||
linux-arm/crypto/fipsmodule/ghashv8-armx32.S
|
||||
linux-arm/crypto/fipsmodule/sha1-armv4-large.S
|
||||
linux-arm/crypto/fipsmodule/sha256-armv4.S
|
||||
linux-arm/crypto/fipsmodule/sha512-armv4.S
|
||||
linux-arm/crypto/fipsmodule/vpaes-armv7.S
|
||||
linux-arm/crypto/test/trampoline-armv4.S
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/curve25519/asm/x25519-asm-arm.S
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_arm_asm.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_ppc64le_SOURCES
|
||||
|
||||
linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
|
||||
linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
|
||||
linux-ppc64le/crypto/test/trampoline-ppc.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_x86_SOURCES
|
||||
|
||||
linux-x86/crypto/chacha/chacha-x86.S
|
||||
linux-x86/crypto/fipsmodule/aesni-x86.S
|
||||
linux-x86/crypto/fipsmodule/bn-586.S
|
||||
linux-x86/crypto/fipsmodule/co-586.S
|
||||
linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S
|
||||
linux-x86/crypto/fipsmodule/ghash-x86.S
|
||||
linux-x86/crypto/fipsmodule/md5-586.S
|
||||
linux-x86/crypto/fipsmodule/sha1-586.S
|
||||
linux-x86/crypto/fipsmodule/sha256-586.S
|
||||
linux-x86/crypto/fipsmodule/sha512-586.S
|
||||
linux-x86/crypto/fipsmodule/vpaes-x86.S
|
||||
linux-x86/crypto/fipsmodule/x86-mont.S
|
||||
linux-x86/crypto/test/trampoline-x86.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_linux_x86_64_SOURCES
|
||||
|
||||
linux-x86_64/crypto/chacha/chacha-x86_64.S
|
||||
linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
|
||||
linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/ghash-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/md5-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
|
||||
linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
|
||||
linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/rsaz-avx2.S
|
||||
linux-x86_64/crypto/fipsmodule/sha1-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/sha256-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/sha512-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
|
||||
linux-x86_64/crypto/fipsmodule/x86_64-mont.S
|
||||
linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
|
||||
linux-x86_64/crypto/test/trampoline-x86_64.S
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/hrss/asm/poly_rq_mul.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_mac_x86_SOURCES
|
||||
|
||||
mac-x86/crypto/chacha/chacha-x86.S
|
||||
mac-x86/crypto/fipsmodule/aesni-x86.S
|
||||
mac-x86/crypto/fipsmodule/bn-586.S
|
||||
mac-x86/crypto/fipsmodule/co-586.S
|
||||
mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S
|
||||
mac-x86/crypto/fipsmodule/ghash-x86.S
|
||||
mac-x86/crypto/fipsmodule/md5-586.S
|
||||
mac-x86/crypto/fipsmodule/sha1-586.S
|
||||
mac-x86/crypto/fipsmodule/sha256-586.S
|
||||
mac-x86/crypto/fipsmodule/sha512-586.S
|
||||
mac-x86/crypto/fipsmodule/vpaes-x86.S
|
||||
mac-x86/crypto/fipsmodule/x86-mont.S
|
||||
mac-x86/crypto/test/trampoline-x86.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_mac_x86_64_SOURCES
|
||||
|
||||
mac-x86_64/crypto/chacha/chacha-x86_64.S
|
||||
mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
|
||||
mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/aesni-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/ghash-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/md5-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
|
||||
mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
|
||||
mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/rsaz-avx2.S
|
||||
mac-x86_64/crypto/fipsmodule/sha1-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/sha256-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/sha512-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S
|
||||
mac-x86_64/crypto/fipsmodule/x86_64-mont.S
|
||||
mac-x86_64/crypto/fipsmodule/x86_64-mont5.S
|
||||
mac-x86_64/crypto/test/trampoline-x86_64.S
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_win_x86_SOURCES
|
||||
|
||||
win-x86/crypto/chacha/chacha-x86.asm
|
||||
win-x86/crypto/fipsmodule/aesni-x86.asm
|
||||
win-x86/crypto/fipsmodule/bn-586.asm
|
||||
win-x86/crypto/fipsmodule/co-586.asm
|
||||
win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm
|
||||
win-x86/crypto/fipsmodule/ghash-x86.asm
|
||||
win-x86/crypto/fipsmodule/md5-586.asm
|
||||
win-x86/crypto/fipsmodule/sha1-586.asm
|
||||
win-x86/crypto/fipsmodule/sha256-586.asm
|
||||
win-x86/crypto/fipsmodule/sha512-586.asm
|
||||
win-x86/crypto/fipsmodule/vpaes-x86.asm
|
||||
win-x86/crypto/fipsmodule/x86-mont.asm
|
||||
win-x86/crypto/test/trampoline-x86.asm
|
||||
)
|
||||
|
||||
set(
|
||||
CRYPTO_win_x86_64_SOURCES
|
||||
|
||||
win-x86_64/crypto/chacha/chacha-x86_64.asm
|
||||
win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
|
||||
win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/ghash-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/md5-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm
|
||||
win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm
|
||||
win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/rsaz-avx2.asm
|
||||
win-x86_64/crypto/fipsmodule/sha1-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/sha256-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/sha512-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm
|
||||
win-x86_64/crypto/fipsmodule/x86_64-mont.asm
|
||||
win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
|
||||
win-x86_64/crypto/test/trampoline-x86_64.asm
|
||||
)
|
||||
|
||||
if(APPLE AND ${ARCH} STREQUAL "aarch64")
|
||||
set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_aarch64_SOURCES})
|
||||
elseif(APPLE AND ${ARCH} STREQUAL "arm")
|
||||
set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_arm_SOURCES})
|
||||
elseif(APPLE)
|
||||
set(CRYPTO_ARCH_SOURCES ${CRYPTO_mac_${ARCH}_SOURCES})
|
||||
elseif(UNIX)
|
||||
set(CRYPTO_ARCH_SOURCES ${CRYPTO_linux_${ARCH}_SOURCES})
|
||||
elseif(WIN32)
|
||||
set(CRYPTO_ARCH_SOURCES ${CRYPTO_win_${ARCH}_SOURCES})
|
||||
endif()
|
||||
|
||||
add_library(
|
||||
crypto
|
||||
|
||||
${CRYPTO_ARCH_SOURCES}
|
||||
err_data.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_bitstr.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_bool.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_d2i_fp.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_dup.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_enum.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_gentm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_i2d_fp.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_int.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_mbstr.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_utctm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_utf8.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn1_lib.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn1_par.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn_pack.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_enum.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_int.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_string.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_dec.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_enc.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_fre.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_new.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_typ.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_utl.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/asn1/time_support.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/base64/base64.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/bio.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/bio_mem.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/connect.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/fd.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/file.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/hexdump.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/pair.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/asn1_compat.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/ber.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/cbb.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/cbs.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/bytestring/unicode.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/chacha/chacha.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/cipher_extra.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/derive_key.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesccm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesctrhmac.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesgcmsiv.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_chacha20poly1305.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_null.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_rc2.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_rc4.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_tls.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/tls_cbc.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cmac/cmac.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/cpu-ppc64le.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/crypto.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/curve25519/curve25519.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/curve25519/spake25519.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/dh_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/params.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/digest_extra/digest_extra.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_derive.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/hash_to_curve.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/ecdh_extra/ecdh_extra.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/ecdsa_extra/ecdsa_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/engine/engine.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/err/err.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/digestsign.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/evp.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/evp_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/evp_ctx.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_dsa_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ec.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ec_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ed25519.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ed25519_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_rsa.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_rsa_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_x25519.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/p_x25519_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/pbkdf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/print.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/scrypt.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/evp/sign.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/is_fips.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/lhash/lhash.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/mem.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/obj/obj.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/obj/obj_xref.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_all.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_info.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_lib.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_oth.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_pk8.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_pkey.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_x509.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_xaux.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pkcs7/pkcs7.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pkcs7/pkcs7_x509.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/p5_pbev2.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/pkcs8.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/pkcs8_x509.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_arm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_vec.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/pool/pool.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/deterministic.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/forkunsafe.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/fuchsia.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/passive.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/rand_extra.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/windows.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rc4/rc4.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/refcount_c11.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/refcount_lock.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rsa_extra/rsa_asn1.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/rsa_extra/rsa_print.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/siphash/siphash.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/stack/stack.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/thread.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/thread_none.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/thread_pthread.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/thread_win.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/trust_token/pmbtoken.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/trust_token/trust_token.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/a_strex.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/t_x509.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/t_x509a.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_att.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_cmp.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_d2.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_def.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_ext.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_lu.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_obj.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_r2x.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_req.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_set.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_trs.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_txt.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_v3.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_vfy.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_vpm.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509cset.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509name.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509rset.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x509spki.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_algor.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_all.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_attrib.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_crl.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_exten.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_info.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_name.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_pkey.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_pubkey.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_req.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_sig.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_spki.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_val.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_x509.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509/x_x509a.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_cache.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_data.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_lib.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_map.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_node.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_tree.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_akey.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_akeya.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_alt.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_bcons.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_bitst.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_conf.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_cpols.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_crld.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_enum.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_extku.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_genn.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ia5.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_info.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_int.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_lib.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ncons.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ocsp.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pci.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcia.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcons.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pmaps.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_prn.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_purp.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_skey.c
|
||||
${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_utl.c
|
||||
)
|
||||
|
||||
add_library(
|
||||
ssl
|
||||
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/bio_ssl.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/d1_both.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/d1_lib.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/d1_pkt.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/handshake_server.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/s3_both.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/s3_lib.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/s3_pkt.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_aead_ctx.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_asn1.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_buffer.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_cert.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_cipher.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_file.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_key_share.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_lib.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_privkey.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_session.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_stat.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_transcript.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/t1_lib.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/tls13_server.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/tls_method.cc
|
||||
${BORINGSSL_SOURCE_DIR}/ssl/tls_record.cc
|
||||
|
||||
${BORINGSSL_SOURCE_DIR}/decrepit/ssl/ssl_decrepit.c
|
||||
${BORINGSSL_SOURCE_DIR}/decrepit/cfb/cfb.c
|
||||
)
|
||||
|
||||
add_executable(
|
||||
bssl
|
||||
|
||||
${BORINGSSL_SOURCE_DIR}/tool/args.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/ciphers.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/client.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/const.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/digest.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/fd.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/file.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/rand.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/server.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/sign.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/speed.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/tool.cc
|
||||
${BORINGSSL_SOURCE_DIR}/tool/transport_common.cc
|
||||
)
|
||||
|
||||
target_link_libraries(ssl crypto)
|
||||
target_link_libraries(bssl ssl)
|
||||
|
||||
if(NOT WIN32 AND NOT ANDROID)
|
||||
target_link_libraries(crypto pthread)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
target_link_libraries(bssl ws2_32)
|
||||
endif()
|
||||
|
||||
target_include_directories(crypto SYSTEM PUBLIC ${BORINGSSL_SOURCE_DIR}/include)
|
||||
target_include_directories(ssl SYSTEM PUBLIC ${BORINGSSL_SOURCE_DIR}/include)
|
||||
|
||||
target_compile_options(crypto PRIVATE -Wno-gnu-anonymous-struct)
|
5012
contrib/boringssl-cmake/crypto_test_data.cc
Normal file
5012
contrib/boringssl-cmake/crypto_test_data.cc
Normal file
File diff suppressed because one or more lines are too long
1457
contrib/boringssl-cmake/err_data.c
Normal file
1457
contrib/boringssl-cmake/err_data.c
Normal file
File diff suppressed because it is too large
Load Diff
1991
contrib/boringssl-cmake/ios-aarch64/crypto/chacha/chacha-armv8.S
Normal file
1991
contrib/boringssl-cmake/ios-aarch64/crypto/chacha/chacha-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,782 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
|
||||
.section __TEXT,__const
|
||||
.align 5
|
||||
Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl _aes_hw_set_encrypt_key
|
||||
.private_extern _aes_hw_set_encrypt_key
|
||||
|
||||
.align 5
|
||||
_aes_hw_set_encrypt_key:
|
||||
Lenc_key:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
cmp x0,#0
|
||||
b.eq Lenc_key_abort
|
||||
cmp x2,#0
|
||||
b.eq Lenc_key_abort
|
||||
mov x3,#-2
|
||||
cmp w1,#128
|
||||
b.lt Lenc_key_abort
|
||||
cmp w1,#256
|
||||
b.gt Lenc_key_abort
|
||||
tst w1,#0x3f
|
||||
b.ne Lenc_key_abort
|
||||
|
||||
adrp x3,Lrcon@PAGE
|
||||
add x3,x3,Lrcon@PAGEOFF
|
||||
cmp w1,#192
|
||||
|
||||
eor v0.16b,v0.16b,v0.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
mov w1,#8 // reuse w1
|
||||
ld1 {v1.4s,v2.4s},[x3],#32
|
||||
|
||||
b.lt Loop128
|
||||
b.eq L192
|
||||
b L256
|
||||
|
||||
.align 4
|
||||
Loop128:
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
b.ne Loop128
|
||||
|
||||
ld1 {v1.4s},[x3]
|
||||
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
st1 {v3.4s},[x2]
|
||||
add x2,x2,#0x50
|
||||
|
||||
mov w12,#10
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L192:
|
||||
ld1 {v4.8b},[x0],#8
|
||||
movi v6.16b,#8 // borrow v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
sub v2.16b,v2.16b,v6.16b // adjust the mask
|
||||
|
||||
Loop192:
|
||||
tbl v6.16b,{v4.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v4.8b},[x2],#8
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
|
||||
dup v5.4s,v3.s[3]
|
||||
eor v5.16b,v5.16b,v4.16b
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
ext v4.16b,v0.16b,v4.16b,#12
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
eor v4.16b,v4.16b,v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
b.ne Loop192
|
||||
|
||||
mov w12,#12
|
||||
add x2,x2,#0x20
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L256:
|
||||
ld1 {v4.16b},[x0]
|
||||
mov w1,#7
|
||||
mov w12,#14
|
||||
st1 {v3.4s},[x2],#16
|
||||
|
||||
Loop256:
|
||||
tbl v6.16b,{v4.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v4.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
b.eq Ldone
|
||||
|
||||
dup v6.4s,v3.s[3] // just splat
|
||||
ext v5.16b,v0.16b,v4.16b,#12
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
|
||||
eor v4.16b,v4.16b,v6.16b
|
||||
b Loop256
|
||||
|
||||
Ldone:
|
||||
str w12,[x2]
|
||||
mov x3,#0
|
||||
|
||||
Lenc_key_abort:
|
||||
mov x0,x3 // return value
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
|
||||
|
||||
.globl _aes_hw_set_decrypt_key
|
||||
.private_extern _aes_hw_set_decrypt_key
|
||||
|
||||
.align 5
|
||||
_aes_hw_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl Lenc_key
|
||||
|
||||
cmp x0,#0
|
||||
b.ne Ldec_key_abort
|
||||
|
||||
sub x2,x2,#240 // restore original x2
|
||||
mov x4,#-16
|
||||
add x0,x2,x12,lsl#4 // end of key schedule
|
||||
|
||||
ld1 {v0.4s},[x2]
|
||||
ld1 {v1.4s},[x0]
|
||||
st1 {v0.4s},[x0],x4
|
||||
st1 {v1.4s},[x2],#16
|
||||
|
||||
Loop_imc:
|
||||
ld1 {v0.4s},[x2]
|
||||
ld1 {v1.4s},[x0]
|
||||
aesimc v0.16b,v0.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
st1 {v0.4s},[x0],x4
|
||||
st1 {v1.4s},[x2],#16
|
||||
cmp x0,x2
|
||||
b.hi Loop_imc
|
||||
|
||||
ld1 {v0.4s},[x2]
|
||||
aesimc v0.16b,v0.16b
|
||||
st1 {v0.4s},[x0]
|
||||
|
||||
eor x0,x0,x0 // return value
|
||||
Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
.globl _aes_hw_encrypt
|
||||
.private_extern _aes_hw_encrypt
|
||||
|
||||
.align 5
|
||||
_aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
sub w3,w3,#2
|
||||
ld1 {v1.4s},[x2],#16
|
||||
|
||||
Loop_enc:
|
||||
aese v2.16b,v0.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2],#16
|
||||
subs w3,w3,#2
|
||||
aese v2.16b,v1.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v1.4s},[x2],#16
|
||||
b.gt Loop_enc
|
||||
|
||||
aese v2.16b,v0.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2]
|
||||
aese v2.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
|
||||
st1 {v2.16b},[x1]
|
||||
ret
|
||||
|
||||
.globl _aes_hw_decrypt
|
||||
.private_extern _aes_hw_decrypt
|
||||
|
||||
.align 5
|
||||
_aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
sub w3,w3,#2
|
||||
ld1 {v1.4s},[x2],#16
|
||||
|
||||
Loop_dec:
|
||||
aesd v2.16b,v0.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2],#16
|
||||
subs w3,w3,#2
|
||||
aesd v2.16b,v1.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v1.4s},[x2],#16
|
||||
b.gt Loop_dec
|
||||
|
||||
aesd v2.16b,v0.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2]
|
||||
aesd v2.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
|
||||
st1 {v2.16b},[x1]
|
||||
ret
|
||||
|
||||
.globl _aes_hw_cbc_encrypt
|
||||
.private_extern _aes_hw_cbc_encrypt
|
||||
|
||||
.align 5
|
||||
_aes_hw_cbc_encrypt:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
mov x8,#16
|
||||
b.lo Lcbc_abort
|
||||
csel x8,xzr,x8,eq
|
||||
|
||||
cmp w5,#0 // en- or decrypting?
|
||||
ldr w5,[x3,#240]
|
||||
and x2,x2,#-16
|
||||
ld1 {v6.16b},[x4]
|
||||
ld1 {v0.16b},[x0],x8
|
||||
|
||||
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
|
||||
sub w5,w5,#6
|
||||
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
|
||||
sub w5,w5,#2
|
||||
ld1 {v18.4s,v19.4s},[x7],#32
|
||||
ld1 {v20.4s,v21.4s},[x7],#32
|
||||
ld1 {v22.4s,v23.4s},[x7],#32
|
||||
ld1 {v7.4s},[x7]
|
||||
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
b.eq Lcbc_dec
|
||||
|
||||
cmp w5,#2
|
||||
eor v0.16b,v0.16b,v6.16b
|
||||
eor v5.16b,v16.16b,v7.16b
|
||||
b.eq Lcbc_enc128
|
||||
|
||||
ld1 {v2.4s,v3.4s},[x7]
|
||||
add x7,x3,#16
|
||||
add x6,x3,#16*4
|
||||
add x12,x3,#16*5
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
add x14,x3,#16*6
|
||||
add x3,x3,#16*7
|
||||
b Lenter_cbc_enc
|
||||
|
||||
.align 4
|
||||
Loop_cbc_enc:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
st1 {v6.16b},[x1],#16
|
||||
Lenter_cbc_enc:
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v2.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.4s},[x6]
|
||||
cmp w5,#4
|
||||
aese v0.16b,v3.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x12]
|
||||
b.eq Lcbc_enc192
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.4s},[x14]
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x3]
|
||||
nop
|
||||
|
||||
Lcbc_enc192:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
subs x2,x2,#16
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
csel x8,xzr,x8,eq
|
||||
aese v0.16b,v18.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v19.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.16b},[x0],x8
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
eor v16.16b,v16.16b,v5.16b
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v23.16b
|
||||
eor v6.16b,v0.16b,v7.16b
|
||||
b.hs Loop_cbc_enc
|
||||
|
||||
st1 {v6.16b},[x1],#16
|
||||
b Lcbc_done
|
||||
|
||||
.align 5
|
||||
Lcbc_enc128:
|
||||
ld1 {v2.4s,v3.4s},[x7]
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
b Lenter_cbc_enc128
|
||||
Loop_cbc_enc128:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
st1 {v6.16b},[x1],#16
|
||||
Lenter_cbc_enc128:
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
subs x2,x2,#16
|
||||
aese v0.16b,v2.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
csel x8,xzr,x8,eq
|
||||
aese v0.16b,v3.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v18.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v19.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.16b},[x0],x8
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
eor v16.16b,v16.16b,v5.16b
|
||||
aese v0.16b,v23.16b
|
||||
eor v6.16b,v0.16b,v7.16b
|
||||
b.hs Loop_cbc_enc128
|
||||
|
||||
st1 {v6.16b},[x1],#16
|
||||
b Lcbc_done
|
||||
.align 5
|
||||
Lcbc_dec:
|
||||
ld1 {v18.16b},[x0],#16
|
||||
subs x2,x2,#32 // bias
|
||||
add w6,w5,#2
|
||||
orr v3.16b,v0.16b,v0.16b
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
orr v19.16b,v18.16b,v18.16b
|
||||
b.lo Lcbc_dec_tail
|
||||
|
||||
orr v1.16b,v18.16b,v18.16b
|
||||
ld1 {v18.16b},[x0],#16
|
||||
orr v2.16b,v0.16b,v0.16b
|
||||
orr v3.16b,v1.16b,v1.16b
|
||||
orr v19.16b,v18.16b,v18.16b
|
||||
|
||||
Loop3x_cbc_dec:
|
||||
aesd v0.16b,v16.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aesd v0.16b,v17.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt Loop3x_cbc_dec
|
||||
|
||||
aesd v0.16b,v16.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v4.16b,v6.16b,v7.16b
|
||||
subs x2,x2,#0x30
|
||||
eor v5.16b,v2.16b,v7.16b
|
||||
csel x6,x2,x6,lo // x6, w6, is zero at this point
|
||||
aesd v0.16b,v17.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v17.16b,v3.16b,v7.16b
|
||||
add x0,x0,x6 // x0 is adjusted in such way that
|
||||
// at exit from the loop v1.16b-v18.16b
|
||||
// are loaded with last "words"
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
mov x7,x3
|
||||
aesd v0.16b,v20.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v20.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v20.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
aesd v0.16b,v21.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v21.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v21.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
aesd v0.16b,v22.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v22.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v22.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v19.16b},[x0],#16
|
||||
aesd v0.16b,v23.16b
|
||||
aesd v1.16b,v23.16b
|
||||
aesd v18.16b,v23.16b
|
||||
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
|
||||
add w6,w5,#2
|
||||
eor v4.16b,v4.16b,v0.16b
|
||||
eor v5.16b,v5.16b,v1.16b
|
||||
eor v18.16b,v18.16b,v17.16b
|
||||
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
|
||||
st1 {v4.16b},[x1],#16
|
||||
orr v0.16b,v2.16b,v2.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
orr v1.16b,v3.16b,v3.16b
|
||||
st1 {v18.16b},[x1],#16
|
||||
orr v18.16b,v19.16b,v19.16b
|
||||
b.hs Loop3x_cbc_dec
|
||||
|
||||
cmn x2,#0x30
|
||||
b.eq Lcbc_done
|
||||
nop
|
||||
|
||||
Lcbc_dec_tail:
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt Lcbc_dec_tail
|
||||
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
aesd v1.16b,v20.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v20.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
cmn x2,#0x20
|
||||
aesd v1.16b,v21.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v21.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v5.16b,v6.16b,v7.16b
|
||||
aesd v1.16b,v22.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v22.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v17.16b,v3.16b,v7.16b
|
||||
aesd v1.16b,v23.16b
|
||||
aesd v18.16b,v23.16b
|
||||
b.eq Lcbc_dec_one
|
||||
eor v5.16b,v5.16b,v1.16b
|
||||
eor v17.16b,v17.16b,v18.16b
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
st1 {v17.16b},[x1],#16
|
||||
b Lcbc_done
|
||||
|
||||
Lcbc_dec_one:
|
||||
eor v5.16b,v5.16b,v18.16b
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
|
||||
Lcbc_done:
|
||||
st1 {v6.16b},[x4]
|
||||
Lcbc_abort:
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
|
||||
.globl _aes_hw_ctr32_encrypt_blocks
|
||||
.private_extern _aes_hw_ctr32_encrypt_blocks
|
||||
|
||||
.align 5
|
||||
_aes_hw_ctr32_encrypt_blocks:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
|
||||
ldr w8, [x4, #12]
|
||||
ld1 {v0.4s},[x4]
|
||||
|
||||
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
|
||||
sub w5,w5,#4
|
||||
mov x12,#16
|
||||
cmp x2,#2
|
||||
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
|
||||
sub w5,w5,#2
|
||||
ld1 {v20.4s,v21.4s},[x7],#32
|
||||
ld1 {v22.4s,v23.4s},[x7],#32
|
||||
ld1 {v7.4s},[x7]
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
csel x12,xzr,x12,lo
|
||||
#ifndef __ARMEB__
|
||||
rev w8, w8
|
||||
#endif
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
add w10, w8, #1
|
||||
orr v18.16b,v0.16b,v0.16b
|
||||
add w8, w8, #2
|
||||
orr v6.16b,v0.16b,v0.16b
|
||||
rev w10, w10
|
||||
mov v1.s[3],w10
|
||||
b.ls Lctr32_tail
|
||||
rev w12, w8
|
||||
sub x2,x2,#3 // bias
|
||||
mov v18.s[3],w12
|
||||
b Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
Loop3x_ctr32:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt Loop3x_ctr32
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v4.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v5.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
aese v4.16b,v17.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v17.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
ld1 {v19.16b},[x0],#16
|
||||
mov x7,x3
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v17.16b,v18.16b
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
add w9,w8,#1
|
||||
aese v4.16b,v20.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v20.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
eor v2.16b,v2.16b,v7.16b
|
||||
add w10,w8,#2
|
||||
aese v17.16b,v20.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
eor v3.16b,v3.16b,v7.16b
|
||||
add w8,w8,#3
|
||||
aese v4.16b,v21.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v21.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
eor v19.16b,v19.16b,v7.16b
|
||||
rev w9,w9
|
||||
aese v17.16b,v21.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v0.s[3], w9
|
||||
rev w10,w10
|
||||
aese v4.16b,v22.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v22.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
mov v1.s[3], w10
|
||||
rev w12,w8
|
||||
aese v17.16b,v22.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v18.s[3], w12
|
||||
subs x2,x2,#3
|
||||
aese v4.16b,v23.16b
|
||||
aese v5.16b,v23.16b
|
||||
aese v17.16b,v23.16b
|
||||
|
||||
eor v2.16b,v2.16b,v4.16b
|
||||
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
|
||||
st1 {v2.16b},[x1],#16
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
mov w6,w5
|
||||
st1 {v3.16b},[x1],#16
|
||||
eor v19.16b,v19.16b,v17.16b
|
||||
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
|
||||
st1 {v19.16b},[x1],#16
|
||||
b.hs Loop3x_ctr32
|
||||
|
||||
adds x2,x2,#3
|
||||
b.eq Lctr32_done
|
||||
cmp x2,#1
|
||||
mov x12,#16
|
||||
csel x12,xzr,x12,eq
|
||||
|
||||
Lctr32_tail:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt Lctr32_tail
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],x12
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v20.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v3.16b},[x0]
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v21.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v7.16b
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v22.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v7.16b
|
||||
aese v0.16b,v23.16b
|
||||
aese v1.16b,v23.16b
|
||||
|
||||
cmp x2,#1
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
eor v3.16b,v3.16b,v1.16b
|
||||
st1 {v2.16b},[x1],#16
|
||||
b.eq Lctr32_done
|
||||
st1 {v3.16b},[x1]
|
||||
|
||||
Lctr32_done:
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
1433
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/armv8-mont.S
Normal file
1433
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/armv8-mont.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,343 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl _gcm_init_neon
|
||||
.private_extern _gcm_init_neon
|
||||
|
||||
.align 4
|
||||
_gcm_init_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
shl v19.2d, v19.2d, #57 // 0xc2.0
|
||||
ext v3.16b, v17.16b, v17.16b, #8
|
||||
ushr v18.2d, v19.2d, #63
|
||||
dup v17.4s, v17.s[1]
|
||||
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
|
||||
ushr v18.2d, v3.2d, #63
|
||||
sshr v17.4s, v17.4s, #31 // broadcast carry bit
|
||||
and v18.16b, v18.16b, v16.16b
|
||||
shl v3.2d, v3.2d, #1
|
||||
ext v18.16b, v18.16b, v18.16b, #8
|
||||
and v16.16b, v16.16b, v17.16b
|
||||
orr v3.16b, v3.16b, v18.16b // H<<<=1
|
||||
eor v5.16b, v3.16b, v16.16b // twisted H
|
||||
st1 {v5.2d}, [x0] // store Htable[0]
|
||||
ret
|
||||
|
||||
|
||||
.globl _gcm_gmult_neon
|
||||
.private_extern _gcm_gmult_neon
|
||||
|
||||
.align 4
|
||||
_gcm_gmult_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, Lmasks@PAGE // load constants
|
||||
add x9, x9, Lmasks@PAGEOFF
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v3.16b, v3.16b // byteswap Xi
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
mov x3, #16
|
||||
b Lgmult_neon
|
||||
|
||||
|
||||
.globl _gcm_ghash_neon
|
||||
.private_extern _gcm_ghash_neon
|
||||
|
||||
.align 4
|
||||
_gcm_ghash_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, Lmasks@PAGE // load constants
|
||||
add x9, x9, Lmasks@PAGEOFF
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v0.16b, v0.16b // byteswap Xi
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
Loop_neon:
|
||||
ld1 {v3.16b}, [x2], #16 // load inp
|
||||
rev64 v3.16b, v3.16b // byteswap inp
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
|
||||
|
||||
Lgmult_neon:
|
||||
// Split the input into v3 and v4. (The upper halves are unused,
|
||||
// so it is okay to leave them alone.)
|
||||
ins v4.d[0], v3.d[1]
|
||||
ext v16.8b, v5.8b, v5.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v0.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v0.8h, v5.8b, v0.8b // E = A*B1
|
||||
ext v17.8b, v5.8b, v5.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v5.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v5.8b, v5.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v0.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v0.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v0.8h, v5.8b, v0.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v0.16b // N = I + J
|
||||
pmull v19.8h, v5.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v0.8h, v5.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v0.16b, v0.16b, v16.16b
|
||||
eor v0.16b, v0.16b, v18.16b
|
||||
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
|
||||
ext v16.8b, v7.8b, v7.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v1.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v1.8h, v7.8b, v1.8b // E = A*B1
|
||||
ext v17.8b, v7.8b, v7.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v7.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v7.8b, v7.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v1.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v1.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v1.8h, v7.8b, v1.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v1.16b // N = I + J
|
||||
pmull v19.8h, v7.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v1.8h, v7.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v1.16b, v1.16b, v16.16b
|
||||
eor v1.16b, v1.16b, v18.16b
|
||||
ext v16.8b, v6.8b, v6.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v4.8b // F = A1*B
|
||||
ext v2.8b, v4.8b, v4.8b, #1 // B1
|
||||
pmull v2.8h, v6.8b, v2.8b // E = A*B1
|
||||
ext v17.8b, v6.8b, v6.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v4.8b // H = A2*B
|
||||
ext v19.8b, v4.8b, v4.8b, #2 // B2
|
||||
pmull v19.8h, v6.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v6.8b, v6.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v2.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v4.8b // J = A3*B
|
||||
ext v2.8b, v4.8b, v4.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v2.8h, v6.8b, v2.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v4.8b, v4.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v2.16b // N = I + J
|
||||
pmull v19.8h, v6.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v2.8h, v6.8b, v4.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v2.16b, v2.16b, v16.16b
|
||||
eor v2.16b, v2.16b, v18.16b
|
||||
ext v16.16b, v0.16b, v2.16b, #8
|
||||
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
|
||||
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
|
||||
// This is a no-op due to the ins instruction below.
|
||||
// ins v2.d[0], v1.d[1]
|
||||
|
||||
// equivalent of reduction_avx from ghash-x86_64.pl
|
||||
shl v17.2d, v0.2d, #57 // 1st phase
|
||||
shl v18.2d, v0.2d, #62
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
shl v17.2d, v0.2d, #63
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
// Note Xm contains {Xl.d[1], Xh.d[0]}.
|
||||
eor v18.16b, v18.16b, v1.16b
|
||||
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
|
||||
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
|
||||
|
||||
ushr v18.2d, v0.2d, #1 // 2nd phase
|
||||
eor v2.16b, v2.16b,v0.16b
|
||||
eor v0.16b, v0.16b,v18.16b //
|
||||
ushr v18.2d, v18.2d, #6
|
||||
ushr v0.2d, v0.2d, #1 //
|
||||
eor v0.16b, v0.16b, v2.16b //
|
||||
eor v0.16b, v0.16b, v18.16b //
|
||||
|
||||
subs x3, x3, #16
|
||||
bne Loop_neon
|
||||
|
||||
rev64 v0.16b, v0.16b // byteswap Xi and write
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
st1 {v0.16b}, [x0]
|
||||
|
||||
ret
|
||||
|
||||
|
||||
.section __TEXT,__const
|
||||
.align 4
|
||||
Lmasks:
|
||||
.quad 0x0000ffffffffffff // k48
|
||||
.quad 0x00000000ffffffff // k32
|
||||
.quad 0x000000000000ffff // k16
|
||||
.quad 0x0000000000000000 // k0
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
@ -0,0 +1,249 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl _gcm_init_v8
|
||||
.private_extern _gcm_init_v8
|
||||
|
||||
.align 4
|
||||
_gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
ushr v18.2d,v19.2d,#63
|
||||
dup v17.4s,v17.s[1]
|
||||
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
|
||||
ushr v18.2d,v3.2d,#63
|
||||
sshr v17.4s,v17.4s,#31 //broadcast carry bit
|
||||
and v18.16b,v18.16b,v16.16b
|
||||
shl v3.2d,v3.2d,#1
|
||||
ext v18.16b,v18.16b,v18.16b,#8
|
||||
and v16.16b,v16.16b,v17.16b
|
||||
orr v3.16b,v3.16b,v18.16b //H<<<=1
|
||||
eor v20.16b,v3.16b,v16.16b //twisted H
|
||||
st1 {v20.2d},[x0],#16 //store Htable[0]
|
||||
|
||||
//calculate H^2
|
||||
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
|
||||
pmull v0.1q,v20.1d,v20.1d
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
pmull2 v2.1q,v20.2d,v20.2d
|
||||
pmull v1.1q,v16.1d,v16.1d
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v22.16b,v0.16b,v18.16b
|
||||
|
||||
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
|
||||
|
||||
ret
|
||||
|
||||
.globl _gcm_gmult_v8
|
||||
.private_extern _gcm_gmult_v8
|
||||
|
||||
.align 4
|
||||
_gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
shl v19.2d,v19.2d,#57
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
|
||||
.globl _gcm_ghash_v8
|
||||
.private_extern _gcm_ghash_v8
|
||||
|
||||
.align 4
|
||||
_gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
//to be rotated in order to
|
||||
//make it appear as in
|
||||
//algorithm specification
|
||||
subs x3,x3,#32 //see if x3 is 32 or larger
|
||||
mov x12,#16 //x12 is used as post-
|
||||
//increment for input pointer;
|
||||
//as loop is modulo-scheduled
|
||||
//x12 is zeroed just in time
|
||||
//to preclude overstepping
|
||||
//inp[len], which means that
|
||||
//last block[s] are actually
|
||||
//loaded twice, but last
|
||||
//copy is not processed
|
||||
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v22.2d},[x1]
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
|
||||
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
|
||||
b.lo Lodd_tail_v8 //x3 was less than 32
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
Loop_mod2x_v8:
|
||||
ext v18.16b,v3.16b,v3.16b,#8
|
||||
subs x3,x3,#32 //is there more data?
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
|
||||
csel x12,xzr,x12,lo //is it time to zero x12?
|
||||
|
||||
pmull v5.1q,v21.1d,v17.1d
|
||||
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
|
||||
eor v0.16b,v0.16b,v4.16b //accumulate
|
||||
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
|
||||
|
||||
eor v2.16b,v2.16b,v6.16b
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
eor v1.16b,v1.16b,v5.16b
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
#endif
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v3.16b,v3.16b,v18.16b
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
eor v3.16b,v3.16b,v0.16b
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b.hs Loop_mod2x_v8 //there was at least 32 more bytes
|
||||
|
||||
eor v2.16b,v2.16b,v18.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
|
||||
adds x3,x3,#32 //re-construct x3
|
||||
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
|
||||
b.eq Ldone_v8 //is x3 zero?
|
||||
Lodd_tail_v8:
|
||||
ext v18.16b,v0.16b,v0.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //inp^=Xi
|
||||
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
1236
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha1-armv8.S
Normal file
1236
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha1-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
1214
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha256-armv8.S
Normal file
1214
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha256-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
1084
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha512-armv8.S
Normal file
1084
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha512-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
1232
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
Normal file
1232
contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,758 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
// with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
// the result of |func|. The |unwind| argument is unused.
|
||||
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
// const uint64_t *argv, size_t argc,
|
||||
// uint64_t unwind);
|
||||
|
||||
.globl _abi_test_trampoline
|
||||
.private_extern _abi_test_trampoline
|
||||
.align 4
|
||||
_abi_test_trampoline:
|
||||
Labi_test_trampoline_begin:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
// Stack layout (low to high addresses)
|
||||
// x29,x30 (16 bytes)
|
||||
// d8-d15 (64 bytes)
|
||||
// x19-x28 (80 bytes)
|
||||
// x1 (8 bytes)
|
||||
// padding (8 bytes)
|
||||
stp x29, x30, [sp, #-176]!
|
||||
mov x29, sp
|
||||
|
||||
// Saved callee-saved registers and |state|.
|
||||
stp d8, d9, [sp, #16]
|
||||
stp d10, d11, [sp, #32]
|
||||
stp d12, d13, [sp, #48]
|
||||
stp d14, d15, [sp, #64]
|
||||
stp x19, x20, [sp, #80]
|
||||
stp x21, x22, [sp, #96]
|
||||
stp x23, x24, [sp, #112]
|
||||
stp x25, x26, [sp, #128]
|
||||
stp x27, x28, [sp, #144]
|
||||
str x1, [sp, #160]
|
||||
|
||||
// Load registers from |state|, with the exception of x29. x29 is the
|
||||
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
|
||||
// mandate that x29 always point to a frame. iOS64 does so, which means
|
||||
// we cannot fill x29 with entropy without violating ABI rules
|
||||
// ourselves. x29 is tested separately below.
|
||||
ldp d8, d9, [x1], #16
|
||||
ldp d10, d11, [x1], #16
|
||||
ldp d12, d13, [x1], #16
|
||||
ldp d14, d15, [x1], #16
|
||||
ldp x19, x20, [x1], #16
|
||||
ldp x21, x22, [x1], #16
|
||||
ldp x23, x24, [x1], #16
|
||||
ldp x25, x26, [x1], #16
|
||||
ldp x27, x28, [x1], #16
|
||||
|
||||
// Move parameters into temporary registers.
|
||||
mov x9, x0
|
||||
mov x10, x2
|
||||
mov x11, x3
|
||||
|
||||
// Load parameters into registers.
|
||||
cbz x11, Largs_done
|
||||
ldr x0, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x1, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x2, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x3, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x4, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x5, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x6, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq Largs_done
|
||||
ldr x7, [x10], #8
|
||||
|
||||
Largs_done:
|
||||
blr x9
|
||||
|
||||
// Reload |state| and store registers.
|
||||
ldr x1, [sp, #160]
|
||||
stp d8, d9, [x1], #16
|
||||
stp d10, d11, [x1], #16
|
||||
stp d12, d13, [x1], #16
|
||||
stp d14, d15, [x1], #16
|
||||
stp x19, x20, [x1], #16
|
||||
stp x21, x22, [x1], #16
|
||||
stp x23, x24, [x1], #16
|
||||
stp x25, x26, [x1], #16
|
||||
stp x27, x28, [x1], #16
|
||||
|
||||
// |func| is required to preserve x29, the frame pointer. We cannot load
|
||||
// random values into x29 (see comment above), so compare it against the
|
||||
// expected value and zero the field of |state| if corrupted.
|
||||
mov x9, sp
|
||||
cmp x29, x9
|
||||
b.eq Lx29_ok
|
||||
str xzr, [x1]
|
||||
|
||||
Lx29_ok:
|
||||
// Restore callee-saved registers.
|
||||
ldp d8, d9, [sp, #16]
|
||||
ldp d10, d11, [sp, #32]
|
||||
ldp d12, d13, [sp, #48]
|
||||
ldp d14, d15, [sp, #64]
|
||||
ldp x19, x20, [sp, #80]
|
||||
ldp x21, x22, [sp, #96]
|
||||
ldp x23, x24, [sp, #112]
|
||||
ldp x25, x26, [sp, #128]
|
||||
ldp x27, x28, [sp, #144]
|
||||
|
||||
ldp x29, x30, [sp], #176
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x0
|
||||
.private_extern _abi_test_clobber_x0
|
||||
.align 4
|
||||
_abi_test_clobber_x0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x0, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x1
|
||||
.private_extern _abi_test_clobber_x1
|
||||
.align 4
|
||||
_abi_test_clobber_x1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x1, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x2
|
||||
.private_extern _abi_test_clobber_x2
|
||||
.align 4
|
||||
_abi_test_clobber_x2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x2, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x3
|
||||
.private_extern _abi_test_clobber_x3
|
||||
.align 4
|
||||
_abi_test_clobber_x3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x3, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x4
|
||||
.private_extern _abi_test_clobber_x4
|
||||
.align 4
|
||||
_abi_test_clobber_x4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x4, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x5
|
||||
.private_extern _abi_test_clobber_x5
|
||||
.align 4
|
||||
_abi_test_clobber_x5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x5, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x6
|
||||
.private_extern _abi_test_clobber_x6
|
||||
.align 4
|
||||
_abi_test_clobber_x6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x6, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x7
|
||||
.private_extern _abi_test_clobber_x7
|
||||
.align 4
|
||||
_abi_test_clobber_x7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x7, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x8
|
||||
.private_extern _abi_test_clobber_x8
|
||||
.align 4
|
||||
_abi_test_clobber_x8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x8, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x9
|
||||
.private_extern _abi_test_clobber_x9
|
||||
.align 4
|
||||
_abi_test_clobber_x9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x9, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x10
|
||||
.private_extern _abi_test_clobber_x10
|
||||
.align 4
|
||||
_abi_test_clobber_x10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x10, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x11
|
||||
.private_extern _abi_test_clobber_x11
|
||||
.align 4
|
||||
_abi_test_clobber_x11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x11, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x12
|
||||
.private_extern _abi_test_clobber_x12
|
||||
.align 4
|
||||
_abi_test_clobber_x12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x12, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x13
|
||||
.private_extern _abi_test_clobber_x13
|
||||
.align 4
|
||||
_abi_test_clobber_x13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x13, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x14
|
||||
.private_extern _abi_test_clobber_x14
|
||||
.align 4
|
||||
_abi_test_clobber_x14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x14, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x15
|
||||
.private_extern _abi_test_clobber_x15
|
||||
.align 4
|
||||
_abi_test_clobber_x15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x15, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x16
|
||||
.private_extern _abi_test_clobber_x16
|
||||
.align 4
|
||||
_abi_test_clobber_x16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x16, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x17
|
||||
.private_extern _abi_test_clobber_x17
|
||||
.align 4
|
||||
_abi_test_clobber_x17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x17, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x19
|
||||
.private_extern _abi_test_clobber_x19
|
||||
.align 4
|
||||
_abi_test_clobber_x19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x19, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x20
|
||||
.private_extern _abi_test_clobber_x20
|
||||
.align 4
|
||||
_abi_test_clobber_x20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x20, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x21
|
||||
.private_extern _abi_test_clobber_x21
|
||||
.align 4
|
||||
_abi_test_clobber_x21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x21, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x22
|
||||
.private_extern _abi_test_clobber_x22
|
||||
.align 4
|
||||
_abi_test_clobber_x22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x22, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x23
|
||||
.private_extern _abi_test_clobber_x23
|
||||
.align 4
|
||||
_abi_test_clobber_x23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x23, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x24
|
||||
.private_extern _abi_test_clobber_x24
|
||||
.align 4
|
||||
_abi_test_clobber_x24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x24, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x25
|
||||
.private_extern _abi_test_clobber_x25
|
||||
.align 4
|
||||
_abi_test_clobber_x25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x25, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x26
|
||||
.private_extern _abi_test_clobber_x26
|
||||
.align 4
|
||||
_abi_test_clobber_x26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x26, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x27
|
||||
.private_extern _abi_test_clobber_x27
|
||||
.align 4
|
||||
_abi_test_clobber_x27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x27, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x28
|
||||
.private_extern _abi_test_clobber_x28
|
||||
.align 4
|
||||
_abi_test_clobber_x28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x28, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_x29
|
||||
.private_extern _abi_test_clobber_x29
|
||||
.align 4
|
||||
_abi_test_clobber_x29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x29, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d0
|
||||
.private_extern _abi_test_clobber_d0
|
||||
.align 4
|
||||
_abi_test_clobber_d0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d0, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d1
|
||||
.private_extern _abi_test_clobber_d1
|
||||
.align 4
|
||||
_abi_test_clobber_d1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d1, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d2
|
||||
.private_extern _abi_test_clobber_d2
|
||||
.align 4
|
||||
_abi_test_clobber_d2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d2, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d3
|
||||
.private_extern _abi_test_clobber_d3
|
||||
.align 4
|
||||
_abi_test_clobber_d3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d3, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d4
|
||||
.private_extern _abi_test_clobber_d4
|
||||
.align 4
|
||||
_abi_test_clobber_d4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d4, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d5
|
||||
.private_extern _abi_test_clobber_d5
|
||||
.align 4
|
||||
_abi_test_clobber_d5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d5, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d6
|
||||
.private_extern _abi_test_clobber_d6
|
||||
.align 4
|
||||
_abi_test_clobber_d6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d6, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d7
|
||||
.private_extern _abi_test_clobber_d7
|
||||
.align 4
|
||||
_abi_test_clobber_d7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d7, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d8
|
||||
.private_extern _abi_test_clobber_d8
|
||||
.align 4
|
||||
_abi_test_clobber_d8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d8, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d9
|
||||
.private_extern _abi_test_clobber_d9
|
||||
.align 4
|
||||
_abi_test_clobber_d9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d9, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d10
|
||||
.private_extern _abi_test_clobber_d10
|
||||
.align 4
|
||||
_abi_test_clobber_d10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d10, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d11
|
||||
.private_extern _abi_test_clobber_d11
|
||||
.align 4
|
||||
_abi_test_clobber_d11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d11, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d12
|
||||
.private_extern _abi_test_clobber_d12
|
||||
.align 4
|
||||
_abi_test_clobber_d12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d12, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d13
|
||||
.private_extern _abi_test_clobber_d13
|
||||
.align 4
|
||||
_abi_test_clobber_d13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d13, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d14
|
||||
.private_extern _abi_test_clobber_d14
|
||||
.align 4
|
||||
_abi_test_clobber_d14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d14, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d15
|
||||
.private_extern _abi_test_clobber_d15
|
||||
.align 4
|
||||
_abi_test_clobber_d15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d15, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d16
|
||||
.private_extern _abi_test_clobber_d16
|
||||
.align 4
|
||||
_abi_test_clobber_d16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d16, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d17
|
||||
.private_extern _abi_test_clobber_d17
|
||||
.align 4
|
||||
_abi_test_clobber_d17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d17, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d18
|
||||
.private_extern _abi_test_clobber_d18
|
||||
.align 4
|
||||
_abi_test_clobber_d18:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d18, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d19
|
||||
.private_extern _abi_test_clobber_d19
|
||||
.align 4
|
||||
_abi_test_clobber_d19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d19, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d20
|
||||
.private_extern _abi_test_clobber_d20
|
||||
.align 4
|
||||
_abi_test_clobber_d20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d20, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d21
|
||||
.private_extern _abi_test_clobber_d21
|
||||
.align 4
|
||||
_abi_test_clobber_d21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d21, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d22
|
||||
.private_extern _abi_test_clobber_d22
|
||||
.align 4
|
||||
_abi_test_clobber_d22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d22, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d23
|
||||
.private_extern _abi_test_clobber_d23
|
||||
.align 4
|
||||
_abi_test_clobber_d23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d23, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d24
|
||||
.private_extern _abi_test_clobber_d24
|
||||
.align 4
|
||||
_abi_test_clobber_d24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d24, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d25
|
||||
.private_extern _abi_test_clobber_d25
|
||||
.align 4
|
||||
_abi_test_clobber_d25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d25, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d26
|
||||
.private_extern _abi_test_clobber_d26
|
||||
.align 4
|
||||
_abi_test_clobber_d26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d26, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d27
|
||||
.private_extern _abi_test_clobber_d27
|
||||
.align 4
|
||||
_abi_test_clobber_d27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d27, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d28
|
||||
.private_extern _abi_test_clobber_d28
|
||||
.align 4
|
||||
_abi_test_clobber_d28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d28, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d29
|
||||
.private_extern _abi_test_clobber_d29
|
||||
.align 4
|
||||
_abi_test_clobber_d29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d29, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d30
|
||||
.private_extern _abi_test_clobber_d30
|
||||
.align 4
|
||||
_abi_test_clobber_d30:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d30, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d31
|
||||
.private_extern _abi_test_clobber_d31
|
||||
.align 4
|
||||
_abi_test_clobber_d31:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d31, xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v8_upper
|
||||
.private_extern _abi_test_clobber_v8_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v8_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v8.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v9_upper
|
||||
.private_extern _abi_test_clobber_v9_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v9_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v9.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v10_upper
|
||||
.private_extern _abi_test_clobber_v10_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v10_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v10.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v11_upper
|
||||
.private_extern _abi_test_clobber_v11_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v11_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v11.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v12_upper
|
||||
.private_extern _abi_test_clobber_v12_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v12_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v12.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v13_upper
|
||||
.private_extern _abi_test_clobber_v13_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v13_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v13.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v14_upper
|
||||
.private_extern _abi_test_clobber_v14_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v14_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v14.d[1], xzr
|
||||
ret
|
||||
|
||||
|
||||
.globl _abi_test_clobber_v15_upper
|
||||
.private_extern _abi_test_clobber_v15_upper
|
||||
.align 4
|
||||
_abi_test_clobber_v15_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v15.d[1], xzr
|
||||
ret
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
1498
contrib/boringssl-cmake/ios-arm/crypto/chacha/chacha-armv4.S
Normal file
1498
contrib/boringssl-cmake/ios-arm/crypto/chacha/chacha-armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
790
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/aesv8-armx32.S
Normal file
790
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/aesv8-armx32.S
Normal file
@ -0,0 +1,790 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
|
||||
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
.align 5
|
||||
Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl _aes_hw_set_encrypt_key
|
||||
.private_extern _aes_hw_set_encrypt_key
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_set_encrypt_key
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_set_encrypt_key:
|
||||
Lenc_key:
|
||||
mov r3,#-1
|
||||
cmp r0,#0
|
||||
beq Lenc_key_abort
|
||||
cmp r2,#0
|
||||
beq Lenc_key_abort
|
||||
mov r3,#-2
|
||||
cmp r1,#128
|
||||
blt Lenc_key_abort
|
||||
cmp r1,#256
|
||||
bgt Lenc_key_abort
|
||||
tst r1,#0x3f
|
||||
bne Lenc_key_abort
|
||||
|
||||
adr r3,Lrcon
|
||||
cmp r1,#192
|
||||
|
||||
veor q0,q0,q0
|
||||
vld1.8 {q3},[r0]!
|
||||
mov r1,#8 @ reuse r1
|
||||
vld1.32 {q1,q2},[r3]!
|
||||
|
||||
blt Loop128
|
||||
beq L192
|
||||
b L256
|
||||
|
||||
.align 4
|
||||
Loop128:
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
bne Loop128
|
||||
|
||||
vld1.32 {q1},[r3]
|
||||
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
veor q3,q3,q10
|
||||
vst1.32 {q3},[r2]
|
||||
add r2,r2,#0x50
|
||||
|
||||
mov r12,#10
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L192:
|
||||
vld1.8 {d16},[r0]!
|
||||
vmov.i8 q10,#8 @ borrow q10
|
||||
vst1.32 {q3},[r2]!
|
||||
vsub.i8 q2,q2,q10 @ adjust the mask
|
||||
|
||||
Loop192:
|
||||
vtbl.8 d20,{q8},d4
|
||||
vtbl.8 d21,{q8},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {d16},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
|
||||
vdup.32 q9,d7[1]
|
||||
veor q9,q9,q8
|
||||
veor q10,q10,q1
|
||||
vext.8 q8,q0,q8,#12
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q8,q8,q9
|
||||
veor q3,q3,q10
|
||||
veor q8,q8,q10
|
||||
vst1.32 {q3},[r2]!
|
||||
bne Loop192
|
||||
|
||||
mov r12,#12
|
||||
add r2,r2,#0x20
|
||||
b Ldone
|
||||
|
||||
.align 4
|
||||
L256:
|
||||
vld1.8 {q8},[r0]
|
||||
mov r1,#7
|
||||
mov r12,#14
|
||||
vst1.32 {q3},[r2]!
|
||||
|
||||
Loop256:
|
||||
vtbl.8 d20,{q8},d4
|
||||
vtbl.8 d21,{q8},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q8},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
vst1.32 {q3},[r2]!
|
||||
beq Ldone
|
||||
|
||||
vdup.32 q10,d7[1]
|
||||
vext.8 q9,q0,q8,#12
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q8,q8,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q8,q8,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q8,q8,q9
|
||||
|
||||
veor q8,q8,q10
|
||||
b Loop256
|
||||
|
||||
Ldone:
|
||||
str r12,[r2]
|
||||
mov r3,#0
|
||||
|
||||
Lenc_key_abort:
|
||||
mov r0,r3 @ return value
|
||||
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _aes_hw_set_decrypt_key
|
||||
.private_extern _aes_hw_set_decrypt_key
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_set_decrypt_key
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_set_decrypt_key:
|
||||
stmdb sp!,{r4,lr}
|
||||
bl Lenc_key
|
||||
|
||||
cmp r0,#0
|
||||
bne Ldec_key_abort
|
||||
|
||||
sub r2,r2,#240 @ restore original r2
|
||||
mov r4,#-16
|
||||
add r0,r2,r12,lsl#4 @ end of key schedule
|
||||
|
||||
vld1.32 {q0},[r2]
|
||||
vld1.32 {q1},[r0]
|
||||
vst1.32 {q0},[r0],r4
|
||||
vst1.32 {q1},[r2]!
|
||||
|
||||
Loop_imc:
|
||||
vld1.32 {q0},[r2]
|
||||
vld1.32 {q1},[r0]
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
vst1.32 {q0},[r0],r4
|
||||
vst1.32 {q1},[r2]!
|
||||
cmp r0,r2
|
||||
bhi Loop_imc
|
||||
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
vst1.32 {q0},[r0]
|
||||
|
||||
eor r0,r0,r0 @ return value
|
||||
Ldec_key_abort:
|
||||
ldmia sp!,{r4,pc}
|
||||
|
||||
.globl _aes_hw_encrypt
|
||||
.private_extern _aes_hw_encrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_encrypt
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_encrypt:
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
sub r3,r3,#2
|
||||
vld1.32 {q1},[r2]!
|
||||
|
||||
Loop_enc:
|
||||
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q0},[r2]!
|
||||
subs r3,r3,#2
|
||||
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q1},[r2]!
|
||||
bgt Loop_enc
|
||||
|
||||
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
|
||||
veor q2,q2,q0
|
||||
|
||||
vst1.8 {q2},[r1]
|
||||
bx lr
|
||||
|
||||
.globl _aes_hw_decrypt
|
||||
.private_extern _aes_hw_decrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_decrypt
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_decrypt:
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
sub r3,r3,#2
|
||||
vld1.32 {q1},[r2]!
|
||||
|
||||
Loop_dec:
|
||||
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q0},[r2]!
|
||||
subs r3,r3,#2
|
||||
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q1},[r2]!
|
||||
bgt Loop_dec
|
||||
|
||||
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
|
||||
veor q2,q2,q0
|
||||
|
||||
vst1.8 {q2},[r1]
|
||||
bx lr
|
||||
|
||||
.globl _aes_hw_cbc_encrypt
|
||||
.private_extern _aes_hw_cbc_encrypt
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_cbc_encrypt
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_cbc_encrypt:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,lr}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldmia ip,{r4,r5} @ load remaining args
|
||||
subs r2,r2,#16
|
||||
mov r8,#16
|
||||
blo Lcbc_abort
|
||||
moveq r8,#0
|
||||
|
||||
cmp r5,#0 @ en- or decrypting?
|
||||
ldr r5,[r3,#240]
|
||||
and r2,r2,#-16
|
||||
vld1.8 {q6},[r4]
|
||||
vld1.8 {q0},[r0],r8
|
||||
|
||||
vld1.32 {q8,q9},[r3] @ load key schedule...
|
||||
sub r5,r5,#6
|
||||
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
|
||||
sub r5,r5,#2
|
||||
vld1.32 {q10,q11},[r7]!
|
||||
vld1.32 {q12,q13},[r7]!
|
||||
vld1.32 {q14,q15},[r7]!
|
||||
vld1.32 {q7},[r7]
|
||||
|
||||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
beq Lcbc_dec
|
||||
|
||||
cmp r5,#2
|
||||
veor q0,q0,q6
|
||||
veor q5,q8,q7
|
||||
beq Lcbc_enc128
|
||||
|
||||
vld1.32 {q2,q3},[r7]
|
||||
add r7,r3,#16
|
||||
add r6,r3,#16*4
|
||||
add r12,r3,#16*5
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
add r14,r3,#16*6
|
||||
add r3,r3,#16*7
|
||||
b Lenter_cbc_enc
|
||||
|
||||
.align 4
|
||||
Loop_cbc_enc:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vst1.8 {q6},[r1]!
|
||||
Lenter_cbc_enc:
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q8},[r6]
|
||||
cmp r5,#4
|
||||
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r12]
|
||||
beq Lcbc_enc192
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q8},[r14]
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r3]
|
||||
nop
|
||||
|
||||
Lcbc_enc192:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
subs r2,r2,#16
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
moveq r8,#0
|
||||
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.8 {q8},[r0],r8
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
veor q8,q8,q5
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
veor q6,q0,q7
|
||||
bhs Loop_cbc_enc
|
||||
|
||||
vst1.8 {q6},[r1]!
|
||||
b Lcbc_done
|
||||
|
||||
.align 5
|
||||
Lcbc_enc128:
|
||||
vld1.32 {q2,q3},[r7]
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
b Lenter_cbc_enc128
|
||||
Loop_cbc_enc128:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vst1.8 {q6},[r1]!
|
||||
Lenter_cbc_enc128:
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
subs r2,r2,#16
|
||||
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
moveq r8,#0
|
||||
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.8 {q8},[r0],r8
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
veor q8,q8,q5
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
veor q6,q0,q7
|
||||
bhs Loop_cbc_enc128
|
||||
|
||||
vst1.8 {q6},[r1]!
|
||||
b Lcbc_done
|
||||
.align 5
|
||||
Lcbc_dec:
|
||||
vld1.8 {q10},[r0]!
|
||||
subs r2,r2,#32 @ bias
|
||||
add r6,r5,#2
|
||||
vorr q3,q0,q0
|
||||
vorr q1,q0,q0
|
||||
vorr q11,q10,q10
|
||||
blo Lcbc_dec_tail
|
||||
|
||||
vorr q1,q10,q10
|
||||
vld1.8 {q10},[r0]!
|
||||
vorr q2,q0,q0
|
||||
vorr q3,q1,q1
|
||||
vorr q11,q10,q10
|
||||
|
||||
Loop3x_cbc_dec:
|
||||
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Loop3x_cbc_dec
|
||||
|
||||
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q4,q6,q7
|
||||
subs r2,r2,#0x30
|
||||
veor q5,q2,q7
|
||||
movlo r6,r2 @ r6, r6, is zero at this point
|
||||
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q9,q3,q7
|
||||
add r0,r0,r6 @ r0 is adjusted in such way that
|
||||
@ at exit from the loop q1-q10
|
||||
@ are loaded with last "words"
|
||||
vorr q6,q11,q11
|
||||
mov r7,r3
|
||||
.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q2},[r0]!
|
||||
.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q3},[r0]!
|
||||
.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q11},[r0]!
|
||||
.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
|
||||
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
|
||||
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
|
||||
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
|
||||
add r6,r5,#2
|
||||
veor q4,q4,q0
|
||||
veor q5,q5,q1
|
||||
veor q10,q10,q9
|
||||
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
|
||||
vst1.8 {q4},[r1]!
|
||||
vorr q0,q2,q2
|
||||
vst1.8 {q5},[r1]!
|
||||
vorr q1,q3,q3
|
||||
vst1.8 {q10},[r1]!
|
||||
vorr q10,q11,q11
|
||||
bhs Loop3x_cbc_dec
|
||||
|
||||
cmn r2,#0x30
|
||||
beq Lcbc_done
|
||||
nop
|
||||
|
||||
Lcbc_dec_tail:
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Lcbc_dec_tail
|
||||
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
cmn r2,#0x20
|
||||
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q5,q6,q7
|
||||
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q9,q3,q7
|
||||
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
|
||||
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
|
||||
beq Lcbc_dec_one
|
||||
veor q5,q5,q1
|
||||
veor q9,q9,q10
|
||||
vorr q6,q11,q11
|
||||
vst1.8 {q5},[r1]!
|
||||
vst1.8 {q9},[r1]!
|
||||
b Lcbc_done
|
||||
|
||||
Lcbc_dec_one:
|
||||
veor q5,q5,q10
|
||||
vorr q6,q11,q11
|
||||
vst1.8 {q5},[r1]!
|
||||
|
||||
Lcbc_done:
|
||||
vst1.8 {q6},[r4]
|
||||
Lcbc_abort:
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,pc}
|
||||
|
||||
.globl _aes_hw_ctr32_encrypt_blocks
|
||||
.private_extern _aes_hw_ctr32_encrypt_blocks
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _aes_hw_ctr32_encrypt_blocks
|
||||
#endif
|
||||
.align 5
|
||||
_aes_hw_ctr32_encrypt_blocks:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldr r4, [ip] @ load remaining arg
|
||||
ldr r5,[r3,#240]
|
||||
|
||||
ldr r8, [r4, #12]
|
||||
vld1.32 {q0},[r4]
|
||||
|
||||
vld1.32 {q8,q9},[r3] @ load key schedule...
|
||||
sub r5,r5,#4
|
||||
mov r12,#16
|
||||
cmp r2,#2
|
||||
add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
|
||||
sub r5,r5,#2
|
||||
vld1.32 {q12,q13},[r7]!
|
||||
vld1.32 {q14,q15},[r7]!
|
||||
vld1.32 {q7},[r7]
|
||||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
movlo r12,#0
|
||||
#ifndef __ARMEB__
|
||||
rev r8, r8
|
||||
#endif
|
||||
vorr q1,q0,q0
|
||||
add r10, r8, #1
|
||||
vorr q10,q0,q0
|
||||
add r8, r8, #2
|
||||
vorr q6,q0,q0
|
||||
rev r10, r10
|
||||
vmov.32 d3[1],r10
|
||||
bls Lctr32_tail
|
||||
rev r12, r8
|
||||
sub r2,r2,#3 @ bias
|
||||
vmov.32 d21[1],r12
|
||||
b Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
Loop3x_ctr32:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Loop3x_ctr32
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
|
||||
vld1.8 {q2},[r0]!
|
||||
vorr q0,q6,q6
|
||||
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.8 {q3},[r0]!
|
||||
vorr q1,q6,q6
|
||||
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
vld1.8 {q11},[r0]!
|
||||
mov r7,r3
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
|
||||
vorr q10,q6,q6
|
||||
add r9,r8,#1
|
||||
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
veor q2,q2,q7
|
||||
add r10,r8,#2
|
||||
.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
veor q3,q3,q7
|
||||
add r8,r8,#3
|
||||
.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
veor q11,q11,q7
|
||||
rev r9,r9
|
||||
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d1[1], r9
|
||||
rev r10,r10
|
||||
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
vmov.32 d3[1], r10
|
||||
rev r12,r8
|
||||
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d21[1], r12
|
||||
subs r2,r2,#3
|
||||
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
|
||||
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
|
||||
.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
|
||||
|
||||
veor q2,q2,q4
|
||||
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
|
||||
vst1.8 {q2},[r1]!
|
||||
veor q3,q3,q5
|
||||
mov r6,r5
|
||||
vst1.8 {q3},[r1]!
|
||||
veor q11,q11,q9
|
||||
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
|
||||
vst1.8 {q11},[r1]!
|
||||
bhs Loop3x_ctr32
|
||||
|
||||
adds r2,r2,#3
|
||||
beq Lctr32_done
|
||||
cmp r2,#1
|
||||
mov r12,#16
|
||||
moveq r12,#0
|
||||
|
||||
Lctr32_tail:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt Lctr32_tail
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.8 {q2},[r0],r12
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.8 {q3},[r0]
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
veor q2,q2,q7
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
veor q3,q3,q7
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
|
||||
|
||||
cmp r2,#1
|
||||
veor q2,q2,q0
|
||||
veor q3,q3,q1
|
||||
vst1.8 {q2},[r1]!
|
||||
beq Lctr32_done
|
||||
vst1.8 {q3},[r1]
|
||||
|
||||
Lctr32_done:
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
|
||||
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
982
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/armv4-mont.S
Normal file
982
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/armv4-mont.S
Normal file
@ -0,0 +1,982 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
|
||||
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__)
|
||||
.syntax unified
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.align 5
|
||||
LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-Lbn_mul_mont
|
||||
#endif
|
||||
|
||||
.globl _bn_mul_mont
|
||||
.private_extern _bn_mul_mont
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _bn_mul_mont
|
||||
#endif
|
||||
|
||||
.align 5
|
||||
_bn_mul_mont:
|
||||
Lbn_mul_mont:
|
||||
ldr ip,[sp,#4] @ load num
|
||||
stmdb sp!,{r0,r2} @ sp points at argument block
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
tst ip,#7
|
||||
bne Lialu
|
||||
adr r0,Lbn_mul_mont
|
||||
ldr r2,LOPENSSL_armcap
|
||||
ldr r0,[r0,r2]
|
||||
#ifdef __APPLE__
|
||||
ldr r0,[r0]
|
||||
#endif
|
||||
tst r0,#ARMV7_NEON @ NEON available?
|
||||
ldmia sp, {r0,r2}
|
||||
beq Lialu
|
||||
add sp,sp,#8
|
||||
b bn_mul8x_mont_neon
|
||||
.align 4
|
||||
Lialu:
|
||||
#endif
|
||||
cmp ip,#2
|
||||
mov r0,ip @ load num
|
||||
#ifdef __thumb2__
|
||||
ittt lt
|
||||
#endif
|
||||
movlt r0,#0
|
||||
addlt sp,sp,#2*4
|
||||
blt Labrt
|
||||
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
|
||||
|
||||
mov r0,r0,lsl#2 @ rescale r0 for byte count
|
||||
sub sp,sp,r0 @ alloca(4*num)
|
||||
sub sp,sp,#4 @ +extra dword
|
||||
sub r0,r0,#4 @ "num=num-1"
|
||||
add r4,r2,r0 @ &bp[num-1]
|
||||
|
||||
add r0,sp,r0 @ r0 to point at &tp[num-1]
|
||||
ldr r8,[r0,#14*4] @ &n0
|
||||
ldr r2,[r2] @ bp[0]
|
||||
ldr r5,[r1],#4 @ ap[0],ap++
|
||||
ldr r6,[r3],#4 @ np[0],np++
|
||||
ldr r8,[r8] @ *n0
|
||||
str r4,[r0,#15*4] @ save &bp[num]
|
||||
|
||||
umull r10,r11,r5,r2 @ ap[0]*bp[0]
|
||||
str r8,[r0,#14*4] @ save n0 value
|
||||
mul r8,r10,r8 @ "tp[0]"*n0
|
||||
mov r12,#0
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
|
||||
mov r4,sp
|
||||
|
||||
L1st:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
mov r10,r11
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
|
||||
mov r14,#0
|
||||
umlal r12,r14,r6,r8 @ np[j]*n0
|
||||
adds r12,r12,r10
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne L1st
|
||||
|
||||
adds r12,r12,r11
|
||||
ldr r4,[r0,#13*4] @ restore bp
|
||||
mov r14,#0
|
||||
ldr r8,[r0,#14*4] @ restore n0
|
||||
adc r14,r14,#0
|
||||
str r12,[r0] @ tp[num-1]=
|
||||
mov r7,sp
|
||||
str r14,[r0,#4] @ tp[num]=
|
||||
|
||||
Louter:
|
||||
sub r7,r0,r7 @ "original" r0-1 value
|
||||
sub r1,r1,r7 @ "rewind" ap to &ap[1]
|
||||
ldr r2,[r4,#4]! @ *(++bp)
|
||||
sub r3,r3,r7 @ "rewind" np to &np[1]
|
||||
ldr r5,[r1,#-4] @ ap[0]
|
||||
ldr r10,[sp] @ tp[0]
|
||||
ldr r6,[r3,#-4] @ np[0]
|
||||
ldr r7,[sp,#4] @ tp[1]
|
||||
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
|
||||
str r4,[r0,#13*4] @ save bp
|
||||
mul r8,r10,r8
|
||||
mov r12,#0
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
|
||||
mov r4,sp
|
||||
|
||||
Linner:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
adds r10,r11,r7 @ +=tp[j]
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
|
||||
mov r14,#0
|
||||
umlal r12,r14,r6,r8 @ np[j]*n0
|
||||
adc r11,r11,#0
|
||||
ldr r7,[r4,#8] @ tp[j+1]
|
||||
adds r12,r12,r10
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne Linner
|
||||
|
||||
adds r12,r12,r11
|
||||
mov r14,#0
|
||||
ldr r4,[r0,#13*4] @ restore bp
|
||||
adc r14,r14,#0
|
||||
ldr r8,[r0,#14*4] @ restore n0
|
||||
adds r12,r12,r7
|
||||
ldr r7,[r0,#15*4] @ restore &bp[num]
|
||||
adc r14,r14,#0
|
||||
str r12,[r0] @ tp[num-1]=
|
||||
str r14,[r0,#4] @ tp[num]=
|
||||
|
||||
cmp r4,r7
|
||||
#ifdef __thumb2__
|
||||
itt ne
|
||||
#endif
|
||||
movne r7,sp
|
||||
bne Louter
|
||||
|
||||
ldr r2,[r0,#12*4] @ pull rp
|
||||
mov r5,sp
|
||||
add r0,r0,#4 @ r0 to point at &tp[num]
|
||||
sub r5,r0,r5 @ "original" num value
|
||||
mov r4,sp @ "rewind" r4
|
||||
mov r1,r4 @ "borrow" r1
|
||||
sub r3,r3,r5 @ "rewind" r3 to &np[0]
|
||||
|
||||
subs r7,r7,r7 @ "clear" carry flag
|
||||
Lsub: ldr r7,[r4],#4
|
||||
ldr r6,[r3],#4
|
||||
sbcs r7,r7,r6 @ tp[j]-np[j]
|
||||
str r7,[r2],#4 @ rp[j]=
|
||||
teq r4,r0 @ preserve carry
|
||||
bne Lsub
|
||||
sbcs r14,r14,#0 @ upmost carry
|
||||
mov r4,sp @ "rewind" r4
|
||||
sub r2,r2,r5 @ "rewind" r2
|
||||
|
||||
Lcopy: ldr r7,[r4] @ conditional copy
|
||||
ldr r5,[r2]
|
||||
str sp,[r4],#4 @ zap tp
|
||||
#ifdef __thumb2__
|
||||
it cc
|
||||
#endif
|
||||
movcc r5,r7
|
||||
str r5,[r2],#4
|
||||
teq r4,r0 @ preserve carry
|
||||
bne Lcopy
|
||||
|
||||
mov sp,r0
|
||||
add sp,sp,#4 @ skip over tp[num+1]
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
|
||||
add sp,sp,#2*4 @ skip over {r0,r2}
|
||||
mov r0,#1
|
||||
Labrt:
|
||||
#if __ARM_ARCH__>=5
|
||||
bx lr @ bx lr
|
||||
#else
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
|
||||
|
||||
|
||||
#ifdef __thumb2__
|
||||
.thumb_func bn_mul8x_mont_neon
|
||||
#endif
|
||||
.align 5
|
||||
bn_mul8x_mont_neon:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldmia ip,{r4,r5} @ load rest of parameter block
|
||||
mov ip,sp
|
||||
|
||||
cmp r5,#8
|
||||
bhi LNEON_8n
|
||||
|
||||
@ special case for r5==8, everything is in register bank...
|
||||
|
||||
vld1.32 {d28[0]}, [r2,:32]!
|
||||
veor d8,d8,d8
|
||||
sub r7,sp,r5,lsl#4
|
||||
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
|
||||
and r7,r7,#-64
|
||||
vld1.32 {d30[0]}, [r4,:32]
|
||||
mov sp,r7 @ alloca
|
||||
vzip.16 d28,d8
|
||||
|
||||
vmull.u32 q6,d28,d0[0]
|
||||
vmull.u32 q7,d28,d0[1]
|
||||
vmull.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmull.u32 q9,d28,d1[1]
|
||||
|
||||
vadd.u64 d29,d29,d12
|
||||
veor d8,d8,d8
|
||||
vmul.u32 d29,d29,d30
|
||||
|
||||
vmull.u32 q10,d28,d2[0]
|
||||
vld1.32 {d4,d5,d6,d7}, [r3]!
|
||||
vmull.u32 q11,d28,d2[1]
|
||||
vmull.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmull.u32 q13,d28,d3[1]
|
||||
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
sub r9,r5,#1
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmov q5,q6
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmov q6,q7
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmov q7,q8
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vmov q8,q9
|
||||
vmov q9,q10
|
||||
vshr.u64 d10,d10,#16
|
||||
vmov q10,q11
|
||||
vmov q11,q12
|
||||
vadd.u64 d10,d10,d11
|
||||
vmov q12,q13
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
b LNEON_outer8
|
||||
|
||||
.align 4
|
||||
LNEON_outer8:
|
||||
vld1.32 {d28[0]}, [r2,:32]!
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
vadd.u64 d12,d12,d10
|
||||
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
|
||||
vadd.u64 d29,d29,d12
|
||||
veor d8,d8,d8
|
||||
subs r9,r9,#1
|
||||
vmul.u32 d29,d29,d30
|
||||
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmov q5,q6
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmov q6,q7
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmov q7,q8
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vmov q8,q9
|
||||
vmov q9,q10
|
||||
vshr.u64 d10,d10,#16
|
||||
vmov q10,q11
|
||||
vmov q11,q12
|
||||
vadd.u64 d10,d10,d11
|
||||
vmov q12,q13
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
bne LNEON_outer8
|
||||
|
||||
vadd.u64 d12,d12,d10
|
||||
mov r7,sp
|
||||
vshr.u64 d10,d12,#16
|
||||
mov r8,r5
|
||||
vadd.u64 d13,d13,d10
|
||||
add r6,sp,#96
|
||||
vshr.u64 d10,d13,#16
|
||||
vzip.16 d12,d13
|
||||
|
||||
b LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
LNEON_8n:
|
||||
veor q6,q6,q6
|
||||
sub r7,sp,#128
|
||||
veor q7,q7,q7
|
||||
sub r7,r7,r5,lsl#4
|
||||
veor q8,q8,q8
|
||||
and r7,r7,#-64
|
||||
veor q9,q9,q9
|
||||
mov sp,r7 @ alloca
|
||||
veor q10,q10,q10
|
||||
add r7,r7,#256
|
||||
veor q11,q11,q11
|
||||
sub r8,r5,#8
|
||||
veor q12,q12,q12
|
||||
veor q13,q13,q13
|
||||
|
||||
LNEON_8n_init:
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
subs r8,r8,#8
|
||||
vst1.64 {q8,q9},[r7,:256]!
|
||||
vst1.64 {q10,q11},[r7,:256]!
|
||||
vst1.64 {q12,q13},[r7,:256]!
|
||||
bne LNEON_8n_init
|
||||
|
||||
add r6,sp,#256
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
add r10,sp,#8
|
||||
vld1.32 {d30[0]},[r4,:32]
|
||||
mov r9,r5
|
||||
b LNEON_8n_outer
|
||||
|
||||
.align 4
|
||||
LNEON_8n_outer:
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
add r7,sp,#128
|
||||
vld1.32 {d4,d5,d6,d7},[r3]!
|
||||
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
vadd.u64 d29,d29,d12
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vshr.u64 d12,d12,#16
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vadd.u64 d12,d12,d13
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vshr.u64 d12,d12,#16
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vadd.u64 d14,d14,d12
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
|
||||
vmlal.u32 q7,d28,d0[0]
|
||||
vld1.64 {q6},[r6,:128]!
|
||||
vmlal.u32 q8,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q9,d28,d1[0]
|
||||
vshl.i64 d29,d15,#16
|
||||
vmlal.u32 q10,d28,d1[1]
|
||||
vadd.u64 d29,d29,d14
|
||||
vmlal.u32 q11,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q12,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
|
||||
vmlal.u32 q13,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q6,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q7,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q8,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q9,d29,d5[0]
|
||||
vshr.u64 d14,d14,#16
|
||||
vmlal.u32 q10,d29,d5[1]
|
||||
vmlal.u32 q11,d29,d6[0]
|
||||
vadd.u64 d14,d14,d15
|
||||
vmlal.u32 q12,d29,d6[1]
|
||||
vshr.u64 d14,d14,#16
|
||||
vmlal.u32 q13,d29,d7[0]
|
||||
vmlal.u32 q6,d29,d7[1]
|
||||
vadd.u64 d16,d16,d14
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
|
||||
vmlal.u32 q8,d28,d0[0]
|
||||
vld1.64 {q7},[r6,:128]!
|
||||
vmlal.u32 q9,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q10,d28,d1[0]
|
||||
vshl.i64 d29,d17,#16
|
||||
vmlal.u32 q11,d28,d1[1]
|
||||
vadd.u64 d29,d29,d16
|
||||
vmlal.u32 q12,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q13,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
|
||||
vmlal.u32 q6,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q7,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q8,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q9,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q10,d29,d5[0]
|
||||
vshr.u64 d16,d16,#16
|
||||
vmlal.u32 q11,d29,d5[1]
|
||||
vmlal.u32 q12,d29,d6[0]
|
||||
vadd.u64 d16,d16,d17
|
||||
vmlal.u32 q13,d29,d6[1]
|
||||
vshr.u64 d16,d16,#16
|
||||
vmlal.u32 q6,d29,d7[0]
|
||||
vmlal.u32 q7,d29,d7[1]
|
||||
vadd.u64 d18,d18,d16
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
|
||||
vmlal.u32 q9,d28,d0[0]
|
||||
vld1.64 {q8},[r6,:128]!
|
||||
vmlal.u32 q10,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q11,d28,d1[0]
|
||||
vshl.i64 d29,d19,#16
|
||||
vmlal.u32 q12,d28,d1[1]
|
||||
vadd.u64 d29,d29,d18
|
||||
vmlal.u32 q13,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q6,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
|
||||
vmlal.u32 q7,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q8,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q9,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q10,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q11,d29,d5[0]
|
||||
vshr.u64 d18,d18,#16
|
||||
vmlal.u32 q12,d29,d5[1]
|
||||
vmlal.u32 q13,d29,d6[0]
|
||||
vadd.u64 d18,d18,d19
|
||||
vmlal.u32 q6,d29,d6[1]
|
||||
vshr.u64 d18,d18,#16
|
||||
vmlal.u32 q7,d29,d7[0]
|
||||
vmlal.u32 q8,d29,d7[1]
|
||||
vadd.u64 d20,d20,d18
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
|
||||
vmlal.u32 q10,d28,d0[0]
|
||||
vld1.64 {q9},[r6,:128]!
|
||||
vmlal.u32 q11,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q12,d28,d1[0]
|
||||
vshl.i64 d29,d21,#16
|
||||
vmlal.u32 q13,d28,d1[1]
|
||||
vadd.u64 d29,d29,d20
|
||||
vmlal.u32 q6,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q7,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
|
||||
vmlal.u32 q8,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q9,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q10,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q11,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q12,d29,d5[0]
|
||||
vshr.u64 d20,d20,#16
|
||||
vmlal.u32 q13,d29,d5[1]
|
||||
vmlal.u32 q6,d29,d6[0]
|
||||
vadd.u64 d20,d20,d21
|
||||
vmlal.u32 q7,d29,d6[1]
|
||||
vshr.u64 d20,d20,#16
|
||||
vmlal.u32 q8,d29,d7[0]
|
||||
vmlal.u32 q9,d29,d7[1]
|
||||
vadd.u64 d22,d22,d20
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
|
||||
vmlal.u32 q11,d28,d0[0]
|
||||
vld1.64 {q10},[r6,:128]!
|
||||
vmlal.u32 q12,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q13,d28,d1[0]
|
||||
vshl.i64 d29,d23,#16
|
||||
vmlal.u32 q6,d28,d1[1]
|
||||
vadd.u64 d29,d29,d22
|
||||
vmlal.u32 q7,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q8,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
|
||||
vmlal.u32 q9,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q10,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q11,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q12,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q13,d29,d5[0]
|
||||
vshr.u64 d22,d22,#16
|
||||
vmlal.u32 q6,d29,d5[1]
|
||||
vmlal.u32 q7,d29,d6[0]
|
||||
vadd.u64 d22,d22,d23
|
||||
vmlal.u32 q8,d29,d6[1]
|
||||
vshr.u64 d22,d22,#16
|
||||
vmlal.u32 q9,d29,d7[0]
|
||||
vmlal.u32 q10,d29,d7[1]
|
||||
vadd.u64 d24,d24,d22
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
|
||||
vmlal.u32 q12,d28,d0[0]
|
||||
vld1.64 {q11},[r6,:128]!
|
||||
vmlal.u32 q13,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q6,d28,d1[0]
|
||||
vshl.i64 d29,d25,#16
|
||||
vmlal.u32 q7,d28,d1[1]
|
||||
vadd.u64 d29,d29,d24
|
||||
vmlal.u32 q8,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q9,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
|
||||
vmlal.u32 q10,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q11,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q12,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q13,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q6,d29,d5[0]
|
||||
vshr.u64 d24,d24,#16
|
||||
vmlal.u32 q7,d29,d5[1]
|
||||
vmlal.u32 q8,d29,d6[0]
|
||||
vadd.u64 d24,d24,d25
|
||||
vmlal.u32 q9,d29,d6[1]
|
||||
vshr.u64 d24,d24,#16
|
||||
vmlal.u32 q10,d29,d7[0]
|
||||
vmlal.u32 q11,d29,d7[1]
|
||||
vadd.u64 d26,d26,d24
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
|
||||
vmlal.u32 q13,d28,d0[0]
|
||||
vld1.64 {q12},[r6,:128]!
|
||||
vmlal.u32 q6,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q7,d28,d1[0]
|
||||
vshl.i64 d29,d27,#16
|
||||
vmlal.u32 q8,d28,d1[1]
|
||||
vadd.u64 d29,d29,d26
|
||||
vmlal.u32 q9,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q10,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
|
||||
vmlal.u32 q11,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q12,d28,d3[1]
|
||||
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 q13,d29,d4[0]
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
vmlal.u32 q6,d29,d4[1]
|
||||
vmlal.u32 q7,d29,d5[0]
|
||||
vshr.u64 d26,d26,#16
|
||||
vmlal.u32 q8,d29,d5[1]
|
||||
vmlal.u32 q9,d29,d6[0]
|
||||
vadd.u64 d26,d26,d27
|
||||
vmlal.u32 q10,d29,d6[1]
|
||||
vshr.u64 d26,d26,#16
|
||||
vmlal.u32 q11,d29,d7[0]
|
||||
vmlal.u32 q12,d29,d7[1]
|
||||
vadd.u64 d12,d12,d26
|
||||
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
|
||||
add r10,sp,#8 @ rewind
|
||||
sub r8,r5,#8
|
||||
b LNEON_8n_inner
|
||||
|
||||
.align 4
|
||||
LNEON_8n_inner:
|
||||
subs r8,r8,#8
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vld1.64 {q13},[r6,:128]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vld1.32 {d4,d5,d6,d7},[r3]!
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vst1.64 {q6},[r7,:128]!
|
||||
vmlal.u32 q7,d28,d0[0]
|
||||
vld1.64 {q6},[r6,:128]
|
||||
vmlal.u32 q8,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
|
||||
vmlal.u32 q9,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q10,d28,d1[1]
|
||||
vmlal.u32 q11,d28,d2[0]
|
||||
vmlal.u32 q12,d28,d2[1]
|
||||
vmlal.u32 q13,d28,d3[0]
|
||||
vmlal.u32 q6,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
|
||||
vmlal.u32 q7,d29,d4[0]
|
||||
vmlal.u32 q8,d29,d4[1]
|
||||
vmlal.u32 q9,d29,d5[0]
|
||||
vmlal.u32 q10,d29,d5[1]
|
||||
vmlal.u32 q11,d29,d6[0]
|
||||
vmlal.u32 q12,d29,d6[1]
|
||||
vmlal.u32 q13,d29,d7[0]
|
||||
vmlal.u32 q6,d29,d7[1]
|
||||
vst1.64 {q7},[r7,:128]!
|
||||
vmlal.u32 q8,d28,d0[0]
|
||||
vld1.64 {q7},[r6,:128]
|
||||
vmlal.u32 q9,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
|
||||
vmlal.u32 q10,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q11,d28,d1[1]
|
||||
vmlal.u32 q12,d28,d2[0]
|
||||
vmlal.u32 q13,d28,d2[1]
|
||||
vmlal.u32 q6,d28,d3[0]
|
||||
vmlal.u32 q7,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
|
||||
vmlal.u32 q8,d29,d4[0]
|
||||
vmlal.u32 q9,d29,d4[1]
|
||||
vmlal.u32 q10,d29,d5[0]
|
||||
vmlal.u32 q11,d29,d5[1]
|
||||
vmlal.u32 q12,d29,d6[0]
|
||||
vmlal.u32 q13,d29,d6[1]
|
||||
vmlal.u32 q6,d29,d7[0]
|
||||
vmlal.u32 q7,d29,d7[1]
|
||||
vst1.64 {q8},[r7,:128]!
|
||||
vmlal.u32 q9,d28,d0[0]
|
||||
vld1.64 {q8},[r6,:128]
|
||||
vmlal.u32 q10,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
|
||||
vmlal.u32 q11,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q12,d28,d1[1]
|
||||
vmlal.u32 q13,d28,d2[0]
|
||||
vmlal.u32 q6,d28,d2[1]
|
||||
vmlal.u32 q7,d28,d3[0]
|
||||
vmlal.u32 q8,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
|
||||
vmlal.u32 q9,d29,d4[0]
|
||||
vmlal.u32 q10,d29,d4[1]
|
||||
vmlal.u32 q11,d29,d5[0]
|
||||
vmlal.u32 q12,d29,d5[1]
|
||||
vmlal.u32 q13,d29,d6[0]
|
||||
vmlal.u32 q6,d29,d6[1]
|
||||
vmlal.u32 q7,d29,d7[0]
|
||||
vmlal.u32 q8,d29,d7[1]
|
||||
vst1.64 {q9},[r7,:128]!
|
||||
vmlal.u32 q10,d28,d0[0]
|
||||
vld1.64 {q9},[r6,:128]
|
||||
vmlal.u32 q11,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
|
||||
vmlal.u32 q12,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q13,d28,d1[1]
|
||||
vmlal.u32 q6,d28,d2[0]
|
||||
vmlal.u32 q7,d28,d2[1]
|
||||
vmlal.u32 q8,d28,d3[0]
|
||||
vmlal.u32 q9,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
|
||||
vmlal.u32 q10,d29,d4[0]
|
||||
vmlal.u32 q11,d29,d4[1]
|
||||
vmlal.u32 q12,d29,d5[0]
|
||||
vmlal.u32 q13,d29,d5[1]
|
||||
vmlal.u32 q6,d29,d6[0]
|
||||
vmlal.u32 q7,d29,d6[1]
|
||||
vmlal.u32 q8,d29,d7[0]
|
||||
vmlal.u32 q9,d29,d7[1]
|
||||
vst1.64 {q10},[r7,:128]!
|
||||
vmlal.u32 q11,d28,d0[0]
|
||||
vld1.64 {q10},[r6,:128]
|
||||
vmlal.u32 q12,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
|
||||
vmlal.u32 q13,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q6,d28,d1[1]
|
||||
vmlal.u32 q7,d28,d2[0]
|
||||
vmlal.u32 q8,d28,d2[1]
|
||||
vmlal.u32 q9,d28,d3[0]
|
||||
vmlal.u32 q10,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
|
||||
vmlal.u32 q11,d29,d4[0]
|
||||
vmlal.u32 q12,d29,d4[1]
|
||||
vmlal.u32 q13,d29,d5[0]
|
||||
vmlal.u32 q6,d29,d5[1]
|
||||
vmlal.u32 q7,d29,d6[0]
|
||||
vmlal.u32 q8,d29,d6[1]
|
||||
vmlal.u32 q9,d29,d7[0]
|
||||
vmlal.u32 q10,d29,d7[1]
|
||||
vst1.64 {q11},[r7,:128]!
|
||||
vmlal.u32 q12,d28,d0[0]
|
||||
vld1.64 {q11},[r6,:128]
|
||||
vmlal.u32 q13,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
|
||||
vmlal.u32 q6,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q7,d28,d1[1]
|
||||
vmlal.u32 q8,d28,d2[0]
|
||||
vmlal.u32 q9,d28,d2[1]
|
||||
vmlal.u32 q10,d28,d3[0]
|
||||
vmlal.u32 q11,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
|
||||
vmlal.u32 q12,d29,d4[0]
|
||||
vmlal.u32 q13,d29,d4[1]
|
||||
vmlal.u32 q6,d29,d5[0]
|
||||
vmlal.u32 q7,d29,d5[1]
|
||||
vmlal.u32 q8,d29,d6[0]
|
||||
vmlal.u32 q9,d29,d6[1]
|
||||
vmlal.u32 q10,d29,d7[0]
|
||||
vmlal.u32 q11,d29,d7[1]
|
||||
vst1.64 {q12},[r7,:128]!
|
||||
vmlal.u32 q13,d28,d0[0]
|
||||
vld1.64 {q12},[r6,:128]
|
||||
vmlal.u32 q6,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
|
||||
vmlal.u32 q7,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q8,d28,d1[1]
|
||||
vmlal.u32 q9,d28,d2[0]
|
||||
vmlal.u32 q10,d28,d2[1]
|
||||
vmlal.u32 q11,d28,d3[0]
|
||||
vmlal.u32 q12,d28,d3[1]
|
||||
it eq
|
||||
subeq r1,r1,r5,lsl#2 @ rewind
|
||||
vmlal.u32 q13,d29,d4[0]
|
||||
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 q6,d29,d4[1]
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
vmlal.u32 q7,d29,d5[0]
|
||||
add r10,sp,#8 @ rewind
|
||||
vmlal.u32 q8,d29,d5[1]
|
||||
vmlal.u32 q9,d29,d6[0]
|
||||
vmlal.u32 q10,d29,d6[1]
|
||||
vmlal.u32 q11,d29,d7[0]
|
||||
vst1.64 {q13},[r7,:128]!
|
||||
vmlal.u32 q12,d29,d7[1]
|
||||
|
||||
bne LNEON_8n_inner
|
||||
add r6,sp,#128
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
veor q2,q2,q2 @ d4-d5
|
||||
vst1.64 {q8,q9},[r7,:256]!
|
||||
veor q3,q3,q3 @ d6-d7
|
||||
vst1.64 {q10,q11},[r7,:256]!
|
||||
vst1.64 {q12},[r7,:128]
|
||||
|
||||
subs r9,r9,#8
|
||||
vld1.64 {q6,q7},[r6,:256]!
|
||||
vld1.64 {q8,q9},[r6,:256]!
|
||||
vld1.64 {q10,q11},[r6,:256]!
|
||||
vld1.64 {q12,q13},[r6,:256]!
|
||||
|
||||
itt ne
|
||||
subne r3,r3,r5,lsl#2 @ rewind
|
||||
bne LNEON_8n_outer
|
||||
|
||||
add r7,sp,#128
|
||||
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
|
||||
vshr.u64 d10,d12,#16
|
||||
vst1.64 {q2,q3},[sp,:256]!
|
||||
vadd.u64 d13,d13,d10
|
||||
vst1.64 {q2,q3}, [sp,:256]!
|
||||
vshr.u64 d10,d13,#16
|
||||
vst1.64 {q2,q3}, [sp,:256]!
|
||||
vzip.16 d12,d13
|
||||
|
||||
mov r8,r5
|
||||
b LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
LNEON_tail:
|
||||
vadd.u64 d12,d12,d10
|
||||
vshr.u64 d10,d12,#16
|
||||
vld1.64 {q8,q9}, [r6, :256]!
|
||||
vadd.u64 d13,d13,d10
|
||||
vld1.64 {q10,q11}, [r6, :256]!
|
||||
vshr.u64 d10,d13,#16
|
||||
vld1.64 {q12,q13}, [r6, :256]!
|
||||
vzip.16 d12,d13
|
||||
|
||||
LNEON_tail_entry:
|
||||
vadd.u64 d14,d14,d10
|
||||
vst1.32 {d12[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d14,#16
|
||||
vadd.u64 d15,d15,d10
|
||||
vshr.u64 d10,d15,#16
|
||||
vzip.16 d14,d15
|
||||
vadd.u64 d16,d16,d10
|
||||
vst1.32 {d14[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d16,#16
|
||||
vadd.u64 d17,d17,d10
|
||||
vshr.u64 d10,d17,#16
|
||||
vzip.16 d16,d17
|
||||
vadd.u64 d18,d18,d10
|
||||
vst1.32 {d16[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d18,#16
|
||||
vadd.u64 d19,d19,d10
|
||||
vshr.u64 d10,d19,#16
|
||||
vzip.16 d18,d19
|
||||
vadd.u64 d20,d20,d10
|
||||
vst1.32 {d18[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d20,#16
|
||||
vadd.u64 d21,d21,d10
|
||||
vshr.u64 d10,d21,#16
|
||||
vzip.16 d20,d21
|
||||
vadd.u64 d22,d22,d10
|
||||
vst1.32 {d20[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d22,#16
|
||||
vadd.u64 d23,d23,d10
|
||||
vshr.u64 d10,d23,#16
|
||||
vzip.16 d22,d23
|
||||
vadd.u64 d24,d24,d10
|
||||
vst1.32 {d22[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d24,#16
|
||||
vadd.u64 d25,d25,d10
|
||||
vshr.u64 d10,d25,#16
|
||||
vzip.16 d24,d25
|
||||
vadd.u64 d26,d26,d10
|
||||
vst1.32 {d24[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d26,#16
|
||||
vadd.u64 d27,d27,d10
|
||||
vshr.u64 d10,d27,#16
|
||||
vzip.16 d26,d27
|
||||
vld1.64 {q6,q7}, [r6, :256]!
|
||||
subs r8,r8,#8
|
||||
vst1.32 {d26[0]}, [r7, :32]!
|
||||
bne LNEON_tail
|
||||
|
||||
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
|
||||
sub r3,r3,r5,lsl#2 @ rewind r3
|
||||
subs r1,sp,#0 @ clear carry flag
|
||||
add r2,sp,r5,lsl#2
|
||||
|
||||
LNEON_sub:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r3!, {r8,r9,r10,r11}
|
||||
sbcs r8, r4,r8
|
||||
sbcs r9, r5,r9
|
||||
sbcs r10,r6,r10
|
||||
sbcs r11,r7,r11
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne LNEON_sub
|
||||
|
||||
ldr r10, [r1] @ load top-most bit
|
||||
mov r11,sp
|
||||
veor q0,q0,q0
|
||||
sub r11,r2,r11 @ this is num*4
|
||||
veor q1,q1,q1
|
||||
mov r1,sp
|
||||
sub r0,r0,r11 @ rewind r0
|
||||
mov r3,r2 @ second 3/4th of frame
|
||||
sbcs r10,r10,#0 @ result is carry flag
|
||||
|
||||
LNEON_copy_n_zap:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r0, {r8,r9,r10,r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
ldmia r1, {r4,r5,r6,r7}
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
sub r1,r1,#16
|
||||
ldmia r0, {r8,r9,r10,r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0,q1}, [r1,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne LNEON_copy_n_zap
|
||||
|
||||
mov sp,ip
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
bx lr @ bx lr
|
||||
|
||||
#endif
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.comm _OPENSSL_armcap_P,4
|
||||
.non_lazy_symbol_pointer
|
||||
OPENSSL_armcap_P:
|
||||
.indirect_symbol _OPENSSL_armcap_P
|
||||
.long 0
|
||||
.private_extern _OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
1536
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/bsaes-armv7.S
Normal file
1536
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/bsaes-armv7.S
Normal file
File diff suppressed because it is too large
Load Diff
258
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/ghash-armv4.S
Normal file
258
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/ghash-armv4.S
Normal file
@ -0,0 +1,258 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
|
||||
@ instructions are in aesv8-armx.pl.)
|
||||
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__) || defined(__clang__)
|
||||
.syntax unified
|
||||
#define ldrplb ldrbpl
|
||||
#define ldrneb ldrbne
|
||||
#endif
|
||||
#if defined(__thumb2__)
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
|
||||
|
||||
|
||||
.globl _gcm_init_neon
|
||||
.private_extern _gcm_init_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_init_neon
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_init_neon:
|
||||
vld1.64 d7,[r1]! @ load H
|
||||
vmov.i8 q8,#0xe1
|
||||
vld1.64 d6,[r1]
|
||||
vshl.i64 d17,#57
|
||||
vshr.u64 d16,#63 @ t0=0xc2....01
|
||||
vdup.8 q9,d7[7]
|
||||
vshr.u64 d26,d6,#63
|
||||
vshr.s8 q9,#7 @ broadcast carry bit
|
||||
vshl.i64 q3,q3,#1
|
||||
vand q8,q8,q9
|
||||
vorr d7,d26 @ H<<<=1
|
||||
veor q3,q3,q8 @ twisted H
|
||||
vstmia r0,{q3}
|
||||
|
||||
bx lr @ bx lr
|
||||
|
||||
|
||||
.globl _gcm_gmult_neon
|
||||
.private_extern _gcm_gmult_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_gmult_neon
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_gmult_neon:
|
||||
vld1.64 d7,[r0]! @ load Xi
|
||||
vld1.64 d6,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
vldmia r1,{d26,d27} @ load twisted H
|
||||
vmov.i64 d30,#0x00000000ffffffff
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q3,q3
|
||||
#endif
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
mov r3,#16
|
||||
b Lgmult_neon
|
||||
|
||||
|
||||
.globl _gcm_ghash_neon
|
||||
.private_extern _gcm_ghash_neon
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_ghash_neon
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_ghash_neon:
|
||||
vld1.64 d1,[r0]! @ load Xi
|
||||
vld1.64 d0,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
vldmia r1,{d26,d27} @ load twisted H
|
||||
vmov.i64 d30,#0x00000000ffffffff
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
|
||||
Loop_neon:
|
||||
vld1.64 d7,[r2]! @ load inp
|
||||
vld1.64 d6,[r2]!
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q3,q3
|
||||
#endif
|
||||
veor q3,q0 @ inp^=Xi
|
||||
Lgmult_neon:
|
||||
vext.8 d16, d26, d26, #1 @ A1
|
||||
vmull.p8 q8, d16, d6 @ F = A1*B
|
||||
vext.8 d0, d6, d6, #1 @ B1
|
||||
vmull.p8 q0, d26, d0 @ E = A*B1
|
||||
vext.8 d18, d26, d26, #2 @ A2
|
||||
vmull.p8 q9, d18, d6 @ H = A2*B
|
||||
vext.8 d22, d6, d6, #2 @ B2
|
||||
vmull.p8 q11, d26, d22 @ G = A*B2
|
||||
vext.8 d20, d26, d26, #3 @ A3
|
||||
veor q8, q8, q0 @ L = E + F
|
||||
vmull.p8 q10, d20, d6 @ J = A3*B
|
||||
vext.8 d0, d6, d6, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q0, d26, d0 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d6, d6, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d26, d22 @ K = A*B4
|
||||
veor q10, q10, q0 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q0, d26, d6 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q0, q0, q8
|
||||
veor q0, q0, q10
|
||||
veor d6,d6,d7 @ Karatsuba pre-processing
|
||||
vext.8 d16, d28, d28, #1 @ A1
|
||||
vmull.p8 q8, d16, d6 @ F = A1*B
|
||||
vext.8 d2, d6, d6, #1 @ B1
|
||||
vmull.p8 q1, d28, d2 @ E = A*B1
|
||||
vext.8 d18, d28, d28, #2 @ A2
|
||||
vmull.p8 q9, d18, d6 @ H = A2*B
|
||||
vext.8 d22, d6, d6, #2 @ B2
|
||||
vmull.p8 q11, d28, d22 @ G = A*B2
|
||||
vext.8 d20, d28, d28, #3 @ A3
|
||||
veor q8, q8, q1 @ L = E + F
|
||||
vmull.p8 q10, d20, d6 @ J = A3*B
|
||||
vext.8 d2, d6, d6, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q1, d28, d2 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d6, d6, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d28, d22 @ K = A*B4
|
||||
veor q10, q10, q1 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q1, d28, d6 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q1, q1, q8
|
||||
veor q1, q1, q10
|
||||
vext.8 d16, d27, d27, #1 @ A1
|
||||
vmull.p8 q8, d16, d7 @ F = A1*B
|
||||
vext.8 d4, d7, d7, #1 @ B1
|
||||
vmull.p8 q2, d27, d4 @ E = A*B1
|
||||
vext.8 d18, d27, d27, #2 @ A2
|
||||
vmull.p8 q9, d18, d7 @ H = A2*B
|
||||
vext.8 d22, d7, d7, #2 @ B2
|
||||
vmull.p8 q11, d27, d22 @ G = A*B2
|
||||
vext.8 d20, d27, d27, #3 @ A3
|
||||
veor q8, q8, q2 @ L = E + F
|
||||
vmull.p8 q10, d20, d7 @ J = A3*B
|
||||
vext.8 d4, d7, d7, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q2, d27, d4 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d7, d7, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d27, d22 @ K = A*B4
|
||||
veor q10, q10, q2 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q2, d27, d7 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q2, q2, q8
|
||||
veor q2, q2, q10
|
||||
veor q1,q1,q0 @ Karatsuba post-processing
|
||||
veor q1,q1,q2
|
||||
veor d1,d1,d2
|
||||
veor d4,d4,d3 @ Xh|Xl - 256-bit result
|
||||
|
||||
@ equivalent of reduction_avx from ghash-x86_64.pl
|
||||
vshl.i64 q9,q0,#57 @ 1st phase
|
||||
vshl.i64 q10,q0,#62
|
||||
veor q10,q10,q9 @
|
||||
vshl.i64 q9,q0,#63
|
||||
veor q10, q10, q9 @
|
||||
veor d1,d1,d20 @
|
||||
veor d4,d4,d21
|
||||
|
||||
vshr.u64 q10,q0,#1 @ 2nd phase
|
||||
veor q2,q2,q0
|
||||
veor q0,q0,q10 @
|
||||
vshr.u64 q10,q10,#6
|
||||
vshr.u64 q0,q0,#1 @
|
||||
veor q0,q0,q2 @
|
||||
veor q0,q0,q10 @
|
||||
|
||||
subs r3,#16
|
||||
bne Loop_neon
|
||||
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
sub r0,#16
|
||||
vst1.64 d1,[r0]! @ write out Xi
|
||||
vst1.64 d0,[r0]
|
||||
|
||||
bx lr @ bx lr
|
||||
|
||||
#endif
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
@ -0,0 +1,256 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
.globl _gcm_init_v8
|
||||
.private_extern _gcm_init_v8
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_init_v8
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_init_v8:
|
||||
vld1.64 {q9},[r1] @ load input H
|
||||
vmov.i8 q11,#0xe1
|
||||
vshl.i64 q11,q11,#57 @ 0xc2.0
|
||||
vext.8 q3,q9,q9,#8
|
||||
vshr.u64 q10,q11,#63
|
||||
vdup.32 q9,d18[1]
|
||||
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
|
||||
vshr.u64 q10,q3,#63
|
||||
vshr.s32 q9,q9,#31 @ broadcast carry bit
|
||||
vand q10,q10,q8
|
||||
vshl.i64 q3,q3,#1
|
||||
vext.8 q10,q10,q10,#8
|
||||
vand q8,q8,q9
|
||||
vorr q3,q3,q10 @ H<<<=1
|
||||
veor q12,q3,q8 @ twisted H
|
||||
vst1.64 {q12},[r0]! @ store Htable[0]
|
||||
|
||||
@ calculate H^2
|
||||
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
|
||||
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
|
||||
veor q8,q8,q12
|
||||
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
|
||||
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q14,q0,q10
|
||||
|
||||
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
|
||||
veor q9,q9,q14
|
||||
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
|
||||
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
|
||||
|
||||
bx lr
|
||||
|
||||
.globl _gcm_gmult_v8
|
||||
.private_extern _gcm_gmult_v8
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_gmult_v8
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_gmult_v8:
|
||||
vld1.64 {q9},[r0] @ load Xi
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q12,q13},[r1] @ load twisted H, ...
|
||||
vshl.u64 q11,q11,#57
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vext.8 q3,q9,q9,#8
|
||||
|
||||
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
|
||||
veor q9,q9,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
|
||||
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q0,q0,q10
|
||||
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q0,q0,q0,#8
|
||||
vst1.64 {q0},[r0] @ write out Xi
|
||||
|
||||
bx lr
|
||||
|
||||
.globl _gcm_ghash_v8
|
||||
.private_extern _gcm_ghash_v8
|
||||
#ifdef __thumb2__
|
||||
.thumb_func _gcm_ghash_v8
|
||||
#endif
|
||||
.align 4
|
||||
_gcm_ghash_v8:
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
vld1.64 {q0},[r0] @ load [rotated] Xi
|
||||
@ "[rotated]" means that
|
||||
@ loaded value would have
|
||||
@ to be rotated in order to
|
||||
@ make it appear as in
|
||||
@ algorithm specification
|
||||
subs r3,r3,#32 @ see if r3 is 32 or larger
|
||||
mov r12,#16 @ r12 is used as post-
|
||||
@ increment for input pointer;
|
||||
@ as loop is modulo-scheduled
|
||||
@ r12 is zeroed just in time
|
||||
@ to preclude overstepping
|
||||
@ inp[len], which means that
|
||||
@ last block[s] are actually
|
||||
@ loaded twice, but last
|
||||
@ copy is not processed
|
||||
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q14},[r1]
|
||||
moveq r12,#0 @ is it time to zero r12?
|
||||
vext.8 q0,q0,q0,#8 @ rotate Xi
|
||||
vld1.64 {q8},[r2]! @ load [rotated] I[0]
|
||||
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q8,q8
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q3,q8,q8,#8 @ rotate I[0]
|
||||
blo Lodd_tail_v8 @ r3 was less than 32
|
||||
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vext.8 q7,q9,q9,#8
|
||||
veor q3,q3,q0 @ I[i]^=Xi
|
||||
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
|
||||
b Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
Loop_mod2x_v8:
|
||||
vext.8 q10,q3,q3,#8
|
||||
subs r3,r3,#32 @ is there more data?
|
||||
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
|
||||
movlo r12,#0 @ is it time to zero r12?
|
||||
|
||||
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
|
||||
veor q10,q10,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
|
||||
veor q0,q0,q4 @ accumulate
|
||||
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
|
||||
|
||||
veor q2,q2,q6
|
||||
moveq r12,#0 @ is it time to zero r12?
|
||||
veor q1,q1,q5
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q8,q8
|
||||
#endif
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
vext.8 q7,q9,q9,#8
|
||||
vext.8 q3,q8,q8,#8
|
||||
veor q0,q1,q10
|
||||
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
|
||||
veor q3,q3,q2 @ accumulate q3 early
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q3,q3,q10
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
veor q3,q3,q0
|
||||
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
|
||||
bhs Loop_mod2x_v8 @ there was at least 32 more bytes
|
||||
|
||||
veor q2,q2,q10
|
||||
vext.8 q3,q8,q8,#8 @ re-construct q3
|
||||
adds r3,r3,#32 @ re-construct r3
|
||||
veor q0,q0,q2 @ re-construct q0
|
||||
beq Ldone_v8 @ is r3 zero?
|
||||
Lodd_tail_v8:
|
||||
vext.8 q10,q0,q0,#8
|
||||
veor q3,q3,q0 @ inp^=Xi
|
||||
veor q9,q8,q10 @ q9 is rotated inp^Xi
|
||||
|
||||
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
|
||||
veor q9,q9,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
|
||||
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q0,q0,q10
|
||||
|
||||
Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q0,q0,q0,#8
|
||||
vst1.64 {q0},[r0] @ write out Xi
|
||||
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
bx lr
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM
|
1518
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha1-armv4-large.S
Normal file
1518
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha1-armv4-large.S
Normal file
File diff suppressed because it is too large
Load Diff
2846
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha256-armv4.S
Normal file
2846
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha256-armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
1899
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha512-armv4.S
Normal file
1899
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha512-armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
1265
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
1265
contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
File diff suppressed because it is too large
Load Diff
376
contrib/boringssl-cmake/ios-arm/crypto/test/trampoline-armv4.S
Normal file
376
contrib/boringssl-cmake/ios-arm/crypto/test/trampoline-armv4.S
Normal file
@ -0,0 +1,376 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.syntax unified
|
||||
|
||||
|
||||
|
||||
|
||||
.text
|
||||
|
||||
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
@ with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
@ the result of |func|. The |unwind| argument is unused.
|
||||
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
@ const uint32_t *argv, size_t argc,
|
||||
@ int unwind);
|
||||
|
||||
.globl _abi_test_trampoline
|
||||
.private_extern _abi_test_trampoline
|
||||
.align 4
|
||||
_abi_test_trampoline:
|
||||
@ Save parameters and all callee-saved registers. For convenience, we
|
||||
@ save r9 on iOS even though it's volatile.
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
|
||||
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
|
||||
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
|
||||
sub sp, sp, #28
|
||||
|
||||
@ Every register in AAPCS is either non-volatile or a parameter (except
|
||||
@ r9 on iOS), so this code, by the actual call, loses all its scratch
|
||||
@ registers. First fill in stack parameters while there are registers
|
||||
@ to spare.
|
||||
cmp r3, #4
|
||||
bls Lstack_args_done
|
||||
mov r4, sp @ r4 is the output pointer.
|
||||
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
|
||||
add r2, r2, #16 @ Skip four arguments.
|
||||
Lstack_args_loop:
|
||||
ldr r6, [r2], #4
|
||||
cmp r2, r5
|
||||
str r6, [r4], #4
|
||||
bne Lstack_args_loop
|
||||
|
||||
Lstack_args_done:
|
||||
@ Load registers from |r1|.
|
||||
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Load register parameters. This uses up our remaining registers, so we
|
||||
@ repurpose lr as scratch space.
|
||||
ldr r3, [sp, #40] @ Reload argc.
|
||||
ldr lr, [sp, #36] @ Load argv into lr.
|
||||
cmp r3, #3
|
||||
bhi Larg_r3
|
||||
beq Larg_r2
|
||||
cmp r3, #1
|
||||
bhi Larg_r1
|
||||
beq Larg_r0
|
||||
b Largs_done
|
||||
|
||||
Larg_r3:
|
||||
ldr r3, [lr, #12] @ argv[3]
|
||||
Larg_r2:
|
||||
ldr r2, [lr, #8] @ argv[2]
|
||||
Larg_r1:
|
||||
ldr r1, [lr, #4] @ argv[1]
|
||||
Larg_r0:
|
||||
ldr r0, [lr] @ argv[0]
|
||||
Largs_done:
|
||||
|
||||
@ With every other register in use, load the function pointer into lr
|
||||
@ and call the function.
|
||||
ldr lr, [sp, #28]
|
||||
blx lr
|
||||
|
||||
@ r1-r3 are free for use again. The trampoline only supports
|
||||
@ single-return functions. Pass r4-r11 to the caller.
|
||||
ldr r1, [sp, #32]
|
||||
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Unwind the stack and restore registers.
|
||||
add sp, sp, #44 @ 44 = 28+16
|
||||
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
|
||||
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r0
|
||||
.private_extern _abi_test_clobber_r0
|
||||
.align 4
|
||||
_abi_test_clobber_r0:
|
||||
mov r0, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r1
|
||||
.private_extern _abi_test_clobber_r1
|
||||
.align 4
|
||||
_abi_test_clobber_r1:
|
||||
mov r1, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r2
|
||||
.private_extern _abi_test_clobber_r2
|
||||
.align 4
|
||||
_abi_test_clobber_r2:
|
||||
mov r2, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r3
|
||||
.private_extern _abi_test_clobber_r3
|
||||
.align 4
|
||||
_abi_test_clobber_r3:
|
||||
mov r3, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r4
|
||||
.private_extern _abi_test_clobber_r4
|
||||
.align 4
|
||||
_abi_test_clobber_r4:
|
||||
mov r4, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r5
|
||||
.private_extern _abi_test_clobber_r5
|
||||
.align 4
|
||||
_abi_test_clobber_r5:
|
||||
mov r5, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r6
|
||||
.private_extern _abi_test_clobber_r6
|
||||
.align 4
|
||||
_abi_test_clobber_r6:
|
||||
mov r6, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r7
|
||||
.private_extern _abi_test_clobber_r7
|
||||
.align 4
|
||||
_abi_test_clobber_r7:
|
||||
mov r7, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r8
|
||||
.private_extern _abi_test_clobber_r8
|
||||
.align 4
|
||||
_abi_test_clobber_r8:
|
||||
mov r8, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r9
|
||||
.private_extern _abi_test_clobber_r9
|
||||
.align 4
|
||||
_abi_test_clobber_r9:
|
||||
mov r9, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r10
|
||||
.private_extern _abi_test_clobber_r10
|
||||
.align 4
|
||||
_abi_test_clobber_r10:
|
||||
mov r10, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r11
|
||||
.private_extern _abi_test_clobber_r11
|
||||
.align 4
|
||||
_abi_test_clobber_r11:
|
||||
mov r11, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_r12
|
||||
.private_extern _abi_test_clobber_r12
|
||||
.align 4
|
||||
_abi_test_clobber_r12:
|
||||
mov r12, #0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d0
|
||||
.private_extern _abi_test_clobber_d0
|
||||
.align 4
|
||||
_abi_test_clobber_d0:
|
||||
mov r0, #0
|
||||
vmov s0, r0
|
||||
vmov s1, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d1
|
||||
.private_extern _abi_test_clobber_d1
|
||||
.align 4
|
||||
_abi_test_clobber_d1:
|
||||
mov r0, #0
|
||||
vmov s2, r0
|
||||
vmov s3, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d2
|
||||
.private_extern _abi_test_clobber_d2
|
||||
.align 4
|
||||
_abi_test_clobber_d2:
|
||||
mov r0, #0
|
||||
vmov s4, r0
|
||||
vmov s5, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d3
|
||||
.private_extern _abi_test_clobber_d3
|
||||
.align 4
|
||||
_abi_test_clobber_d3:
|
||||
mov r0, #0
|
||||
vmov s6, r0
|
||||
vmov s7, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d4
|
||||
.private_extern _abi_test_clobber_d4
|
||||
.align 4
|
||||
_abi_test_clobber_d4:
|
||||
mov r0, #0
|
||||
vmov s8, r0
|
||||
vmov s9, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d5
|
||||
.private_extern _abi_test_clobber_d5
|
||||
.align 4
|
||||
_abi_test_clobber_d5:
|
||||
mov r0, #0
|
||||
vmov s10, r0
|
||||
vmov s11, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d6
|
||||
.private_extern _abi_test_clobber_d6
|
||||
.align 4
|
||||
_abi_test_clobber_d6:
|
||||
mov r0, #0
|
||||
vmov s12, r0
|
||||
vmov s13, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d7
|
||||
.private_extern _abi_test_clobber_d7
|
||||
.align 4
|
||||
_abi_test_clobber_d7:
|
||||
mov r0, #0
|
||||
vmov s14, r0
|
||||
vmov s15, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d8
|
||||
.private_extern _abi_test_clobber_d8
|
||||
.align 4
|
||||
_abi_test_clobber_d8:
|
||||
mov r0, #0
|
||||
vmov s16, r0
|
||||
vmov s17, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d9
|
||||
.private_extern _abi_test_clobber_d9
|
||||
.align 4
|
||||
_abi_test_clobber_d9:
|
||||
mov r0, #0
|
||||
vmov s18, r0
|
||||
vmov s19, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d10
|
||||
.private_extern _abi_test_clobber_d10
|
||||
.align 4
|
||||
_abi_test_clobber_d10:
|
||||
mov r0, #0
|
||||
vmov s20, r0
|
||||
vmov s21, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d11
|
||||
.private_extern _abi_test_clobber_d11
|
||||
.align 4
|
||||
_abi_test_clobber_d11:
|
||||
mov r0, #0
|
||||
vmov s22, r0
|
||||
vmov s23, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d12
|
||||
.private_extern _abi_test_clobber_d12
|
||||
.align 4
|
||||
_abi_test_clobber_d12:
|
||||
mov r0, #0
|
||||
vmov s24, r0
|
||||
vmov s25, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d13
|
||||
.private_extern _abi_test_clobber_d13
|
||||
.align 4
|
||||
_abi_test_clobber_d13:
|
||||
mov r0, #0
|
||||
vmov s26, r0
|
||||
vmov s27, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d14
|
||||
.private_extern _abi_test_clobber_d14
|
||||
.align 4
|
||||
_abi_test_clobber_d14:
|
||||
mov r0, #0
|
||||
vmov s28, r0
|
||||
vmov s29, r0
|
||||
bx lr
|
||||
|
||||
|
||||
.globl _abi_test_clobber_d15
|
||||
.private_extern _abi_test_clobber_d15
|
||||
.align 4
|
||||
_abi_test_clobber_d15:
|
||||
mov r0, #0
|
||||
vmov s30, r0
|
||||
vmov s31, r0
|
||||
bx lr
|
||||
|
||||
#endif // !OPENSSL_NO_ASM
|
1994
contrib/boringssl-cmake/linux-aarch64/crypto/chacha/chacha-armv8.S
Normal file
1994
contrib/boringssl-cmake/linux-aarch64/crypto/chacha/chacha-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,785 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
.section .rodata
|
||||
.align 5
|
||||
.Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl aes_hw_set_encrypt_key
|
||||
.hidden aes_hw_set_encrypt_key
|
||||
.type aes_hw_set_encrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_encrypt_key:
|
||||
.Lenc_key:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
cmp x0,#0
|
||||
b.eq .Lenc_key_abort
|
||||
cmp x2,#0
|
||||
b.eq .Lenc_key_abort
|
||||
mov x3,#-2
|
||||
cmp w1,#128
|
||||
b.lt .Lenc_key_abort
|
||||
cmp w1,#256
|
||||
b.gt .Lenc_key_abort
|
||||
tst w1,#0x3f
|
||||
b.ne .Lenc_key_abort
|
||||
|
||||
adrp x3,.Lrcon
|
||||
add x3,x3,:lo12:.Lrcon
|
||||
cmp w1,#192
|
||||
|
||||
eor v0.16b,v0.16b,v0.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
mov w1,#8 // reuse w1
|
||||
ld1 {v1.4s,v2.4s},[x3],#32
|
||||
|
||||
b.lt .Loop128
|
||||
b.eq .L192
|
||||
b .L256
|
||||
|
||||
.align 4
|
||||
.Loop128:
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
b.ne .Loop128
|
||||
|
||||
ld1 {v1.4s},[x3]
|
||||
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
|
||||
tbl v6.16b,{v3.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v3.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
st1 {v3.4s},[x2]
|
||||
add x2,x2,#0x50
|
||||
|
||||
mov w12,#10
|
||||
b .Ldone
|
||||
|
||||
.align 4
|
||||
.L192:
|
||||
ld1 {v4.8b},[x0],#8
|
||||
movi v6.16b,#8 // borrow v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
sub v2.16b,v2.16b,v6.16b // adjust the mask
|
||||
|
||||
.Loop192:
|
||||
tbl v6.16b,{v4.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v4.8b},[x2],#8
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
|
||||
dup v5.4s,v3.s[3]
|
||||
eor v5.16b,v5.16b,v4.16b
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
ext v4.16b,v0.16b,v4.16b,#12
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
eor v4.16b,v4.16b,v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
b.ne .Loop192
|
||||
|
||||
mov w12,#12
|
||||
add x2,x2,#0x20
|
||||
b .Ldone
|
||||
|
||||
.align 4
|
||||
.L256:
|
||||
ld1 {v4.16b},[x0]
|
||||
mov w1,#7
|
||||
mov w12,#14
|
||||
st1 {v3.4s},[x2],#16
|
||||
|
||||
.Loop256:
|
||||
tbl v6.16b,{v4.16b},v2.16b
|
||||
ext v5.16b,v0.16b,v3.16b,#12
|
||||
st1 {v4.4s},[x2],#16
|
||||
aese v6.16b,v0.16b
|
||||
subs w1,w1,#1
|
||||
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v6.16b,v6.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
shl v1.16b,v1.16b,#1
|
||||
eor v3.16b,v3.16b,v6.16b
|
||||
st1 {v3.4s},[x2],#16
|
||||
b.eq .Ldone
|
||||
|
||||
dup v6.4s,v3.s[3] // just splat
|
||||
ext v5.16b,v0.16b,v4.16b,#12
|
||||
aese v6.16b,v0.16b
|
||||
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
ext v5.16b,v0.16b,v5.16b,#12
|
||||
eor v4.16b,v4.16b,v5.16b
|
||||
|
||||
eor v4.16b,v4.16b,v6.16b
|
||||
b .Loop256
|
||||
|
||||
.Ldone:
|
||||
str w12,[x2]
|
||||
mov x3,#0
|
||||
|
||||
.Lenc_key_abort:
|
||||
mov x0,x3 // return value
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
|
||||
|
||||
.globl aes_hw_set_decrypt_key
|
||||
.hidden aes_hw_set_decrypt_key
|
||||
.type aes_hw_set_decrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl .Lenc_key
|
||||
|
||||
cmp x0,#0
|
||||
b.ne .Ldec_key_abort
|
||||
|
||||
sub x2,x2,#240 // restore original x2
|
||||
mov x4,#-16
|
||||
add x0,x2,x12,lsl#4 // end of key schedule
|
||||
|
||||
ld1 {v0.4s},[x2]
|
||||
ld1 {v1.4s},[x0]
|
||||
st1 {v0.4s},[x0],x4
|
||||
st1 {v1.4s},[x2],#16
|
||||
|
||||
.Loop_imc:
|
||||
ld1 {v0.4s},[x2]
|
||||
ld1 {v1.4s},[x0]
|
||||
aesimc v0.16b,v0.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
st1 {v0.4s},[x0],x4
|
||||
st1 {v1.4s},[x2],#16
|
||||
cmp x0,x2
|
||||
b.hi .Loop_imc
|
||||
|
||||
ld1 {v0.4s},[x2]
|
||||
aesimc v0.16b,v0.16b
|
||||
st1 {v0.4s},[x0]
|
||||
|
||||
eor x0,x0,x0 // return value
|
||||
.Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
|
||||
.globl aes_hw_encrypt
|
||||
.hidden aes_hw_encrypt
|
||||
.type aes_hw_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
sub w3,w3,#2
|
||||
ld1 {v1.4s},[x2],#16
|
||||
|
||||
.Loop_enc:
|
||||
aese v2.16b,v0.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2],#16
|
||||
subs w3,w3,#2
|
||||
aese v2.16b,v1.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v1.4s},[x2],#16
|
||||
b.gt .Loop_enc
|
||||
|
||||
aese v2.16b,v0.16b
|
||||
aesmc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2]
|
||||
aese v2.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
|
||||
st1 {v2.16b},[x1]
|
||||
ret
|
||||
.size aes_hw_encrypt,.-aes_hw_encrypt
|
||||
.globl aes_hw_decrypt
|
||||
.hidden aes_hw_decrypt
|
||||
.type aes_hw_decrypt,%function
|
||||
.align 5
|
||||
aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
sub w3,w3,#2
|
||||
ld1 {v1.4s},[x2],#16
|
||||
|
||||
.Loop_dec:
|
||||
aesd v2.16b,v0.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2],#16
|
||||
subs w3,w3,#2
|
||||
aesd v2.16b,v1.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v1.4s},[x2],#16
|
||||
b.gt .Loop_dec
|
||||
|
||||
aesd v2.16b,v0.16b
|
||||
aesimc v2.16b,v2.16b
|
||||
ld1 {v0.4s},[x2]
|
||||
aesd v2.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
|
||||
st1 {v2.16b},[x1]
|
||||
ret
|
||||
.size aes_hw_decrypt,.-aes_hw_decrypt
|
||||
.globl aes_hw_cbc_encrypt
|
||||
.hidden aes_hw_cbc_encrypt
|
||||
.type aes_hw_cbc_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_cbc_encrypt:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
mov x8,#16
|
||||
b.lo .Lcbc_abort
|
||||
csel x8,xzr,x8,eq
|
||||
|
||||
cmp w5,#0 // en- or decrypting?
|
||||
ldr w5,[x3,#240]
|
||||
and x2,x2,#-16
|
||||
ld1 {v6.16b},[x4]
|
||||
ld1 {v0.16b},[x0],x8
|
||||
|
||||
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
|
||||
sub w5,w5,#6
|
||||
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
|
||||
sub w5,w5,#2
|
||||
ld1 {v18.4s,v19.4s},[x7],#32
|
||||
ld1 {v20.4s,v21.4s},[x7],#32
|
||||
ld1 {v22.4s,v23.4s},[x7],#32
|
||||
ld1 {v7.4s},[x7]
|
||||
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
b.eq .Lcbc_dec
|
||||
|
||||
cmp w5,#2
|
||||
eor v0.16b,v0.16b,v6.16b
|
||||
eor v5.16b,v16.16b,v7.16b
|
||||
b.eq .Lcbc_enc128
|
||||
|
||||
ld1 {v2.4s,v3.4s},[x7]
|
||||
add x7,x3,#16
|
||||
add x6,x3,#16*4
|
||||
add x12,x3,#16*5
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
add x14,x3,#16*6
|
||||
add x3,x3,#16*7
|
||||
b .Lenter_cbc_enc
|
||||
|
||||
.align 4
|
||||
.Loop_cbc_enc:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
st1 {v6.16b},[x1],#16
|
||||
.Lenter_cbc_enc:
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v2.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.4s},[x6]
|
||||
cmp w5,#4
|
||||
aese v0.16b,v3.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x12]
|
||||
b.eq .Lcbc_enc192
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.4s},[x14]
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x3]
|
||||
nop
|
||||
|
||||
.Lcbc_enc192:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
subs x2,x2,#16
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
csel x8,xzr,x8,eq
|
||||
aese v0.16b,v18.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v19.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.16b},[x0],x8
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
eor v16.16b,v16.16b,v5.16b
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v23.16b
|
||||
eor v6.16b,v0.16b,v7.16b
|
||||
b.hs .Loop_cbc_enc
|
||||
|
||||
st1 {v6.16b},[x1],#16
|
||||
b .Lcbc_done
|
||||
|
||||
.align 5
|
||||
.Lcbc_enc128:
|
||||
ld1 {v2.4s,v3.4s},[x7]
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
b .Lenter_cbc_enc128
|
||||
.Loop_cbc_enc128:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
st1 {v6.16b},[x1],#16
|
||||
.Lenter_cbc_enc128:
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
subs x2,x2,#16
|
||||
aese v0.16b,v2.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
csel x8,xzr,x8,eq
|
||||
aese v0.16b,v3.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v18.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v19.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
ld1 {v16.16b},[x0],x8
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
eor v16.16b,v16.16b,v5.16b
|
||||
aese v0.16b,v23.16b
|
||||
eor v6.16b,v0.16b,v7.16b
|
||||
b.hs .Loop_cbc_enc128
|
||||
|
||||
st1 {v6.16b},[x1],#16
|
||||
b .Lcbc_done
|
||||
.align 5
|
||||
.Lcbc_dec:
|
||||
ld1 {v18.16b},[x0],#16
|
||||
subs x2,x2,#32 // bias
|
||||
add w6,w5,#2
|
||||
orr v3.16b,v0.16b,v0.16b
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
orr v19.16b,v18.16b,v18.16b
|
||||
b.lo .Lcbc_dec_tail
|
||||
|
||||
orr v1.16b,v18.16b,v18.16b
|
||||
ld1 {v18.16b},[x0],#16
|
||||
orr v2.16b,v0.16b,v0.16b
|
||||
orr v3.16b,v1.16b,v1.16b
|
||||
orr v19.16b,v18.16b,v18.16b
|
||||
|
||||
.Loop3x_cbc_dec:
|
||||
aesd v0.16b,v16.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aesd v0.16b,v17.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt .Loop3x_cbc_dec
|
||||
|
||||
aesd v0.16b,v16.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v4.16b,v6.16b,v7.16b
|
||||
subs x2,x2,#0x30
|
||||
eor v5.16b,v2.16b,v7.16b
|
||||
csel x6,x2,x6,lo // x6, w6, is zero at this point
|
||||
aesd v0.16b,v17.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v17.16b,v3.16b,v7.16b
|
||||
add x0,x0,x6 // x0 is adjusted in such way that
|
||||
// at exit from the loop v1.16b-v18.16b
|
||||
// are loaded with last "words"
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
mov x7,x3
|
||||
aesd v0.16b,v20.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v20.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v20.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
aesd v0.16b,v21.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v21.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v21.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
aesd v0.16b,v22.16b
|
||||
aesimc v0.16b,v0.16b
|
||||
aesd v1.16b,v22.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v22.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v19.16b},[x0],#16
|
||||
aesd v0.16b,v23.16b
|
||||
aesd v1.16b,v23.16b
|
||||
aesd v18.16b,v23.16b
|
||||
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
|
||||
add w6,w5,#2
|
||||
eor v4.16b,v4.16b,v0.16b
|
||||
eor v5.16b,v5.16b,v1.16b
|
||||
eor v18.16b,v18.16b,v17.16b
|
||||
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
|
||||
st1 {v4.16b},[x1],#16
|
||||
orr v0.16b,v2.16b,v2.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
orr v1.16b,v3.16b,v3.16b
|
||||
st1 {v18.16b},[x1],#16
|
||||
orr v18.16b,v19.16b,v19.16b
|
||||
b.hs .Loop3x_cbc_dec
|
||||
|
||||
cmn x2,#0x30
|
||||
b.eq .Lcbc_done
|
||||
nop
|
||||
|
||||
.Lcbc_dec_tail:
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt .Lcbc_dec_tail
|
||||
|
||||
aesd v1.16b,v16.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v16.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
aesd v1.16b,v17.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v17.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
aesd v1.16b,v20.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v20.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
cmn x2,#0x20
|
||||
aesd v1.16b,v21.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v21.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v5.16b,v6.16b,v7.16b
|
||||
aesd v1.16b,v22.16b
|
||||
aesimc v1.16b,v1.16b
|
||||
aesd v18.16b,v22.16b
|
||||
aesimc v18.16b,v18.16b
|
||||
eor v17.16b,v3.16b,v7.16b
|
||||
aesd v1.16b,v23.16b
|
||||
aesd v18.16b,v23.16b
|
||||
b.eq .Lcbc_dec_one
|
||||
eor v5.16b,v5.16b,v1.16b
|
||||
eor v17.16b,v17.16b,v18.16b
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
st1 {v17.16b},[x1],#16
|
||||
b .Lcbc_done
|
||||
|
||||
.Lcbc_dec_one:
|
||||
eor v5.16b,v5.16b,v18.16b
|
||||
orr v6.16b,v19.16b,v19.16b
|
||||
st1 {v5.16b},[x1],#16
|
||||
|
||||
.Lcbc_done:
|
||||
st1 {v6.16b},[x4]
|
||||
.Lcbc_abort:
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
|
||||
.globl aes_hw_ctr32_encrypt_blocks
|
||||
.hidden aes_hw_ctr32_encrypt_blocks
|
||||
.type aes_hw_ctr32_encrypt_blocks,%function
|
||||
.align 5
|
||||
aes_hw_ctr32_encrypt_blocks:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
|
||||
ldr w8, [x4, #12]
|
||||
ld1 {v0.4s},[x4]
|
||||
|
||||
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
|
||||
sub w5,w5,#4
|
||||
mov x12,#16
|
||||
cmp x2,#2
|
||||
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
|
||||
sub w5,w5,#2
|
||||
ld1 {v20.4s,v21.4s},[x7],#32
|
||||
ld1 {v22.4s,v23.4s},[x7],#32
|
||||
ld1 {v7.4s},[x7]
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
csel x12,xzr,x12,lo
|
||||
#ifndef __ARMEB__
|
||||
rev w8, w8
|
||||
#endif
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
add w10, w8, #1
|
||||
orr v18.16b,v0.16b,v0.16b
|
||||
add w8, w8, #2
|
||||
orr v6.16b,v0.16b,v0.16b
|
||||
rev w10, w10
|
||||
mov v1.s[3],w10
|
||||
b.ls .Lctr32_tail
|
||||
rev w12, w8
|
||||
sub x2,x2,#3 // bias
|
||||
mov v18.s[3],w12
|
||||
b .Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
.Loop3x_ctr32:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt .Loop3x_ctr32
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v4.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v5.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
aese v4.16b,v17.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v17.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
ld1 {v19.16b},[x0],#16
|
||||
mov x7,x3
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v17.16b,v18.16b
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
add w9,w8,#1
|
||||
aese v4.16b,v20.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v20.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
eor v2.16b,v2.16b,v7.16b
|
||||
add w10,w8,#2
|
||||
aese v17.16b,v20.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
eor v3.16b,v3.16b,v7.16b
|
||||
add w8,w8,#3
|
||||
aese v4.16b,v21.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v21.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
eor v19.16b,v19.16b,v7.16b
|
||||
rev w9,w9
|
||||
aese v17.16b,v21.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v0.s[3], w9
|
||||
rev w10,w10
|
||||
aese v4.16b,v22.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v22.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
mov v1.s[3], w10
|
||||
rev w12,w8
|
||||
aese v17.16b,v22.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v18.s[3], w12
|
||||
subs x2,x2,#3
|
||||
aese v4.16b,v23.16b
|
||||
aese v5.16b,v23.16b
|
||||
aese v17.16b,v23.16b
|
||||
|
||||
eor v2.16b,v2.16b,v4.16b
|
||||
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
|
||||
st1 {v2.16b},[x1],#16
|
||||
eor v3.16b,v3.16b,v5.16b
|
||||
mov w6,w5
|
||||
st1 {v3.16b},[x1],#16
|
||||
eor v19.16b,v19.16b,v17.16b
|
||||
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
|
||||
st1 {v19.16b},[x1],#16
|
||||
b.hs .Loop3x_ctr32
|
||||
|
||||
adds x2,x2,#3
|
||||
b.eq .Lctr32_done
|
||||
cmp x2,#1
|
||||
mov x12,#16
|
||||
csel x12,xzr,x12,eq
|
||||
|
||||
.Lctr32_tail:
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v16.4s},[x7],#16
|
||||
subs w6,w6,#2
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v17.4s},[x7],#16
|
||||
b.gt .Lctr32_tail
|
||||
|
||||
aese v0.16b,v16.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
aese v0.16b,v17.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v17.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],x12
|
||||
aese v0.16b,v20.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v20.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
ld1 {v3.16b},[x0]
|
||||
aese v0.16b,v21.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v21.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
eor v2.16b,v2.16b,v7.16b
|
||||
aese v0.16b,v22.16b
|
||||
aesmc v0.16b,v0.16b
|
||||
aese v1.16b,v22.16b
|
||||
aesmc v1.16b,v1.16b
|
||||
eor v3.16b,v3.16b,v7.16b
|
||||
aese v0.16b,v23.16b
|
||||
aese v1.16b,v23.16b
|
||||
|
||||
cmp x2,#1
|
||||
eor v2.16b,v2.16b,v0.16b
|
||||
eor v3.16b,v3.16b,v1.16b
|
||||
st1 {v2.16b},[x1],#16
|
||||
b.eq .Lctr32_done
|
||||
st1 {v3.16b},[x1]
|
||||
|
||||
.Lctr32_done:
|
||||
ldr x29,[sp],#16
|
||||
ret
|
||||
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
1436
contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/armv8-mont.S
Normal file
1436
contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/armv8-mont.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,346 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl gcm_init_neon
|
||||
.hidden gcm_init_neon
|
||||
.type gcm_init_neon,%function
|
||||
.align 4
|
||||
gcm_init_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
shl v19.2d, v19.2d, #57 // 0xc2.0
|
||||
ext v3.16b, v17.16b, v17.16b, #8
|
||||
ushr v18.2d, v19.2d, #63
|
||||
dup v17.4s, v17.s[1]
|
||||
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
|
||||
ushr v18.2d, v3.2d, #63
|
||||
sshr v17.4s, v17.4s, #31 // broadcast carry bit
|
||||
and v18.16b, v18.16b, v16.16b
|
||||
shl v3.2d, v3.2d, #1
|
||||
ext v18.16b, v18.16b, v18.16b, #8
|
||||
and v16.16b, v16.16b, v17.16b
|
||||
orr v3.16b, v3.16b, v18.16b // H<<<=1
|
||||
eor v5.16b, v3.16b, v16.16b // twisted H
|
||||
st1 {v5.2d}, [x0] // store Htable[0]
|
||||
ret
|
||||
.size gcm_init_neon,.-gcm_init_neon
|
||||
|
||||
.globl gcm_gmult_neon
|
||||
.hidden gcm_gmult_neon
|
||||
.type gcm_gmult_neon,%function
|
||||
.align 4
|
||||
gcm_gmult_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, .Lmasks // load constants
|
||||
add x9, x9, :lo12:.Lmasks
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v3.16b, v3.16b // byteswap Xi
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
mov x3, #16
|
||||
b .Lgmult_neon
|
||||
.size gcm_gmult_neon,.-gcm_gmult_neon
|
||||
|
||||
.globl gcm_ghash_neon
|
||||
.hidden gcm_ghash_neon
|
||||
.type gcm_ghash_neon,%function
|
||||
.align 4
|
||||
gcm_ghash_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
adrp x9, .Lmasks // load constants
|
||||
add x9, x9, :lo12:.Lmasks
|
||||
ld1 {v24.2d, v25.2d}, [x9]
|
||||
rev64 v0.16b, v0.16b // byteswap Xi
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
|
||||
|
||||
.Loop_neon:
|
||||
ld1 {v3.16b}, [x2], #16 // load inp
|
||||
rev64 v3.16b, v3.16b // byteswap inp
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
|
||||
|
||||
.Lgmult_neon:
|
||||
// Split the input into v3 and v4. (The upper halves are unused,
|
||||
// so it is okay to leave them alone.)
|
||||
ins v4.d[0], v3.d[1]
|
||||
ext v16.8b, v5.8b, v5.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v0.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v0.8h, v5.8b, v0.8b // E = A*B1
|
||||
ext v17.8b, v5.8b, v5.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v5.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v5.8b, v5.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v0.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v0.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v0.8h, v5.8b, v0.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v0.16b // N = I + J
|
||||
pmull v19.8h, v5.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v0.8h, v5.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v0.16b, v0.16b, v16.16b
|
||||
eor v0.16b, v0.16b, v18.16b
|
||||
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
|
||||
ext v16.8b, v7.8b, v7.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v3.8b // F = A1*B
|
||||
ext v1.8b, v3.8b, v3.8b, #1 // B1
|
||||
pmull v1.8h, v7.8b, v1.8b // E = A*B1
|
||||
ext v17.8b, v7.8b, v7.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v3.8b // H = A2*B
|
||||
ext v19.8b, v3.8b, v3.8b, #2 // B2
|
||||
pmull v19.8h, v7.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v7.8b, v7.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v1.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v3.8b // J = A3*B
|
||||
ext v1.8b, v3.8b, v3.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v1.8h, v7.8b, v1.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v3.8b, v3.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v1.16b // N = I + J
|
||||
pmull v19.8h, v7.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v1.8h, v7.8b, v3.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v1.16b, v1.16b, v16.16b
|
||||
eor v1.16b, v1.16b, v18.16b
|
||||
ext v16.8b, v6.8b, v6.8b, #1 // A1
|
||||
pmull v16.8h, v16.8b, v4.8b // F = A1*B
|
||||
ext v2.8b, v4.8b, v4.8b, #1 // B1
|
||||
pmull v2.8h, v6.8b, v2.8b // E = A*B1
|
||||
ext v17.8b, v6.8b, v6.8b, #2 // A2
|
||||
pmull v17.8h, v17.8b, v4.8b // H = A2*B
|
||||
ext v19.8b, v4.8b, v4.8b, #2 // B2
|
||||
pmull v19.8h, v6.8b, v19.8b // G = A*B2
|
||||
ext v18.8b, v6.8b, v6.8b, #3 // A3
|
||||
eor v16.16b, v16.16b, v2.16b // L = E + F
|
||||
pmull v18.8h, v18.8b, v4.8b // J = A3*B
|
||||
ext v2.8b, v4.8b, v4.8b, #3 // B3
|
||||
eor v17.16b, v17.16b, v19.16b // M = G + H
|
||||
pmull v2.8h, v6.8b, v2.8b // I = A*B3
|
||||
|
||||
// Here we diverge from the 32-bit version. It computes the following
|
||||
// (instructions reordered for clarity):
|
||||
//
|
||||
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
|
||||
// vand $t0#hi, $t0#hi, $k48
|
||||
// veor $t0#lo, $t0#lo, $t0#hi
|
||||
//
|
||||
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
|
||||
// vand $t1#hi, $t1#hi, $k32
|
||||
// veor $t1#lo, $t1#lo, $t1#hi
|
||||
//
|
||||
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
|
||||
// vand $t2#hi, $t2#hi, $k16
|
||||
// veor $t2#lo, $t2#lo, $t2#hi
|
||||
//
|
||||
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
|
||||
// vmov.i64 $t3#hi, #0
|
||||
//
|
||||
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
|
||||
// upper halves of SIMD registers, so we must split each half into
|
||||
// separate registers. To compensate, we pair computations up and
|
||||
// parallelize.
|
||||
|
||||
ext v19.8b, v4.8b, v4.8b, #4 // B4
|
||||
eor v18.16b, v18.16b, v2.16b // N = I + J
|
||||
pmull v19.8h, v6.8b, v19.8b // K = A*B4
|
||||
|
||||
// This can probably be scheduled more efficiently. For now, we just
|
||||
// pair up independent instructions.
|
||||
zip1 v20.2d, v16.2d, v17.2d
|
||||
zip1 v22.2d, v18.2d, v19.2d
|
||||
zip2 v21.2d, v16.2d, v17.2d
|
||||
zip2 v23.2d, v18.2d, v19.2d
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
and v21.16b, v21.16b, v24.16b
|
||||
and v23.16b, v23.16b, v25.16b
|
||||
eor v20.16b, v20.16b, v21.16b
|
||||
eor v22.16b, v22.16b, v23.16b
|
||||
zip1 v16.2d, v20.2d, v21.2d
|
||||
zip1 v18.2d, v22.2d, v23.2d
|
||||
zip2 v17.2d, v20.2d, v21.2d
|
||||
zip2 v19.2d, v22.2d, v23.2d
|
||||
|
||||
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
|
||||
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
|
||||
pmull v2.8h, v6.8b, v4.8b // D = A*B
|
||||
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
|
||||
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
|
||||
eor v16.16b, v16.16b, v17.16b
|
||||
eor v18.16b, v18.16b, v19.16b
|
||||
eor v2.16b, v2.16b, v16.16b
|
||||
eor v2.16b, v2.16b, v18.16b
|
||||
ext v16.16b, v0.16b, v2.16b, #8
|
||||
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
|
||||
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
|
||||
// This is a no-op due to the ins instruction below.
|
||||
// ins v2.d[0], v1.d[1]
|
||||
|
||||
// equivalent of reduction_avx from ghash-x86_64.pl
|
||||
shl v17.2d, v0.2d, #57 // 1st phase
|
||||
shl v18.2d, v0.2d, #62
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
shl v17.2d, v0.2d, #63
|
||||
eor v18.16b, v18.16b, v17.16b //
|
||||
// Note Xm contains {Xl.d[1], Xh.d[0]}.
|
||||
eor v18.16b, v18.16b, v1.16b
|
||||
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
|
||||
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
|
||||
|
||||
ushr v18.2d, v0.2d, #1 // 2nd phase
|
||||
eor v2.16b, v2.16b,v0.16b
|
||||
eor v0.16b, v0.16b,v18.16b //
|
||||
ushr v18.2d, v18.2d, #6
|
||||
ushr v0.2d, v0.2d, #1 //
|
||||
eor v0.16b, v0.16b, v2.16b //
|
||||
eor v0.16b, v0.16b, v18.16b //
|
||||
|
||||
subs x3, x3, #16
|
||||
bne .Loop_neon
|
||||
|
||||
rev64 v0.16b, v0.16b // byteswap Xi and write
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
st1 {v0.16b}, [x0]
|
||||
|
||||
ret
|
||||
.size gcm_ghash_neon,.-gcm_ghash_neon
|
||||
|
||||
.section .rodata
|
||||
.align 4
|
||||
.Lmasks:
|
||||
.quad 0x0000ffffffffffff // k48
|
||||
.quad 0x00000000ffffffff // k32
|
||||
.quad 0x000000000000ffff // k16
|
||||
.quad 0x0000000000000000 // k0
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
@ -0,0 +1,252 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
.globl gcm_init_v8
|
||||
.hidden gcm_init_v8
|
||||
.type gcm_init_v8,%function
|
||||
.align 4
|
||||
gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
ushr v18.2d,v19.2d,#63
|
||||
dup v17.4s,v17.s[1]
|
||||
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
|
||||
ushr v18.2d,v3.2d,#63
|
||||
sshr v17.4s,v17.4s,#31 //broadcast carry bit
|
||||
and v18.16b,v18.16b,v16.16b
|
||||
shl v3.2d,v3.2d,#1
|
||||
ext v18.16b,v18.16b,v18.16b,#8
|
||||
and v16.16b,v16.16b,v17.16b
|
||||
orr v3.16b,v3.16b,v18.16b //H<<<=1
|
||||
eor v20.16b,v3.16b,v16.16b //twisted H
|
||||
st1 {v20.2d},[x0],#16 //store Htable[0]
|
||||
|
||||
//calculate H^2
|
||||
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
|
||||
pmull v0.1q,v20.1d,v20.1d
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
pmull2 v2.1q,v20.2d,v20.2d
|
||||
pmull v1.1q,v16.1d,v16.1d
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v22.16b,v0.16b,v18.16b
|
||||
|
||||
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
|
||||
|
||||
ret
|
||||
.size gcm_init_v8,.-gcm_init_v8
|
||||
.globl gcm_gmult_v8
|
||||
.hidden gcm_gmult_v8
|
||||
.type gcm_gmult_v8,%function
|
||||
.align 4
|
||||
gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
shl v19.2d,v19.2d,#57
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v3.16b,v17.16b,v17.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
.size gcm_gmult_v8,.-gcm_gmult_v8
|
||||
.globl gcm_ghash_v8
|
||||
.hidden gcm_ghash_v8
|
||||
.type gcm_ghash_v8,%function
|
||||
.align 4
|
||||
gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
//to be rotated in order to
|
||||
//make it appear as in
|
||||
//algorithm specification
|
||||
subs x3,x3,#32 //see if x3 is 32 or larger
|
||||
mov x12,#16 //x12 is used as post-
|
||||
//increment for input pointer;
|
||||
//as loop is modulo-scheduled
|
||||
//x12 is zeroed just in time
|
||||
//to preclude overstepping
|
||||
//inp[len], which means that
|
||||
//last block[s] are actually
|
||||
//loaded twice, but last
|
||||
//copy is not processed
|
||||
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v22.2d},[x1]
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
|
||||
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
|
||||
b.lo .Lodd_tail_v8 //x3 was less than 32
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b .Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
.Loop_mod2x_v8:
|
||||
ext v18.16b,v3.16b,v3.16b,#8
|
||||
subs x3,x3,#32 //is there more data?
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
|
||||
csel x12,xzr,x12,lo //is it time to zero x12?
|
||||
|
||||
pmull v5.1q,v21.1d,v17.1d
|
||||
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
|
||||
eor v0.16b,v0.16b,v4.16b //accumulate
|
||||
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
|
||||
|
||||
eor v2.16b,v2.16b,v6.16b
|
||||
csel x12,xzr,x12,eq //is it time to zero x12?
|
||||
eor v1.16b,v1.16b,v5.16b
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
|
||||
#ifndef __ARMEB__
|
||||
rev64 v16.16b,v16.16b
|
||||
#endif
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v17.16b,v17.16b
|
||||
#endif
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v7.16b,v17.16b,v17.16b,#8
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
|
||||
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v3.16b,v3.16b,v18.16b
|
||||
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
|
||||
eor v3.16b,v3.16b,v0.16b
|
||||
pmull2 v6.1q,v20.2d,v7.2d
|
||||
b.hs .Loop_mod2x_v8 //there was at least 32 more bytes
|
||||
|
||||
eor v2.16b,v2.16b,v18.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
|
||||
adds x3,x3,#32 //re-construct x3
|
||||
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
|
||||
b.eq .Ldone_v8 //is x3 zero?
|
||||
.Lodd_tail_v8:
|
||||
ext v18.16b,v0.16b,v0.16b,#8
|
||||
eor v3.16b,v3.16b,v0.16b //inp^=Xi
|
||||
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
|
||||
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
|
||||
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
|
||||
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
|
||||
.Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
.size gcm_ghash_v8,.-gcm_ghash_v8
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
1239
contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha1-armv8.S
Normal file
1239
contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha1-armv8.S
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,761 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__aarch64__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
// with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
// the result of |func|. The |unwind| argument is unused.
|
||||
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
// const uint64_t *argv, size_t argc,
|
||||
// uint64_t unwind);
|
||||
.type abi_test_trampoline, %function
|
||||
.globl abi_test_trampoline
|
||||
.hidden abi_test_trampoline
|
||||
.align 4
|
||||
abi_test_trampoline:
|
||||
.Labi_test_trampoline_begin:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
// Stack layout (low to high addresses)
|
||||
// x29,x30 (16 bytes)
|
||||
// d8-d15 (64 bytes)
|
||||
// x19-x28 (80 bytes)
|
||||
// x1 (8 bytes)
|
||||
// padding (8 bytes)
|
||||
stp x29, x30, [sp, #-176]!
|
||||
mov x29, sp
|
||||
|
||||
// Saved callee-saved registers and |state|.
|
||||
stp d8, d9, [sp, #16]
|
||||
stp d10, d11, [sp, #32]
|
||||
stp d12, d13, [sp, #48]
|
||||
stp d14, d15, [sp, #64]
|
||||
stp x19, x20, [sp, #80]
|
||||
stp x21, x22, [sp, #96]
|
||||
stp x23, x24, [sp, #112]
|
||||
stp x25, x26, [sp, #128]
|
||||
stp x27, x28, [sp, #144]
|
||||
str x1, [sp, #160]
|
||||
|
||||
// Load registers from |state|, with the exception of x29. x29 is the
|
||||
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
|
||||
// mandate that x29 always point to a frame. iOS64 does so, which means
|
||||
// we cannot fill x29 with entropy without violating ABI rules
|
||||
// ourselves. x29 is tested separately below.
|
||||
ldp d8, d9, [x1], #16
|
||||
ldp d10, d11, [x1], #16
|
||||
ldp d12, d13, [x1], #16
|
||||
ldp d14, d15, [x1], #16
|
||||
ldp x19, x20, [x1], #16
|
||||
ldp x21, x22, [x1], #16
|
||||
ldp x23, x24, [x1], #16
|
||||
ldp x25, x26, [x1], #16
|
||||
ldp x27, x28, [x1], #16
|
||||
|
||||
// Move parameters into temporary registers.
|
||||
mov x9, x0
|
||||
mov x10, x2
|
||||
mov x11, x3
|
||||
|
||||
// Load parameters into registers.
|
||||
cbz x11, .Largs_done
|
||||
ldr x0, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x1, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x2, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x3, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x4, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x5, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x6, [x10], #8
|
||||
subs x11, x11, #1
|
||||
b.eq .Largs_done
|
||||
ldr x7, [x10], #8
|
||||
|
||||
.Largs_done:
|
||||
blr x9
|
||||
|
||||
// Reload |state| and store registers.
|
||||
ldr x1, [sp, #160]
|
||||
stp d8, d9, [x1], #16
|
||||
stp d10, d11, [x1], #16
|
||||
stp d12, d13, [x1], #16
|
||||
stp d14, d15, [x1], #16
|
||||
stp x19, x20, [x1], #16
|
||||
stp x21, x22, [x1], #16
|
||||
stp x23, x24, [x1], #16
|
||||
stp x25, x26, [x1], #16
|
||||
stp x27, x28, [x1], #16
|
||||
|
||||
// |func| is required to preserve x29, the frame pointer. We cannot load
|
||||
// random values into x29 (see comment above), so compare it against the
|
||||
// expected value and zero the field of |state| if corrupted.
|
||||
mov x9, sp
|
||||
cmp x29, x9
|
||||
b.eq .Lx29_ok
|
||||
str xzr, [x1]
|
||||
|
||||
.Lx29_ok:
|
||||
// Restore callee-saved registers.
|
||||
ldp d8, d9, [sp, #16]
|
||||
ldp d10, d11, [sp, #32]
|
||||
ldp d12, d13, [sp, #48]
|
||||
ldp d14, d15, [sp, #64]
|
||||
ldp x19, x20, [sp, #80]
|
||||
ldp x21, x22, [sp, #96]
|
||||
ldp x23, x24, [sp, #112]
|
||||
ldp x25, x26, [sp, #128]
|
||||
ldp x27, x28, [sp, #144]
|
||||
|
||||
ldp x29, x30, [sp], #176
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size abi_test_trampoline,.-abi_test_trampoline
|
||||
.type abi_test_clobber_x0, %function
|
||||
.globl abi_test_clobber_x0
|
||||
.hidden abi_test_clobber_x0
|
||||
.align 4
|
||||
abi_test_clobber_x0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x0, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x0,.-abi_test_clobber_x0
|
||||
.type abi_test_clobber_x1, %function
|
||||
.globl abi_test_clobber_x1
|
||||
.hidden abi_test_clobber_x1
|
||||
.align 4
|
||||
abi_test_clobber_x1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x1, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x1,.-abi_test_clobber_x1
|
||||
.type abi_test_clobber_x2, %function
|
||||
.globl abi_test_clobber_x2
|
||||
.hidden abi_test_clobber_x2
|
||||
.align 4
|
||||
abi_test_clobber_x2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x2, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x2,.-abi_test_clobber_x2
|
||||
.type abi_test_clobber_x3, %function
|
||||
.globl abi_test_clobber_x3
|
||||
.hidden abi_test_clobber_x3
|
||||
.align 4
|
||||
abi_test_clobber_x3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x3, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x3,.-abi_test_clobber_x3
|
||||
.type abi_test_clobber_x4, %function
|
||||
.globl abi_test_clobber_x4
|
||||
.hidden abi_test_clobber_x4
|
||||
.align 4
|
||||
abi_test_clobber_x4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x4, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x4,.-abi_test_clobber_x4
|
||||
.type abi_test_clobber_x5, %function
|
||||
.globl abi_test_clobber_x5
|
||||
.hidden abi_test_clobber_x5
|
||||
.align 4
|
||||
abi_test_clobber_x5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x5, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x5,.-abi_test_clobber_x5
|
||||
.type abi_test_clobber_x6, %function
|
||||
.globl abi_test_clobber_x6
|
||||
.hidden abi_test_clobber_x6
|
||||
.align 4
|
||||
abi_test_clobber_x6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x6, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x6,.-abi_test_clobber_x6
|
||||
.type abi_test_clobber_x7, %function
|
||||
.globl abi_test_clobber_x7
|
||||
.hidden abi_test_clobber_x7
|
||||
.align 4
|
||||
abi_test_clobber_x7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x7, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x7,.-abi_test_clobber_x7
|
||||
.type abi_test_clobber_x8, %function
|
||||
.globl abi_test_clobber_x8
|
||||
.hidden abi_test_clobber_x8
|
||||
.align 4
|
||||
abi_test_clobber_x8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x8, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x8,.-abi_test_clobber_x8
|
||||
.type abi_test_clobber_x9, %function
|
||||
.globl abi_test_clobber_x9
|
||||
.hidden abi_test_clobber_x9
|
||||
.align 4
|
||||
abi_test_clobber_x9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x9, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x9,.-abi_test_clobber_x9
|
||||
.type abi_test_clobber_x10, %function
|
||||
.globl abi_test_clobber_x10
|
||||
.hidden abi_test_clobber_x10
|
||||
.align 4
|
||||
abi_test_clobber_x10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x10, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x10,.-abi_test_clobber_x10
|
||||
.type abi_test_clobber_x11, %function
|
||||
.globl abi_test_clobber_x11
|
||||
.hidden abi_test_clobber_x11
|
||||
.align 4
|
||||
abi_test_clobber_x11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x11, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x11,.-abi_test_clobber_x11
|
||||
.type abi_test_clobber_x12, %function
|
||||
.globl abi_test_clobber_x12
|
||||
.hidden abi_test_clobber_x12
|
||||
.align 4
|
||||
abi_test_clobber_x12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x12, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x12,.-abi_test_clobber_x12
|
||||
.type abi_test_clobber_x13, %function
|
||||
.globl abi_test_clobber_x13
|
||||
.hidden abi_test_clobber_x13
|
||||
.align 4
|
||||
abi_test_clobber_x13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x13, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x13,.-abi_test_clobber_x13
|
||||
.type abi_test_clobber_x14, %function
|
||||
.globl abi_test_clobber_x14
|
||||
.hidden abi_test_clobber_x14
|
||||
.align 4
|
||||
abi_test_clobber_x14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x14, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x14,.-abi_test_clobber_x14
|
||||
.type abi_test_clobber_x15, %function
|
||||
.globl abi_test_clobber_x15
|
||||
.hidden abi_test_clobber_x15
|
||||
.align 4
|
||||
abi_test_clobber_x15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x15, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x15,.-abi_test_clobber_x15
|
||||
.type abi_test_clobber_x16, %function
|
||||
.globl abi_test_clobber_x16
|
||||
.hidden abi_test_clobber_x16
|
||||
.align 4
|
||||
abi_test_clobber_x16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x16, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x16,.-abi_test_clobber_x16
|
||||
.type abi_test_clobber_x17, %function
|
||||
.globl abi_test_clobber_x17
|
||||
.hidden abi_test_clobber_x17
|
||||
.align 4
|
||||
abi_test_clobber_x17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x17, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x17,.-abi_test_clobber_x17
|
||||
.type abi_test_clobber_x19, %function
|
||||
.globl abi_test_clobber_x19
|
||||
.hidden abi_test_clobber_x19
|
||||
.align 4
|
||||
abi_test_clobber_x19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x19, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x19,.-abi_test_clobber_x19
|
||||
.type abi_test_clobber_x20, %function
|
||||
.globl abi_test_clobber_x20
|
||||
.hidden abi_test_clobber_x20
|
||||
.align 4
|
||||
abi_test_clobber_x20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x20, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x20,.-abi_test_clobber_x20
|
||||
.type abi_test_clobber_x21, %function
|
||||
.globl abi_test_clobber_x21
|
||||
.hidden abi_test_clobber_x21
|
||||
.align 4
|
||||
abi_test_clobber_x21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x21, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x21,.-abi_test_clobber_x21
|
||||
.type abi_test_clobber_x22, %function
|
||||
.globl abi_test_clobber_x22
|
||||
.hidden abi_test_clobber_x22
|
||||
.align 4
|
||||
abi_test_clobber_x22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x22, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x22,.-abi_test_clobber_x22
|
||||
.type abi_test_clobber_x23, %function
|
||||
.globl abi_test_clobber_x23
|
||||
.hidden abi_test_clobber_x23
|
||||
.align 4
|
||||
abi_test_clobber_x23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x23, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x23,.-abi_test_clobber_x23
|
||||
.type abi_test_clobber_x24, %function
|
||||
.globl abi_test_clobber_x24
|
||||
.hidden abi_test_clobber_x24
|
||||
.align 4
|
||||
abi_test_clobber_x24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x24, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x24,.-abi_test_clobber_x24
|
||||
.type abi_test_clobber_x25, %function
|
||||
.globl abi_test_clobber_x25
|
||||
.hidden abi_test_clobber_x25
|
||||
.align 4
|
||||
abi_test_clobber_x25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x25, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x25,.-abi_test_clobber_x25
|
||||
.type abi_test_clobber_x26, %function
|
||||
.globl abi_test_clobber_x26
|
||||
.hidden abi_test_clobber_x26
|
||||
.align 4
|
||||
abi_test_clobber_x26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x26, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x26,.-abi_test_clobber_x26
|
||||
.type abi_test_clobber_x27, %function
|
||||
.globl abi_test_clobber_x27
|
||||
.hidden abi_test_clobber_x27
|
||||
.align 4
|
||||
abi_test_clobber_x27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x27, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x27,.-abi_test_clobber_x27
|
||||
.type abi_test_clobber_x28, %function
|
||||
.globl abi_test_clobber_x28
|
||||
.hidden abi_test_clobber_x28
|
||||
.align 4
|
||||
abi_test_clobber_x28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x28, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x28,.-abi_test_clobber_x28
|
||||
.type abi_test_clobber_x29, %function
|
||||
.globl abi_test_clobber_x29
|
||||
.hidden abi_test_clobber_x29
|
||||
.align 4
|
||||
abi_test_clobber_x29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
mov x29, xzr
|
||||
ret
|
||||
.size abi_test_clobber_x29,.-abi_test_clobber_x29
|
||||
.type abi_test_clobber_d0, %function
|
||||
.globl abi_test_clobber_d0
|
||||
.hidden abi_test_clobber_d0
|
||||
.align 4
|
||||
abi_test_clobber_d0:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d0, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d0,.-abi_test_clobber_d0
|
||||
.type abi_test_clobber_d1, %function
|
||||
.globl abi_test_clobber_d1
|
||||
.hidden abi_test_clobber_d1
|
||||
.align 4
|
||||
abi_test_clobber_d1:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d1, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d1,.-abi_test_clobber_d1
|
||||
.type abi_test_clobber_d2, %function
|
||||
.globl abi_test_clobber_d2
|
||||
.hidden abi_test_clobber_d2
|
||||
.align 4
|
||||
abi_test_clobber_d2:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d2, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d2,.-abi_test_clobber_d2
|
||||
.type abi_test_clobber_d3, %function
|
||||
.globl abi_test_clobber_d3
|
||||
.hidden abi_test_clobber_d3
|
||||
.align 4
|
||||
abi_test_clobber_d3:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d3, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d3,.-abi_test_clobber_d3
|
||||
.type abi_test_clobber_d4, %function
|
||||
.globl abi_test_clobber_d4
|
||||
.hidden abi_test_clobber_d4
|
||||
.align 4
|
||||
abi_test_clobber_d4:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d4, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d4,.-abi_test_clobber_d4
|
||||
.type abi_test_clobber_d5, %function
|
||||
.globl abi_test_clobber_d5
|
||||
.hidden abi_test_clobber_d5
|
||||
.align 4
|
||||
abi_test_clobber_d5:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d5, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d5,.-abi_test_clobber_d5
|
||||
.type abi_test_clobber_d6, %function
|
||||
.globl abi_test_clobber_d6
|
||||
.hidden abi_test_clobber_d6
|
||||
.align 4
|
||||
abi_test_clobber_d6:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d6, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d6,.-abi_test_clobber_d6
|
||||
.type abi_test_clobber_d7, %function
|
||||
.globl abi_test_clobber_d7
|
||||
.hidden abi_test_clobber_d7
|
||||
.align 4
|
||||
abi_test_clobber_d7:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d7, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d7,.-abi_test_clobber_d7
|
||||
.type abi_test_clobber_d8, %function
|
||||
.globl abi_test_clobber_d8
|
||||
.hidden abi_test_clobber_d8
|
||||
.align 4
|
||||
abi_test_clobber_d8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d8, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d8,.-abi_test_clobber_d8
|
||||
.type abi_test_clobber_d9, %function
|
||||
.globl abi_test_clobber_d9
|
||||
.hidden abi_test_clobber_d9
|
||||
.align 4
|
||||
abi_test_clobber_d9:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d9, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d9,.-abi_test_clobber_d9
|
||||
.type abi_test_clobber_d10, %function
|
||||
.globl abi_test_clobber_d10
|
||||
.hidden abi_test_clobber_d10
|
||||
.align 4
|
||||
abi_test_clobber_d10:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d10, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d10,.-abi_test_clobber_d10
|
||||
.type abi_test_clobber_d11, %function
|
||||
.globl abi_test_clobber_d11
|
||||
.hidden abi_test_clobber_d11
|
||||
.align 4
|
||||
abi_test_clobber_d11:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d11, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d11,.-abi_test_clobber_d11
|
||||
.type abi_test_clobber_d12, %function
|
||||
.globl abi_test_clobber_d12
|
||||
.hidden abi_test_clobber_d12
|
||||
.align 4
|
||||
abi_test_clobber_d12:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d12, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d12,.-abi_test_clobber_d12
|
||||
.type abi_test_clobber_d13, %function
|
||||
.globl abi_test_clobber_d13
|
||||
.hidden abi_test_clobber_d13
|
||||
.align 4
|
||||
abi_test_clobber_d13:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d13, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d13,.-abi_test_clobber_d13
|
||||
.type abi_test_clobber_d14, %function
|
||||
.globl abi_test_clobber_d14
|
||||
.hidden abi_test_clobber_d14
|
||||
.align 4
|
||||
abi_test_clobber_d14:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d14, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d14,.-abi_test_clobber_d14
|
||||
.type abi_test_clobber_d15, %function
|
||||
.globl abi_test_clobber_d15
|
||||
.hidden abi_test_clobber_d15
|
||||
.align 4
|
||||
abi_test_clobber_d15:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d15, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d15,.-abi_test_clobber_d15
|
||||
.type abi_test_clobber_d16, %function
|
||||
.globl abi_test_clobber_d16
|
||||
.hidden abi_test_clobber_d16
|
||||
.align 4
|
||||
abi_test_clobber_d16:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d16, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d16,.-abi_test_clobber_d16
|
||||
.type abi_test_clobber_d17, %function
|
||||
.globl abi_test_clobber_d17
|
||||
.hidden abi_test_clobber_d17
|
||||
.align 4
|
||||
abi_test_clobber_d17:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d17, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d17,.-abi_test_clobber_d17
|
||||
.type abi_test_clobber_d18, %function
|
||||
.globl abi_test_clobber_d18
|
||||
.hidden abi_test_clobber_d18
|
||||
.align 4
|
||||
abi_test_clobber_d18:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d18, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d18,.-abi_test_clobber_d18
|
||||
.type abi_test_clobber_d19, %function
|
||||
.globl abi_test_clobber_d19
|
||||
.hidden abi_test_clobber_d19
|
||||
.align 4
|
||||
abi_test_clobber_d19:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d19, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d19,.-abi_test_clobber_d19
|
||||
.type abi_test_clobber_d20, %function
|
||||
.globl abi_test_clobber_d20
|
||||
.hidden abi_test_clobber_d20
|
||||
.align 4
|
||||
abi_test_clobber_d20:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d20, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d20,.-abi_test_clobber_d20
|
||||
.type abi_test_clobber_d21, %function
|
||||
.globl abi_test_clobber_d21
|
||||
.hidden abi_test_clobber_d21
|
||||
.align 4
|
||||
abi_test_clobber_d21:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d21, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d21,.-abi_test_clobber_d21
|
||||
.type abi_test_clobber_d22, %function
|
||||
.globl abi_test_clobber_d22
|
||||
.hidden abi_test_clobber_d22
|
||||
.align 4
|
||||
abi_test_clobber_d22:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d22, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d22,.-abi_test_clobber_d22
|
||||
.type abi_test_clobber_d23, %function
|
||||
.globl abi_test_clobber_d23
|
||||
.hidden abi_test_clobber_d23
|
||||
.align 4
|
||||
abi_test_clobber_d23:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d23, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d23,.-abi_test_clobber_d23
|
||||
.type abi_test_clobber_d24, %function
|
||||
.globl abi_test_clobber_d24
|
||||
.hidden abi_test_clobber_d24
|
||||
.align 4
|
||||
abi_test_clobber_d24:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d24, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d24,.-abi_test_clobber_d24
|
||||
.type abi_test_clobber_d25, %function
|
||||
.globl abi_test_clobber_d25
|
||||
.hidden abi_test_clobber_d25
|
||||
.align 4
|
||||
abi_test_clobber_d25:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d25, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d25,.-abi_test_clobber_d25
|
||||
.type abi_test_clobber_d26, %function
|
||||
.globl abi_test_clobber_d26
|
||||
.hidden abi_test_clobber_d26
|
||||
.align 4
|
||||
abi_test_clobber_d26:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d26, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d26,.-abi_test_clobber_d26
|
||||
.type abi_test_clobber_d27, %function
|
||||
.globl abi_test_clobber_d27
|
||||
.hidden abi_test_clobber_d27
|
||||
.align 4
|
||||
abi_test_clobber_d27:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d27, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d27,.-abi_test_clobber_d27
|
||||
.type abi_test_clobber_d28, %function
|
||||
.globl abi_test_clobber_d28
|
||||
.hidden abi_test_clobber_d28
|
||||
.align 4
|
||||
abi_test_clobber_d28:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d28, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d28,.-abi_test_clobber_d28
|
||||
.type abi_test_clobber_d29, %function
|
||||
.globl abi_test_clobber_d29
|
||||
.hidden abi_test_clobber_d29
|
||||
.align 4
|
||||
abi_test_clobber_d29:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d29, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d29,.-abi_test_clobber_d29
|
||||
.type abi_test_clobber_d30, %function
|
||||
.globl abi_test_clobber_d30
|
||||
.hidden abi_test_clobber_d30
|
||||
.align 4
|
||||
abi_test_clobber_d30:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d30, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d30,.-abi_test_clobber_d30
|
||||
.type abi_test_clobber_d31, %function
|
||||
.globl abi_test_clobber_d31
|
||||
.hidden abi_test_clobber_d31
|
||||
.align 4
|
||||
abi_test_clobber_d31:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov d31, xzr
|
||||
ret
|
||||
.size abi_test_clobber_d31,.-abi_test_clobber_d31
|
||||
.type abi_test_clobber_v8_upper, %function
|
||||
.globl abi_test_clobber_v8_upper
|
||||
.hidden abi_test_clobber_v8_upper
|
||||
.align 4
|
||||
abi_test_clobber_v8_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v8.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
|
||||
.type abi_test_clobber_v9_upper, %function
|
||||
.globl abi_test_clobber_v9_upper
|
||||
.hidden abi_test_clobber_v9_upper
|
||||
.align 4
|
||||
abi_test_clobber_v9_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v9.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
|
||||
.type abi_test_clobber_v10_upper, %function
|
||||
.globl abi_test_clobber_v10_upper
|
||||
.hidden abi_test_clobber_v10_upper
|
||||
.align 4
|
||||
abi_test_clobber_v10_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v10.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
|
||||
.type abi_test_clobber_v11_upper, %function
|
||||
.globl abi_test_clobber_v11_upper
|
||||
.hidden abi_test_clobber_v11_upper
|
||||
.align 4
|
||||
abi_test_clobber_v11_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v11.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
|
||||
.type abi_test_clobber_v12_upper, %function
|
||||
.globl abi_test_clobber_v12_upper
|
||||
.hidden abi_test_clobber_v12_upper
|
||||
.align 4
|
||||
abi_test_clobber_v12_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v12.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
|
||||
.type abi_test_clobber_v13_upper, %function
|
||||
.globl abi_test_clobber_v13_upper
|
||||
.hidden abi_test_clobber_v13_upper
|
||||
.align 4
|
||||
abi_test_clobber_v13_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v13.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
|
||||
.type abi_test_clobber_v14_upper, %function
|
||||
.globl abi_test_clobber_v14_upper
|
||||
.hidden abi_test_clobber_v14_upper
|
||||
.align 4
|
||||
abi_test_clobber_v14_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v14.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
|
||||
.type abi_test_clobber_v15_upper, %function
|
||||
.globl abi_test_clobber_v15_upper
|
||||
.hidden abi_test_clobber_v15_upper
|
||||
.align 4
|
||||
abi_test_clobber_v15_upper:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
fmov v15.d[1], xzr
|
||||
ret
|
||||
.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
1493
contrib/boringssl-cmake/linux-arm/crypto/chacha/chacha-armv4.S
Normal file
1493
contrib/boringssl-cmake/linux-arm/crypto/chacha/chacha-armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,781 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-)
|
||||
.fpu neon
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
.align 5
|
||||
.Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
|
||||
.text
|
||||
|
||||
.globl aes_hw_set_encrypt_key
|
||||
.hidden aes_hw_set_encrypt_key
|
||||
.type aes_hw_set_encrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_encrypt_key:
|
||||
.Lenc_key:
|
||||
mov r3,#-1
|
||||
cmp r0,#0
|
||||
beq .Lenc_key_abort
|
||||
cmp r2,#0
|
||||
beq .Lenc_key_abort
|
||||
mov r3,#-2
|
||||
cmp r1,#128
|
||||
blt .Lenc_key_abort
|
||||
cmp r1,#256
|
||||
bgt .Lenc_key_abort
|
||||
tst r1,#0x3f
|
||||
bne .Lenc_key_abort
|
||||
|
||||
adr r3,.Lrcon
|
||||
cmp r1,#192
|
||||
|
||||
veor q0,q0,q0
|
||||
vld1.8 {q3},[r0]!
|
||||
mov r1,#8 @ reuse r1
|
||||
vld1.32 {q1,q2},[r3]!
|
||||
|
||||
blt .Loop128
|
||||
beq .L192
|
||||
b .L256
|
||||
|
||||
.align 4
|
||||
.Loop128:
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
bne .Loop128
|
||||
|
||||
vld1.32 {q1},[r3]
|
||||
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
|
||||
vtbl.8 d20,{q3},d4
|
||||
vtbl.8 d21,{q3},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q3},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
veor q3,q3,q10
|
||||
vst1.32 {q3},[r2]
|
||||
add r2,r2,#0x50
|
||||
|
||||
mov r12,#10
|
||||
b .Ldone
|
||||
|
||||
.align 4
|
||||
.L192:
|
||||
vld1.8 {d16},[r0]!
|
||||
vmov.i8 q10,#8 @ borrow q10
|
||||
vst1.32 {q3},[r2]!
|
||||
vsub.i8 q2,q2,q10 @ adjust the mask
|
||||
|
||||
.Loop192:
|
||||
vtbl.8 d20,{q8},d4
|
||||
vtbl.8 d21,{q8},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {d16},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
|
||||
vdup.32 q9,d7[1]
|
||||
veor q9,q9,q8
|
||||
veor q10,q10,q1
|
||||
vext.8 q8,q0,q8,#12
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q8,q8,q9
|
||||
veor q3,q3,q10
|
||||
veor q8,q8,q10
|
||||
vst1.32 {q3},[r2]!
|
||||
bne .Loop192
|
||||
|
||||
mov r12,#12
|
||||
add r2,r2,#0x20
|
||||
b .Ldone
|
||||
|
||||
.align 4
|
||||
.L256:
|
||||
vld1.8 {q8},[r0]
|
||||
mov r1,#7
|
||||
mov r12,#14
|
||||
vst1.32 {q3},[r2]!
|
||||
|
||||
.Loop256:
|
||||
vtbl.8 d20,{q8},d4
|
||||
vtbl.8 d21,{q8},d5
|
||||
vext.8 q9,q0,q3,#12
|
||||
vst1.32 {q8},[r2]!
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
subs r1,r1,#1
|
||||
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q3,q3,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q10,q10,q1
|
||||
veor q3,q3,q9
|
||||
vshl.u8 q1,q1,#1
|
||||
veor q3,q3,q10
|
||||
vst1.32 {q3},[r2]!
|
||||
beq .Ldone
|
||||
|
||||
vdup.32 q10,d7[1]
|
||||
vext.8 q9,q0,q8,#12
|
||||
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
|
||||
|
||||
veor q8,q8,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q8,q8,q9
|
||||
vext.8 q9,q0,q9,#12
|
||||
veor q8,q8,q9
|
||||
|
||||
veor q8,q8,q10
|
||||
b .Loop256
|
||||
|
||||
.Ldone:
|
||||
str r12,[r2]
|
||||
mov r3,#0
|
||||
|
||||
.Lenc_key_abort:
|
||||
mov r0,r3 @ return value
|
||||
|
||||
bx lr
|
||||
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
|
||||
|
||||
.globl aes_hw_set_decrypt_key
|
||||
.hidden aes_hw_set_decrypt_key
|
||||
.type aes_hw_set_decrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_decrypt_key:
|
||||
stmdb sp!,{r4,lr}
|
||||
bl .Lenc_key
|
||||
|
||||
cmp r0,#0
|
||||
bne .Ldec_key_abort
|
||||
|
||||
sub r2,r2,#240 @ restore original r2
|
||||
mov r4,#-16
|
||||
add r0,r2,r12,lsl#4 @ end of key schedule
|
||||
|
||||
vld1.32 {q0},[r2]
|
||||
vld1.32 {q1},[r0]
|
||||
vst1.32 {q0},[r0],r4
|
||||
vst1.32 {q1},[r2]!
|
||||
|
||||
.Loop_imc:
|
||||
vld1.32 {q0},[r2]
|
||||
vld1.32 {q1},[r0]
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
vst1.32 {q0},[r0],r4
|
||||
vst1.32 {q1},[r2]!
|
||||
cmp r0,r2
|
||||
bhi .Loop_imc
|
||||
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
vst1.32 {q0},[r0]
|
||||
|
||||
eor r0,r0,r0 @ return value
|
||||
.Ldec_key_abort:
|
||||
ldmia sp!,{r4,pc}
|
||||
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
|
||||
.globl aes_hw_encrypt
|
||||
.hidden aes_hw_encrypt
|
||||
.type aes_hw_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_encrypt:
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
sub r3,r3,#2
|
||||
vld1.32 {q1},[r2]!
|
||||
|
||||
.Loop_enc:
|
||||
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q0},[r2]!
|
||||
subs r3,r3,#2
|
||||
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q1},[r2]!
|
||||
bgt .Loop_enc
|
||||
|
||||
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
|
||||
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
|
||||
veor q2,q2,q0
|
||||
|
||||
vst1.8 {q2},[r1]
|
||||
bx lr
|
||||
.size aes_hw_encrypt,.-aes_hw_encrypt
|
||||
.globl aes_hw_decrypt
|
||||
.hidden aes_hw_decrypt
|
||||
.type aes_hw_decrypt,%function
|
||||
.align 5
|
||||
aes_hw_decrypt:
|
||||
ldr r3,[r2,#240]
|
||||
vld1.32 {q0},[r2]!
|
||||
vld1.8 {q2},[r0]
|
||||
sub r3,r3,#2
|
||||
vld1.32 {q1},[r2]!
|
||||
|
||||
.Loop_dec:
|
||||
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q0},[r2]!
|
||||
subs r3,r3,#2
|
||||
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q1},[r2]!
|
||||
bgt .Loop_dec
|
||||
|
||||
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
|
||||
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
|
||||
vld1.32 {q0},[r2]
|
||||
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
|
||||
veor q2,q2,q0
|
||||
|
||||
vst1.8 {q2},[r1]
|
||||
bx lr
|
||||
.size aes_hw_decrypt,.-aes_hw_decrypt
|
||||
.globl aes_hw_cbc_encrypt
|
||||
.hidden aes_hw_cbc_encrypt
|
||||
.type aes_hw_cbc_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_cbc_encrypt:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,lr}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldmia ip,{r4,r5} @ load remaining args
|
||||
subs r2,r2,#16
|
||||
mov r8,#16
|
||||
blo .Lcbc_abort
|
||||
moveq r8,#0
|
||||
|
||||
cmp r5,#0 @ en- or decrypting?
|
||||
ldr r5,[r3,#240]
|
||||
and r2,r2,#-16
|
||||
vld1.8 {q6},[r4]
|
||||
vld1.8 {q0},[r0],r8
|
||||
|
||||
vld1.32 {q8,q9},[r3] @ load key schedule...
|
||||
sub r5,r5,#6
|
||||
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
|
||||
sub r5,r5,#2
|
||||
vld1.32 {q10,q11},[r7]!
|
||||
vld1.32 {q12,q13},[r7]!
|
||||
vld1.32 {q14,q15},[r7]!
|
||||
vld1.32 {q7},[r7]
|
||||
|
||||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
beq .Lcbc_dec
|
||||
|
||||
cmp r5,#2
|
||||
veor q0,q0,q6
|
||||
veor q5,q8,q7
|
||||
beq .Lcbc_enc128
|
||||
|
||||
vld1.32 {q2,q3},[r7]
|
||||
add r7,r3,#16
|
||||
add r6,r3,#16*4
|
||||
add r12,r3,#16*5
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
add r14,r3,#16*6
|
||||
add r3,r3,#16*7
|
||||
b .Lenter_cbc_enc
|
||||
|
||||
.align 4
|
||||
.Loop_cbc_enc:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vst1.8 {q6},[r1]!
|
||||
.Lenter_cbc_enc:
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q8},[r6]
|
||||
cmp r5,#4
|
||||
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r12]
|
||||
beq .Lcbc_enc192
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q8},[r14]
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r3]
|
||||
nop
|
||||
|
||||
.Lcbc_enc192:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
subs r2,r2,#16
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
moveq r8,#0
|
||||
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.8 {q8},[r0],r8
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
veor q8,q8,q5
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
veor q6,q0,q7
|
||||
bhs .Loop_cbc_enc
|
||||
|
||||
vst1.8 {q6},[r1]!
|
||||
b .Lcbc_done
|
||||
|
||||
.align 5
|
||||
.Lcbc_enc128:
|
||||
vld1.32 {q2,q3},[r7]
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
b .Lenter_cbc_enc128
|
||||
.Loop_cbc_enc128:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vst1.8 {q6},[r1]!
|
||||
.Lenter_cbc_enc128:
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
subs r2,r2,#16
|
||||
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
moveq r8,#0
|
||||
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
vld1.8 {q8},[r0],r8
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
veor q8,q8,q5
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
veor q6,q0,q7
|
||||
bhs .Loop_cbc_enc128
|
||||
|
||||
vst1.8 {q6},[r1]!
|
||||
b .Lcbc_done
|
||||
.align 5
|
||||
.Lcbc_dec:
|
||||
vld1.8 {q10},[r0]!
|
||||
subs r2,r2,#32 @ bias
|
||||
add r6,r5,#2
|
||||
vorr q3,q0,q0
|
||||
vorr q1,q0,q0
|
||||
vorr q11,q10,q10
|
||||
blo .Lcbc_dec_tail
|
||||
|
||||
vorr q1,q10,q10
|
||||
vld1.8 {q10},[r0]!
|
||||
vorr q2,q0,q0
|
||||
vorr q3,q1,q1
|
||||
vorr q11,q10,q10
|
||||
|
||||
.Loop3x_cbc_dec:
|
||||
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt .Loop3x_cbc_dec
|
||||
|
||||
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q4,q6,q7
|
||||
subs r2,r2,#0x30
|
||||
veor q5,q2,q7
|
||||
movlo r6,r2 @ r6, r6, is zero at this point
|
||||
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q9,q3,q7
|
||||
add r0,r0,r6 @ r0 is adjusted in such way that
|
||||
@ at exit from the loop q1-q10
|
||||
@ are loaded with last "words"
|
||||
vorr q6,q11,q11
|
||||
mov r7,r3
|
||||
.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q2},[r0]!
|
||||
.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q3},[r0]!
|
||||
.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
|
||||
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
|
||||
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.8 {q11},[r0]!
|
||||
.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
|
||||
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
|
||||
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
|
||||
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
|
||||
add r6,r5,#2
|
||||
veor q4,q4,q0
|
||||
veor q5,q5,q1
|
||||
veor q10,q10,q9
|
||||
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
|
||||
vst1.8 {q4},[r1]!
|
||||
vorr q0,q2,q2
|
||||
vst1.8 {q5},[r1]!
|
||||
vorr q1,q3,q3
|
||||
vst1.8 {q10},[r1]!
|
||||
vorr q10,q11,q11
|
||||
bhs .Loop3x_cbc_dec
|
||||
|
||||
cmn r2,#0x30
|
||||
beq .Lcbc_done
|
||||
nop
|
||||
|
||||
.Lcbc_dec_tail:
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt .Lcbc_dec_tail
|
||||
|
||||
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
cmn r2,#0x20
|
||||
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q5,q6,q7
|
||||
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
|
||||
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
|
||||
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
|
||||
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
|
||||
veor q9,q3,q7
|
||||
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
|
||||
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
|
||||
beq .Lcbc_dec_one
|
||||
veor q5,q5,q1
|
||||
veor q9,q9,q10
|
||||
vorr q6,q11,q11
|
||||
vst1.8 {q5},[r1]!
|
||||
vst1.8 {q9},[r1]!
|
||||
b .Lcbc_done
|
||||
|
||||
.Lcbc_dec_one:
|
||||
veor q5,q5,q10
|
||||
vorr q6,q11,q11
|
||||
vst1.8 {q5},[r1]!
|
||||
|
||||
.Lcbc_done:
|
||||
vst1.8 {q6},[r4]
|
||||
.Lcbc_abort:
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,pc}
|
||||
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
|
||||
.globl aes_hw_ctr32_encrypt_blocks
|
||||
.hidden aes_hw_ctr32_encrypt_blocks
|
||||
.type aes_hw_ctr32_encrypt_blocks,%function
|
||||
.align 5
|
||||
aes_hw_ctr32_encrypt_blocks:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldr r4, [ip] @ load remaining arg
|
||||
ldr r5,[r3,#240]
|
||||
|
||||
ldr r8, [r4, #12]
|
||||
vld1.32 {q0},[r4]
|
||||
|
||||
vld1.32 {q8,q9},[r3] @ load key schedule...
|
||||
sub r5,r5,#4
|
||||
mov r12,#16
|
||||
cmp r2,#2
|
||||
add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
|
||||
sub r5,r5,#2
|
||||
vld1.32 {q12,q13},[r7]!
|
||||
vld1.32 {q14,q15},[r7]!
|
||||
vld1.32 {q7},[r7]
|
||||
add r7,r3,#32
|
||||
mov r6,r5
|
||||
movlo r12,#0
|
||||
#ifndef __ARMEB__
|
||||
rev r8, r8
|
||||
#endif
|
||||
vorr q1,q0,q0
|
||||
add r10, r8, #1
|
||||
vorr q10,q0,q0
|
||||
add r8, r8, #2
|
||||
vorr q6,q0,q0
|
||||
rev r10, r10
|
||||
vmov.32 d3[1],r10
|
||||
bls .Lctr32_tail
|
||||
rev r12, r8
|
||||
sub r2,r2,#3 @ bias
|
||||
vmov.32 d21[1],r12
|
||||
b .Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
.Loop3x_ctr32:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt .Loop3x_ctr32
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
|
||||
vld1.8 {q2},[r0]!
|
||||
vorr q0,q6,q6
|
||||
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
|
||||
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
|
||||
vld1.8 {q3},[r0]!
|
||||
vorr q1,q6,q6
|
||||
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
vld1.8 {q11},[r0]!
|
||||
mov r7,r3
|
||||
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
|
||||
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
|
||||
vorr q10,q6,q6
|
||||
add r9,r8,#1
|
||||
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
veor q2,q2,q7
|
||||
add r10,r8,#2
|
||||
.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
veor q3,q3,q7
|
||||
add r8,r8,#3
|
||||
.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
veor q11,q11,q7
|
||||
rev r9,r9
|
||||
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d1[1], r9
|
||||
rev r10,r10
|
||||
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
|
||||
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
|
||||
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
|
||||
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
|
||||
vmov.32 d3[1], r10
|
||||
rev r12,r8
|
||||
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
|
||||
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
|
||||
vmov.32 d21[1], r12
|
||||
subs r2,r2,#3
|
||||
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
|
||||
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
|
||||
.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
|
||||
|
||||
veor q2,q2,q4
|
||||
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
|
||||
vst1.8 {q2},[r1]!
|
||||
veor q3,q3,q5
|
||||
mov r6,r5
|
||||
vst1.8 {q3},[r1]!
|
||||
veor q11,q11,q9
|
||||
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
|
||||
vst1.8 {q11},[r1]!
|
||||
bhs .Loop3x_ctr32
|
||||
|
||||
adds r2,r2,#3
|
||||
beq .Lctr32_done
|
||||
cmp r2,#1
|
||||
mov r12,#16
|
||||
moveq r12,#0
|
||||
|
||||
.Lctr32_tail:
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.32 {q8},[r7]!
|
||||
subs r6,r6,#2
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.32 {q9},[r7]!
|
||||
bgt .Lctr32_tail
|
||||
|
||||
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.8 {q2},[r0],r12
|
||||
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
vld1.8 {q3},[r0]
|
||||
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
veor q2,q2,q7
|
||||
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
|
||||
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
|
||||
.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
|
||||
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
|
||||
veor q3,q3,q7
|
||||
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
|
||||
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
|
||||
|
||||
cmp r2,#1
|
||||
veor q2,q2,q0
|
||||
veor q3,q3,q1
|
||||
vst1.8 {q2},[r1]!
|
||||
beq .Lctr32_done
|
||||
vst1.8 {q3},[r1]
|
||||
|
||||
.Lctr32_done:
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
|
||||
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
977
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/armv4-mont.S
Normal file
977
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/armv4-mont.S
Normal file
@ -0,0 +1,977 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__)
|
||||
.syntax unified
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.align 5
|
||||
.LOPENSSL_armcap:
|
||||
.word OPENSSL_armcap_P-.Lbn_mul_mont
|
||||
#endif
|
||||
|
||||
.globl bn_mul_mont
|
||||
.hidden bn_mul_mont
|
||||
.type bn_mul_mont,%function
|
||||
|
||||
.align 5
|
||||
bn_mul_mont:
|
||||
.Lbn_mul_mont:
|
||||
ldr ip,[sp,#4] @ load num
|
||||
stmdb sp!,{r0,r2} @ sp points at argument block
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
tst ip,#7
|
||||
bne .Lialu
|
||||
adr r0,.Lbn_mul_mont
|
||||
ldr r2,.LOPENSSL_armcap
|
||||
ldr r0,[r0,r2]
|
||||
#ifdef __APPLE__
|
||||
ldr r0,[r0]
|
||||
#endif
|
||||
tst r0,#ARMV7_NEON @ NEON available?
|
||||
ldmia sp, {r0,r2}
|
||||
beq .Lialu
|
||||
add sp,sp,#8
|
||||
b bn_mul8x_mont_neon
|
||||
.align 4
|
||||
.Lialu:
|
||||
#endif
|
||||
cmp ip,#2
|
||||
mov r0,ip @ load num
|
||||
#ifdef __thumb2__
|
||||
ittt lt
|
||||
#endif
|
||||
movlt r0,#0
|
||||
addlt sp,sp,#2*4
|
||||
blt .Labrt
|
||||
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
|
||||
|
||||
mov r0,r0,lsl#2 @ rescale r0 for byte count
|
||||
sub sp,sp,r0 @ alloca(4*num)
|
||||
sub sp,sp,#4 @ +extra dword
|
||||
sub r0,r0,#4 @ "num=num-1"
|
||||
add r4,r2,r0 @ &bp[num-1]
|
||||
|
||||
add r0,sp,r0 @ r0 to point at &tp[num-1]
|
||||
ldr r8,[r0,#14*4] @ &n0
|
||||
ldr r2,[r2] @ bp[0]
|
||||
ldr r5,[r1],#4 @ ap[0],ap++
|
||||
ldr r6,[r3],#4 @ np[0],np++
|
||||
ldr r8,[r8] @ *n0
|
||||
str r4,[r0,#15*4] @ save &bp[num]
|
||||
|
||||
umull r10,r11,r5,r2 @ ap[0]*bp[0]
|
||||
str r8,[r0,#14*4] @ save n0 value
|
||||
mul r8,r10,r8 @ "tp[0]"*n0
|
||||
mov r12,#0
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
|
||||
mov r4,sp
|
||||
|
||||
.L1st:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
mov r10,r11
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
|
||||
mov r14,#0
|
||||
umlal r12,r14,r6,r8 @ np[j]*n0
|
||||
adds r12,r12,r10
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne .L1st
|
||||
|
||||
adds r12,r12,r11
|
||||
ldr r4,[r0,#13*4] @ restore bp
|
||||
mov r14,#0
|
||||
ldr r8,[r0,#14*4] @ restore n0
|
||||
adc r14,r14,#0
|
||||
str r12,[r0] @ tp[num-1]=
|
||||
mov r7,sp
|
||||
str r14,[r0,#4] @ tp[num]=
|
||||
|
||||
.Louter:
|
||||
sub r7,r0,r7 @ "original" r0-1 value
|
||||
sub r1,r1,r7 @ "rewind" ap to &ap[1]
|
||||
ldr r2,[r4,#4]! @ *(++bp)
|
||||
sub r3,r3,r7 @ "rewind" np to &np[1]
|
||||
ldr r5,[r1,#-4] @ ap[0]
|
||||
ldr r10,[sp] @ tp[0]
|
||||
ldr r6,[r3,#-4] @ np[0]
|
||||
ldr r7,[sp,#4] @ tp[1]
|
||||
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
|
||||
str r4,[r0,#13*4] @ save bp
|
||||
mul r8,r10,r8
|
||||
mov r12,#0
|
||||
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
|
||||
mov r4,sp
|
||||
|
||||
.Linner:
|
||||
ldr r5,[r1],#4 @ ap[j],ap++
|
||||
adds r10,r11,r7 @ +=tp[j]
|
||||
ldr r6,[r3],#4 @ np[j],np++
|
||||
mov r11,#0
|
||||
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
|
||||
mov r14,#0
|
||||
umlal r12,r14,r6,r8 @ np[j]*n0
|
||||
adc r11,r11,#0
|
||||
ldr r7,[r4,#8] @ tp[j+1]
|
||||
adds r12,r12,r10
|
||||
str r12,[r4],#4 @ tp[j-1]=,tp++
|
||||
adc r12,r14,#0
|
||||
cmp r4,r0
|
||||
bne .Linner
|
||||
|
||||
adds r12,r12,r11
|
||||
mov r14,#0
|
||||
ldr r4,[r0,#13*4] @ restore bp
|
||||
adc r14,r14,#0
|
||||
ldr r8,[r0,#14*4] @ restore n0
|
||||
adds r12,r12,r7
|
||||
ldr r7,[r0,#15*4] @ restore &bp[num]
|
||||
adc r14,r14,#0
|
||||
str r12,[r0] @ tp[num-1]=
|
||||
str r14,[r0,#4] @ tp[num]=
|
||||
|
||||
cmp r4,r7
|
||||
#ifdef __thumb2__
|
||||
itt ne
|
||||
#endif
|
||||
movne r7,sp
|
||||
bne .Louter
|
||||
|
||||
ldr r2,[r0,#12*4] @ pull rp
|
||||
mov r5,sp
|
||||
add r0,r0,#4 @ r0 to point at &tp[num]
|
||||
sub r5,r0,r5 @ "original" num value
|
||||
mov r4,sp @ "rewind" r4
|
||||
mov r1,r4 @ "borrow" r1
|
||||
sub r3,r3,r5 @ "rewind" r3 to &np[0]
|
||||
|
||||
subs r7,r7,r7 @ "clear" carry flag
|
||||
.Lsub: ldr r7,[r4],#4
|
||||
ldr r6,[r3],#4
|
||||
sbcs r7,r7,r6 @ tp[j]-np[j]
|
||||
str r7,[r2],#4 @ rp[j]=
|
||||
teq r4,r0 @ preserve carry
|
||||
bne .Lsub
|
||||
sbcs r14,r14,#0 @ upmost carry
|
||||
mov r4,sp @ "rewind" r4
|
||||
sub r2,r2,r5 @ "rewind" r2
|
||||
|
||||
.Lcopy: ldr r7,[r4] @ conditional copy
|
||||
ldr r5,[r2]
|
||||
str sp,[r4],#4 @ zap tp
|
||||
#ifdef __thumb2__
|
||||
it cc
|
||||
#endif
|
||||
movcc r5,r7
|
||||
str r5,[r2],#4
|
||||
teq r4,r0 @ preserve carry
|
||||
bne .Lcopy
|
||||
|
||||
mov sp,r0
|
||||
add sp,sp,#4 @ skip over tp[num+1]
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
|
||||
add sp,sp,#2*4 @ skip over {r0,r2}
|
||||
mov r0,#1
|
||||
.Labrt:
|
||||
#if __ARM_ARCH__>=5
|
||||
bx lr @ bx lr
|
||||
#else
|
||||
tst lr,#1
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size bn_mul_mont,.-bn_mul_mont
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
.type bn_mul8x_mont_neon,%function
|
||||
.align 5
|
||||
bn_mul8x_mont_neon:
|
||||
mov ip,sp
|
||||
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
|
||||
ldmia ip,{r4,r5} @ load rest of parameter block
|
||||
mov ip,sp
|
||||
|
||||
cmp r5,#8
|
||||
bhi .LNEON_8n
|
||||
|
||||
@ special case for r5==8, everything is in register bank...
|
||||
|
||||
vld1.32 {d28[0]}, [r2,:32]!
|
||||
veor d8,d8,d8
|
||||
sub r7,sp,r5,lsl#4
|
||||
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
|
||||
and r7,r7,#-64
|
||||
vld1.32 {d30[0]}, [r4,:32]
|
||||
mov sp,r7 @ alloca
|
||||
vzip.16 d28,d8
|
||||
|
||||
vmull.u32 q6,d28,d0[0]
|
||||
vmull.u32 q7,d28,d0[1]
|
||||
vmull.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmull.u32 q9,d28,d1[1]
|
||||
|
||||
vadd.u64 d29,d29,d12
|
||||
veor d8,d8,d8
|
||||
vmul.u32 d29,d29,d30
|
||||
|
||||
vmull.u32 q10,d28,d2[0]
|
||||
vld1.32 {d4,d5,d6,d7}, [r3]!
|
||||
vmull.u32 q11,d28,d2[1]
|
||||
vmull.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmull.u32 q13,d28,d3[1]
|
||||
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
sub r9,r5,#1
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmov q5,q6
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmov q6,q7
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmov q7,q8
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vmov q8,q9
|
||||
vmov q9,q10
|
||||
vshr.u64 d10,d10,#16
|
||||
vmov q10,q11
|
||||
vmov q11,q12
|
||||
vadd.u64 d10,d10,d11
|
||||
vmov q12,q13
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
b .LNEON_outer8
|
||||
|
||||
.align 4
|
||||
.LNEON_outer8:
|
||||
vld1.32 {d28[0]}, [r2,:32]!
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
vadd.u64 d12,d12,d10
|
||||
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
|
||||
vadd.u64 d29,d29,d12
|
||||
veor d8,d8,d8
|
||||
subs r9,r9,#1
|
||||
vmul.u32 d29,d29,d30
|
||||
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmov q5,q6
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmov q6,q7
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmov q7,q8
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vmov q8,q9
|
||||
vmov q9,q10
|
||||
vshr.u64 d10,d10,#16
|
||||
vmov q10,q11
|
||||
vmov q11,q12
|
||||
vadd.u64 d10,d10,d11
|
||||
vmov q12,q13
|
||||
veor q13,q13
|
||||
vshr.u64 d10,d10,#16
|
||||
|
||||
bne .LNEON_outer8
|
||||
|
||||
vadd.u64 d12,d12,d10
|
||||
mov r7,sp
|
||||
vshr.u64 d10,d12,#16
|
||||
mov r8,r5
|
||||
vadd.u64 d13,d13,d10
|
||||
add r6,sp,#96
|
||||
vshr.u64 d10,d13,#16
|
||||
vzip.16 d12,d13
|
||||
|
||||
b .LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
.LNEON_8n:
|
||||
veor q6,q6,q6
|
||||
sub r7,sp,#128
|
||||
veor q7,q7,q7
|
||||
sub r7,r7,r5,lsl#4
|
||||
veor q8,q8,q8
|
||||
and r7,r7,#-64
|
||||
veor q9,q9,q9
|
||||
mov sp,r7 @ alloca
|
||||
veor q10,q10,q10
|
||||
add r7,r7,#256
|
||||
veor q11,q11,q11
|
||||
sub r8,r5,#8
|
||||
veor q12,q12,q12
|
||||
veor q13,q13,q13
|
||||
|
||||
.LNEON_8n_init:
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
subs r8,r8,#8
|
||||
vst1.64 {q8,q9},[r7,:256]!
|
||||
vst1.64 {q10,q11},[r7,:256]!
|
||||
vst1.64 {q12,q13},[r7,:256]!
|
||||
bne .LNEON_8n_init
|
||||
|
||||
add r6,sp,#256
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
add r10,sp,#8
|
||||
vld1.32 {d30[0]},[r4,:32]
|
||||
mov r9,r5
|
||||
b .LNEON_8n_outer
|
||||
|
||||
.align 4
|
||||
.LNEON_8n_outer:
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
veor d8,d8,d8
|
||||
vzip.16 d28,d8
|
||||
add r7,sp,#128
|
||||
vld1.32 {d4,d5,d6,d7},[r3]!
|
||||
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vshl.i64 d29,d13,#16
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
vadd.u64 d29,d29,d12
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vshr.u64 d12,d12,#16
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vadd.u64 d12,d12,d13
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vshr.u64 d12,d12,#16
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vadd.u64 d14,d14,d12
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
|
||||
vmlal.u32 q7,d28,d0[0]
|
||||
vld1.64 {q6},[r6,:128]!
|
||||
vmlal.u32 q8,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q9,d28,d1[0]
|
||||
vshl.i64 d29,d15,#16
|
||||
vmlal.u32 q10,d28,d1[1]
|
||||
vadd.u64 d29,d29,d14
|
||||
vmlal.u32 q11,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q12,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
|
||||
vmlal.u32 q13,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q6,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q7,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q8,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q9,d29,d5[0]
|
||||
vshr.u64 d14,d14,#16
|
||||
vmlal.u32 q10,d29,d5[1]
|
||||
vmlal.u32 q11,d29,d6[0]
|
||||
vadd.u64 d14,d14,d15
|
||||
vmlal.u32 q12,d29,d6[1]
|
||||
vshr.u64 d14,d14,#16
|
||||
vmlal.u32 q13,d29,d7[0]
|
||||
vmlal.u32 q6,d29,d7[1]
|
||||
vadd.u64 d16,d16,d14
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
|
||||
vmlal.u32 q8,d28,d0[0]
|
||||
vld1.64 {q7},[r6,:128]!
|
||||
vmlal.u32 q9,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q10,d28,d1[0]
|
||||
vshl.i64 d29,d17,#16
|
||||
vmlal.u32 q11,d28,d1[1]
|
||||
vadd.u64 d29,d29,d16
|
||||
vmlal.u32 q12,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q13,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
|
||||
vmlal.u32 q6,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q7,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q8,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q9,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q10,d29,d5[0]
|
||||
vshr.u64 d16,d16,#16
|
||||
vmlal.u32 q11,d29,d5[1]
|
||||
vmlal.u32 q12,d29,d6[0]
|
||||
vadd.u64 d16,d16,d17
|
||||
vmlal.u32 q13,d29,d6[1]
|
||||
vshr.u64 d16,d16,#16
|
||||
vmlal.u32 q6,d29,d7[0]
|
||||
vmlal.u32 q7,d29,d7[1]
|
||||
vadd.u64 d18,d18,d16
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
|
||||
vmlal.u32 q9,d28,d0[0]
|
||||
vld1.64 {q8},[r6,:128]!
|
||||
vmlal.u32 q10,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q11,d28,d1[0]
|
||||
vshl.i64 d29,d19,#16
|
||||
vmlal.u32 q12,d28,d1[1]
|
||||
vadd.u64 d29,d29,d18
|
||||
vmlal.u32 q13,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q6,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
|
||||
vmlal.u32 q7,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q8,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q9,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q10,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q11,d29,d5[0]
|
||||
vshr.u64 d18,d18,#16
|
||||
vmlal.u32 q12,d29,d5[1]
|
||||
vmlal.u32 q13,d29,d6[0]
|
||||
vadd.u64 d18,d18,d19
|
||||
vmlal.u32 q6,d29,d6[1]
|
||||
vshr.u64 d18,d18,#16
|
||||
vmlal.u32 q7,d29,d7[0]
|
||||
vmlal.u32 q8,d29,d7[1]
|
||||
vadd.u64 d20,d20,d18
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
|
||||
vmlal.u32 q10,d28,d0[0]
|
||||
vld1.64 {q9},[r6,:128]!
|
||||
vmlal.u32 q11,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q12,d28,d1[0]
|
||||
vshl.i64 d29,d21,#16
|
||||
vmlal.u32 q13,d28,d1[1]
|
||||
vadd.u64 d29,d29,d20
|
||||
vmlal.u32 q6,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q7,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
|
||||
vmlal.u32 q8,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q9,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q10,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q11,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q12,d29,d5[0]
|
||||
vshr.u64 d20,d20,#16
|
||||
vmlal.u32 q13,d29,d5[1]
|
||||
vmlal.u32 q6,d29,d6[0]
|
||||
vadd.u64 d20,d20,d21
|
||||
vmlal.u32 q7,d29,d6[1]
|
||||
vshr.u64 d20,d20,#16
|
||||
vmlal.u32 q8,d29,d7[0]
|
||||
vmlal.u32 q9,d29,d7[1]
|
||||
vadd.u64 d22,d22,d20
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
|
||||
vmlal.u32 q11,d28,d0[0]
|
||||
vld1.64 {q10},[r6,:128]!
|
||||
vmlal.u32 q12,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q13,d28,d1[0]
|
||||
vshl.i64 d29,d23,#16
|
||||
vmlal.u32 q6,d28,d1[1]
|
||||
vadd.u64 d29,d29,d22
|
||||
vmlal.u32 q7,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q8,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
|
||||
vmlal.u32 q9,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q10,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q11,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q12,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q13,d29,d5[0]
|
||||
vshr.u64 d22,d22,#16
|
||||
vmlal.u32 q6,d29,d5[1]
|
||||
vmlal.u32 q7,d29,d6[0]
|
||||
vadd.u64 d22,d22,d23
|
||||
vmlal.u32 q8,d29,d6[1]
|
||||
vshr.u64 d22,d22,#16
|
||||
vmlal.u32 q9,d29,d7[0]
|
||||
vmlal.u32 q10,d29,d7[1]
|
||||
vadd.u64 d24,d24,d22
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
|
||||
vmlal.u32 q12,d28,d0[0]
|
||||
vld1.64 {q11},[r6,:128]!
|
||||
vmlal.u32 q13,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q6,d28,d1[0]
|
||||
vshl.i64 d29,d25,#16
|
||||
vmlal.u32 q7,d28,d1[1]
|
||||
vadd.u64 d29,d29,d24
|
||||
vmlal.u32 q8,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q9,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
|
||||
vmlal.u32 q10,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q11,d28,d3[1]
|
||||
vld1.32 {d28[0]},[r2,:32]! @ *b++
|
||||
vmlal.u32 q12,d29,d4[0]
|
||||
veor d10,d10,d10
|
||||
vmlal.u32 q13,d29,d4[1]
|
||||
vzip.16 d28,d10
|
||||
vmlal.u32 q6,d29,d5[0]
|
||||
vshr.u64 d24,d24,#16
|
||||
vmlal.u32 q7,d29,d5[1]
|
||||
vmlal.u32 q8,d29,d6[0]
|
||||
vadd.u64 d24,d24,d25
|
||||
vmlal.u32 q9,d29,d6[1]
|
||||
vshr.u64 d24,d24,#16
|
||||
vmlal.u32 q10,d29,d7[0]
|
||||
vmlal.u32 q11,d29,d7[1]
|
||||
vadd.u64 d26,d26,d24
|
||||
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
|
||||
vmlal.u32 q13,d28,d0[0]
|
||||
vld1.64 {q12},[r6,:128]!
|
||||
vmlal.u32 q6,d28,d0[1]
|
||||
veor d8,d8,d8
|
||||
vmlal.u32 q7,d28,d1[0]
|
||||
vshl.i64 d29,d27,#16
|
||||
vmlal.u32 q8,d28,d1[1]
|
||||
vadd.u64 d29,d29,d26
|
||||
vmlal.u32 q9,d28,d2[0]
|
||||
vmul.u32 d29,d29,d30
|
||||
vmlal.u32 q10,d28,d2[1]
|
||||
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
|
||||
vmlal.u32 q11,d28,d3[0]
|
||||
vzip.16 d29,d8
|
||||
vmlal.u32 q12,d28,d3[1]
|
||||
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 q13,d29,d4[0]
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
vmlal.u32 q6,d29,d4[1]
|
||||
vmlal.u32 q7,d29,d5[0]
|
||||
vshr.u64 d26,d26,#16
|
||||
vmlal.u32 q8,d29,d5[1]
|
||||
vmlal.u32 q9,d29,d6[0]
|
||||
vadd.u64 d26,d26,d27
|
||||
vmlal.u32 q10,d29,d6[1]
|
||||
vshr.u64 d26,d26,#16
|
||||
vmlal.u32 q11,d29,d7[0]
|
||||
vmlal.u32 q12,d29,d7[1]
|
||||
vadd.u64 d12,d12,d26
|
||||
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
|
||||
add r10,sp,#8 @ rewind
|
||||
sub r8,r5,#8
|
||||
b .LNEON_8n_inner
|
||||
|
||||
.align 4
|
||||
.LNEON_8n_inner:
|
||||
subs r8,r8,#8
|
||||
vmlal.u32 q6,d28,d0[0]
|
||||
vld1.64 {q13},[r6,:128]
|
||||
vmlal.u32 q7,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
|
||||
vmlal.u32 q8,d28,d1[0]
|
||||
vld1.32 {d4,d5,d6,d7},[r3]!
|
||||
vmlal.u32 q9,d28,d1[1]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q10,d28,d2[0]
|
||||
vmlal.u32 q11,d28,d2[1]
|
||||
vmlal.u32 q12,d28,d3[0]
|
||||
vmlal.u32 q13,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
|
||||
vmlal.u32 q6,d29,d4[0]
|
||||
vmlal.u32 q7,d29,d4[1]
|
||||
vmlal.u32 q8,d29,d5[0]
|
||||
vmlal.u32 q9,d29,d5[1]
|
||||
vmlal.u32 q10,d29,d6[0]
|
||||
vmlal.u32 q11,d29,d6[1]
|
||||
vmlal.u32 q12,d29,d7[0]
|
||||
vmlal.u32 q13,d29,d7[1]
|
||||
vst1.64 {q6},[r7,:128]!
|
||||
vmlal.u32 q7,d28,d0[0]
|
||||
vld1.64 {q6},[r6,:128]
|
||||
vmlal.u32 q8,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
|
||||
vmlal.u32 q9,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q10,d28,d1[1]
|
||||
vmlal.u32 q11,d28,d2[0]
|
||||
vmlal.u32 q12,d28,d2[1]
|
||||
vmlal.u32 q13,d28,d3[0]
|
||||
vmlal.u32 q6,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
|
||||
vmlal.u32 q7,d29,d4[0]
|
||||
vmlal.u32 q8,d29,d4[1]
|
||||
vmlal.u32 q9,d29,d5[0]
|
||||
vmlal.u32 q10,d29,d5[1]
|
||||
vmlal.u32 q11,d29,d6[0]
|
||||
vmlal.u32 q12,d29,d6[1]
|
||||
vmlal.u32 q13,d29,d7[0]
|
||||
vmlal.u32 q6,d29,d7[1]
|
||||
vst1.64 {q7},[r7,:128]!
|
||||
vmlal.u32 q8,d28,d0[0]
|
||||
vld1.64 {q7},[r6,:128]
|
||||
vmlal.u32 q9,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
|
||||
vmlal.u32 q10,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q11,d28,d1[1]
|
||||
vmlal.u32 q12,d28,d2[0]
|
||||
vmlal.u32 q13,d28,d2[1]
|
||||
vmlal.u32 q6,d28,d3[0]
|
||||
vmlal.u32 q7,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
|
||||
vmlal.u32 q8,d29,d4[0]
|
||||
vmlal.u32 q9,d29,d4[1]
|
||||
vmlal.u32 q10,d29,d5[0]
|
||||
vmlal.u32 q11,d29,d5[1]
|
||||
vmlal.u32 q12,d29,d6[0]
|
||||
vmlal.u32 q13,d29,d6[1]
|
||||
vmlal.u32 q6,d29,d7[0]
|
||||
vmlal.u32 q7,d29,d7[1]
|
||||
vst1.64 {q8},[r7,:128]!
|
||||
vmlal.u32 q9,d28,d0[0]
|
||||
vld1.64 {q8},[r6,:128]
|
||||
vmlal.u32 q10,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
|
||||
vmlal.u32 q11,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q12,d28,d1[1]
|
||||
vmlal.u32 q13,d28,d2[0]
|
||||
vmlal.u32 q6,d28,d2[1]
|
||||
vmlal.u32 q7,d28,d3[0]
|
||||
vmlal.u32 q8,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
|
||||
vmlal.u32 q9,d29,d4[0]
|
||||
vmlal.u32 q10,d29,d4[1]
|
||||
vmlal.u32 q11,d29,d5[0]
|
||||
vmlal.u32 q12,d29,d5[1]
|
||||
vmlal.u32 q13,d29,d6[0]
|
||||
vmlal.u32 q6,d29,d6[1]
|
||||
vmlal.u32 q7,d29,d7[0]
|
||||
vmlal.u32 q8,d29,d7[1]
|
||||
vst1.64 {q9},[r7,:128]!
|
||||
vmlal.u32 q10,d28,d0[0]
|
||||
vld1.64 {q9},[r6,:128]
|
||||
vmlal.u32 q11,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
|
||||
vmlal.u32 q12,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q13,d28,d1[1]
|
||||
vmlal.u32 q6,d28,d2[0]
|
||||
vmlal.u32 q7,d28,d2[1]
|
||||
vmlal.u32 q8,d28,d3[0]
|
||||
vmlal.u32 q9,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
|
||||
vmlal.u32 q10,d29,d4[0]
|
||||
vmlal.u32 q11,d29,d4[1]
|
||||
vmlal.u32 q12,d29,d5[0]
|
||||
vmlal.u32 q13,d29,d5[1]
|
||||
vmlal.u32 q6,d29,d6[0]
|
||||
vmlal.u32 q7,d29,d6[1]
|
||||
vmlal.u32 q8,d29,d7[0]
|
||||
vmlal.u32 q9,d29,d7[1]
|
||||
vst1.64 {q10},[r7,:128]!
|
||||
vmlal.u32 q11,d28,d0[0]
|
||||
vld1.64 {q10},[r6,:128]
|
||||
vmlal.u32 q12,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
|
||||
vmlal.u32 q13,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q6,d28,d1[1]
|
||||
vmlal.u32 q7,d28,d2[0]
|
||||
vmlal.u32 q8,d28,d2[1]
|
||||
vmlal.u32 q9,d28,d3[0]
|
||||
vmlal.u32 q10,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
|
||||
vmlal.u32 q11,d29,d4[0]
|
||||
vmlal.u32 q12,d29,d4[1]
|
||||
vmlal.u32 q13,d29,d5[0]
|
||||
vmlal.u32 q6,d29,d5[1]
|
||||
vmlal.u32 q7,d29,d6[0]
|
||||
vmlal.u32 q8,d29,d6[1]
|
||||
vmlal.u32 q9,d29,d7[0]
|
||||
vmlal.u32 q10,d29,d7[1]
|
||||
vst1.64 {q11},[r7,:128]!
|
||||
vmlal.u32 q12,d28,d0[0]
|
||||
vld1.64 {q11},[r6,:128]
|
||||
vmlal.u32 q13,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
|
||||
vmlal.u32 q6,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q7,d28,d1[1]
|
||||
vmlal.u32 q8,d28,d2[0]
|
||||
vmlal.u32 q9,d28,d2[1]
|
||||
vmlal.u32 q10,d28,d3[0]
|
||||
vmlal.u32 q11,d28,d3[1]
|
||||
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
|
||||
vmlal.u32 q12,d29,d4[0]
|
||||
vmlal.u32 q13,d29,d4[1]
|
||||
vmlal.u32 q6,d29,d5[0]
|
||||
vmlal.u32 q7,d29,d5[1]
|
||||
vmlal.u32 q8,d29,d6[0]
|
||||
vmlal.u32 q9,d29,d6[1]
|
||||
vmlal.u32 q10,d29,d7[0]
|
||||
vmlal.u32 q11,d29,d7[1]
|
||||
vst1.64 {q12},[r7,:128]!
|
||||
vmlal.u32 q13,d28,d0[0]
|
||||
vld1.64 {q12},[r6,:128]
|
||||
vmlal.u32 q6,d28,d0[1]
|
||||
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
|
||||
vmlal.u32 q7,d28,d1[0]
|
||||
it ne
|
||||
addne r6,r6,#16 @ don't advance in last iteration
|
||||
vmlal.u32 q8,d28,d1[1]
|
||||
vmlal.u32 q9,d28,d2[0]
|
||||
vmlal.u32 q10,d28,d2[1]
|
||||
vmlal.u32 q11,d28,d3[0]
|
||||
vmlal.u32 q12,d28,d3[1]
|
||||
it eq
|
||||
subeq r1,r1,r5,lsl#2 @ rewind
|
||||
vmlal.u32 q13,d29,d4[0]
|
||||
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
|
||||
vmlal.u32 q6,d29,d4[1]
|
||||
vld1.32 {d0,d1,d2,d3},[r1]!
|
||||
vmlal.u32 q7,d29,d5[0]
|
||||
add r10,sp,#8 @ rewind
|
||||
vmlal.u32 q8,d29,d5[1]
|
||||
vmlal.u32 q9,d29,d6[0]
|
||||
vmlal.u32 q10,d29,d6[1]
|
||||
vmlal.u32 q11,d29,d7[0]
|
||||
vst1.64 {q13},[r7,:128]!
|
||||
vmlal.u32 q12,d29,d7[1]
|
||||
|
||||
bne .LNEON_8n_inner
|
||||
add r6,sp,#128
|
||||
vst1.64 {q6,q7},[r7,:256]!
|
||||
veor q2,q2,q2 @ d4-d5
|
||||
vst1.64 {q8,q9},[r7,:256]!
|
||||
veor q3,q3,q3 @ d6-d7
|
||||
vst1.64 {q10,q11},[r7,:256]!
|
||||
vst1.64 {q12},[r7,:128]
|
||||
|
||||
subs r9,r9,#8
|
||||
vld1.64 {q6,q7},[r6,:256]!
|
||||
vld1.64 {q8,q9},[r6,:256]!
|
||||
vld1.64 {q10,q11},[r6,:256]!
|
||||
vld1.64 {q12,q13},[r6,:256]!
|
||||
|
||||
itt ne
|
||||
subne r3,r3,r5,lsl#2 @ rewind
|
||||
bne .LNEON_8n_outer
|
||||
|
||||
add r7,sp,#128
|
||||
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
|
||||
vshr.u64 d10,d12,#16
|
||||
vst1.64 {q2,q3},[sp,:256]!
|
||||
vadd.u64 d13,d13,d10
|
||||
vst1.64 {q2,q3}, [sp,:256]!
|
||||
vshr.u64 d10,d13,#16
|
||||
vst1.64 {q2,q3}, [sp,:256]!
|
||||
vzip.16 d12,d13
|
||||
|
||||
mov r8,r5
|
||||
b .LNEON_tail_entry
|
||||
|
||||
.align 4
|
||||
.LNEON_tail:
|
||||
vadd.u64 d12,d12,d10
|
||||
vshr.u64 d10,d12,#16
|
||||
vld1.64 {q8,q9}, [r6, :256]!
|
||||
vadd.u64 d13,d13,d10
|
||||
vld1.64 {q10,q11}, [r6, :256]!
|
||||
vshr.u64 d10,d13,#16
|
||||
vld1.64 {q12,q13}, [r6, :256]!
|
||||
vzip.16 d12,d13
|
||||
|
||||
.LNEON_tail_entry:
|
||||
vadd.u64 d14,d14,d10
|
||||
vst1.32 {d12[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d14,#16
|
||||
vadd.u64 d15,d15,d10
|
||||
vshr.u64 d10,d15,#16
|
||||
vzip.16 d14,d15
|
||||
vadd.u64 d16,d16,d10
|
||||
vst1.32 {d14[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d16,#16
|
||||
vadd.u64 d17,d17,d10
|
||||
vshr.u64 d10,d17,#16
|
||||
vzip.16 d16,d17
|
||||
vadd.u64 d18,d18,d10
|
||||
vst1.32 {d16[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d18,#16
|
||||
vadd.u64 d19,d19,d10
|
||||
vshr.u64 d10,d19,#16
|
||||
vzip.16 d18,d19
|
||||
vadd.u64 d20,d20,d10
|
||||
vst1.32 {d18[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d20,#16
|
||||
vadd.u64 d21,d21,d10
|
||||
vshr.u64 d10,d21,#16
|
||||
vzip.16 d20,d21
|
||||
vadd.u64 d22,d22,d10
|
||||
vst1.32 {d20[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d22,#16
|
||||
vadd.u64 d23,d23,d10
|
||||
vshr.u64 d10,d23,#16
|
||||
vzip.16 d22,d23
|
||||
vadd.u64 d24,d24,d10
|
||||
vst1.32 {d22[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d24,#16
|
||||
vadd.u64 d25,d25,d10
|
||||
vshr.u64 d10,d25,#16
|
||||
vzip.16 d24,d25
|
||||
vadd.u64 d26,d26,d10
|
||||
vst1.32 {d24[0]}, [r7, :32]!
|
||||
vshr.u64 d10,d26,#16
|
||||
vadd.u64 d27,d27,d10
|
||||
vshr.u64 d10,d27,#16
|
||||
vzip.16 d26,d27
|
||||
vld1.64 {q6,q7}, [r6, :256]!
|
||||
subs r8,r8,#8
|
||||
vst1.32 {d26[0]}, [r7, :32]!
|
||||
bne .LNEON_tail
|
||||
|
||||
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
|
||||
sub r3,r3,r5,lsl#2 @ rewind r3
|
||||
subs r1,sp,#0 @ clear carry flag
|
||||
add r2,sp,r5,lsl#2
|
||||
|
||||
.LNEON_sub:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r3!, {r8,r9,r10,r11}
|
||||
sbcs r8, r4,r8
|
||||
sbcs r9, r5,r9
|
||||
sbcs r10,r6,r10
|
||||
sbcs r11,r7,r11
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne .LNEON_sub
|
||||
|
||||
ldr r10, [r1] @ load top-most bit
|
||||
mov r11,sp
|
||||
veor q0,q0,q0
|
||||
sub r11,r2,r11 @ this is num*4
|
||||
veor q1,q1,q1
|
||||
mov r1,sp
|
||||
sub r0,r0,r11 @ rewind r0
|
||||
mov r3,r2 @ second 3/4th of frame
|
||||
sbcs r10,r10,#0 @ result is carry flag
|
||||
|
||||
.LNEON_copy_n_zap:
|
||||
ldmia r1!, {r4,r5,r6,r7}
|
||||
ldmia r0, {r8,r9,r10,r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
ldmia r1, {r4,r5,r6,r7}
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
sub r1,r1,#16
|
||||
ldmia r0, {r8,r9,r10,r11}
|
||||
it cc
|
||||
movcc r8, r4
|
||||
vst1.64 {q0,q1}, [r1,:256]! @ wipe
|
||||
itt cc
|
||||
movcc r9, r5
|
||||
movcc r10,r6
|
||||
vst1.64 {q0,q1}, [r3,:256]! @ wipe
|
||||
it cc
|
||||
movcc r11,r7
|
||||
teq r1,r2 @ preserves carry
|
||||
stmia r0!, {r8,r9,r10,r11}
|
||||
bne .LNEON_copy_n_zap
|
||||
|
||||
mov sp,ip
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
bx lr @ bx lr
|
||||
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
|
||||
#endif
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
1529
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/bsaes-armv7.S
Normal file
1529
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/bsaes-armv7.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,255 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
|
||||
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
|
||||
@ instructions are in aesv8-armx.pl.)
|
||||
.arch armv7-a
|
||||
|
||||
.text
|
||||
#if defined(__thumb2__) || defined(__clang__)
|
||||
.syntax unified
|
||||
#define ldrplb ldrbpl
|
||||
#define ldrneb ldrbne
|
||||
#endif
|
||||
#if defined(__thumb2__)
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
#endif
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.arch armv7-a
|
||||
.fpu neon
|
||||
|
||||
.globl gcm_init_neon
|
||||
.hidden gcm_init_neon
|
||||
.type gcm_init_neon,%function
|
||||
.align 4
|
||||
gcm_init_neon:
|
||||
vld1.64 d7,[r1]! @ load H
|
||||
vmov.i8 q8,#0xe1
|
||||
vld1.64 d6,[r1]
|
||||
vshl.i64 d17,#57
|
||||
vshr.u64 d16,#63 @ t0=0xc2....01
|
||||
vdup.8 q9,d7[7]
|
||||
vshr.u64 d26,d6,#63
|
||||
vshr.s8 q9,#7 @ broadcast carry bit
|
||||
vshl.i64 q3,q3,#1
|
||||
vand q8,q8,q9
|
||||
vorr d7,d26 @ H<<<=1
|
||||
veor q3,q3,q8 @ twisted H
|
||||
vstmia r0,{q3}
|
||||
|
||||
bx lr @ bx lr
|
||||
.size gcm_init_neon,.-gcm_init_neon
|
||||
|
||||
.globl gcm_gmult_neon
|
||||
.hidden gcm_gmult_neon
|
||||
.type gcm_gmult_neon,%function
|
||||
.align 4
|
||||
gcm_gmult_neon:
|
||||
vld1.64 d7,[r0]! @ load Xi
|
||||
vld1.64 d6,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
vldmia r1,{d26,d27} @ load twisted H
|
||||
vmov.i64 d30,#0x00000000ffffffff
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q3,q3
|
||||
#endif
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
mov r3,#16
|
||||
b .Lgmult_neon
|
||||
.size gcm_gmult_neon,.-gcm_gmult_neon
|
||||
|
||||
.globl gcm_ghash_neon
|
||||
.hidden gcm_ghash_neon
|
||||
.type gcm_ghash_neon,%function
|
||||
.align 4
|
||||
gcm_ghash_neon:
|
||||
vld1.64 d1,[r0]! @ load Xi
|
||||
vld1.64 d0,[r0]!
|
||||
vmov.i64 d29,#0x0000ffffffffffff
|
||||
vldmia r1,{d26,d27} @ load twisted H
|
||||
vmov.i64 d30,#0x00000000ffffffff
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vmov.i64 d31,#0x000000000000ffff
|
||||
veor d28,d26,d27 @ Karatsuba pre-processing
|
||||
|
||||
.Loop_neon:
|
||||
vld1.64 d7,[r2]! @ load inp
|
||||
vld1.64 d6,[r2]!
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q3,q3
|
||||
#endif
|
||||
veor q3,q0 @ inp^=Xi
|
||||
.Lgmult_neon:
|
||||
vext.8 d16, d26, d26, #1 @ A1
|
||||
vmull.p8 q8, d16, d6 @ F = A1*B
|
||||
vext.8 d0, d6, d6, #1 @ B1
|
||||
vmull.p8 q0, d26, d0 @ E = A*B1
|
||||
vext.8 d18, d26, d26, #2 @ A2
|
||||
vmull.p8 q9, d18, d6 @ H = A2*B
|
||||
vext.8 d22, d6, d6, #2 @ B2
|
||||
vmull.p8 q11, d26, d22 @ G = A*B2
|
||||
vext.8 d20, d26, d26, #3 @ A3
|
||||
veor q8, q8, q0 @ L = E + F
|
||||
vmull.p8 q10, d20, d6 @ J = A3*B
|
||||
vext.8 d0, d6, d6, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q0, d26, d0 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d6, d6, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d26, d22 @ K = A*B4
|
||||
veor q10, q10, q0 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q0, d26, d6 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q0, q0, q8
|
||||
veor q0, q0, q10
|
||||
veor d6,d6,d7 @ Karatsuba pre-processing
|
||||
vext.8 d16, d28, d28, #1 @ A1
|
||||
vmull.p8 q8, d16, d6 @ F = A1*B
|
||||
vext.8 d2, d6, d6, #1 @ B1
|
||||
vmull.p8 q1, d28, d2 @ E = A*B1
|
||||
vext.8 d18, d28, d28, #2 @ A2
|
||||
vmull.p8 q9, d18, d6 @ H = A2*B
|
||||
vext.8 d22, d6, d6, #2 @ B2
|
||||
vmull.p8 q11, d28, d22 @ G = A*B2
|
||||
vext.8 d20, d28, d28, #3 @ A3
|
||||
veor q8, q8, q1 @ L = E + F
|
||||
vmull.p8 q10, d20, d6 @ J = A3*B
|
||||
vext.8 d2, d6, d6, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q1, d28, d2 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d6, d6, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d28, d22 @ K = A*B4
|
||||
veor q10, q10, q1 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q1, d28, d6 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q1, q1, q8
|
||||
veor q1, q1, q10
|
||||
vext.8 d16, d27, d27, #1 @ A1
|
||||
vmull.p8 q8, d16, d7 @ F = A1*B
|
||||
vext.8 d4, d7, d7, #1 @ B1
|
||||
vmull.p8 q2, d27, d4 @ E = A*B1
|
||||
vext.8 d18, d27, d27, #2 @ A2
|
||||
vmull.p8 q9, d18, d7 @ H = A2*B
|
||||
vext.8 d22, d7, d7, #2 @ B2
|
||||
vmull.p8 q11, d27, d22 @ G = A*B2
|
||||
vext.8 d20, d27, d27, #3 @ A3
|
||||
veor q8, q8, q2 @ L = E + F
|
||||
vmull.p8 q10, d20, d7 @ J = A3*B
|
||||
vext.8 d4, d7, d7, #3 @ B3
|
||||
veor q9, q9, q11 @ M = G + H
|
||||
vmull.p8 q2, d27, d4 @ I = A*B3
|
||||
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
|
||||
vand d17, d17, d29
|
||||
vext.8 d22, d7, d7, #4 @ B4
|
||||
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
|
||||
vand d19, d19, d30
|
||||
vmull.p8 q11, d27, d22 @ K = A*B4
|
||||
veor q10, q10, q2 @ N = I + J
|
||||
veor d16, d16, d17
|
||||
veor d18, d18, d19
|
||||
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
|
||||
vand d21, d21, d31
|
||||
vext.8 q8, q8, q8, #15
|
||||
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
|
||||
vmov.i64 d23, #0
|
||||
vext.8 q9, q9, q9, #14
|
||||
veor d20, d20, d21
|
||||
vmull.p8 q2, d27, d7 @ D = A*B
|
||||
vext.8 q11, q11, q11, #12
|
||||
vext.8 q10, q10, q10, #13
|
||||
veor q8, q8, q9
|
||||
veor q10, q10, q11
|
||||
veor q2, q2, q8
|
||||
veor q2, q2, q10
|
||||
veor q1,q1,q0 @ Karatsuba post-processing
|
||||
veor q1,q1,q2
|
||||
veor d1,d1,d2
|
||||
veor d4,d4,d3 @ Xh|Xl - 256-bit result
|
||||
|
||||
@ equivalent of reduction_avx from ghash-x86_64.pl
|
||||
vshl.i64 q9,q0,#57 @ 1st phase
|
||||
vshl.i64 q10,q0,#62
|
||||
veor q10,q10,q9 @
|
||||
vshl.i64 q9,q0,#63
|
||||
veor q10, q10, q9 @
|
||||
veor d1,d1,d20 @
|
||||
veor d4,d4,d21
|
||||
|
||||
vshr.u64 q10,q0,#1 @ 2nd phase
|
||||
veor q2,q2,q0
|
||||
veor q0,q0,q10 @
|
||||
vshr.u64 q10,q10,#6
|
||||
vshr.u64 q0,q0,#1 @
|
||||
veor q0,q0,q2 @
|
||||
veor q0,q0,q10 @
|
||||
|
||||
subs r3,#16
|
||||
bne .Loop_neon
|
||||
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
sub r0,#16
|
||||
vst1.64 d1,[r0]! @ write out Xi
|
||||
vst1.64 d0,[r0]
|
||||
|
||||
bx lr @ bx lr
|
||||
.size gcm_ghash_neon,.-gcm_ghash_neon
|
||||
#endif
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
@ -0,0 +1,253 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
.fpu neon
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
.globl gcm_init_v8
|
||||
.hidden gcm_init_v8
|
||||
.type gcm_init_v8,%function
|
||||
.align 4
|
||||
gcm_init_v8:
|
||||
vld1.64 {q9},[r1] @ load input H
|
||||
vmov.i8 q11,#0xe1
|
||||
vshl.i64 q11,q11,#57 @ 0xc2.0
|
||||
vext.8 q3,q9,q9,#8
|
||||
vshr.u64 q10,q11,#63
|
||||
vdup.32 q9,d18[1]
|
||||
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
|
||||
vshr.u64 q10,q3,#63
|
||||
vshr.s32 q9,q9,#31 @ broadcast carry bit
|
||||
vand q10,q10,q8
|
||||
vshl.i64 q3,q3,#1
|
||||
vext.8 q10,q10,q10,#8
|
||||
vand q8,q8,q9
|
||||
vorr q3,q3,q10 @ H<<<=1
|
||||
veor q12,q3,q8 @ twisted H
|
||||
vst1.64 {q12},[r0]! @ store Htable[0]
|
||||
|
||||
@ calculate H^2
|
||||
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
|
||||
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
|
||||
veor q8,q8,q12
|
||||
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
|
||||
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q14,q0,q10
|
||||
|
||||
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
|
||||
veor q9,q9,q14
|
||||
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
|
||||
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
|
||||
|
||||
bx lr
|
||||
.size gcm_init_v8,.-gcm_init_v8
|
||||
.globl gcm_gmult_v8
|
||||
.hidden gcm_gmult_v8
|
||||
.type gcm_gmult_v8,%function
|
||||
.align 4
|
||||
gcm_gmult_v8:
|
||||
vld1.64 {q9},[r0] @ load Xi
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q12,q13},[r1] @ load twisted H, ...
|
||||
vshl.u64 q11,q11,#57
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vext.8 q3,q9,q9,#8
|
||||
|
||||
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
|
||||
veor q9,q9,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
|
||||
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q0,q0,q10
|
||||
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q0,q0,q0,#8
|
||||
vst1.64 {q0},[r0] @ write out Xi
|
||||
|
||||
bx lr
|
||||
.size gcm_gmult_v8,.-gcm_gmult_v8
|
||||
.globl gcm_ghash_v8
|
||||
.hidden gcm_ghash_v8
|
||||
.type gcm_ghash_v8,%function
|
||||
.align 4
|
||||
gcm_ghash_v8:
|
||||
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
vld1.64 {q0},[r0] @ load [rotated] Xi
|
||||
@ "[rotated]" means that
|
||||
@ loaded value would have
|
||||
@ to be rotated in order to
|
||||
@ make it appear as in
|
||||
@ algorithm specification
|
||||
subs r3,r3,#32 @ see if r3 is 32 or larger
|
||||
mov r12,#16 @ r12 is used as post-
|
||||
@ increment for input pointer;
|
||||
@ as loop is modulo-scheduled
|
||||
@ r12 is zeroed just in time
|
||||
@ to preclude overstepping
|
||||
@ inp[len], which means that
|
||||
@ last block[s] are actually
|
||||
@ loaded twice, but last
|
||||
@ copy is not processed
|
||||
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
|
||||
vmov.i8 q11,#0xe1
|
||||
vld1.64 {q14},[r1]
|
||||
moveq r12,#0 @ is it time to zero r12?
|
||||
vext.8 q0,q0,q0,#8 @ rotate Xi
|
||||
vld1.64 {q8},[r2]! @ load [rotated] I[0]
|
||||
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q8,q8
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q3,q8,q8,#8 @ rotate I[0]
|
||||
blo .Lodd_tail_v8 @ r3 was less than 32
|
||||
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vext.8 q7,q9,q9,#8
|
||||
veor q3,q3,q0 @ I[i]^=Xi
|
||||
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
|
||||
b .Loop_mod2x_v8
|
||||
|
||||
.align 4
|
||||
.Loop_mod2x_v8:
|
||||
vext.8 q10,q3,q3,#8
|
||||
subs r3,r3,#32 @ is there more data?
|
||||
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
|
||||
movlo r12,#0 @ is it time to zero r12?
|
||||
|
||||
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
|
||||
veor q10,q10,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
|
||||
veor q0,q0,q4 @ accumulate
|
||||
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
|
||||
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
|
||||
|
||||
veor q2,q2,q6
|
||||
moveq r12,#0 @ is it time to zero r12?
|
||||
veor q1,q1,q5
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q8,q8
|
||||
#endif
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q9,q9
|
||||
#endif
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
vext.8 q7,q9,q9,#8
|
||||
vext.8 q3,q8,q8,#8
|
||||
veor q0,q1,q10
|
||||
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
|
||||
veor q3,q3,q2 @ accumulate q3 early
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q3,q3,q10
|
||||
veor q9,q9,q7 @ Karatsuba pre-processing
|
||||
veor q3,q3,q0
|
||||
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
|
||||
bhs .Loop_mod2x_v8 @ there was at least 32 more bytes
|
||||
|
||||
veor q2,q2,q10
|
||||
vext.8 q3,q8,q8,#8 @ re-construct q3
|
||||
adds r3,r3,#32 @ re-construct r3
|
||||
veor q0,q0,q2 @ re-construct q0
|
||||
beq .Ldone_v8 @ is r3 zero?
|
||||
.Lodd_tail_v8:
|
||||
vext.8 q10,q0,q0,#8
|
||||
veor q3,q3,q0 @ inp^=Xi
|
||||
veor q9,q8,q10 @ q9 is rotated inp^Xi
|
||||
|
||||
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
|
||||
veor q9,q9,q3 @ Karatsuba pre-processing
|
||||
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
|
||||
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
||||
|
||||
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
|
||||
veor q10,q0,q2
|
||||
veor q1,q1,q9
|
||||
veor q1,q1,q10
|
||||
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
|
||||
|
||||
vmov d4,d3 @ Xh|Xm - 256-bit result
|
||||
vmov d3,d0 @ Xm is rotated Xl
|
||||
veor q0,q1,q10
|
||||
|
||||
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
|
||||
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
|
||||
veor q10,q10,q2
|
||||
veor q0,q0,q10
|
||||
|
||||
.Ldone_v8:
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 q0,q0
|
||||
#endif
|
||||
vext.8 q0,q0,q0,#8
|
||||
vst1.64 {q0},[r0] @ write out Xi
|
||||
|
||||
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
|
||||
bx lr
|
||||
.size gcm_ghash_v8,.-gcm_ghash_v8
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
File diff suppressed because it is too large
Load Diff
2839
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha256-armv4.S
Normal file
2839
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha256-armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
1894
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha512-armv4.S
Normal file
1894
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha512-armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
1236
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
1236
contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/vpaes-armv7.S
Normal file
File diff suppressed because it is too large
Load Diff
379
contrib/boringssl-cmake/linux-arm/crypto/test/trampoline-armv4.S
Normal file
379
contrib/boringssl-cmake/linux-arm/crypto/test/trampoline-armv4.S
Normal file
@ -0,0 +1,379 @@
|
||||
// This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
// source tree. Do not edit by hand.
|
||||
|
||||
#if !defined(__has_feature)
|
||||
#define __has_feature(x) 0
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM)
|
||||
#if defined(__arm__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.syntax unified
|
||||
|
||||
.arch armv7-a
|
||||
.fpu vfp
|
||||
|
||||
.text
|
||||
|
||||
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
||||
@ with |argv|, then saves the callee-saved registers into |state|. It returns
|
||||
@ the result of |func|. The |unwind| argument is unused.
|
||||
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
||||
@ const uint32_t *argv, size_t argc,
|
||||
@ int unwind);
|
||||
.type abi_test_trampoline, %function
|
||||
.globl abi_test_trampoline
|
||||
.hidden abi_test_trampoline
|
||||
.align 4
|
||||
abi_test_trampoline:
|
||||
@ Save parameters and all callee-saved registers. For convenience, we
|
||||
@ save r9 on iOS even though it's volatile.
|
||||
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
||||
|
||||
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
|
||||
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
|
||||
sub sp, sp, #28
|
||||
|
||||
@ Every register in AAPCS is either non-volatile or a parameter (except
|
||||
@ r9 on iOS), so this code, by the actual call, loses all its scratch
|
||||
@ registers. First fill in stack parameters while there are registers
|
||||
@ to spare.
|
||||
cmp r3, #4
|
||||
bls .Lstack_args_done
|
||||
mov r4, sp @ r4 is the output pointer.
|
||||
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
|
||||
add r2, r2, #16 @ Skip four arguments.
|
||||
.Lstack_args_loop:
|
||||
ldr r6, [r2], #4
|
||||
cmp r2, r5
|
||||
str r6, [r4], #4
|
||||
bne .Lstack_args_loop
|
||||
|
||||
.Lstack_args_done:
|
||||
@ Load registers from |r1|.
|
||||
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Load register parameters. This uses up our remaining registers, so we
|
||||
@ repurpose lr as scratch space.
|
||||
ldr r3, [sp, #40] @ Reload argc.
|
||||
ldr lr, [sp, #36] @ .Load argv into lr.
|
||||
cmp r3, #3
|
||||
bhi .Larg_r3
|
||||
beq .Larg_r2
|
||||
cmp r3, #1
|
||||
bhi .Larg_r1
|
||||
beq .Larg_r0
|
||||
b .Largs_done
|
||||
|
||||
.Larg_r3:
|
||||
ldr r3, [lr, #12] @ argv[3]
|
||||
.Larg_r2:
|
||||
ldr r2, [lr, #8] @ argv[2]
|
||||
.Larg_r1:
|
||||
ldr r1, [lr, #4] @ argv[1]
|
||||
.Larg_r0:
|
||||
ldr r0, [lr] @ argv[0]
|
||||
.Largs_done:
|
||||
|
||||
@ With every other register in use, load the function pointer into lr
|
||||
@ and call the function.
|
||||
ldr lr, [sp, #28]
|
||||
blx lr
|
||||
|
||||
@ r1-r3 are free for use again. The trampoline only supports
|
||||
@ single-return functions. Pass r4-r11 to the caller.
|
||||
ldr r1, [sp, #32]
|
||||
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
#if defined(__APPLE__)
|
||||
@ r9 is not volatile on iOS.
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
|
||||
#else
|
||||
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
|
||||
#endif
|
||||
|
||||
@ Unwind the stack and restore registers.
|
||||
add sp, sp, #44 @ 44 = 28+16
|
||||
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
|
||||
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
|
||||
|
||||
bx lr
|
||||
.size abi_test_trampoline,.-abi_test_trampoline
|
||||
.type abi_test_clobber_r0, %function
|
||||
.globl abi_test_clobber_r0
|
||||
.hidden abi_test_clobber_r0
|
||||
.align 4
|
||||
abi_test_clobber_r0:
|
||||
mov r0, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r0,.-abi_test_clobber_r0
|
||||
.type abi_test_clobber_r1, %function
|
||||
.globl abi_test_clobber_r1
|
||||
.hidden abi_test_clobber_r1
|
||||
.align 4
|
||||
abi_test_clobber_r1:
|
||||
mov r1, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r1,.-abi_test_clobber_r1
|
||||
.type abi_test_clobber_r2, %function
|
||||
.globl abi_test_clobber_r2
|
||||
.hidden abi_test_clobber_r2
|
||||
.align 4
|
||||
abi_test_clobber_r2:
|
||||
mov r2, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r2,.-abi_test_clobber_r2
|
||||
.type abi_test_clobber_r3, %function
|
||||
.globl abi_test_clobber_r3
|
||||
.hidden abi_test_clobber_r3
|
||||
.align 4
|
||||
abi_test_clobber_r3:
|
||||
mov r3, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r3,.-abi_test_clobber_r3
|
||||
.type abi_test_clobber_r4, %function
|
||||
.globl abi_test_clobber_r4
|
||||
.hidden abi_test_clobber_r4
|
||||
.align 4
|
||||
abi_test_clobber_r4:
|
||||
mov r4, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r4,.-abi_test_clobber_r4
|
||||
.type abi_test_clobber_r5, %function
|
||||
.globl abi_test_clobber_r5
|
||||
.hidden abi_test_clobber_r5
|
||||
.align 4
|
||||
abi_test_clobber_r5:
|
||||
mov r5, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r5,.-abi_test_clobber_r5
|
||||
.type abi_test_clobber_r6, %function
|
||||
.globl abi_test_clobber_r6
|
||||
.hidden abi_test_clobber_r6
|
||||
.align 4
|
||||
abi_test_clobber_r6:
|
||||
mov r6, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r6,.-abi_test_clobber_r6
|
||||
.type abi_test_clobber_r7, %function
|
||||
.globl abi_test_clobber_r7
|
||||
.hidden abi_test_clobber_r7
|
||||
.align 4
|
||||
abi_test_clobber_r7:
|
||||
mov r7, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r7,.-abi_test_clobber_r7
|
||||
.type abi_test_clobber_r8, %function
|
||||
.globl abi_test_clobber_r8
|
||||
.hidden abi_test_clobber_r8
|
||||
.align 4
|
||||
abi_test_clobber_r8:
|
||||
mov r8, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r8,.-abi_test_clobber_r8
|
||||
.type abi_test_clobber_r9, %function
|
||||
.globl abi_test_clobber_r9
|
||||
.hidden abi_test_clobber_r9
|
||||
.align 4
|
||||
abi_test_clobber_r9:
|
||||
mov r9, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r9,.-abi_test_clobber_r9
|
||||
.type abi_test_clobber_r10, %function
|
||||
.globl abi_test_clobber_r10
|
||||
.hidden abi_test_clobber_r10
|
||||
.align 4
|
||||
abi_test_clobber_r10:
|
||||
mov r10, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r10,.-abi_test_clobber_r10
|
||||
.type abi_test_clobber_r11, %function
|
||||
.globl abi_test_clobber_r11
|
||||
.hidden abi_test_clobber_r11
|
||||
.align 4
|
||||
abi_test_clobber_r11:
|
||||
mov r11, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r11,.-abi_test_clobber_r11
|
||||
.type abi_test_clobber_r12, %function
|
||||
.globl abi_test_clobber_r12
|
||||
.hidden abi_test_clobber_r12
|
||||
.align 4
|
||||
abi_test_clobber_r12:
|
||||
mov r12, #0
|
||||
bx lr
|
||||
.size abi_test_clobber_r12,.-abi_test_clobber_r12
|
||||
.type abi_test_clobber_d0, %function
|
||||
.globl abi_test_clobber_d0
|
||||
.hidden abi_test_clobber_d0
|
||||
.align 4
|
||||
abi_test_clobber_d0:
|
||||
mov r0, #0
|
||||
vmov s0, r0
|
||||
vmov s1, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d0,.-abi_test_clobber_d0
|
||||
.type abi_test_clobber_d1, %function
|
||||
.globl abi_test_clobber_d1
|
||||
.hidden abi_test_clobber_d1
|
||||
.align 4
|
||||
abi_test_clobber_d1:
|
||||
mov r0, #0
|
||||
vmov s2, r0
|
||||
vmov s3, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d1,.-abi_test_clobber_d1
|
||||
.type abi_test_clobber_d2, %function
|
||||
.globl abi_test_clobber_d2
|
||||
.hidden abi_test_clobber_d2
|
||||
.align 4
|
||||
abi_test_clobber_d2:
|
||||
mov r0, #0
|
||||
vmov s4, r0
|
||||
vmov s5, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d2,.-abi_test_clobber_d2
|
||||
.type abi_test_clobber_d3, %function
|
||||
.globl abi_test_clobber_d3
|
||||
.hidden abi_test_clobber_d3
|
||||
.align 4
|
||||
abi_test_clobber_d3:
|
||||
mov r0, #0
|
||||
vmov s6, r0
|
||||
vmov s7, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d3,.-abi_test_clobber_d3
|
||||
.type abi_test_clobber_d4, %function
|
||||
.globl abi_test_clobber_d4
|
||||
.hidden abi_test_clobber_d4
|
||||
.align 4
|
||||
abi_test_clobber_d4:
|
||||
mov r0, #0
|
||||
vmov s8, r0
|
||||
vmov s9, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d4,.-abi_test_clobber_d4
|
||||
.type abi_test_clobber_d5, %function
|
||||
.globl abi_test_clobber_d5
|
||||
.hidden abi_test_clobber_d5
|
||||
.align 4
|
||||
abi_test_clobber_d5:
|
||||
mov r0, #0
|
||||
vmov s10, r0
|
||||
vmov s11, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d5,.-abi_test_clobber_d5
|
||||
.type abi_test_clobber_d6, %function
|
||||
.globl abi_test_clobber_d6
|
||||
.hidden abi_test_clobber_d6
|
||||
.align 4
|
||||
abi_test_clobber_d6:
|
||||
mov r0, #0
|
||||
vmov s12, r0
|
||||
vmov s13, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d6,.-abi_test_clobber_d6
|
||||
.type abi_test_clobber_d7, %function
|
||||
.globl abi_test_clobber_d7
|
||||
.hidden abi_test_clobber_d7
|
||||
.align 4
|
||||
abi_test_clobber_d7:
|
||||
mov r0, #0
|
||||
vmov s14, r0
|
||||
vmov s15, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d7,.-abi_test_clobber_d7
|
||||
.type abi_test_clobber_d8, %function
|
||||
.globl abi_test_clobber_d8
|
||||
.hidden abi_test_clobber_d8
|
||||
.align 4
|
||||
abi_test_clobber_d8:
|
||||
mov r0, #0
|
||||
vmov s16, r0
|
||||
vmov s17, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d8,.-abi_test_clobber_d8
|
||||
.type abi_test_clobber_d9, %function
|
||||
.globl abi_test_clobber_d9
|
||||
.hidden abi_test_clobber_d9
|
||||
.align 4
|
||||
abi_test_clobber_d9:
|
||||
mov r0, #0
|
||||
vmov s18, r0
|
||||
vmov s19, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d9,.-abi_test_clobber_d9
|
||||
.type abi_test_clobber_d10, %function
|
||||
.globl abi_test_clobber_d10
|
||||
.hidden abi_test_clobber_d10
|
||||
.align 4
|
||||
abi_test_clobber_d10:
|
||||
mov r0, #0
|
||||
vmov s20, r0
|
||||
vmov s21, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d10,.-abi_test_clobber_d10
|
||||
.type abi_test_clobber_d11, %function
|
||||
.globl abi_test_clobber_d11
|
||||
.hidden abi_test_clobber_d11
|
||||
.align 4
|
||||
abi_test_clobber_d11:
|
||||
mov r0, #0
|
||||
vmov s22, r0
|
||||
vmov s23, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d11,.-abi_test_clobber_d11
|
||||
.type abi_test_clobber_d12, %function
|
||||
.globl abi_test_clobber_d12
|
||||
.hidden abi_test_clobber_d12
|
||||
.align 4
|
||||
abi_test_clobber_d12:
|
||||
mov r0, #0
|
||||
vmov s24, r0
|
||||
vmov s25, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d12,.-abi_test_clobber_d12
|
||||
.type abi_test_clobber_d13, %function
|
||||
.globl abi_test_clobber_d13
|
||||
.hidden abi_test_clobber_d13
|
||||
.align 4
|
||||
abi_test_clobber_d13:
|
||||
mov r0, #0
|
||||
vmov s26, r0
|
||||
vmov s27, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d13,.-abi_test_clobber_d13
|
||||
.type abi_test_clobber_d14, %function
|
||||
.globl abi_test_clobber_d14
|
||||
.hidden abi_test_clobber_d14
|
||||
.align 4
|
||||
abi_test_clobber_d14:
|
||||
mov r0, #0
|
||||
vmov s28, r0
|
||||
vmov s29, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d14,.-abi_test_clobber_d14
|
||||
.type abi_test_clobber_d15, %function
|
||||
.globl abi_test_clobber_d15
|
||||
.hidden abi_test_clobber_d15
|
||||
.align 4
|
||||
abi_test_clobber_d15:
|
||||
mov r0, #0
|
||||
vmov s30, r0
|
||||
vmov s31, r0
|
||||
bx lr
|
||||
.size abi_test_clobber_d15,.-abi_test_clobber_d15
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
3670
contrib/boringssl-cmake/linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
Normal file
3670
contrib/boringssl-cmake/linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,587 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
||||
#define OPENSSL_NO_ASM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(__powerpc64__)
|
||||
.machine "any"
|
||||
|
||||
.abiversion 2
|
||||
.text
|
||||
|
||||
.globl gcm_init_p8
|
||||
.type gcm_init_p8,@function
|
||||
.align 5
|
||||
gcm_init_p8:
|
||||
.localentry gcm_init_p8,0
|
||||
|
||||
li 0,-4096
|
||||
li 8,0x10
|
||||
li 12,-1
|
||||
li 9,0x20
|
||||
or 0,0,0
|
||||
li 10,0x30
|
||||
.long 0x7D202699
|
||||
|
||||
vspltisb 8,-16
|
||||
vspltisb 5,1
|
||||
vaddubm 8,8,8
|
||||
vxor 4,4,4
|
||||
vor 8,8,5
|
||||
vsldoi 8,8,4,15
|
||||
vsldoi 6,4,5,1
|
||||
vaddubm 8,8,8
|
||||
vspltisb 7,7
|
||||
vor 8,8,6
|
||||
vspltb 6,9,0
|
||||
vsl 9,9,5
|
||||
vsrab 6,6,7
|
||||
vand 6,6,8
|
||||
vxor 3,9,6
|
||||
|
||||
vsldoi 9,3,3,8
|
||||
vsldoi 8,4,8,8
|
||||
vsldoi 11,4,9,8
|
||||
vsldoi 10,9,4,8
|
||||
|
||||
.long 0x7D001F99
|
||||
.long 0x7D681F99
|
||||
li 8,0x40
|
||||
.long 0x7D291F99
|
||||
li 9,0x50
|
||||
.long 0x7D4A1F99
|
||||
li 10,0x60
|
||||
|
||||
.long 0x10035CC8
|
||||
.long 0x10234CC8
|
||||
.long 0x104354C8
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
vxor 16,0,6
|
||||
|
||||
vsldoi 17,16,16,8
|
||||
vsldoi 19,4,17,8
|
||||
vsldoi 18,17,4,8
|
||||
|
||||
.long 0x7E681F99
|
||||
li 8,0x70
|
||||
.long 0x7E291F99
|
||||
li 9,0x80
|
||||
.long 0x7E4A1F99
|
||||
li 10,0x90
|
||||
.long 0x10039CC8
|
||||
.long 0x11B09CC8
|
||||
.long 0x10238CC8
|
||||
.long 0x11D08CC8
|
||||
.long 0x104394C8
|
||||
.long 0x11F094C8
|
||||
|
||||
.long 0x10E044C8
|
||||
.long 0x114D44C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vsldoi 11,14,4,8
|
||||
vsldoi 9,4,14,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
vxor 13,13,11
|
||||
vxor 15,15,9
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vsldoi 13,13,13,8
|
||||
vxor 0,0,7
|
||||
vxor 13,13,10
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
vsldoi 9,13,13,8
|
||||
.long 0x100044C8
|
||||
.long 0x11AD44C8
|
||||
vxor 6,6,2
|
||||
vxor 9,9,15
|
||||
vxor 0,0,6
|
||||
vxor 13,13,9
|
||||
|
||||
vsldoi 9,0,0,8
|
||||
vsldoi 17,13,13,8
|
||||
vsldoi 11,4,9,8
|
||||
vsldoi 10,9,4,8
|
||||
vsldoi 19,4,17,8
|
||||
vsldoi 18,17,4,8
|
||||
|
||||
.long 0x7D681F99
|
||||
li 8,0xa0
|
||||
.long 0x7D291F99
|
||||
li 9,0xb0
|
||||
.long 0x7D4A1F99
|
||||
li 10,0xc0
|
||||
.long 0x7E681F99
|
||||
.long 0x7E291F99
|
||||
.long 0x7E4A1F99
|
||||
|
||||
or 12,12,12
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size gcm_init_p8,.-gcm_init_p8
|
||||
.globl gcm_gmult_p8
|
||||
.type gcm_gmult_p8,@function
|
||||
.align 5
|
||||
gcm_gmult_p8:
|
||||
.localentry gcm_gmult_p8,0
|
||||
|
||||
lis 0,0xfff8
|
||||
li 8,0x10
|
||||
li 12,-1
|
||||
li 9,0x20
|
||||
or 0,0,0
|
||||
li 10,0x30
|
||||
.long 0x7C601E99
|
||||
|
||||
.long 0x7D682699
|
||||
lvsl 12,0,0
|
||||
.long 0x7D292699
|
||||
vspltisb 5,0x07
|
||||
.long 0x7D4A2699
|
||||
vxor 12,12,5
|
||||
.long 0x7D002699
|
||||
vperm 3,3,3,12
|
||||
vxor 4,4,4
|
||||
|
||||
.long 0x10035CC8
|
||||
.long 0x10234CC8
|
||||
.long 0x104354C8
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
vxor 0,0,6
|
||||
|
||||
vperm 0,0,0,12
|
||||
.long 0x7C001F99
|
||||
|
||||
or 12,12,12
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,2,0
|
||||
.long 0
|
||||
.size gcm_gmult_p8,.-gcm_gmult_p8
|
||||
|
||||
.globl gcm_ghash_p8
|
||||
.type gcm_ghash_p8,@function
|
||||
.align 5
|
||||
gcm_ghash_p8:
|
||||
.localentry gcm_ghash_p8,0
|
||||
|
||||
li 0,-4096
|
||||
li 8,0x10
|
||||
li 12,-1
|
||||
li 9,0x20
|
||||
or 0,0,0
|
||||
li 10,0x30
|
||||
.long 0x7C001E99
|
||||
|
||||
.long 0x7D682699
|
||||
li 8,0x40
|
||||
lvsl 12,0,0
|
||||
.long 0x7D292699
|
||||
li 9,0x50
|
||||
vspltisb 5,0x07
|
||||
.long 0x7D4A2699
|
||||
li 10,0x60
|
||||
vxor 12,12,5
|
||||
.long 0x7D002699
|
||||
vperm 0,0,0,12
|
||||
vxor 4,4,4
|
||||
|
||||
cmpldi 6,64
|
||||
bge .Lgcm_ghash_p8_4x
|
||||
|
||||
.long 0x7C602E99
|
||||
addi 5,5,16
|
||||
subic. 6,6,16
|
||||
vperm 3,3,3,12
|
||||
vxor 3,3,0
|
||||
beq .Lshort
|
||||
|
||||
.long 0x7E682699
|
||||
li 8,16
|
||||
.long 0x7E292699
|
||||
add 9,5,6
|
||||
.long 0x7E4A2699
|
||||
|
||||
|
||||
.align 5
|
||||
.Loop_2x:
|
||||
.long 0x7E002E99
|
||||
vperm 16,16,16,12
|
||||
|
||||
subic 6,6,32
|
||||
.long 0x10039CC8
|
||||
.long 0x11B05CC8
|
||||
subfe 0,0,0
|
||||
.long 0x10238CC8
|
||||
.long 0x11D04CC8
|
||||
and 0,0,6
|
||||
.long 0x104394C8
|
||||
.long 0x11F054C8
|
||||
add 5,5,0
|
||||
|
||||
vxor 0,0,13
|
||||
vxor 1,1,14
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 2,2,15
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
.long 0x7C682E99
|
||||
addi 5,5,32
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vperm 3,3,3,12
|
||||
vxor 6,6,2
|
||||
vxor 3,3,6
|
||||
vxor 3,3,0
|
||||
cmpld 9,5
|
||||
bgt .Loop_2x
|
||||
|
||||
cmplwi 6,0
|
||||
bne .Leven
|
||||
|
||||
.Lshort:
|
||||
.long 0x10035CC8
|
||||
.long 0x10234CC8
|
||||
.long 0x104354C8
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
|
||||
.Leven:
|
||||
vxor 0,0,6
|
||||
vperm 0,0,0,12
|
||||
.long 0x7C001F99
|
||||
|
||||
or 12,12,12
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x14,0,0,0,4,0
|
||||
.long 0
|
||||
.align 5
|
||||
.gcm_ghash_p8_4x:
|
||||
.Lgcm_ghash_p8_4x:
|
||||
stdu 1,-256(1)
|
||||
li 10,63
|
||||
li 11,79
|
||||
stvx 20,10,1
|
||||
addi 10,10,32
|
||||
stvx 21,11,1
|
||||
addi 11,11,32
|
||||
stvx 22,10,1
|
||||
addi 10,10,32
|
||||
stvx 23,11,1
|
||||
addi 11,11,32
|
||||
stvx 24,10,1
|
||||
addi 10,10,32
|
||||
stvx 25,11,1
|
||||
addi 11,11,32
|
||||
stvx 26,10,1
|
||||
addi 10,10,32
|
||||
stvx 27,11,1
|
||||
addi 11,11,32
|
||||
stvx 28,10,1
|
||||
addi 10,10,32
|
||||
stvx 29,11,1
|
||||
addi 11,11,32
|
||||
stvx 30,10,1
|
||||
li 10,0x60
|
||||
stvx 31,11,1
|
||||
li 0,-1
|
||||
stw 12,252(1)
|
||||
or 0,0,0
|
||||
|
||||
lvsl 5,0,8
|
||||
|
||||
li 8,0x70
|
||||
.long 0x7E292699
|
||||
li 9,0x80
|
||||
vspltisb 6,8
|
||||
|
||||
li 10,0x90
|
||||
.long 0x7EE82699
|
||||
li 8,0xa0
|
||||
.long 0x7F092699
|
||||
li 9,0xb0
|
||||
.long 0x7F2A2699
|
||||
li 10,0xc0
|
||||
.long 0x7FA82699
|
||||
li 8,0x10
|
||||
.long 0x7FC92699
|
||||
li 9,0x20
|
||||
.long 0x7FEA2699
|
||||
li 10,0x30
|
||||
|
||||
vsldoi 7,4,6,8
|
||||
vaddubm 18,5,7
|
||||
vaddubm 19,6,18
|
||||
|
||||
srdi 6,6,4
|
||||
|
||||
.long 0x7C602E99
|
||||
.long 0x7E082E99
|
||||
subic. 6,6,8
|
||||
.long 0x7EC92E99
|
||||
.long 0x7F8A2E99
|
||||
addi 5,5,0x40
|
||||
vperm 3,3,3,12
|
||||
vperm 16,16,16,12
|
||||
vperm 22,22,22,12
|
||||
vperm 28,28,28,12
|
||||
|
||||
vxor 2,3,0
|
||||
|
||||
.long 0x11B0BCC8
|
||||
.long 0x11D0C4C8
|
||||
.long 0x11F0CCC8
|
||||
|
||||
vperm 11,17,9,18
|
||||
vperm 5,22,28,19
|
||||
vperm 10,17,9,19
|
||||
vperm 6,22,28,18
|
||||
.long 0x12B68CC8
|
||||
.long 0x12855CC8
|
||||
.long 0x137C4CC8
|
||||
.long 0x134654C8
|
||||
|
||||
vxor 21,21,14
|
||||
vxor 20,20,13
|
||||
vxor 27,27,21
|
||||
vxor 26,26,15
|
||||
|
||||
blt .Ltail_4x
|
||||
|
||||
.Loop_4x:
|
||||
.long 0x7C602E99
|
||||
.long 0x7E082E99
|
||||
subic. 6,6,4
|
||||
.long 0x7EC92E99
|
||||
.long 0x7F8A2E99
|
||||
addi 5,5,0x40
|
||||
vperm 16,16,16,12
|
||||
vperm 22,22,22,12
|
||||
vperm 28,28,28,12
|
||||
vperm 3,3,3,12
|
||||
|
||||
.long 0x1002ECC8
|
||||
.long 0x1022F4C8
|
||||
.long 0x1042FCC8
|
||||
.long 0x11B0BCC8
|
||||
.long 0x11D0C4C8
|
||||
.long 0x11F0CCC8
|
||||
|
||||
vxor 0,0,20
|
||||
vxor 1,1,27
|
||||
vxor 2,2,26
|
||||
vperm 5,22,28,19
|
||||
vperm 6,22,28,18
|
||||
|
||||
.long 0x10E044C8
|
||||
.long 0x12855CC8
|
||||
.long 0x134654C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x12B68CC8
|
||||
.long 0x137C4CC8
|
||||
.long 0x100044C8
|
||||
|
||||
vxor 20,20,13
|
||||
vxor 26,26,15
|
||||
vxor 2,2,3
|
||||
vxor 21,21,14
|
||||
vxor 2,2,6
|
||||
vxor 27,27,21
|
||||
vxor 2,2,0
|
||||
bge .Loop_4x
|
||||
|
||||
.Ltail_4x:
|
||||
.long 0x1002ECC8
|
||||
.long 0x1022F4C8
|
||||
.long 0x1042FCC8
|
||||
|
||||
vxor 0,0,20
|
||||
vxor 1,1,27
|
||||
|
||||
.long 0x10E044C8
|
||||
|
||||
vsldoi 5,1,4,8
|
||||
vsldoi 6,4,1,8
|
||||
vxor 2,2,26
|
||||
vxor 0,0,5
|
||||
vxor 2,2,6
|
||||
|
||||
vsldoi 0,0,0,8
|
||||
vxor 0,0,7
|
||||
|
||||
vsldoi 6,0,0,8
|
||||
.long 0x100044C8
|
||||
vxor 6,6,2
|
||||
vxor 0,0,6
|
||||
|
||||
addic. 6,6,4
|
||||
beq .Ldone_4x
|
||||
|
||||
.long 0x7C602E99
|
||||
cmpldi 6,2
|
||||
li 6,-4
|
||||
blt .Lone
|
||||
.long 0x7E082E99
|
||||
beq .Ltwo
|
||||
|
||||
.Lthree:
|
||||
.long 0x7EC92E99
|
||||
vperm 3,3,3,12
|
||||
vperm 16,16,16,12
|
||||
vperm 22,22,22,12
|
||||
|
||||
vxor 2,3,0
|
||||
vor 29,23,23
|
||||
vor 30, 24, 24
|
||||
vor 31,25,25
|
||||
|
||||
vperm 5,16,22,19
|
||||
vperm 6,16,22,18
|
||||
.long 0x12B08CC8
|
||||
.long 0x13764CC8
|
||||
.long 0x12855CC8
|
||||
.long 0x134654C8
|
||||
|
||||
vxor 27,27,21
|
||||
b .Ltail_4x
|
||||
|
||||
.align 4
|
||||
.Ltwo:
|
||||
vperm 3,3,3,12
|
||||
vperm 16,16,16,12
|
||||
|
||||
vxor 2,3,0
|
||||
vperm 5,4,16,19
|
||||
vperm 6,4,16,18
|
||||
|
||||
vsldoi 29,4,17,8
|
||||
vor 30, 17, 17
|
||||
vsldoi 31,17,4,8
|
||||
|
||||
.long 0x12855CC8
|
||||
.long 0x13704CC8
|
||||
.long 0x134654C8
|
||||
|
||||
b .Ltail_4x
|
||||
|
||||
.align 4
|
||||
.Lone:
|
||||
vperm 3,3,3,12
|
||||
|
||||
vsldoi 29,4,9,8
|
||||
vor 30, 9, 9
|
||||
vsldoi 31,9,4,8
|
||||
|
||||
vxor 2,3,0
|
||||
vxor 20,20,20
|
||||
vxor 27,27,27
|
||||
vxor 26,26,26
|
||||
|
||||
b .Ltail_4x
|
||||
|
||||
.Ldone_4x:
|
||||
vperm 0,0,0,12
|
||||
.long 0x7C001F99
|
||||
|
||||
li 10,63
|
||||
li 11,79
|
||||
or 12,12,12
|
||||
lvx 20,10,1
|
||||
addi 10,10,32
|
||||
lvx 21,11,1
|
||||
addi 11,11,32
|
||||
lvx 22,10,1
|
||||
addi 10,10,32
|
||||
lvx 23,11,1
|
||||
addi 11,11,32
|
||||
lvx 24,10,1
|
||||
addi 10,10,32
|
||||
lvx 25,11,1
|
||||
addi 11,11,32
|
||||
lvx 26,10,1
|
||||
addi 10,10,32
|
||||
lvx 27,11,1
|
||||
addi 11,11,32
|
||||
lvx 28,10,1
|
||||
addi 10,10,32
|
||||
lvx 29,11,1
|
||||
addi 11,11,32
|
||||
lvx 30,10,1
|
||||
lvx 31,11,1
|
||||
addi 1,1,256
|
||||
blr
|
||||
.long 0
|
||||
.byte 0,12,0x04,0,0x80,0,4,0
|
||||
.long 0
|
||||
.size gcm_ghash_p8,.-gcm_ghash_p8
|
||||
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif // !OPENSSL_NO_ASM && __powerpc64__
|
||||
.section .note.GNU-stack,"",@progbits
|
1410
contrib/boringssl-cmake/linux-ppc64le/crypto/test/trampoline-ppc.S
Normal file
1410
contrib/boringssl-cmake/linux-ppc64le/crypto/test/trampoline-ppc.S
Normal file
File diff suppressed because it is too large
Load Diff
975
contrib/boringssl-cmake/linux-x86/crypto/chacha/chacha-x86.S
Normal file
975
contrib/boringssl-cmake/linux-x86/crypto/chacha/chacha-x86.S
Normal file
@ -0,0 +1,975 @@
|
||||
# This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
# source tree. Do not edit by hand.
|
||||
|
||||
#if defined(__i386__)
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
.text
|
||||
.globl ChaCha20_ctr32
|
||||
.hidden ChaCha20_ctr32
|
||||
.type ChaCha20_ctr32,@function
|
||||
.align 16
|
||||
ChaCha20_ctr32:
|
||||
.L_ChaCha20_ctr32_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
xorl %eax,%eax
|
||||
cmpl 28(%esp),%eax
|
||||
je .L000no_data
|
||||
call .Lpic_point
|
||||
.Lpic_point:
|
||||
popl %eax
|
||||
leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
|
||||
testl $16777216,(%ebp)
|
||||
jz .L001x86
|
||||
testl $512,4(%ebp)
|
||||
jz .L001x86
|
||||
jmp .Lssse3_shortcut
|
||||
.L001x86:
|
||||
movl 32(%esp),%esi
|
||||
movl 36(%esp),%edi
|
||||
subl $132,%esp
|
||||
movl (%esi),%eax
|
||||
movl 4(%esi),%ebx
|
||||
movl 8(%esi),%ecx
|
||||
movl 12(%esi),%edx
|
||||
movl %eax,80(%esp)
|
||||
movl %ebx,84(%esp)
|
||||
movl %ecx,88(%esp)
|
||||
movl %edx,92(%esp)
|
||||
movl 16(%esi),%eax
|
||||
movl 20(%esi),%ebx
|
||||
movl 24(%esi),%ecx
|
||||
movl 28(%esi),%edx
|
||||
movl %eax,96(%esp)
|
||||
movl %ebx,100(%esp)
|
||||
movl %ecx,104(%esp)
|
||||
movl %edx,108(%esp)
|
||||
movl (%edi),%eax
|
||||
movl 4(%edi),%ebx
|
||||
movl 8(%edi),%ecx
|
||||
movl 12(%edi),%edx
|
||||
subl $1,%eax
|
||||
movl %eax,112(%esp)
|
||||
movl %ebx,116(%esp)
|
||||
movl %ecx,120(%esp)
|
||||
movl %edx,124(%esp)
|
||||
jmp .L002entry
|
||||
.align 16
|
||||
.L003outer_loop:
|
||||
movl %ebx,156(%esp)
|
||||
movl %eax,152(%esp)
|
||||
movl %ecx,160(%esp)
|
||||
.L002entry:
|
||||
movl $1634760805,%eax
|
||||
movl $857760878,4(%esp)
|
||||
movl $2036477234,8(%esp)
|
||||
movl $1797285236,12(%esp)
|
||||
movl 84(%esp),%ebx
|
||||
movl 88(%esp),%ebp
|
||||
movl 104(%esp),%ecx
|
||||
movl 108(%esp),%esi
|
||||
movl 116(%esp),%edx
|
||||
movl 120(%esp),%edi
|
||||
movl %ebx,20(%esp)
|
||||
movl %ebp,24(%esp)
|
||||
movl %ecx,40(%esp)
|
||||
movl %esi,44(%esp)
|
||||
movl %edx,52(%esp)
|
||||
movl %edi,56(%esp)
|
||||
movl 92(%esp),%ebx
|
||||
movl 124(%esp),%edi
|
||||
movl 112(%esp),%edx
|
||||
movl 80(%esp),%ebp
|
||||
movl 96(%esp),%ecx
|
||||
movl 100(%esp),%esi
|
||||
addl $1,%edx
|
||||
movl %ebx,28(%esp)
|
||||
movl %edi,60(%esp)
|
||||
movl %edx,112(%esp)
|
||||
movl $10,%ebx
|
||||
jmp .L004loop
|
||||
.align 16
|
||||
.L004loop:
|
||||
addl %ebp,%eax
|
||||
movl %ebx,128(%esp)
|
||||
movl %ebp,%ebx
|
||||
xorl %eax,%edx
|
||||
roll $16,%edx
|
||||
addl %edx,%ecx
|
||||
xorl %ecx,%ebx
|
||||
movl 52(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 20(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,(%esp)
|
||||
roll $8,%edx
|
||||
movl 4(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,48(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
movl %ecx,32(%esp)
|
||||
roll $16,%edi
|
||||
movl %ebx,16(%esp)
|
||||
addl %edi,%esi
|
||||
movl 40(%esp),%ecx
|
||||
xorl %esi,%ebp
|
||||
movl 56(%esp),%edx
|
||||
roll $12,%ebp
|
||||
movl 24(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,4(%esp)
|
||||
roll $8,%edi
|
||||
movl 8(%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,52(%esp)
|
||||
xorl %esi,%ebp
|
||||
addl %ebx,%eax
|
||||
roll $7,%ebp
|
||||
xorl %eax,%edx
|
||||
movl %esi,36(%esp)
|
||||
roll $16,%edx
|
||||
movl %ebp,20(%esp)
|
||||
addl %edx,%ecx
|
||||
movl 44(%esp),%esi
|
||||
xorl %ecx,%ebx
|
||||
movl 60(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 28(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,8(%esp)
|
||||
roll $8,%edx
|
||||
movl 12(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,56(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
roll $16,%edi
|
||||
movl %ebx,24(%esp)
|
||||
addl %edi,%esi
|
||||
xorl %esi,%ebp
|
||||
roll $12,%ebp
|
||||
movl 20(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,12(%esp)
|
||||
roll $8,%edi
|
||||
movl (%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,%edx
|
||||
xorl %esi,%ebp
|
||||
addl %ebx,%eax
|
||||
roll $7,%ebp
|
||||
xorl %eax,%edx
|
||||
roll $16,%edx
|
||||
movl %ebp,28(%esp)
|
||||
addl %edx,%ecx
|
||||
xorl %ecx,%ebx
|
||||
movl 48(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 24(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,(%esp)
|
||||
roll $8,%edx
|
||||
movl 4(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,60(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
movl %ecx,40(%esp)
|
||||
roll $16,%edi
|
||||
movl %ebx,20(%esp)
|
||||
addl %edi,%esi
|
||||
movl 32(%esp),%ecx
|
||||
xorl %esi,%ebp
|
||||
movl 52(%esp),%edx
|
||||
roll $12,%ebp
|
||||
movl 28(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,4(%esp)
|
||||
roll $8,%edi
|
||||
movl 8(%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,48(%esp)
|
||||
xorl %esi,%ebp
|
||||
addl %ebx,%eax
|
||||
roll $7,%ebp
|
||||
xorl %eax,%edx
|
||||
movl %esi,44(%esp)
|
||||
roll $16,%edx
|
||||
movl %ebp,24(%esp)
|
||||
addl %edx,%ecx
|
||||
movl 36(%esp),%esi
|
||||
xorl %ecx,%ebx
|
||||
movl 56(%esp),%edi
|
||||
roll $12,%ebx
|
||||
movl 16(%esp),%ebp
|
||||
addl %ebx,%eax
|
||||
xorl %eax,%edx
|
||||
movl %eax,8(%esp)
|
||||
roll $8,%edx
|
||||
movl 12(%esp),%eax
|
||||
addl %edx,%ecx
|
||||
movl %edx,52(%esp)
|
||||
xorl %ecx,%ebx
|
||||
addl %ebp,%eax
|
||||
roll $7,%ebx
|
||||
xorl %eax,%edi
|
||||
roll $16,%edi
|
||||
movl %ebx,28(%esp)
|
||||
addl %edi,%esi
|
||||
xorl %esi,%ebp
|
||||
movl 48(%esp),%edx
|
||||
roll $12,%ebp
|
||||
movl 128(%esp),%ebx
|
||||
addl %ebp,%eax
|
||||
xorl %eax,%edi
|
||||
movl %eax,12(%esp)
|
||||
roll $8,%edi
|
||||
movl (%esp),%eax
|
||||
addl %edi,%esi
|
||||
movl %edi,56(%esp)
|
||||
xorl %esi,%ebp
|
||||
roll $7,%ebp
|
||||
decl %ebx
|
||||
jnz .L004loop
|
||||
movl 160(%esp),%ebx
|
||||
addl $1634760805,%eax
|
||||
addl 80(%esp),%ebp
|
||||
addl 96(%esp),%ecx
|
||||
addl 100(%esp),%esi
|
||||
cmpl $64,%ebx
|
||||
jb .L005tail
|
||||
movl 156(%esp),%ebx
|
||||
addl 112(%esp),%edx
|
||||
addl 120(%esp),%edi
|
||||
xorl (%ebx),%eax
|
||||
xorl 16(%ebx),%ebp
|
||||
movl %eax,(%esp)
|
||||
movl 152(%esp),%eax
|
||||
xorl 32(%ebx),%ecx
|
||||
xorl 36(%ebx),%esi
|
||||
xorl 48(%ebx),%edx
|
||||
xorl 56(%ebx),%edi
|
||||
movl %ebp,16(%eax)
|
||||
movl %ecx,32(%eax)
|
||||
movl %esi,36(%eax)
|
||||
movl %edx,48(%eax)
|
||||
movl %edi,56(%eax)
|
||||
movl 4(%esp),%ebp
|
||||
movl 8(%esp),%ecx
|
||||
movl 12(%esp),%esi
|
||||
movl 20(%esp),%edx
|
||||
movl 24(%esp),%edi
|
||||
addl $857760878,%ebp
|
||||
addl $2036477234,%ecx
|
||||
addl $1797285236,%esi
|
||||
addl 84(%esp),%edx
|
||||
addl 88(%esp),%edi
|
||||
xorl 4(%ebx),%ebp
|
||||
xorl 8(%ebx),%ecx
|
||||
xorl 12(%ebx),%esi
|
||||
xorl 20(%ebx),%edx
|
||||
xorl 24(%ebx),%edi
|
||||
movl %ebp,4(%eax)
|
||||
movl %ecx,8(%eax)
|
||||
movl %esi,12(%eax)
|
||||
movl %edx,20(%eax)
|
||||
movl %edi,24(%eax)
|
||||
movl 28(%esp),%ebp
|
||||
movl 40(%esp),%ecx
|
||||
movl 44(%esp),%esi
|
||||
movl 52(%esp),%edx
|
||||
movl 60(%esp),%edi
|
||||
addl 92(%esp),%ebp
|
||||
addl 104(%esp),%ecx
|
||||
addl 108(%esp),%esi
|
||||
addl 116(%esp),%edx
|
||||
addl 124(%esp),%edi
|
||||
xorl 28(%ebx),%ebp
|
||||
xorl 40(%ebx),%ecx
|
||||
xorl 44(%ebx),%esi
|
||||
xorl 52(%ebx),%edx
|
||||
xorl 60(%ebx),%edi
|
||||
leal 64(%ebx),%ebx
|
||||
movl %ebp,28(%eax)
|
||||
movl (%esp),%ebp
|
||||
movl %ecx,40(%eax)
|
||||
movl 160(%esp),%ecx
|
||||
movl %esi,44(%eax)
|
||||
movl %edx,52(%eax)
|
||||
movl %edi,60(%eax)
|
||||
movl %ebp,(%eax)
|
||||
leal 64(%eax),%eax
|
||||
subl $64,%ecx
|
||||
jnz .L003outer_loop
|
||||
jmp .L006done
|
||||
.L005tail:
|
||||
addl 112(%esp),%edx
|
||||
addl 120(%esp),%edi
|
||||
movl %eax,(%esp)
|
||||
movl %ebp,16(%esp)
|
||||
movl %ecx,32(%esp)
|
||||
movl %esi,36(%esp)
|
||||
movl %edx,48(%esp)
|
||||
movl %edi,56(%esp)
|
||||
movl 4(%esp),%ebp
|
||||
movl 8(%esp),%ecx
|
||||
movl 12(%esp),%esi
|
||||
movl 20(%esp),%edx
|
||||
movl 24(%esp),%edi
|
||||
addl $857760878,%ebp
|
||||
addl $2036477234,%ecx
|
||||
addl $1797285236,%esi
|
||||
addl 84(%esp),%edx
|
||||
addl 88(%esp),%edi
|
||||
movl %ebp,4(%esp)
|
||||
movl %ecx,8(%esp)
|
||||
movl %esi,12(%esp)
|
||||
movl %edx,20(%esp)
|
||||
movl %edi,24(%esp)
|
||||
movl 28(%esp),%ebp
|
||||
movl 40(%esp),%ecx
|
||||
movl 44(%esp),%esi
|
||||
movl 52(%esp),%edx
|
||||
movl 60(%esp),%edi
|
||||
addl 92(%esp),%ebp
|
||||
addl 104(%esp),%ecx
|
||||
addl 108(%esp),%esi
|
||||
addl 116(%esp),%edx
|
||||
addl 124(%esp),%edi
|
||||
movl %ebp,28(%esp)
|
||||
movl 156(%esp),%ebp
|
||||
movl %ecx,40(%esp)
|
||||
movl 152(%esp),%ecx
|
||||
movl %esi,44(%esp)
|
||||
xorl %esi,%esi
|
||||
movl %edx,52(%esp)
|
||||
movl %edi,60(%esp)
|
||||
xorl %eax,%eax
|
||||
xorl %edx,%edx
|
||||
.L007tail_loop:
|
||||
movb (%esi,%ebp,1),%al
|
||||
movb (%esp,%esi,1),%dl
|
||||
leal 1(%esi),%esi
|
||||
xorb %dl,%al
|
||||
movb %al,-1(%ecx,%esi,1)
|
||||
decl %ebx
|
||||
jnz .L007tail_loop
|
||||
.L006done:
|
||||
addl $132,%esp
|
||||
.L000no_data:
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
|
||||
.globl ChaCha20_ssse3
|
||||
.hidden ChaCha20_ssse3
|
||||
.type ChaCha20_ssse3,@function
|
||||
.align 16
|
||||
ChaCha20_ssse3:
|
||||
.L_ChaCha20_ssse3_begin:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
.Lssse3_shortcut:
|
||||
movl 20(%esp),%edi
|
||||
movl 24(%esp),%esi
|
||||
movl 28(%esp),%ecx
|
||||
movl 32(%esp),%edx
|
||||
movl 36(%esp),%ebx
|
||||
movl %esp,%ebp
|
||||
subl $524,%esp
|
||||
andl $-64,%esp
|
||||
movl %ebp,512(%esp)
|
||||
leal .Lssse3_data-.Lpic_point(%eax),%eax
|
||||
movdqu (%ebx),%xmm3
|
||||
cmpl $256,%ecx
|
||||
jb .L0081x
|
||||
movl %edx,516(%esp)
|
||||
movl %ebx,520(%esp)
|
||||
subl $256,%ecx
|
||||
leal 384(%esp),%ebp
|
||||
movdqu (%edx),%xmm7
|
||||
pshufd $0,%xmm3,%xmm0
|
||||
pshufd $85,%xmm3,%xmm1
|
||||
pshufd $170,%xmm3,%xmm2
|
||||
pshufd $255,%xmm3,%xmm3
|
||||
paddd 48(%eax),%xmm0
|
||||
pshufd $0,%xmm7,%xmm4
|
||||
pshufd $85,%xmm7,%xmm5
|
||||
psubd 64(%eax),%xmm0
|
||||
pshufd $170,%xmm7,%xmm6
|
||||
pshufd $255,%xmm7,%xmm7
|
||||
movdqa %xmm0,64(%ebp)
|
||||
movdqa %xmm1,80(%ebp)
|
||||
movdqa %xmm2,96(%ebp)
|
||||
movdqa %xmm3,112(%ebp)
|
||||
movdqu 16(%edx),%xmm3
|
||||
movdqa %xmm4,-64(%ebp)
|
||||
movdqa %xmm5,-48(%ebp)
|
||||
movdqa %xmm6,-32(%ebp)
|
||||
movdqa %xmm7,-16(%ebp)
|
||||
movdqa 32(%eax),%xmm7
|
||||
leal 128(%esp),%ebx
|
||||
pshufd $0,%xmm3,%xmm0
|
||||
pshufd $85,%xmm3,%xmm1
|
||||
pshufd $170,%xmm3,%xmm2
|
||||
pshufd $255,%xmm3,%xmm3
|
||||
pshufd $0,%xmm7,%xmm4
|
||||
pshufd $85,%xmm7,%xmm5
|
||||
pshufd $170,%xmm7,%xmm6
|
||||
pshufd $255,%xmm7,%xmm7
|
||||
movdqa %xmm0,(%ebp)
|
||||
movdqa %xmm1,16(%ebp)
|
||||
movdqa %xmm2,32(%ebp)
|
||||
movdqa %xmm3,48(%ebp)
|
||||
movdqa %xmm4,-128(%ebp)
|
||||
movdqa %xmm5,-112(%ebp)
|
||||
movdqa %xmm6,-96(%ebp)
|
||||
movdqa %xmm7,-80(%ebp)
|
||||
leal 128(%esi),%esi
|
||||
leal 128(%edi),%edi
|
||||
jmp .L009outer_loop
|
||||
.align 16
|
||||
.L009outer_loop:
|
||||
movdqa -112(%ebp),%xmm1
|
||||
movdqa -96(%ebp),%xmm2
|
||||
movdqa -80(%ebp),%xmm3
|
||||
movdqa -48(%ebp),%xmm5
|
||||
movdqa -32(%ebp),%xmm6
|
||||
movdqa -16(%ebp),%xmm7
|
||||
movdqa %xmm1,-112(%ebx)
|
||||
movdqa %xmm2,-96(%ebx)
|
||||
movdqa %xmm3,-80(%ebx)
|
||||
movdqa %xmm5,-48(%ebx)
|
||||
movdqa %xmm6,-32(%ebx)
|
||||
movdqa %xmm7,-16(%ebx)
|
||||
movdqa 32(%ebp),%xmm2
|
||||
movdqa 48(%ebp),%xmm3
|
||||
movdqa 64(%ebp),%xmm4
|
||||
movdqa 80(%ebp),%xmm5
|
||||
movdqa 96(%ebp),%xmm6
|
||||
movdqa 112(%ebp),%xmm7
|
||||
paddd 64(%eax),%xmm4
|
||||
movdqa %xmm2,32(%ebx)
|
||||
movdqa %xmm3,48(%ebx)
|
||||
movdqa %xmm4,64(%ebx)
|
||||
movdqa %xmm5,80(%ebx)
|
||||
movdqa %xmm6,96(%ebx)
|
||||
movdqa %xmm7,112(%ebx)
|
||||
movdqa %xmm4,64(%ebp)
|
||||
movdqa -128(%ebp),%xmm0
|
||||
movdqa %xmm4,%xmm6
|
||||
movdqa -64(%ebp),%xmm3
|
||||
movdqa (%ebp),%xmm4
|
||||
movdqa 16(%ebp),%xmm5
|
||||
movl $10,%edx
|
||||
nop
|
||||
.align 16
|
||||
.L010loop:
|
||||
paddd %xmm3,%xmm0
|
||||
movdqa %xmm3,%xmm2
|
||||
pxor %xmm0,%xmm6
|
||||
pshufb (%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -48(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -112(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 80(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-128(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,64(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
movdqa %xmm4,(%ebx)
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-64(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa 32(%ebx),%xmm4
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa -32(%ebx),%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -96(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa 96(%ebx),%xmm6
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-112(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,80(%ebx)
|
||||
pxor %xmm5,%xmm3
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
por %xmm1,%xmm3
|
||||
movdqa %xmm5,16(%ebx)
|
||||
pshufb (%eax),%xmm6
|
||||
movdqa %xmm3,-48(%ebx)
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa 48(%ebx),%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -16(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -80(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 112(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-96(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,96(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-32(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa -48(%ebx),%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -128(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-80(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,%xmm6
|
||||
pxor %xmm5,%xmm3
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
por %xmm1,%xmm3
|
||||
pshufb (%eax),%xmm6
|
||||
movdqa %xmm3,-16(%ebx)
|
||||
paddd %xmm6,%xmm4
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -32(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -112(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 64(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-128(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,112(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
movdqa %xmm4,32(%ebx)
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-48(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa (%ebx),%xmm4
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa -16(%ebx),%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -96(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa 80(%ebx),%xmm6
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-112(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,64(%ebx)
|
||||
pxor %xmm5,%xmm3
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
pxor %xmm0,%xmm6
|
||||
por %xmm1,%xmm3
|
||||
movdqa %xmm5,48(%ebx)
|
||||
pshufb (%eax),%xmm6
|
||||
movdqa %xmm3,-32(%ebx)
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa 16(%ebx),%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
movdqa -64(%ebx),%xmm3
|
||||
movdqa %xmm2,%xmm1
|
||||
pslld $12,%xmm2
|
||||
psrld $20,%xmm1
|
||||
por %xmm1,%xmm2
|
||||
movdqa -80(%ebx),%xmm1
|
||||
paddd %xmm2,%xmm0
|
||||
movdqa 96(%ebx),%xmm7
|
||||
pxor %xmm0,%xmm6
|
||||
movdqa %xmm0,-96(%ebx)
|
||||
pshufb 16(%eax),%xmm6
|
||||
paddd %xmm6,%xmm4
|
||||
movdqa %xmm6,80(%ebx)
|
||||
pxor %xmm4,%xmm2
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm0
|
||||
pslld $7,%xmm2
|
||||
psrld $25,%xmm0
|
||||
pxor %xmm1,%xmm7
|
||||
por %xmm0,%xmm2
|
||||
pshufb (%eax),%xmm7
|
||||
movdqa %xmm2,-16(%ebx)
|
||||
paddd %xmm7,%xmm5
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa %xmm3,%xmm0
|
||||
pslld $12,%xmm3
|
||||
psrld $20,%xmm0
|
||||
por %xmm0,%xmm3
|
||||
movdqa -128(%ebx),%xmm0
|
||||
paddd %xmm3,%xmm1
|
||||
movdqa 64(%ebx),%xmm6
|
||||
pxor %xmm1,%xmm7
|
||||
movdqa %xmm1,-80(%ebx)
|
||||
pshufb 16(%eax),%xmm7
|
||||
paddd %xmm7,%xmm5
|
||||
movdqa %xmm7,96(%ebx)
|
||||
pxor %xmm5,%xmm3
|
||||
movdqa %xmm3,%xmm1
|
||||
pslld $7,%xmm3
|
||||
psrld $25,%xmm1
|
||||
por %xmm1,%xmm3
|
||||
decl %edx
|
||||
jnz .L010loop
|
||||
movdqa %xmm3,-64(%ebx)
|
||||
movdqa %xmm4,(%ebx)
|
||||
movdqa %xmm5,16(%ebx)
|
||||
movdqa %xmm6,64(%ebx)
|
||||
movdqa %xmm7,96(%ebx)
|
||||
movdqa -112(%ebx),%xmm1
|
||||
movdqa -96(%ebx),%xmm2
|
||||
movdqa -80(%ebx),%xmm3
|
||||
paddd -128(%ebp),%xmm0
|
||||
paddd -112(%ebp),%xmm1
|
||||
paddd -96(%ebp),%xmm2
|
||||
paddd -80(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 16(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa -64(%ebx),%xmm0
|
||||
pxor %xmm1,%xmm5
|
||||
movdqa -48(%ebx),%xmm1
|
||||
pxor %xmm2,%xmm6
|
||||
movdqa -32(%ebx),%xmm2
|
||||
pxor %xmm3,%xmm7
|
||||
movdqa -16(%ebx),%xmm3
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 16(%edi),%edi
|
||||
paddd -64(%ebp),%xmm0
|
||||
paddd -48(%ebp),%xmm1
|
||||
paddd -32(%ebp),%xmm2
|
||||
paddd -16(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 16(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa (%ebx),%xmm0
|
||||
pxor %xmm1,%xmm5
|
||||
movdqa 16(%ebx),%xmm1
|
||||
pxor %xmm2,%xmm6
|
||||
movdqa 32(%ebx),%xmm2
|
||||
pxor %xmm3,%xmm7
|
||||
movdqa 48(%ebx),%xmm3
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 16(%edi),%edi
|
||||
paddd (%ebp),%xmm0
|
||||
paddd 16(%ebp),%xmm1
|
||||
paddd 32(%ebp),%xmm2
|
||||
paddd 48(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 16(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
movdqa 64(%ebx),%xmm0
|
||||
pxor %xmm1,%xmm5
|
||||
movdqa 80(%ebx),%xmm1
|
||||
pxor %xmm2,%xmm6
|
||||
movdqa 96(%ebx),%xmm2
|
||||
pxor %xmm3,%xmm7
|
||||
movdqa 112(%ebx),%xmm3
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 16(%edi),%edi
|
||||
paddd 64(%ebp),%xmm0
|
||||
paddd 80(%ebp),%xmm1
|
||||
paddd 96(%ebp),%xmm2
|
||||
paddd 112(%ebp),%xmm3
|
||||
movdqa %xmm0,%xmm6
|
||||
punpckldq %xmm1,%xmm0
|
||||
movdqa %xmm2,%xmm7
|
||||
punpckldq %xmm3,%xmm2
|
||||
punpckhdq %xmm1,%xmm6
|
||||
punpckhdq %xmm3,%xmm7
|
||||
movdqa %xmm0,%xmm1
|
||||
punpcklqdq %xmm2,%xmm0
|
||||
movdqa %xmm6,%xmm3
|
||||
punpcklqdq %xmm7,%xmm6
|
||||
punpckhqdq %xmm2,%xmm1
|
||||
punpckhqdq %xmm7,%xmm3
|
||||
movdqu -128(%esi),%xmm4
|
||||
movdqu -64(%esi),%xmm5
|
||||
movdqu (%esi),%xmm2
|
||||
movdqu 64(%esi),%xmm7
|
||||
leal 208(%esi),%esi
|
||||
pxor %xmm0,%xmm4
|
||||
pxor %xmm1,%xmm5
|
||||
pxor %xmm2,%xmm6
|
||||
pxor %xmm3,%xmm7
|
||||
movdqu %xmm4,-128(%edi)
|
||||
movdqu %xmm5,-64(%edi)
|
||||
movdqu %xmm6,(%edi)
|
||||
movdqu %xmm7,64(%edi)
|
||||
leal 208(%edi),%edi
|
||||
subl $256,%ecx
|
||||
jnc .L009outer_loop
|
||||
addl $256,%ecx
|
||||
jz .L011done
|
||||
movl 520(%esp),%ebx
|
||||
leal -128(%esi),%esi
|
||||
movl 516(%esp),%edx
|
||||
leal -128(%edi),%edi
|
||||
movd 64(%ebp),%xmm2
|
||||
movdqu (%ebx),%xmm3
|
||||
paddd 96(%eax),%xmm2
|
||||
pand 112(%eax),%xmm3
|
||||
por %xmm2,%xmm3
|
||||
.L0081x:
|
||||
movdqa 32(%eax),%xmm0
|
||||
movdqu (%edx),%xmm1
|
||||
movdqu 16(%edx),%xmm2
|
||||
movdqa (%eax),%xmm6
|
||||
movdqa 16(%eax),%xmm7
|
||||
movl %ebp,48(%esp)
|
||||
movdqa %xmm0,(%esp)
|
||||
movdqa %xmm1,16(%esp)
|
||||
movdqa %xmm2,32(%esp)
|
||||
movdqa %xmm3,48(%esp)
|
||||
movl $10,%edx
|
||||
jmp .L012loop1x
|
||||
.align 16
|
||||
.L013outer1x:
|
||||
movdqa 80(%eax),%xmm3
|
||||
movdqa (%esp),%xmm0
|
||||
movdqa 16(%esp),%xmm1
|
||||
movdqa 32(%esp),%xmm2
|
||||
paddd 48(%esp),%xmm3
|
||||
movl $10,%edx
|
||||
movdqa %xmm3,48(%esp)
|
||||
jmp .L012loop1x
|
||||
.align 16
|
||||
.L012loop1x:
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,222
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $20,%xmm1
|
||||
pslld $12,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,223
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $25,%xmm1
|
||||
pslld $7,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $57,%xmm1,%xmm1
|
||||
pshufd $147,%xmm3,%xmm3
|
||||
nop
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,222
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $20,%xmm1
|
||||
pslld $12,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
.byte 102,15,56,0,223
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm4
|
||||
psrld $25,%xmm1
|
||||
pslld $7,%xmm4
|
||||
por %xmm4,%xmm1
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
pshufd $147,%xmm1,%xmm1
|
||||
pshufd $57,%xmm3,%xmm3
|
||||
decl %edx
|
||||
jnz .L012loop1x
|
||||
paddd (%esp),%xmm0
|
||||
paddd 16(%esp),%xmm1
|
||||
paddd 32(%esp),%xmm2
|
||||
paddd 48(%esp),%xmm3
|
||||
cmpl $64,%ecx
|
||||
jb .L014tail
|
||||
movdqu (%esi),%xmm4
|
||||
movdqu 16(%esi),%xmm5
|
||||
pxor %xmm4,%xmm0
|
||||
movdqu 32(%esi),%xmm4
|
||||
pxor %xmm5,%xmm1
|
||||
movdqu 48(%esi),%xmm5
|
||||
pxor %xmm4,%xmm2
|
||||
pxor %xmm5,%xmm3
|
||||
leal 64(%esi),%esi
|
||||
movdqu %xmm0,(%edi)
|
||||
movdqu %xmm1,16(%edi)
|
||||
movdqu %xmm2,32(%edi)
|
||||
movdqu %xmm3,48(%edi)
|
||||
leal 64(%edi),%edi
|
||||
subl $64,%ecx
|
||||
jnz .L013outer1x
|
||||
jmp .L011done
|
||||
.L014tail:
|
||||
movdqa %xmm0,(%esp)
|
||||
movdqa %xmm1,16(%esp)
|
||||
movdqa %xmm2,32(%esp)
|
||||
movdqa %xmm3,48(%esp)
|
||||
xorl %eax,%eax
|
||||
xorl %edx,%edx
|
||||
xorl %ebp,%ebp
|
||||
.L015tail_loop:
|
||||
movb (%esp,%ebp,1),%al
|
||||
movb (%esi,%ebp,1),%dl
|
||||
leal 1(%ebp),%ebp
|
||||
xorb %dl,%al
|
||||
movb %al,-1(%edi,%ebp,1)
|
||||
decl %ecx
|
||||
jnz .L015tail_loop
|
||||
.L011done:
|
||||
movl 512(%esp),%esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
|
||||
.align 64
|
||||
.Lssse3_data:
|
||||
.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
|
||||
.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
|
||||
.long 1634760805,857760878,2036477234,1797285236
|
||||
.long 0,1,2,3
|
||||
.long 4,4,4,4
|
||||
.long 1,0,0,0
|
||||
.long 4,0,0,0
|
||||
.long 0,-1,-1,-1
|
||||
.align 64
|
||||
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
|
||||
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
|
||||
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
|
||||
.byte 114,103,62,0
|
||||
#endif
|
||||
.section .note.GNU-stack,"",@progbits
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user