Merge branch 'master' into keeper-parallel-storage

This commit is contained in:
Antonio Andelic 2024-04-29 15:13:19 +02:00
commit b1d53f0472
2240 changed files with 118341 additions and 264980 deletions

View File

@ -96,7 +96,6 @@ Checks: [
'-modernize-use-default-member-init',
'-modernize-use-emplace',
'-modernize-use-nodiscard',
'-modernize-use-override',
'-modernize-use-trailing-return-type',
'-performance-inefficient-string-concatenation',
@ -120,7 +119,6 @@ Checks: [
'-readability-named-parameter',
'-readability-redundant-declaration',
'-readability-simplify-boolean-expr',
'-readability-static-accessed-through-instance',
'-readability-suspicious-call-argument',
'-readability-uppercase-literal-suffix',
'-readability-use-anyofallof',

View File

@ -44,22 +44,35 @@ At a minimum, the following information should be added (but add more as needed)
---
### Modify your CI run:
**NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing
**NOTE:** Set desired options before CI starts or re-push after updates
**NOTE:** Checked options will be applied if set before CI RunConfig/PrepareRunConfig step
#### Run only:
- [ ] <!---ci_set_integration--> Integration tests
- [ ] <!---ci_set_arm--> Integration tests (arm64)
- [ ] <!---ci_set_stateless--> Stateless tests (release)
- [ ] <!---ci_set_stateless_asan--> Stateless tests (asan)
- [ ] <!---ci_set_stateful--> Stateful tests (release)
- [ ] <!---ci_set_stateful_asan--> Stateful tests (asan)
- [ ] <!---ci_set_reduced--> No sanitizers
- [ ] <!---ci_set_analyzer--> Tests with analyzer
- [ ] <!---ci_set_fast--> Fast tests
- [ ] <!---job_package_debug--> Only package_debug build
- [ ] <!---PLACE_YOUR_TAG_CONFIGURED_IN_ci_config.py_FILE_HERE--> Add your CI variant description here
#### Include tests (required builds will be added automatically):
- [ ] <!---ci_include_fast--> Fast test
- [ ] <!---ci_include_integration--> Integration Tests
- [ ] <!---ci_include_stateless--> Stateless tests
- [ ] <!---ci_include_stateful--> Stateful tests
- [ ] <!---ci_include_unit--> Unit tests
- [ ] <!---ci_include_performance--> Performance tests
- [ ] <!---ci_include_asan--> All with ASAN
- [ ] <!---ci_include_tsan--> All with TSAN
- [ ] <!---ci_include_analyzer--> All with Analyzer
- [ ] <!---ci_include_KEYWORD--> Add your option here
#### CI options:
#### Exclude tests:
- [ ] <!---ci_exclude_fast--> Fast test
- [ ] <!---ci_exclude_integration--> Integration Tests
- [ ] <!---ci_exclude_stateless--> Stateless tests
- [ ] <!---ci_exclude_stateful--> Stateful tests
- [ ] <!---ci_exclude_performance--> Performance tests
- [ ] <!---ci_exclude_asan--> All with ASAN
- [ ] <!---ci_exclude_tsan--> All with TSAN
- [ ] <!---ci_exclude_msan--> All with MSAN
- [ ] <!---ci_exclude_ubsan--> All with UBSAN
- [ ] <!---ci_exclude_coverage--> All with Coverage
- [ ] <!---ci_exclude_aarch64--> All with Aarch64
- [ ] <!---ci_exclude_KEYWORD--> Add your option here
#### Extra options:
- [ ] <!---do_not_test--> do not test (only style check)
- [ ] <!---no_merge_commit--> disable merge-commit (no merge from master before tests)
- [ ] <!---no_ci_cache--> disable CI cache (job reuse)

View File

@ -23,6 +23,10 @@ jobs:
clear-repository: true # to ensure correct digests
fetch-depth: 0 # to get version
filter: tree:0
- name: Merge sync PR
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 sync_pr.py --merge || :
- name: Python unit tests
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
@ -55,16 +59,17 @@ jobs:
uses: ./.github/workflows/reusable_docker.yml
with:
data: ${{ needs.RunConfig.outputs.data }}
StyleCheck:
needs: [RunConfig, BuildDockers]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Style check
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 style_check.py --no-push
# Tested in MQ
# StyleCheck:
# needs: [RunConfig, BuildDockers]
# if: ${{ !failure() && !cancelled() }}
# uses: ./.github/workflows/reusable_test.yml
# with:
# test_name: Style check
# runner_type: style-checker
# data: ${{ needs.RunConfig.outputs.data }}
# run_command: |
# python3 style_check.py --no-push
CompatibilityCheckX86:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
@ -234,14 +239,15 @@ jobs:
build_name: binary_riscv64
data: ${{ needs.RunConfig.outputs.data }}
checkout_depth: 0
BuilderBinS390X:
needs: [RunConfig, BuilderDebRelease]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_build.yml
with:
build_name: binary_s390x
data: ${{ needs.RunConfig.outputs.data }}
checkout_depth: 0
# disabled because s390x refused to build in the migration to OpenSSL
# BuilderBinS390X:
# needs: [RunConfig, BuilderDebRelease]
# if: ${{ !failure() && !cancelled() }}
# uses: ./.github/workflows/reusable_build.yml
# with:
# build_name: binary_s390x
# data: ${{ needs.RunConfig.outputs.data }}
# checkout_depth: 0
############################################################################################
##################################### Docker images #######################################
############################################################################################
@ -292,7 +298,7 @@ jobs:
- BuilderBinFreeBSD
- BuilderBinPPC64
- BuilderBinRISCV64
- BuilderBinS390X
# - BuilderBinS390X # disabled because s390x refused to build in the migration to OpenSSL
- BuilderBinAmd64Compat
- BuilderBinAarch64V80Compat
- BuilderBinClangTidy
@ -374,7 +380,7 @@ jobs:
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (release, analyzer, s3, DatabaseReplicated)
test_name: Stateless tests (release, old analyzer, s3, DatabaseReplicated)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestS3Debug:
@ -442,6 +448,14 @@ jobs:
test_name: Stateless tests (debug)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
FunctionalStatelessTestAsanAzure:
needs: [RunConfig, BuilderDebAsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stateless tests (azure, asan)
runner_type: func-tester
data: ${{ needs.RunConfig.outputs.data }}
##############################################################################################
############################ FUNCTIONAl STATEFUL TESTS #######################################
##############################################################################################
@ -592,6 +606,14 @@ jobs:
test_name: Stress test (tsan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
StressTestTsanAzure:
needs: [RunConfig, BuilderDebTsan]
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Stress test (azure, tsan)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
StressTestMsan:
needs: [RunConfig, BuilderDebMsan]
if: ${{ !failure() && !cancelled() }}
@ -632,7 +654,7 @@ jobs:
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Integration tests (asan, analyzer)
test_name: Integration tests (asan, old analyzer)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
IntegrationTestsTsan:

View File

@ -6,6 +6,7 @@ env:
PYTHONUNBUFFERED: 1
on: # yamllint disable-line rule:truthy
merge_group:
pull_request:
types:
- synchronize
@ -29,6 +30,7 @@ jobs:
fetch-depth: 0 # to get version
filter: tree:0
- name: Labels check
if: ${{ github.event_name != 'merge_group' }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 run_check.py
@ -56,16 +58,9 @@ jobs:
echo 'EOF'
} >> "$GITHUB_OUTPUT"
- name: Re-create GH statuses for skipped jobs if any
if: ${{ github.event_name != 'merge_group' }}
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses
- name: Style check early
# hack to run style check before the docker build job if possible (style-check image not changed)
if: contains(fromJson(steps.runconfig.outputs.CI_DATA).jobs_data.jobs_to_do, 'Style check early')
run: |
DOCKER_TAG=$(echo '${{ toJson(fromJson(steps.runconfig.outputs.CI_DATA).docker_data.images) }}' | tr -d '\n')
export DOCKER_TAG=$DOCKER_TAG
python3 ./tests/ci/style_check.py --no-push
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --post --job-name 'Style check'
BuildDockers:
needs: [RunConfig]
if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }}
@ -88,7 +83,7 @@ jobs:
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
FastTest:
needs: [RunConfig, StyleCheck]
needs: [RunConfig, BuildDockers]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Fast test') }}
uses: ./.github/workflows/reusable_test.yml
with:
@ -102,7 +97,7 @@ jobs:
# for main CI chain
#
Builds_1:
needs: [RunConfig, FastTest]
needs: [RunConfig, StyleCheck, FastTest]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }}
# using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
uses: ./.github/workflows/reusable_build_stage.yml
@ -163,15 +158,24 @@ jobs:
#
FinishCheck:
if: ${{ !failure() && !cancelled() }}
needs: [Tests_1, Tests_2]
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Check sync status
if: ${{ github.event_name == 'merge_group' }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 sync_pr.py --status
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
python3 finish_check.py ${{ (contains(needs.*.result, 'failure') && github.event_name == 'merge_group') && '--pipeline-failure' || '' }}
- name: Auto merge if approved
if: ${{ github.event_name != 'merge_group' }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 merge_pr.py --check-approved

View File

@ -436,7 +436,7 @@ jobs:
if: ${{ !failure() && !cancelled() }}
uses: ./.github/workflows/reusable_test.yml
with:
test_name: Integration tests (asan, analyzer)
test_name: Integration tests (asan, old analyzer)
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
IntegrationTestsTsan:

3
.gitignore vendored
View File

@ -164,6 +164,9 @@ tests/queries/0_stateless/*.generated-expect
tests/queries/0_stateless/*.expect.history
tests/integration/**/_gen
# pytest --pdb history
.pdb_history
# rust
/rust/**/target*
# It is autogenerated from *.in

View File

@ -16,7 +16,7 @@
#ci_set_reduced
#ci_set_arm
#ci_set_integration
#ci_set_analyzer
#ci_set_old_analyzer
## To run specified job in CI:
#job_<JOB NAME>

21
.gitmodules vendored
View File

@ -22,9 +22,6 @@
[submodule "contrib/capnproto"]
path = contrib/capnproto
url = https://github.com/ClickHouse/capnproto
[submodule "contrib/double-conversion"]
path = contrib/double-conversion
url = https://github.com/google/double-conversion
[submodule "contrib/re2"]
path = contrib/re2
url = https://github.com/google/re2
@ -176,9 +173,6 @@
[submodule "contrib/libpq"]
path = contrib/libpq
url = https://github.com/ClickHouse/libpq
[submodule "contrib/boringssl"]
path = contrib/boringssl
url = https://github.com/ClickHouse/boringssl
[submodule "contrib/NuRaft"]
path = contrib/NuRaft
url = https://github.com/ClickHouse/NuRaft
@ -278,9 +272,6 @@
[submodule "contrib/crc32-s390x"]
path = contrib/crc32-s390x
url = https://github.com/linux-on-ibm-z/crc32-s390x
[submodule "contrib/openssl"]
path = contrib/openssl
url = https://github.com/openssl/openssl
[submodule "contrib/google-benchmark"]
path = contrib/google-benchmark
url = https://github.com/google/benchmark
@ -326,6 +317,9 @@
[submodule "contrib/crc32-vpmsum"]
path = contrib/crc32-vpmsum
url = https://github.com/antonblanchard/crc32-vpmsum.git
[submodule "contrib/expected"]
path = contrib/expected
url = https://github.com/TartanLlama/expected
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing
@ -369,3 +363,12 @@
[submodule "contrib/idna"]
path = contrib/idna
url = https://github.com/ada-url/idna.git
[submodule "contrib/rust_vendor"]
path = contrib/rust_vendor
url = https://github.com/ClickHouse/rust_vendor.git
[submodule "contrib/openssl"]
path = contrib/openssl
url = https://github.com/ClickHouse/openssl.git
[submodule "contrib/double-conversion"]
path = contrib/double-conversion
url = https://github.com/ClickHouse/double-conversion.git

View File

@ -1,10 +1,183 @@
### Table of Contents
**[ClickHouse release v24.3 LTS, 2024-03-26](#243)**<br/>
**[ClickHouse release v24.2, 2024-02-29](#242)**<br/>
**[ClickHouse release v24.1, 2024-01-30](#241)**<br/>
**[Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023/)**<br/>
# 2024 Changelog
### <a id="243"></a> ClickHouse release 24.3 LTS, 2024-03-27
#### Upgrade Notes
* The setting `allow_experimental_analyzer` is enabled by default and it switches the query analysis to a new implementation, which has better compatibility and feature completeness. The feature "analyzer" is considered beta instead of experimental. You can turn the old behavior by setting the `compatibility` to `24.2` or disabling the `allow_experimental_analyzer` setting. Watch the [video on YouTube](https://www.youtube.com/watch?v=zhrOYQpgvkk).
* ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. This is controlled by the settings, `output_format_parquet_string_as_string`, `output_format_orc_string_as_string`, `output_format_arrow_string_as_string`. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases. Parquet/ORC/Arrow supports many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools lack support for the faster `lz4` compression method, that's why we set `zstd` by default. This is controlled by the settings `output_format_parquet_compression_method`, `output_format_orc_compression_method`, and `output_format_arrow_compression_method`. We changed the default to `zstd` for Parquet and ORC, but not Arrow (it is emphasized for low-level usages). [#61817](https://github.com/ClickHouse/ClickHouse/pull/61817) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* In the new ClickHouse version, the functions `geoDistance`, `greatCircleDistance`, and `greatCircleAngle` will use 64-bit double precision floating point data type for internal calculations and return type if all the arguments are Float64. This closes [#58476](https://github.com/ClickHouse/ClickHouse/issues/58476). In previous versions, the function always used Float32. You can switch to the old behavior by setting `geo_distance_returns_float64_on_float64_arguments` to `false` or setting `compatibility` to `24.2` or earlier. [#61848](https://github.com/ClickHouse/ClickHouse/pull/61848) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Co-authored with [Geet Patel](https://github.com/geetptl).
* The obsolete in-memory data parts have been deprecated since version 23.5 and have not been supported since version 23.10. Now the remaining code is removed. Continuation of [#55186](https://github.com/ClickHouse/ClickHouse/issues/55186) and [#45409](https://github.com/ClickHouse/ClickHouse/issues/45409). It is unlikely that you have used in-memory data parts because they were available only before version 23.5 and only when you enabled them manually by specifying the corresponding SETTINGS for a MergeTree table. To check if you have in-memory data parts, run the following query: `SELECT part_type, count() FROM system.parts GROUP BY part_type ORDER BY part_type`. To disable the usage of in-memory data parts, do `ALTER TABLE ... MODIFY SETTING min_bytes_for_compact_part = DEFAULT, min_rows_for_compact_part = DEFAULT`. Before upgrading from old ClickHouse releases, first check that you don't have in-memory data parts. If there are in-memory data parts, disable them first, then wait while there are no in-memory data parts and continue the upgrade. [#61127](https://github.com/ClickHouse/ClickHouse/pull/61127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Changed the column name from `duration_ms` to `duration_microseconds` in the `system.zookeeper` table to reflect the reality that the duration is in the microsecond resolution. [#60774](https://github.com/ClickHouse/ClickHouse/pull/60774) ([Duc Canh Le](https://github.com/canhld94)).
* Reject incoming INSERT queries in case when query-level settings `async_insert` and `deduplicate_blocks_in_dependent_materialized_views` are enabled together. This behaviour is controlled by a setting `throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert` and enabled by default. This is a continuation of https://github.com/ClickHouse/ClickHouse/pull/59699 needed to unblock https://github.com/ClickHouse/ClickHouse/pull/59915. [#60888](https://github.com/ClickHouse/ClickHouse/pull/60888) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Utility `clickhouse-copier` is moved to a separate repository on GitHub: https://github.com/ClickHouse/copier. It is no longer included in the bundle but is still available as a separate download. This closes: [#60734](https://github.com/ClickHouse/ClickHouse/issues/60734) This closes: [#60540](https://github.com/ClickHouse/ClickHouse/issues/60540) This closes: [#60250](https://github.com/ClickHouse/ClickHouse/issues/60250) This closes: [#52917](https://github.com/ClickHouse/ClickHouse/issues/52917) This closes: [#51140](https://github.com/ClickHouse/ClickHouse/issues/51140) This closes: [#47517](https://github.com/ClickHouse/ClickHouse/issues/47517) This closes: [#47189](https://github.com/ClickHouse/ClickHouse/issues/47189) This closes: [#46598](https://github.com/ClickHouse/ClickHouse/issues/46598) This closes: [#40257](https://github.com/ClickHouse/ClickHouse/issues/40257) This closes: [#36504](https://github.com/ClickHouse/ClickHouse/issues/36504) This closes: [#35485](https://github.com/ClickHouse/ClickHouse/issues/35485) This closes: [#33702](https://github.com/ClickHouse/ClickHouse/issues/33702) This closes: [#26702](https://github.com/ClickHouse/ClickHouse/issues/26702).
* To increase compatibility with MySQL, the compatibility alias `locate` now accepts arguments `(needle, haystack[, start_pos])` by default. The previous behavior `(haystack, needle, [, start_pos])` can be restored by setting `function_locate_has_mysql_compatible_argument_order = 0`. [#61092](https://github.com/ClickHouse/ClickHouse/pull/61092) ([Robert Schulze](https://github.com/rschu1ze)).
* Forbid `SimpleAggregateFunction` in `ORDER BY` of `MergeTree` tables (like `AggregateFunction` is forbidden, but they are forbidden because they are not comparable) by default (use `allow_suspicious_primary_key` to allow them). [#61399](https://github.com/ClickHouse/ClickHouse/pull/61399) ([Azat Khuzhin](https://github.com/azat)).
* The `Ordinary` database engine is deprecated. You will receive a warning in clickhouse-client if your server is using it. This closes [#52229](https://github.com/ClickHouse/ClickHouse/issues/52229). [#56942](https://github.com/ClickHouse/ClickHouse/pull/56942) ([shabroo](https://github.com/shabroo)).
#### New Feature
* Support reading and writing backups as `tar` (in addition to `zip`). [#59535](https://github.com/ClickHouse/ClickHouse/pull/59535) ([josh-hildred](https://github.com/josh-hildred)).
* Implemented support for S3 Express buckets. [#59965](https://github.com/ClickHouse/ClickHouse/pull/59965) ([Nikita Taranov](https://github.com/nickitat)).
* Allow to attach parts from a different disk (using copy instead of hard link). [#60112](https://github.com/ClickHouse/ClickHouse/pull/60112) ([Unalian](https://github.com/Unalian)).
* Size-capped `Memory` tables: controlled by their settings, `min_bytes_to_keep, max_bytes_to_keep, min_rows_to_keep` and `max_rows_to_keep`. [#60612](https://github.com/ClickHouse/ClickHouse/pull/60612) ([Jake Bamrah](https://github.com/JakeBamrah)).
* Separate limits on number of waiting and executing queries. Added new server setting `max_waiting_queries` that limits the number of queries waiting due to `async_load_databases`. Existing limits on number of executing queries no longer count waiting queries. [#61053](https://github.com/ClickHouse/ClickHouse/pull/61053) ([Sergei Trifonov](https://github.com/serxa)).
* Added a table `system.keywords` which contains all the keywords from parser. Mostly needed and will be used for better fuzzing and syntax highlighting. [#51808](https://github.com/ClickHouse/ClickHouse/pull/51808) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Add support for `ATTACH PARTITION ALL`. [#61107](https://github.com/ClickHouse/ClickHouse/pull/61107) ([Kirill Nikiforov](https://github.com/allmazz)).
* Add a new function, `getClientHTTPHeader`. This closes [#54665](https://github.com/ClickHouse/ClickHouse/issues/54665). Co-authored with @lingtaolf. [#61820](https://github.com/ClickHouse/ClickHouse/pull/61820) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add `generate_series` as a table function (compatibility alias for PostgreSQL to the existing `numbers` function). This function generates table with an arithmetic progression with natural numbers. [#59390](https://github.com/ClickHouse/ClickHouse/pull/59390) ([divanik](https://github.com/divanik)).
* A mode for `topK`/`topkWeighed` support mode, which return count of values and its error. [#54508](https://github.com/ClickHouse/ClickHouse/pull/54508) ([UnamedRus](https://github.com/UnamedRus)).
* Added function `toMillisecond` which returns the millisecond component for values of type`DateTime` or `DateTime64`. [#60281](https://github.com/ClickHouse/ClickHouse/pull/60281) ([Shaun Struwig](https://github.com/Blargian)).
* Allow configuring HTTP redirect handlers for clickhouse-server. For example, you can make `/` redirect to the Play UI. [#60390](https://github.com/ClickHouse/ClickHouse/pull/60390) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Performance Improvement
* Optimized function `dotProduct` to omit unnecessary and expensive memory copies. [#60928](https://github.com/ClickHouse/ClickHouse/pull/60928) ([Robert Schulze](https://github.com/rschu1ze)).
* 30x faster printing for 256-bit integers. [#61100](https://github.com/ClickHouse/ClickHouse/pull/61100) ([Raúl Marín](https://github.com/Algunenano)).
* If the table's primary key contains mostly useless columns, don't keep them in memory. This is controlled by a new setting `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns` with the value `0.9` by default, which means: for a composite primary key, if a column changes its value for at least 0.9 of all the times, the next columns after it will be not loaded. [#60255](https://github.com/ClickHouse/ClickHouse/pull/60255) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve the performance of serialized aggregation methods when involving multiple `Nullable` columns. [#55809](https://github.com/ClickHouse/ClickHouse/pull/55809) ([Amos Bird](https://github.com/amosbird)).
* Lazy builds JSON's output to improve performance of ALL JOIN. [#58278](https://github.com/ClickHouse/ClickHouse/pull/58278) ([LiuNeng](https://github.com/liuneng1994)).
* Make HTTP/HTTPs connections with external services, such as AWS S3 reusable for all use cases. Even when the response is 3xx or 4xx. [#58845](https://github.com/ClickHouse/ClickHouse/pull/58845) ([Sema Checherinda](https://github.com/CheSema)).
* Improvements to aggregate functions `argMin` / `argMax` / `any` / `anyLast` / `anyHeavy`, as well as `ORDER BY {u8/u16/u32/u64/i8/i16/u32/i64) LIMIT 1` queries. [#58640](https://github.com/ClickHouse/ClickHouse/pull/58640) ([Raúl Marín](https://github.com/Algunenano)).
* Trivial optimization for column's filter. Peak memory can be reduced to 44% of the original in some cases. [#59698](https://github.com/ClickHouse/ClickHouse/pull/59698) ([李扬](https://github.com/taiyang-li)).
* Execute `multiIf` function in a columnar fashion when the result type's underlying type is a number. [#60384](https://github.com/ClickHouse/ClickHouse/pull/60384) ([李扬](https://github.com/taiyang-li)).
* Faster (almost 2x) mutexes. [#60823](https://github.com/ClickHouse/ClickHouse/pull/60823) ([Azat Khuzhin](https://github.com/azat)).
* Drain multiple connections in parallel when a distributed query is finishing. [#60845](https://github.com/ClickHouse/ClickHouse/pull/60845) ([lizhuoyu5](https://github.com/lzydmxy)).
* Optimize data movement between columns of a Nullable number or a Nullable string, which improves some micro-benchmarks. [#60846](https://github.com/ClickHouse/ClickHouse/pull/60846) ([李扬](https://github.com/taiyang-li)).
* Operations with the filesystem cache will suffer less from the lock contention. [#61066](https://github.com/ClickHouse/ClickHouse/pull/61066) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Optimize array join and other JOINs by preventing a wrong compiler's optimization. Close [#61074](https://github.com/ClickHouse/ClickHouse/issues/61074). [#61075](https://github.com/ClickHouse/ClickHouse/pull/61075) ([李扬](https://github.com/taiyang-li)).
* If a query with a syntax error contained the `COLUMNS` matcher with a regular expression, the regular expression was compiled each time during the parser's backtracking, instead of being compiled once. This was a fundamental error. The compiled regexp was put to AST. But the letter A in AST means "abstract" which means it should not contain heavyweight objects. Parts of AST can be created and discarded during parsing, including a large number of backtracking. This leads to slowness on the parsing side and consequently allows DoS by a readonly user. But the main problem is that it prevents progress in fuzzers. [#61543](https://github.com/ClickHouse/ClickHouse/pull/61543) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add a new analyzer pass to optimize the IN operator for a single value. [#61564](https://github.com/ClickHouse/ClickHouse/pull/61564) ([LiuNeng](https://github.com/liuneng1994)).
* DNSResolver shuffles set of resolved IPs which is needed to uniformly utilize multiple endpoints of AWS S3. [#60965](https://github.com/ClickHouse/ClickHouse/pull/60965) ([Sema Checherinda](https://github.com/CheSema)).
#### Experimental Feature
* Support parallel reading for Azure blob storage. This improves the performance of the experimental Azure object storage. [#61503](https://github.com/ClickHouse/ClickHouse/pull/61503) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Add asynchronous WriteBuffer for Azure blob storage similar to S3. This improves the performance of the experimental Azure object storage. [#59929](https://github.com/ClickHouse/ClickHouse/pull/59929) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Use managed identity for backups IO when using Azure Blob Storage. Add a setting to prevent ClickHouse from attempting to create a non-existent container, which requires permissions at the storage account level. [#61785](https://github.com/ClickHouse/ClickHouse/pull/61785) ([Daniel Pozo Escalona](https://github.com/danipozo)).
* Add a setting `parallel_replicas_allow_in_with_subquery = 1` which allows subqueries for IN work with parallel replicas. [#60950](https://github.com/ClickHouse/ClickHouse/pull/60950) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* A change for the "zero-copy" replication: all zero copy locks related to a table have to be dropped when the table is dropped. The directory which contains these locks has to be removed also. [#57575](https://github.com/ClickHouse/ClickHouse/pull/57575) ([Sema Checherinda](https://github.com/CheSema)).
#### Improvement
* Use `MergeTree` as a default table engine. [#60524](https://github.com/ClickHouse/ClickHouse/pull/60524) ([Alexey Milovidov](https://github.com/alexey-milovidov))
* Enable `output_format_pretty_row_numbers` by default. It is better for usability. [#61791](https://github.com/ClickHouse/ClickHouse/pull/61791) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* In the previous version, some numbers in Pretty formats were not pretty enough. [#61794](https://github.com/ClickHouse/ClickHouse/pull/61794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* A long value in Pretty formats won't be cut if it is the single value in the resultset, such as in the result of the `SHOW CREATE TABLE` query. [#61795](https://github.com/ClickHouse/ClickHouse/pull/61795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Similarly to `clickhouse-local`, `clickhouse-client` will accept the `--output-format` option as a synonym to the `--format` option. This closes [#59848](https://github.com/ClickHouse/ClickHouse/issues/59848). [#61797](https://github.com/ClickHouse/ClickHouse/pull/61797) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* If stdout is a terminal and the output format is not specified, `clickhouse-client` and similar tools will use `PrettyCompact` by default, similarly to the interactive mode. `clickhouse-client` and `clickhouse-local` will handle command line arguments for input and output formats in a unified fashion. This closes [#61272](https://github.com/ClickHouse/ClickHouse/issues/61272). [#61800](https://github.com/ClickHouse/ClickHouse/pull/61800) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Underscore digit groups in Pretty formats for better readability. This is controlled by a new setting, `output_format_pretty_highlight_digit_groups`. [#61802](https://github.com/ClickHouse/ClickHouse/pull/61802) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add ability to override initial INSERT settings via `SYSTEM FLUSH DISTRIBUTED`. [#61832](https://github.com/ClickHouse/ClickHouse/pull/61832) ([Azat Khuzhin](https://github.com/azat)).
* Enable processors profiling (time spent/in and out bytes for sorting, aggregation, ...) by default. [#61096](https://github.com/ClickHouse/ClickHouse/pull/61096) ([Azat Khuzhin](https://github.com/azat)).
* Support files without format extension in Filesystem database. [#60795](https://github.com/ClickHouse/ClickHouse/pull/60795) ([Kruglov Pavel](https://github.com/Avogar)).
* Make all format names case insensitive, like Tsv, or TSV, or tsv, or even rowbinary. [#60420](https://github.com/ClickHouse/ClickHouse/pull/60420) ([豪肥肥](https://github.com/HowePa)). I appreciate if you will continue to write it correctly, e.g., `JSON` 😇, not `Json` 🤮, but we don't mind if you spell it as you prefer.
* Added `none_only_active` mode for `distributed_ddl_output_mode` setting. [#60340](https://github.com/ClickHouse/ClickHouse/pull/60340) ([Alexander Tokmakov](https://github.com/tavplubix)).
* The advanced dashboard has slightly better colors for multi-line graphs. [#60391](https://github.com/ClickHouse/ClickHouse/pull/60391) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The Advanced dashboard now has controls always visible on scrolling. This allows you to add a new chart without scrolling up. [#60692](https://github.com/ClickHouse/ClickHouse/pull/60692) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* While running the `MODIFY COLUMN` query for materialized views, check the inner table's structure to ensure every column exists. [#47427](https://github.com/ClickHouse/ClickHouse/pull/47427) ([sunny](https://github.com/sunny19930321)).
* String types and Enums can be used in the same context, such as: arrays, UNION queries, conditional expressions. This closes [#60726](https://github.com/ClickHouse/ClickHouse/issues/60726). [#60727](https://github.com/ClickHouse/ClickHouse/pull/60727) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow declaring Enums in the structure of external data for query processing (this is an immediate temporary table that you can provide for your query). [#57857](https://github.com/ClickHouse/ClickHouse/pull/57857) ([Duc Canh Le](https://github.com/canhld94)).
* Consider lightweight deleted rows when selecting parts to merge, so the disk size of the resulting part will be estimated better. [#58223](https://github.com/ClickHouse/ClickHouse/pull/58223) ([Zhuo Qiu](https://github.com/jewelzqiu)).
* Added comments for columns for more system tables. Continuation of https://github.com/ClickHouse/ClickHouse/pull/58356. [#59016](https://github.com/ClickHouse/ClickHouse/pull/59016) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Now we can use virtual columns in PREWHERE. It's worthwhile for non-const virtual columns like `_part_offset`. [#59033](https://github.com/ClickHouse/ClickHouse/pull/59033) ([Amos Bird](https://github.com/amosbird)). Improved overall usability of virtual columns. Now it is allowed to use virtual columns in `PREWHERE` (it's worthwhile for non-const virtual columns like `_part_offset`). Now a builtin documentation is available for virtual columns as a comment of column in `DESCRIBE` query with enabled setting `describe_include_virtual_columns`. [#60205](https://github.com/ClickHouse/ClickHouse/pull/60205) ([Anton Popov](https://github.com/CurtizJ)).
* Instead of using a constant key, now object storage generates key for determining remove objects capability. [#59495](https://github.com/ClickHouse/ClickHouse/pull/59495) ([Sema Checherinda](https://github.com/CheSema)).
* Allow "local" as object storage type instead of "local_blob_storage". [#60165](https://github.com/ClickHouse/ClickHouse/pull/60165) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Parallel flush of pending INSERT blocks of Distributed engine on `DETACH`/server shutdown and `SYSTEM FLUSH DISTRIBUTED` (Parallelism will work only if you have multi-disk policy for a table (like everything in the Distributed engine right now)). [#60225](https://github.com/ClickHouse/ClickHouse/pull/60225) ([Azat Khuzhin](https://github.com/azat)).
* Add a setting to force read-through cache for merges. [#60308](https://github.com/ClickHouse/ClickHouse/pull/60308) ([Kseniia Sumarokova](https://github.com/kssenii)).
* An improvement for the MySQL compatibility protocol. The issue [#57598](https://github.com/ClickHouse/ClickHouse/issues/57598) mentions a variant behaviour regarding transaction handling. An issued COMMIT/ROLLBACK when no transaction is active is reported as an error contrary to MySQL behaviour. [#60338](https://github.com/ClickHouse/ClickHouse/pull/60338) ([PapaToemmsn](https://github.com/PapaToemmsn)).
* Function `substring` now has a new alias `byteSlice`. [#60494](https://github.com/ClickHouse/ClickHouse/pull/60494) ([Robert Schulze](https://github.com/rschu1ze)).
* Renamed server setting `dns_cache_max_size` to `dns_cache_max_entries` to reduce ambiguity. [#60500](https://github.com/ClickHouse/ClickHouse/pull/60500) ([Kirill Nikiforov](https://github.com/allmazz)).
* `SHOW INDEX | INDEXES | INDICES | KEYS` no longer sorts by the primary key columns (which was unintuitive). [#60514](https://github.com/ClickHouse/ClickHouse/pull/60514) ([Robert Schulze](https://github.com/rschu1ze)).
* Keeper improvement: abort during startup if an invalid snapshot is detected to avoid data loss. [#60537](https://github.com/ClickHouse/ClickHouse/pull/60537) ([Antonio Andelic](https://github.com/antonio2368)).
* Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
* Keeper improvement: support `leadership_expiry_ms` in Keeper's settings. [#60806](https://github.com/ClickHouse/ClickHouse/pull/60806) ([Brokenice0415](https://github.com/Brokenice0415)).
* Always infer exponential numbers in JSON formats regardless of the setting `input_format_try_infer_exponent_floats`. Add setting `input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects` that allows to use String type for ambiguous paths instead of an exception during named Tuples inference from JSON objects. [#60808](https://github.com/ClickHouse/ClickHouse/pull/60808) ([Kruglov Pavel](https://github.com/Avogar)).
* Add support for `START TRANSACTION` syntax typically used in MySQL syntax, resolving https://github.com/ClickHouse/ClickHouse/discussions/60865. [#60886](https://github.com/ClickHouse/ClickHouse/pull/60886) ([Zach Naimon](https://github.com/ArctypeZach)).
* Add a flag for the full-sorting merge join algorithm to treat null as biggest/smallest. So the behavior can be compitable with other SQL systems, like Apache Spark. [#60896](https://github.com/ClickHouse/ClickHouse/pull/60896) ([loudongfeng](https://github.com/loudongfeng)).
* Support detect output format by file exctension in `clickhouse-client` and `clickhouse-local`. [#61036](https://github.com/ClickHouse/ClickHouse/pull/61036) ([豪肥肥](https://github.com/HowePa)).
* Update memory limit in runtime when Linux's CGroups value changed. [#61049](https://github.com/ClickHouse/ClickHouse/pull/61049) ([Han Fei](https://github.com/hanfei1991)).
* Add the function `toUInt128OrZero`, which was missed by mistake (the mistake is related to https://github.com/ClickHouse/ClickHouse/pull/945). The compatibility aliases `FROM_UNIXTIME` and `DATE_FORMAT` (they are not ClickHouse-native and only exist for MySQL compatibility) have been made case insensitive, as expected for SQL-compatibility aliases. [#61114](https://github.com/ClickHouse/ClickHouse/pull/61114) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improvements for the access checks, allowing to revoke of unpossessed rights in case the target user doesn't have the revoking grants either. Example: `GRANT SELECT ON *.* TO user1; REVOKE SELECT ON system.* FROM user1;`. [#61115](https://github.com/ClickHouse/ClickHouse/pull/61115) ([pufit](https://github.com/pufit)).
* Fix `has()` function with `Nullable` column (fixes [#60214](https://github.com/ClickHouse/ClickHouse/issues/60214)). [#61249](https://github.com/ClickHouse/ClickHouse/pull/61249) ([Mikhail Koviazin](https://github.com/mkmkme)).
* Now it's possible to specify the attribute `merge="true"` in config substitutions for subtrees `<include from_zk="/path" merge="true">`. In case this attribute specified, clickhouse will merge subtree with existing configuration, otherwise default behavior is append new content to configuration. [#61299](https://github.com/ClickHouse/ClickHouse/pull/61299) ([alesapin](https://github.com/alesapin)).
* Add async metrics for virtual memory mappings: `VMMaxMapCount` & `VMNumMaps`. Closes [#60662](https://github.com/ClickHouse/ClickHouse/issues/60662). [#61354](https://github.com/ClickHouse/ClickHouse/pull/61354) ([Tuan Pham Anh](https://github.com/tuanpavn)).
* Use `temporary_files_codec` setting in all places where we create temporary data, for example external memory sorting and external memory GROUP BY. Before it worked only in `partial_merge` JOIN algorithm. [#61456](https://github.com/ClickHouse/ClickHouse/pull/61456) ([Maksim Kita](https://github.com/kitaisreal)).
* Add a new setting `max_parser_backtracks` which allows to limit the complexity of query parsing. [#61502](https://github.com/ClickHouse/ClickHouse/pull/61502) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Less contention during dynamic resize of the filesystem cache. [#61524](https://github.com/ClickHouse/ClickHouse/pull/61524) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Disallow sharded mode of StorageS3 queue, because it will be rewritten. [#61537](https://github.com/ClickHouse/ClickHouse/pull/61537) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fixed typo: from `use_leagcy_max_level` to `use_legacy_max_level`. [#61545](https://github.com/ClickHouse/ClickHouse/pull/61545) ([William Schoeffel](https://github.com/wiledusc)).
* Remove some duplicate entries in `system.blob_storage_log`. [#61622](https://github.com/ClickHouse/ClickHouse/pull/61622) ([YenchangChan](https://github.com/YenchangChan)).
* Added `current_user` function as a compatibility alias for MySQL. [#61770](https://github.com/ClickHouse/ClickHouse/pull/61770) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix inconsistent floating point aggregate function states in mixed x86-64 / ARM clusters [#60610](https://github.com/ClickHouse/ClickHouse/pull/60610) ([Harry Lee](https://github.com/HarryLeeIBM)).
#### Build/Testing/Packaging Improvement
* The real-time query profiler now works on AArch64. In previous versions, it worked only when a program didn't spend time inside a syscall. [#60807](https://github.com/ClickHouse/ClickHouse/pull/60807) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* ClickHouse version has been added to docker labels. Closes [#54224](https://github.com/ClickHouse/ClickHouse/issues/54224). [#60949](https://github.com/ClickHouse/ClickHouse/pull/60949) ([Nikolay Monkov](https://github.com/nikmonkov)).
* Upgrade `prqlc` to 0.11.3. [#60616](https://github.com/ClickHouse/ClickHouse/pull/60616) ([Maximilian Roos](https://github.com/max-sixty)).
* Add generic query text fuzzer in `clickhouse-local`. [#61508](https://github.com/ClickHouse/ClickHouse/pull/61508) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix finished_mutations_to_keep=0 for MergeTree (as docs says 0 is to keep everything) [#60031](https://github.com/ClickHouse/ClickHouse/pull/60031) ([Azat Khuzhin](https://github.com/azat)).
* Something was wrong with the FINAL optimization, here is how the author describes it: "PartsSplitter invalid ranges for the same part". [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
* Something was wrong with Apache Hive, which is experimental and not supported. [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)).
* An improvement for experimental parallel replicas: force reanalysis if parallel replicas changed [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)).
* Fix usage of plain metadata type with new disks configuration option [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)).
* Avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
* Something was wrong with experimental KQL (Kusto) support: fix `max_query_size_for_kql_compound_operator`: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
* Keeper fix: add timeouts when waiting for commit logs [#60544](https://github.com/ClickHouse/ClickHouse/pull/60544) ([Antonio Andelic](https://github.com/antonio2368)).
* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)).
* Fix reading from MergeTree with non-deterministic functions in filter [#60586](https://github.com/ClickHouse/ClickHouse/pull/60586) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix logical error on bad compatibility setting value type [#60596](https://github.com/ClickHouse/ClickHouse/pull/60596) ([Kruglov Pavel](https://github.com/Avogar)).
* fix(prql): Robust panic handler [#60615](https://github.com/ClickHouse/ClickHouse/pull/60615) ([Maximilian Roos](https://github.com/max-sixty)).
* Fix `intDiv` for decimal and date arguments [#60672](https://github.com/ClickHouse/ClickHouse/pull/60672) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix: expand CTE in alter modify query [#60682](https://github.com/ClickHouse/ClickHouse/pull/60682) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix system.parts for non-Atomic/Ordinary database engine (i.e. Memory) [#60689](https://github.com/ClickHouse/ClickHouse/pull/60689) ([Azat Khuzhin](https://github.com/azat)).
* Fix "Invalid storage definition in metadata file" for parameterized views [#60708](https://github.com/ClickHouse/ClickHouse/pull/60708) ([Azat Khuzhin](https://github.com/azat)).
* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove wrong assertion in aggregate function quantileGK [#60740](https://github.com/ClickHouse/ClickHouse/pull/60740) ([李扬](https://github.com/taiyang-li)).
* Fix insert-select + insert_deduplication_token bug by setting streams to 1 [#60745](https://github.com/ClickHouse/ClickHouse/pull/60745) ([Jordi Villar](https://github.com/jrdi)).
* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
* Fix toStartOfInterval [#60763](https://github.com/ClickHouse/ClickHouse/pull/60763) ([Andrey Zvonov](https://github.com/zvonand)).
* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix possible stuck on error in HashedDictionaryParallelLoader [#60926](https://github.com/ClickHouse/ClickHouse/pull/60926) ([vdimir](https://github.com/vdimir)).
* Fix async RESTORE with Replicated database (experimental feature) [#60934](https://github.com/ClickHouse/ClickHouse/pull/60934) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix deadlock in async inserts to `Log` tables via native protocol [#61055](https://github.com/ClickHouse/ClickHouse/pull/61055) ([Anton Popov](https://github.com/CurtizJ)).
* Fix lazy execution of default argument in dictGetOrDefault for RangeHashedDictionary [#61196](https://github.com/ClickHouse/ClickHouse/pull/61196) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)).
* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix usage of session_token in S3 engine [#61234](https://github.com/ClickHouse/ClickHouse/pull/61234) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)).
* Fix bugs in show database [#61269](https://github.com/ClickHouse/ClickHouse/pull/61269) ([Raúl Marín](https://github.com/Algunenano)).
* Fix logical error in RabbitMQ storage with MATERIALIZED columns [#61320](https://github.com/ClickHouse/ClickHouse/pull/61320) ([vdimir](https://github.com/vdimir)).
* Fix CREATE OR REPLACE DICTIONARY [#61356](https://github.com/ClickHouse/ClickHouse/pull/61356) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix ATTACH query with external ON CLUSTER [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)).
* fix issue of actions dag split [#61458](https://github.com/ClickHouse/ClickHouse/pull/61458) ([Raúl Marín](https://github.com/Algunenano)).
* Fix finishing a failed RESTORE [#61466](https://github.com/ClickHouse/ClickHouse/pull/61466) ([Vitaly Baranov](https://github.com/vitlibar)).
* Disable async_insert_use_adaptive_busy_timeout correctly with compatibility settings [#61468](https://github.com/ClickHouse/ClickHouse/pull/61468) ([Raúl Marín](https://github.com/Algunenano)).
* Allow queuing in restore pool [#61475](https://github.com/ClickHouse/ClickHouse/pull/61475) ([Nikita Taranov](https://github.com/nickitat)).
* Fix an inconsistency when reading system.parts using UUID. [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)).
* Fix ALTER QUERY MODIFY SQL SECURITY [#61480](https://github.com/ClickHouse/ClickHouse/pull/61480) ([pufit](https://github.com/pufit)).
* Fix a crash in window view (experimental feature) [#61526](https://github.com/ClickHouse/ClickHouse/pull/61526) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `repeat` with non-native integers [#61527](https://github.com/ClickHouse/ClickHouse/pull/61527) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix client's `-s` argument [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix crash in arrayPartialReverseSort [#61539](https://github.com/ClickHouse/ClickHouse/pull/61539) ([Raúl Marín](https://github.com/Algunenano)).
* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix addDays cause an error when used DateTime64 [#61561](https://github.com/ClickHouse/ClickHouse/pull/61561) ([Shuai li](https://github.com/loneylee)).
* Disallow LowCardinality input type for JSONExtract [#61617](https://github.com/ClickHouse/ClickHouse/pull/61617) ([Julia Kartseva](https://github.com/jkartseva)).
* Fix `system.part_log` for async insert with deduplication [#61620](https://github.com/ClickHouse/ClickHouse/pull/61620) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix a `Non-ready set` exception for system.parts. [#61666](https://github.com/ClickHouse/ClickHouse/pull/61666) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix actual_part_name for REPLACE_RANGE (`Entry actual part isn't empty yet`) [#61675](https://github.com/ClickHouse/ClickHouse/pull/61675) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix a sanitizer report in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).
* Fix an observation that the RANGE frame is not supported for Nullable columns. [#61766](https://github.com/ClickHouse/ClickHouse/pull/61766) ([YuanLiu](https://github.com/ditgittube)).
### <a id="242"></a> ClickHouse release 24.2, 2024-02-29
#### Backward Incompatible Change

View File

@ -455,8 +455,6 @@ endif ()
enable_testing() # Enable for tests without binary
option(ENABLE_OPENSSL "This option performs a build with OpenSSL. NOTE! This option is insecure and should never be used. By default, ClickHouse uses and only supports BoringSSL" OFF)
if (ARCH_S390X)
set(ENABLE_OPENSSL_DYNAMIC_DEFAULT ON)
else ()

View File

@ -28,7 +28,6 @@ curl https://clickhouse.com/ | sh
* [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlighting, powered by github.dev.
* [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements.
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
## Monthly Release & Community Call
@ -40,15 +39,8 @@ Every month we get together with the community (users, contributors, customers,
## Upcoming Events
Keep an eye out for upcoming meetups and eventsaround the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
* [ClickHouse Meetup in Bellevue](https://www.meetup.com/clickhouse-seattle-user-group/events/298650371/) - Mar 11
* [ClickHouse Meetup at Ramp's Offices in NYC](https://www.meetup.com/clickhouse-new-york-user-group/events/298640542/) - Mar 19
* [ClickHouse Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/299479750/) - Mar 20
* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/298997115/) - Mar 21
* [ClickHouse Meetup in Bengaluru](https://www.meetup.com/clickhouse-bangalore-user-group/events/299479850/) - Mar 23
* [ClickHouse Meetup in Zurich](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/299628922/) - Apr 16
* [ClickHouse Meetup in Copenhagen](https://www.meetup.com/clickhouse-denmark-meetup-group/events/299629133/) - Apr 23
* [ClickHouse Meetup in Dubai](https://www.meetup.com/clickhouse-dubai-meetup-group/events/299629189/) - May 28

View File

@ -13,18 +13,16 @@ The following versions of ClickHouse server are currently being supported with s
| Version | Supported |
|:-|:-|
| 24.3 | ✔️ |
| 24.2 | ✔️ |
| 24.1 | ✔️ |
| 23.12 | ✔️ |
| 23.11 | ❌ |
| 23.10 | ❌ |
| 23.9 | ❌ |
| 23.* | ❌ |
| 23.8 | ✔️ |
| 23.7 | ❌ |
| 23.6 | ❌ |
| 23.5 | ❌ |
| 23.4 | ❌ |
| 23.3 | ✔️ |
| 23.3 | |
| 23.2 | ❌ |
| 23.1 | ❌ |
| 22.* | ❌ |

View File

@ -29,11 +29,13 @@ public:
requires std::is_convertible_v<G, F>
constexpr BasicScopeGuard & operator=(BasicScopeGuard<G> && src) // NOLINT(cppcoreguidelines-rvalue-reference-param-not-moved, cppcoreguidelines-noexcept-move-operations)
{
if (this != &src)
if constexpr (std::is_same_v<G, F>)
{
invoke();
function = src.release();
if (this == &src)
return *this;
}
invoke();
function = src.release();
return *this;
}

View File

@ -13,8 +13,6 @@
#include <tuple>
#include <limits>
#include <boost/math/special_functions/fpclassify.hpp>
// NOLINTBEGIN(*)
/// Use same extended double for all platforms
@ -22,6 +20,7 @@
#define CONSTEXPR_FROM_DOUBLE constexpr
using FromDoubleIntermediateType = long double;
#else
#include <boost/math/special_functions/fpclassify.hpp>
#include <boost/multiprecision/cpp_bin_float.hpp>
/// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended`
#define CONSTEXPR_FROM_DOUBLE
@ -309,6 +308,13 @@ struct integer<Bits, Signed>::_impl
constexpr uint64_t max_int = std::numeric_limits<uint64_t>::max();
static_assert(std::is_same_v<T, double> || std::is_same_v<T, FromDoubleIntermediateType>);
/// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast).
#if (LDBL_MANT_DIG == 64)
if (!std::isfinite(t))
{
self = 0;
return;
}
#else
if constexpr (std::is_same_v<T, double>)
{
if (!std::isfinite(t))
@ -325,6 +331,7 @@ struct integer<Bits, Signed>::_impl
return;
}
}
#endif
const T alpha = t / static_cast<T>(max_int);

View File

@ -23,6 +23,9 @@
#include <openssl/conf.h>
#endif
#if __has_feature(address_sanitizer)
#include <sanitizer/lsan_interface.h>
#endif
using Poco::RandomInputStream;
using Poco::Thread;
@ -67,12 +70,18 @@ void OpenSSLInitializer::initialize()
SSL_library_init();
SSL_load_error_strings();
OpenSSL_add_all_algorithms();
char seed[SEEDSIZE];
RandomInputStream rnd;
rnd.read(seed, sizeof(seed));
RAND_seed(seed, SEEDSIZE);
{
# if __has_feature(address_sanitizer)
/// Leak sanitizer (part of address sanitizer) thinks that a few bytes of memory in OpenSSL are allocated during but never released.
__lsan::ScopedDisabler lsan_disabler;
#endif
RAND_seed(seed, SEEDSIZE);
}
int nMutexes = CRYPTO_num_locks();
_mutexes = new Poco::FastMutex[nMutexes];
CRYPTO_set_locking_callback(&OpenSSLInitializer::lock);
@ -80,8 +89,8 @@ void OpenSSLInitializer::initialize()
// https://sourceforge.net/p/poco/bugs/110/
//
// From http://www.openssl.org/docs/crypto/threads.html :
// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(),
// then a default implementation is used - on Windows and BeOS this uses the system's
// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(),
// then a default implementation is used - on Windows and BeOS this uses the system's
// default thread identifying APIs"
CRYPTO_set_id_callback(&OpenSSLInitializer::id);
CRYPTO_set_dynlock_create_callback(&OpenSSLInitializer::dynlockCreate);
@ -100,7 +109,7 @@ void OpenSSLInitializer::uninitialize()
CRYPTO_set_locking_callback(0);
CRYPTO_set_id_callback(0);
delete [] _mutexes;
CONF_modules_free();
}
}

View File

@ -66,9 +66,11 @@ public:
/// The thread and process ids are set.
Message(
const std::string & source, const std::string & text, Priority prio, const char * file, int line, std::string_view fmt_str = {});
const std::string & source, const std::string & text, Priority prio, const char * file, int line,
std::string_view fmt_str = {}, const std::vector<std::string> & fmt_str_args = {});
Message(
std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str);
std::string && source, std::string && text, Priority prio, const char * file, int line,
std::string_view fmt_str, std::vector<std::string> && fmt_str_args);
/// Creates a Message with the given source, text, priority,
/// source file path and line.
///
@ -161,6 +163,9 @@ public:
std::string_view getFormatString() const;
void setFormatString(std::string_view fmt_str);
const std::vector<std::string> & getFormatStringArgs() const;
void setFormatStringArgs(const std::vector<std::string> & fmt_str_args);
int getSourceLine() const;
/// Returns the source file line of the statement
/// generating the log message. May be 0
@ -210,6 +215,7 @@ private:
int _line;
StringMap * _pMap;
std::string_view _fmt_str;
std::vector<std::string> _fmt_str_args;
};

View File

@ -46,7 +46,9 @@ Message::Message(const std::string& source, const std::string& text, Priority pr
}
Message::Message(const std::string& source, const std::string& text, Priority prio, const char* file, int line, std::string_view fmt_str):
Message::Message(
const std::string& source, const std::string& text, Priority prio, const char* file, int line,
std::string_view fmt_str, const std::vector<std::string>& fmt_str_args):
_source(source),
_text(text),
_prio(prio),
@ -54,13 +56,16 @@ Message::Message(const std::string& source, const std::string& text, Priority pr
_file(file),
_line(line),
_pMap(0),
_fmt_str(fmt_str)
_fmt_str(fmt_str),
_fmt_str_args(fmt_str_args)
{
init();
}
Message::Message(std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str):
Message::Message(
std::string && source, std::string && text, Priority prio, const char * file, int line,
std::string_view fmt_str, std::vector<std::string> && fmt_str_args):
_source(std::move(source)),
_text(std::move(text)),
_prio(prio),
@ -68,7 +73,8 @@ Message::Message(std::string && source, std::string && text, Priority prio, cons
_file(file),
_line(line),
_pMap(0),
_fmt_str(fmt_str)
_fmt_str(fmt_str),
_fmt_str_args(std::move(fmt_str_args))
{
init();
}
@ -83,7 +89,8 @@ Message::Message(const Message& msg):
_pid(msg._pid),
_file(msg._file),
_line(msg._line),
_fmt_str(msg._fmt_str)
_fmt_str(msg._fmt_str),
_fmt_str_args(msg._fmt_str_args)
{
if (msg._pMap)
_pMap = new StringMap(*msg._pMap);
@ -102,7 +109,8 @@ Message::Message(const Message& msg, const std::string& text):
_pid(msg._pid),
_file(msg._file),
_line(msg._line),
_fmt_str(msg._fmt_str)
_fmt_str(msg._fmt_str),
_fmt_str_args(msg._fmt_str_args)
{
if (msg._pMap)
_pMap = new StringMap(*msg._pMap);
@ -154,6 +162,7 @@ void Message::swap(Message& msg)
swap(_line, msg._line);
swap(_pMap, msg._pMap);
swap(_fmt_str, msg._fmt_str);
swap(_fmt_str_args, msg._fmt_str_args);
}
@ -227,6 +236,17 @@ void Message::setFormatString(std::string_view fmt_str)
}
const std::vector<std::string>& Message::getFormatStringArgs() const
{
return _fmt_str_args;
}
void Message::setFormatStringArgs(const std::vector<std::string>& fmt_str_args)
{
_fmt_str_args = fmt_str_args;
}
bool Message::has(const std::string& param) const
{
return _pMap && (_pMap->find(param) != _pMap->end());

View File

@ -4835,7 +4835,7 @@ for (;; ptr++)
If the class contains characters outside the 0-255 range, a different
opcode is compiled. It may optionally have a bit map for characters < 256,
but those above are are explicitly listed afterwards. A flag byte tells
but those above are explicitly listed afterwards. A flag byte tells
whether the bitmap is present, and whether this is a negated class or not.
In JavaScript compatibility mode, an isolated ']' causes an error. In

View File

@ -314,13 +314,13 @@ static int read_unicode(json_stream *json)
if (l < 0xdc00 || l > 0xdfff) {
json_error(json, "invalid surrogate pair continuation \\u%04lx out "
"of range (dc00-dfff)", l);
"of range (dc00-dfff)", (unsigned long)l);
return -1;
}
cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000);
} else if (cp >= 0xdc00 && cp <= 0xdfff) {
json_error(json, "dangling surrogate \\u%04lx", cp);
json_error(json, "dangling surrogate \\u%04lx", (unsigned long)cp);
return -1;
}

View File

@ -213,6 +213,19 @@ namespace Net
Poco::Timespan getKeepAliveTimeout() const;
/// Returns the connection timeout for HTTP connections.
void setKeepAliveMaxRequests(int max_requests);
int getKeepAliveMaxRequests() const;
int getKeepAliveRequest() const;
bool isKeepAliveExpired(double reliability = 1.0) const;
/// Returns if the connection is expired with some margin as fraction of timeout as reliability
double getKeepAliveReliability() const;
/// Returns the current fraction of keep alive timeout when connection is considered safe to use
/// It helps to avoid situation when a client uses nearly expired connection and receives NoMessageException
virtual std::ostream & sendRequest(HTTPRequest & request);
/// Sends the header for the given HTTP request to
/// the server.
@ -345,6 +358,8 @@ namespace Net
void assign(HTTPClientSession & session);
void setKeepAliveRequest(int request);
HTTPSessionFactory _proxySessionFactory;
/// Factory to create HTTPClientSession to proxy.
private:
@ -353,6 +368,8 @@ namespace Net
Poco::UInt16 _port;
ProxyConfig _proxyConfig;
Poco::Timespan _keepAliveTimeout;
int _keepAliveCurrentRequest = 0;
int _keepAliveMaxRequests = 1000;
Poco::Timestamp _lastRequest;
bool _reconnect;
bool _mustReconnect;
@ -361,6 +378,7 @@ namespace Net
Poco::SharedPtr<std::ostream> _pRequestStream;
Poco::SharedPtr<std::istream> _pResponseStream;
static const double _defaultKeepAliveReliabilityLevel;
static ProxyConfig _globalProxyConfig;
HTTPClientSession(const HTTPClientSession &);
@ -450,9 +468,19 @@ namespace Net
return _lastRequest;
}
inline void HTTPClientSession::setLastRequest(Poco::Timestamp time)
inline double HTTPClientSession::getKeepAliveReliability() const
{
_lastRequest = time;
return _defaultKeepAliveReliabilityLevel;
}
inline int HTTPClientSession::getKeepAliveMaxRequests() const
{
return _keepAliveMaxRequests;
}
inline int HTTPClientSession::getKeepAliveRequest() const
{
return _keepAliveCurrentRequest;
}
}

View File

@ -120,6 +120,10 @@ namespace Net
/// The value is set to "Keep-Alive" if keepAlive is
/// true, or to "Close" otherwise.
void setKeepAliveTimeout(int timeout, int max_requests);
int getKeepAliveTimeout() const;
int getKeepAliveMaxRequests() const;
bool getKeepAlive() const;
/// Returns true if
/// * the message has a Connection header field and its value is "Keep-Alive"

View File

@ -44,7 +44,7 @@ namespace Net
/// - timeout: 60 seconds
/// - keepAlive: true
/// - maxKeepAliveRequests: 0
/// - keepAliveTimeout: 10 seconds
/// - keepAliveTimeout: 15 seconds
void setServerName(const std::string & serverName);
/// Sets the name and port (name:port) that the server uses to identify itself.

View File

@ -56,6 +56,8 @@ namespace Net
SocketAddress serverAddress();
/// Returns the server's address.
void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout);
private:
bool _firstRequest;
Poco::Timespan _keepAliveTimeout;

View File

@ -79,6 +79,11 @@ namespace Net
/// Returns the value of the first name-value pair with the given name.
/// If no value with the given name has been found, the defaultValue is returned.
const std::vector<std::reference_wrapper<const std::string>> getAll(const std::string & name) const;
/// Returns all values of all name-value pairs with the given name.
///
/// Returns an empty vector if there are no name-value pairs with the given name.
bool has(const std::string & name) const;
/// Returns true if there is at least one name-value pair
/// with the given name.

View File

@ -37,6 +37,7 @@ namespace Net {
HTTPClientSession::ProxyConfig HTTPClientSession::_globalProxyConfig;
const double HTTPClientSession::_defaultKeepAliveReliabilityLevel = 0.9;
HTTPClientSession::HTTPClientSession():
@ -220,7 +221,41 @@ void HTTPClientSession::setGlobalProxyConfig(const ProxyConfig& config)
void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout)
{
_keepAliveTimeout = timeout;
if (connected())
{
throw Poco::IllegalStateException("cannot change keep alive timeout on initiated connection, "
"That value is managed privately after connection is established.");
}
_keepAliveTimeout = timeout;
}
void HTTPClientSession::setKeepAliveMaxRequests(int max_requests)
{
if (connected())
{
throw Poco::IllegalStateException("cannot change keep alive max requests on initiated connection, "
"That value is managed privately after connection is established.");
}
_keepAliveMaxRequests = max_requests;
}
void HTTPClientSession::setKeepAliveRequest(int request)
{
_keepAliveCurrentRequest = request;
}
void HTTPClientSession::setLastRequest(Poco::Timestamp time)
{
if (connected())
{
throw Poco::IllegalStateException("cannot change last request on initiated connection, "
"That value is managed privately after connection is established.");
}
_lastRequest = time;
}
@ -231,6 +266,8 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request)
clearException();
_responseReceived = false;
_keepAliveCurrentRequest += 1;
bool keepAlive = getKeepAlive();
if (((connected() && !keepAlive) || mustReconnect()) && !_host.empty())
{
@ -241,8 +278,10 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request)
{
if (!connected())
reconnect();
if (!keepAlive)
request.setKeepAlive(false);
if (!request.has(HTTPMessage::CONNECTION))
request.setKeepAlive(keepAlive);
if (keepAlive && !request.has(HTTPMessage::CONNECTION_KEEP_ALIVE) && _keepAliveTimeout.totalSeconds() > 0)
request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds(), _keepAliveMaxRequests);
if (!request.has(HTTPRequest::HOST) && !_host.empty())
request.setHost(_host, _port);
if (!_proxyConfig.host.empty() && !bypassProxy())
@ -324,6 +363,17 @@ std::istream& HTTPClientSession::receiveResponse(HTTPResponse& response)
_mustReconnect = getKeepAlive() && !response.getKeepAlive();
if (!_mustReconnect)
{
/// when server sends its keep alive timeout, client has to follow that value
auto timeout = response.getKeepAliveTimeout();
if (timeout > 0)
_keepAliveTimeout = std::min(_keepAliveTimeout, Poco::Timespan(timeout, 0));
auto max_requests = response.getKeepAliveMaxRequests();
if (max_requests > 0)
_keepAliveMaxRequests = std::min(_keepAliveMaxRequests, max_requests);
}
if (!_expectResponseBody || response.getStatus() < 200 || response.getStatus() == HTTPResponse::HTTP_NO_CONTENT || response.getStatus() == HTTPResponse::HTTP_NOT_MODIFIED)
_pResponseStream = new HTTPFixedLengthInputStream(*this, 0);
else if (response.getChunkedTransferEncoding())
@ -430,15 +480,18 @@ std::string HTTPClientSession::proxyRequestPrefix() const
return result;
}
bool HTTPClientSession::isKeepAliveExpired(double reliability) const
{
Poco::Timestamp now;
return Timespan(Timestamp::TimeDiff(reliability *_keepAliveTimeout.totalMicroseconds())) <= now - _lastRequest
|| _keepAliveCurrentRequest > _keepAliveMaxRequests;
}
bool HTTPClientSession::mustReconnect() const
{
if (!_mustReconnect)
{
Poco::Timestamp now;
return _keepAliveTimeout <= now - _lastRequest;
}
else return true;
return isKeepAliveExpired(_defaultKeepAliveReliabilityLevel);
return true;
}
@ -511,14 +564,21 @@ void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session)
if (buffered())
throw Poco::LogicException("assign to a session with not empty buffered data");
attachSocket(session.detachSocket());
setLastRequest(session.getLastRequest());
poco_assert(!connected());
setResolvedHost(session.getResolvedHost());
setKeepAlive(session.getKeepAlive());
setProxyConfig(session.getProxyConfig());
setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout());
setKeepAlive(session.getKeepAlive());
setLastRequest(session.getLastRequest());
setKeepAliveTimeout(session.getKeepAliveTimeout());
setProxyConfig(session.getProxyConfig());
_keepAliveMaxRequests = session._keepAliveMaxRequests;
_keepAliveCurrentRequest = session._keepAliveCurrentRequest;
attachSocket(session.detachSocket());
session.reset();
}

View File

@ -17,6 +17,7 @@
#include "Poco/NumberFormatter.h"
#include "Poco/NumberParser.h"
#include "Poco/String.h"
#include <format>
using Poco::NumberFormatter;
@ -179,4 +180,51 @@ bool HTTPMessage::getKeepAlive() const
}
void HTTPMessage::setKeepAliveTimeout(int timeout, int max_requests)
{
add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}, max={}", timeout, max_requests));
}
int parseFromHeaderValues(const std::string_view header_value, const std::string_view param_name)
{
auto param_value_pos = header_value.find(param_name);
if (param_value_pos == std::string::npos)
param_value_pos = header_value.size();
if (param_value_pos != header_value.size())
param_value_pos += param_name.size();
auto param_value_end = header_value.find(',', param_value_pos);
if (param_value_end == std::string::npos)
param_value_end = header_value.size();
auto timeout_value_substr = header_value.substr(param_value_pos, param_value_end - param_value_pos);
if (timeout_value_substr.empty())
return -1;
int value = 0;
auto [ptr, ec] = std::from_chars(timeout_value_substr.begin(), timeout_value_substr.end(), value);
if (ec == std::errc())
return value;
return -1;
}
int HTTPMessage::getKeepAliveTimeout() const
{
const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY);
static const std::string_view timeout_param = "timeout=";
return parseFromHeaderValues(ka_header, timeout_param);
}
int HTTPMessage::getKeepAliveMaxRequests() const
{
const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY);
static const std::string_view timeout_param = "max=";
return parseFromHeaderValues(ka_header, timeout_param);
}
} } // namespace Poco::Net

View File

@ -88,7 +88,18 @@ void HTTPServerConnection::run()
pHandler->handleRequest(request, response);
session.setKeepAlive(_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive());
}
/// all that fuzz is all about to make session close with less timeout than 15s (set in HTTPServerParams c-tor)
if (_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive())
{
int value = response.getKeepAliveTimeout();
if (value < 0)
value = request.getKeepAliveTimeout();
if (value > 0)
session.setKeepAliveTimeout(Poco::Timespan(value, 0));
}
}
else sendErrorResponse(session, HTTPResponse::HTTP_NOT_IMPLEMENTED);
}
catch (Poco::Exception&)

View File

@ -33,6 +33,12 @@ HTTPServerSession::~HTTPServerSession()
{
}
void HTTPServerSession::setKeepAliveTimeout(Poco::Timespan keepAliveTimeout)
{
_keepAliveTimeout = keepAliveTimeout;
}
bool HTTPServerSession::hasMoreRequests()
{

View File

@ -15,6 +15,7 @@
#include "Poco/Net/NameValueCollection.h"
#include "Poco/Exception.h"
#include <algorithm>
#include <functional>
using Poco::NotFoundException;
@ -55,7 +56,7 @@ void NameValueCollection::swap(NameValueCollection& nvc)
std::swap(_map, nvc._map);
}
const std::string& NameValueCollection::operator [] (const std::string& name) const
{
ConstIterator it = _map.find(name);
@ -65,8 +66,8 @@ const std::string& NameValueCollection::operator [] (const std::string& name) co
throw NotFoundException(name);
}
void NameValueCollection::set(const std::string& name, const std::string& value)
void NameValueCollection::set(const std::string& name, const std::string& value)
{
Iterator it = _map.find(name);
if (it != _map.end())
@ -75,13 +76,13 @@ void NameValueCollection::set(const std::string& name, const std::string& value)
_map.insert(HeaderMap::ValueType(name, value));
}
void NameValueCollection::add(const std::string& name, const std::string& value)
{
_map.insert(HeaderMap::ValueType(name, value));
}
const std::string& NameValueCollection::get(const std::string& name) const
{
ConstIterator it = _map.find(name);
@ -101,6 +102,15 @@ const std::string& NameValueCollection::get(const std::string& name, const std::
return defaultValue;
}
const std::vector<std::reference_wrapper<const std::string>> NameValueCollection::getAll(const std::string& name) const
{
std::vector<std::reference_wrapper<const std::string>> values;
for (ConstIterator it = _map.find(name); it != _map.end(); it++)
if (it->first == name)
values.push_back(it->second);
return values;
}
bool NameValueCollection::has(const std::string& name) const
{
@ -113,19 +123,19 @@ NameValueCollection::ConstIterator NameValueCollection::find(const std::string&
return _map.find(name);
}
NameValueCollection::ConstIterator NameValueCollection::begin() const
{
return _map.begin();
}
NameValueCollection::ConstIterator NameValueCollection::end() const
{
return _map.end();
}
bool NameValueCollection::empty() const
{
return _map.empty();

View File

@ -592,6 +592,7 @@ void Context::createSSLContext()
SSL_CTX_set_default_passwd_cb(_pSSLContext, &SSLManager::privateKeyPassphraseCallback);
Utility::clearErrorStack();
SSL_CTX_set_options(_pSSLContext, SSL_OP_ALL);
SSL_CTX_set_options(_pSSLContext, SSL_OP_IGNORE_UNEXPECTED_EOF);
}

View File

@ -125,7 +125,7 @@ void SSLManager::initializeClient(PrivateKeyPassphraseHandlerPtr ptrPassphraseHa
Context::Ptr SSLManager::defaultServerContext()
{
Poco::FastMutex::ScopedLock lock(_mutex);
if (!_ptrDefaultServerContext)
initDefaultContext(true);
@ -150,7 +150,7 @@ Context::Ptr SSLManager::defaultClientContext()
_ptrDefaultClientContext->disableProtocols(Context::PROTO_SSLV2 | Context::PROTO_SSLV3);
}
}
return _ptrDefaultClientContext;
}
@ -256,7 +256,7 @@ void SSLManager::initDefaultContext(bool server)
Context::Params params;
// mandatory options
params.privateKeyFile = config.getString(prefix + CFG_PRIV_KEY_FILE, "");
params.certificateFile = config.getString(prefix + CFG_CERTIFICATE_FILE, params.privateKeyFile);
params.certificateFile = config.getString(prefix + CFG_CERTIFICATE_FILE, params.privateKeyFile);
params.caLocation = config.getString(prefix + CFG_CA_LOCATION, "");
if (server && params.certificateFile.empty() && params.privateKeyFile.empty())
@ -283,7 +283,7 @@ void SSLManager::initDefaultContext(bool server)
params.ecdhCurve = config.getString(prefix + CFG_ECDH_CURVE, "");
Context::Usage usage;
if (server)
{
if (requireTLSv1_2)
@ -308,7 +308,7 @@ void SSLManager::initDefaultContext(bool server)
usage = Context::CLIENT_USE;
_ptrDefaultClientContext = new Context(usage, params);
}
std::string disabledProtocolsList = config.getString(prefix + CFG_DISABLE_PROTOCOLS, "");
Poco::StringTokenizer dpTok(disabledProtocolsList, ";,", Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY);
int disabledProtocols = 0;
@ -329,27 +329,28 @@ void SSLManager::initDefaultContext(bool server)
_ptrDefaultServerContext->disableProtocols(disabledProtocols);
else
_ptrDefaultClientContext->disableProtocols(disabledProtocols);
bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false);
if (server)
{
std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", ""));
_ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext);
if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE))
{
int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE);
_ptrDefaultServerContext->setSessionCacheSize(cacheSize);
}
if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT))
{
int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT);
_ptrDefaultServerContext->setSessionTimeout(timeout);
}
}
else
{
_ptrDefaultClientContext->enableSessionCache(cacheSessions);
}
/// Temporarily disabled during the transition from boringssl to OpenSSL due to tsan issues.
/// bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false);
/// if (server)
/// {
/// std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", ""));
/// _ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext);
/// if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE))
/// {
/// int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE);
/// _ptrDefaultServerContext->setSessionCacheSize(cacheSize);
/// }
/// if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT))
/// {
/// int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT);
/// _ptrDefaultServerContext->setSessionTimeout(timeout);
/// }
/// }
/// else
/// {
/// _ptrDefaultClientContext->enableSessionCache(cacheSessions);
/// }
bool extendedVerification = config.getBool(prefix + CFG_EXTENDED_VERIFICATION, false);
if (server)
_ptrDefaultServerContext->enableExtendedCertificateVerification(extendedVerification);
@ -378,7 +379,7 @@ void SSLManager::initPassphraseHandler(bool server)
{
if (server && _ptrServerPassphraseHandler) return;
if (!server && _ptrClientPassphraseHandler) return;
std::string prefix = server ? CFG_SERVER_PREFIX : CFG_CLIENT_PREFIX;
Poco::Util::AbstractConfiguration& config = appConfig();
@ -399,7 +400,7 @@ void SSLManager::initPassphraseHandler(bool server)
}
else throw Poco::Util::UnknownOptionException(std::string("No passphrase handler known with the name ") + className);
}
void SSLManager::initCertificateHandler(bool server)
{

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54484)
SET(VERSION_REVISION 54485)
SET(VERSION_MAJOR 24)
SET(VERSION_MINOR 3)
SET(VERSION_MINOR 4)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 891689a41506d00aa169548f5b4a8774351242c4)
SET(VERSION_DESCRIBE v24.3.1.1-testing)
SET(VERSION_STRING 24.3.1.1)
SET(VERSION_GITHASH 2c5c589a882ceec35439650337b92db3e76f0081)
SET(VERSION_DESCRIBE v24.4.1.1-testing)
SET(VERSION_STRING 24.4.1.1)
# end of autochange

View File

@ -8,9 +8,6 @@ option (SANITIZE "Enable one of the code sanitizers" "")
set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")
# It's possible to pass an ignore list to sanitizers (-fsanitize-ignorelist). Intentionally not doing this because
# 1. out-of-source suppressions are awkward 2. it seems ignore lists don't work after the Clang v16 upgrade (#49829)
if (SANITIZE)
if (SANITIZE STREQUAL "address")
set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")

View File

@ -37,11 +37,7 @@ function(add_contrib cmake_folder)
message(STATUS "Adding contrib module ${base_folders} (configuring with ${cmake_folder})")
add_subdirectory (${cmake_folder})
endfunction()
if (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)
add_contrib (openssl-cmake openssl)
else ()
add_contrib (boringssl-cmake boringssl)
endif ()
add_contrib (openssl-cmake openssl)
add_contrib (miniselect-cmake miniselect)
add_contrib (pdqsort-cmake pdqsort)
add_contrib (pocketfft-cmake pocketfft)
@ -223,6 +219,8 @@ endif ()
add_contrib (xxHash-cmake xxHash)
add_contrib (expected-cmake expected)
add_contrib (libbcrypt-cmake libbcrypt)
add_contrib (google-benchmark-cmake google-benchmark)

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 4a12f99dfc9d47c687ff7700b927cc76856225d1
Subproject commit cb5dc3c906e80f253e9ce9535807caef827cc2e0

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit ba5c67934e8274d649befcffab56731632dc5253
Subproject commit 8f36d71d18587f1f315ec832f424183cb6519cbb

View File

@ -59,12 +59,3 @@ target_link_libraries (_avrocpp PRIVATE boost::headers_only boost::iostreams)
target_compile_definitions (_avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE)
target_include_directories (_avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
target_link_libraries (_avrocpp PRIVATE ch_contrib::snappy)
# create a symlink to include headers with <avro/...>
set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
ADD_CUSTOM_TARGET(avro_symlink_headers ALL
COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}"
COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro"
)
add_dependencies(_avrocpp avro_symlink_headers)
target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}")

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit 32870e234cac03e0ac46370c26858b0ffdf14200
Subproject commit 2e12d7c6dafa81311ee3d73ac6a178550ffa75be

2
contrib/aws-c-cal vendored

@ -1 +1 @@
Subproject commit 9453687ff5493ba94eaccf8851200565c4364c77
Subproject commit 1586846816e6d7d5ff744a2db943107a3a74a082

2
contrib/azure vendored

@ -1 +1 @@
Subproject commit e71395e44f309f97b5a486f5c2c59b82f85dd2d2
Subproject commit b90fd3c6ef3185f5be3408056567bca0854129b6

View File

@ -10,6 +10,7 @@ set(AZURE_SDK_LIBRARY_DIR "${AZURE_DIR}/sdk")
file(GLOB AZURE_SDK_SRC
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/credentials/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/cryptography/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/*.cpp"
"${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.cpp"

1
contrib/boringssl vendored

@ -1 +0,0 @@
Subproject commit aa6d2f865a2eab01cf94f197e11e36b6de47b5b4

View File

@ -1,799 +0,0 @@
# Needed for:
# - securely connecting to an external server, e.g. clickhouse-client --host ... --secure
# - lots of thirdparty libraries
# Actually, so many 3rd party libraries + unit tests need SSL that we cannot disable it
# without breaking the build ...
option(ENABLE_SSL "Enable ssl" ON) # breaks if OFF
# TODO: Making SSL dependent on ENABLE_LIBRARIES is desirable but needs fixing dependent libs + tests.
# option(ENABLE_SSL "Enable ssl" ${ENABLE_LIBRARIES})
if(NOT ENABLE_SSL)
message(STATUS "Not using openssl")
return()
endif()
# Copyright (c) 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# This file is created by generate_build_files.py and edited accordingly.
cmake_minimum_required(VERSION 3.5)
project(BoringSSL LANGUAGES C CXX)
set(BORINGSSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/boringssl")
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CLANG 1)
endif()
if(CMAKE_COMPILER_IS_GNUCXX OR CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-common -fno-exceptions -fno-rtti")
if(APPLE AND CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-common -std=c11")
endif()
# pthread_rwlock_t requires a feature flag.
if(NOT WIN32)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_XOPEN_SOURCE=700")
endif()
if(WIN32)
add_definitions(-D_HAS_EXCEPTIONS=0)
add_definitions(-DWIN32_LEAN_AND_MEAN)
add_definitions(-DNOMINMAX)
# Allow use of fopen.
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
# VS 2017 and higher supports STL-only warning suppressions.
# A bug in CMake < 3.13.0 may cause the space in this value to
# cause issues when building with NASM. In that case, update CMake.
add_definitions("-D_STL_EXTRA_DISABLED_WARNINGS=4774 4987")
endif()
add_definitions(-DBORINGSSL_IMPLEMENTATION)
# CMake's iOS support uses Apple's multiple-architecture toolchain. It takes an
# architecture list from CMAKE_OSX_ARCHITECTURES, leaves CMAKE_SYSTEM_PROCESSOR
# alone, and expects all architecture-specific logic to be conditioned within
# the source files rather than the build. This does not work for our assembly
# files, so we fix CMAKE_SYSTEM_PROCESSOR and only support single-architecture
# builds.
if(NOT OPENSSL_NO_ASM AND CMAKE_OSX_ARCHITECTURES)
list(LENGTH CMAKE_OSX_ARCHITECTURES NUM_ARCHES)
if(NOT NUM_ARCHES EQUAL 1)
message(FATAL_ERROR "Universal binaries not supported.")
endif()
list(GET CMAKE_OSX_ARCHITECTURES 0 CMAKE_SYSTEM_PROCESSOR)
endif()
if(OPENSSL_NO_ASM)
add_definitions(-DOPENSSL_NO_ASM)
set(ARCH "generic")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
set(ARCH "x86_64")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64")
set(ARCH "x86_64")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
# cmake reports AMD64 on Windows, but we might be building for 32-bit.
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(ARCH "x86_64")
else()
set(ARCH "x86")
endif()
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86")
set(ARCH "x86")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386")
# cmake uses `uname -p` to set the system processor, but Solaris
# systems support multiple architectures.
if((${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") AND CMAKE_SIZEOF_VOID_P EQUAL 8)
set(ARCH "x86_64")
else()
set(ARCH "x86")
endif()
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
set(ARCH "x86")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
set(ARCH "aarch64")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
set(ARCH "aarch64")
# Apple A12 Bionic chipset which is added in iPhone XS/XS Max/XR uses arm64e architecture.
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64e")
set(ARCH "aarch64")
elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm*")
set(ARCH "arm")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "mips")
# Just to avoid the unknown processor error.
set(ARCH "generic")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le")
set(ARCH "ppc64le")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64")
set(ARCH "riscv64")
else()
message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR})
endif()
if(NOT OPENSSL_NO_ASM)
if(UNIX)
enable_language(ASM)
# Clang's integerated assembler does not support debug symbols.
if(NOT CMAKE_ASM_COMPILER_ID MATCHES "Clang")
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -Wa,-g")
endif()
# CMake does not add -isysroot and -arch flags to assembly.
if(APPLE)
if(CMAKE_OSX_SYSROOT)
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -isysroot \"${CMAKE_OSX_SYSROOT}\"")
endif()
foreach(arch ${CMAKE_OSX_ARCHITECTURES})
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -arch ${arch}")
endforeach()
endif()
else()
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -gcv8")
enable_language(ASM_NASM)
endif()
endif()
set(
CRYPTO_ios_aarch64_SOURCES
ios-aarch64/crypto/chacha/chacha-armv8.S
ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
ios-aarch64/crypto/fipsmodule/armv8-mont.S
ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
ios-aarch64/crypto/fipsmodule/sha1-armv8.S
ios-aarch64/crypto/fipsmodule/sha256-armv8.S
ios-aarch64/crypto/fipsmodule/sha512-armv8.S
ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
ios-aarch64/crypto/test/trampoline-armv8.S
)
set(
CRYPTO_ios_arm_SOURCES
ios-arm/crypto/chacha/chacha-armv4.S
ios-arm/crypto/fipsmodule/aesv8-armx32.S
ios-arm/crypto/fipsmodule/armv4-mont.S
ios-arm/crypto/fipsmodule/bsaes-armv7.S
ios-arm/crypto/fipsmodule/ghash-armv4.S
ios-arm/crypto/fipsmodule/ghashv8-armx32.S
ios-arm/crypto/fipsmodule/sha1-armv4-large.S
ios-arm/crypto/fipsmodule/sha256-armv4.S
ios-arm/crypto/fipsmodule/sha512-armv4.S
ios-arm/crypto/fipsmodule/vpaes-armv7.S
ios-arm/crypto/test/trampoline-armv4.S
)
set(
CRYPTO_linux_aarch64_SOURCES
linux-aarch64/crypto/chacha/chacha-armv8.S
linux-aarch64/crypto/fipsmodule/aesv8-armx64.S
linux-aarch64/crypto/fipsmodule/armv8-mont.S
linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S
linux-aarch64/crypto/fipsmodule/sha1-armv8.S
linux-aarch64/crypto/fipsmodule/sha256-armv8.S
linux-aarch64/crypto/fipsmodule/sha512-armv8.S
linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
linux-aarch64/crypto/test/trampoline-armv8.S
)
set(
CRYPTO_linux_arm_SOURCES
linux-arm/crypto/chacha/chacha-armv4.S
linux-arm/crypto/fipsmodule/aesv8-armx32.S
linux-arm/crypto/fipsmodule/armv4-mont.S
linux-arm/crypto/fipsmodule/bsaes-armv7.S
linux-arm/crypto/fipsmodule/ghash-armv4.S
linux-arm/crypto/fipsmodule/ghashv8-armx32.S
linux-arm/crypto/fipsmodule/sha1-armv4-large.S
linux-arm/crypto/fipsmodule/sha256-armv4.S
linux-arm/crypto/fipsmodule/sha512-armv4.S
linux-arm/crypto/fipsmodule/vpaes-armv7.S
linux-arm/crypto/test/trampoline-armv4.S
"${BORINGSSL_SOURCE_DIR}/crypto/curve25519/asm/x25519-asm-arm.S"
"${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_arm_asm.S"
)
set(
CRYPTO_linux_ppc64le_SOURCES
linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
linux-ppc64le/crypto/test/trampoline-ppc.S
)
set(
CRYPTO_linux_x86_SOURCES
linux-x86/crypto/chacha/chacha-x86.S
linux-x86/crypto/fipsmodule/aesni-x86.S
linux-x86/crypto/fipsmodule/bn-586.S
linux-x86/crypto/fipsmodule/co-586.S
linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S
linux-x86/crypto/fipsmodule/ghash-x86.S
linux-x86/crypto/fipsmodule/md5-586.S
linux-x86/crypto/fipsmodule/sha1-586.S
linux-x86/crypto/fipsmodule/sha256-586.S
linux-x86/crypto/fipsmodule/sha512-586.S
linux-x86/crypto/fipsmodule/vpaes-x86.S
linux-x86/crypto/fipsmodule/x86-mont.S
linux-x86/crypto/test/trampoline-x86.S
)
set(
CRYPTO_linux_x86_64_SOURCES
linux-x86_64/crypto/chacha/chacha-x86_64.S
linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
linux-x86_64/crypto/fipsmodule/ghash-x86_64.S
linux-x86_64/crypto/fipsmodule/md5-x86_64.S
linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S
linux-x86_64/crypto/fipsmodule/rsaz-avx2.S
linux-x86_64/crypto/fipsmodule/sha1-x86_64.S
linux-x86_64/crypto/fipsmodule/sha256-x86_64.S
linux-x86_64/crypto/fipsmodule/sha512-x86_64.S
linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
linux-x86_64/crypto/fipsmodule/x86_64-mont.S
linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
linux-x86_64/crypto/test/trampoline-x86_64.S
"${BORINGSSL_SOURCE_DIR}/crypto/hrss/asm/poly_rq_mul.S"
)
set(
CRYPTO_mac_x86_SOURCES
mac-x86/crypto/chacha/chacha-x86.S
mac-x86/crypto/fipsmodule/aesni-x86.S
mac-x86/crypto/fipsmodule/bn-586.S
mac-x86/crypto/fipsmodule/co-586.S
mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S
mac-x86/crypto/fipsmodule/ghash-x86.S
mac-x86/crypto/fipsmodule/md5-586.S
mac-x86/crypto/fipsmodule/sha1-586.S
mac-x86/crypto/fipsmodule/sha256-586.S
mac-x86/crypto/fipsmodule/sha512-586.S
mac-x86/crypto/fipsmodule/vpaes-x86.S
mac-x86/crypto/fipsmodule/x86-mont.S
mac-x86/crypto/test/trampoline-x86.S
)
set(
CRYPTO_mac_x86_64_SOURCES
mac-x86_64/crypto/chacha/chacha-x86_64.S
mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
mac-x86_64/crypto/fipsmodule/aesni-x86_64.S
mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
mac-x86_64/crypto/fipsmodule/ghash-x86_64.S
mac-x86_64/crypto/fipsmodule/md5-x86_64.S
mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S
mac-x86_64/crypto/fipsmodule/rsaz-avx2.S
mac-x86_64/crypto/fipsmodule/sha1-x86_64.S
mac-x86_64/crypto/fipsmodule/sha256-x86_64.S
mac-x86_64/crypto/fipsmodule/sha512-x86_64.S
mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S
mac-x86_64/crypto/fipsmodule/x86_64-mont.S
mac-x86_64/crypto/fipsmodule/x86_64-mont5.S
mac-x86_64/crypto/test/trampoline-x86_64.S
)
set(
CRYPTO_win_aarch64_SOURCES
win-aarch64/crypto/chacha/chacha-armv8.S
win-aarch64/crypto/fipsmodule/aesv8-armx64.S
win-aarch64/crypto/fipsmodule/armv8-mont.S
win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
win-aarch64/crypto/fipsmodule/ghashv8-armx64.S
win-aarch64/crypto/fipsmodule/sha1-armv8.S
win-aarch64/crypto/fipsmodule/sha256-armv8.S
win-aarch64/crypto/fipsmodule/sha512-armv8.S
win-aarch64/crypto/fipsmodule/vpaes-armv8.S
win-aarch64/crypto/test/trampoline-armv8.S
)
set(
CRYPTO_win_x86_SOURCES
win-x86/crypto/chacha/chacha-x86.asm
win-x86/crypto/fipsmodule/aesni-x86.asm
win-x86/crypto/fipsmodule/bn-586.asm
win-x86/crypto/fipsmodule/co-586.asm
win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm
win-x86/crypto/fipsmodule/ghash-x86.asm
win-x86/crypto/fipsmodule/md5-586.asm
win-x86/crypto/fipsmodule/sha1-586.asm
win-x86/crypto/fipsmodule/sha256-586.asm
win-x86/crypto/fipsmodule/sha512-586.asm
win-x86/crypto/fipsmodule/vpaes-x86.asm
win-x86/crypto/fipsmodule/x86-mont.asm
win-x86/crypto/test/trampoline-x86.asm
)
set(
CRYPTO_win_x86_64_SOURCES
win-x86_64/crypto/chacha/chacha-x86_64.asm
win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm
win-x86_64/crypto/fipsmodule/ghash-x86_64.asm
win-x86_64/crypto/fipsmodule/md5-x86_64.asm
win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm
win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm
win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm
win-x86_64/crypto/fipsmodule/rsaz-avx2.asm
win-x86_64/crypto/fipsmodule/sha1-x86_64.asm
win-x86_64/crypto/fipsmodule/sha256-x86_64.asm
win-x86_64/crypto/fipsmodule/sha512-x86_64.asm
win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm
win-x86_64/crypto/fipsmodule/x86_64-mont.asm
win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
win-x86_64/crypto/test/trampoline-x86_64.asm
)
if(APPLE AND ARCH STREQUAL "aarch64")
set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_aarch64_SOURCES})
elseif(APPLE AND ARCH STREQUAL "arm")
set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_arm_SOURCES})
elseif(APPLE)
set(CRYPTO_ARCH_SOURCES ${CRYPTO_mac_${ARCH}_SOURCES})
elseif(UNIX)
set(CRYPTO_ARCH_SOURCES ${CRYPTO_linux_${ARCH}_SOURCES})
elseif(WIN32)
set(CRYPTO_ARCH_SOURCES ${CRYPTO_win_${ARCH}_SOURCES})
endif()
add_library(
_crypto
${CRYPTO_ARCH_SOURCES}
err_data.c
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_bitstr.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_bool.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_d2i_fp.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_dup.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_enum.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_gentm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_i2d_fp.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_int.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_mbstr.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strex.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_utctm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_utf8.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn1_lib.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn1_par.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn_pack.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_enum.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_int.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_string.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_dec.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_enc.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_fre.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_new.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_typ.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_utl.c"
"${BORINGSSL_SOURCE_DIR}/crypto/asn1/time_support.c"
"${BORINGSSL_SOURCE_DIR}/crypto/base64/base64.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/bio.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/bio_mem.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/connect.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/fd.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/file.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/hexdump.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/pair.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c"
"${BORINGSSL_SOURCE_DIR}/crypto/blake2/blake2.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c"
"${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bytestring/asn1_compat.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bytestring/ber.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bytestring/cbb.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bytestring/cbs.c"
"${BORINGSSL_SOURCE_DIR}/crypto/bytestring/unicode.c"
"${BORINGSSL_SOURCE_DIR}/crypto/chacha/chacha.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/cipher_extra.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/derive_key.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesccm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesctrhmac.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesgcmsiv.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_chacha20poly1305.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_null.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_rc2.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_rc4.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_tls.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/tls_cbc.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cmac/cmac.c"
"${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-win.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c"
"${BORINGSSL_SOURCE_DIR}/crypto/cpu-ppc64le.c"
"${BORINGSSL_SOURCE_DIR}/crypto/crypto.c"
"${BORINGSSL_SOURCE_DIR}/crypto/curve25519/curve25519.c"
"${BORINGSSL_SOURCE_DIR}/crypto/curve25519/spake25519.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/dh_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/params.c"
"${BORINGSSL_SOURCE_DIR}/crypto/digest_extra/digest_extra.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa.c"
"${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_derive.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/hash_to_curve.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ecdh_extra/ecdh_extra.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ecdsa_extra/ecdsa_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/engine/engine.c"
"${BORINGSSL_SOURCE_DIR}/crypto/err/err.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/digestsign.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/evp.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/evp_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/evp_ctx.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/p_dsa_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ec.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ec_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ed25519.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ed25519_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/p_rsa.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/p_rsa_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/p_x25519.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/p_x25519_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/pbkdf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/print.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/scrypt.c"
"${BORINGSSL_SOURCE_DIR}/crypto/evp/sign.c"
"${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c"
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c"
"${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c"
"${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c"
"${BORINGSSL_SOURCE_DIR}/crypto/lhash/lhash.c"
"${BORINGSSL_SOURCE_DIR}/crypto/mem.c"
"${BORINGSSL_SOURCE_DIR}/crypto/obj/obj.c"
"${BORINGSSL_SOURCE_DIR}/crypto/obj/obj_xref.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_all.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_info.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_lib.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_oth.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_pk8.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_pkey.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_x509.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_xaux.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pkcs7/pkcs7.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pkcs7/pkcs7_x509.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/p5_pbev2.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/pkcs8.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/pkcs8_x509.c"
"${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305.c"
"${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_arm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_vec.c"
"${BORINGSSL_SOURCE_DIR}/crypto/pool/pool.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/deterministic.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/forkunsafe.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/fuchsia.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/passive.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/rand_extra.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/windows.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rc4/rc4.c"
"${BORINGSSL_SOURCE_DIR}/crypto/refcount_c11.c"
"${BORINGSSL_SOURCE_DIR}/crypto/refcount_lock.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rsa_extra/rsa_asn1.c"
"${BORINGSSL_SOURCE_DIR}/crypto/rsa_extra/rsa_print.c"
"${BORINGSSL_SOURCE_DIR}/crypto/siphash/siphash.c"
"${BORINGSSL_SOURCE_DIR}/crypto/stack/stack.c"
"${BORINGSSL_SOURCE_DIR}/crypto/thread.c"
"${BORINGSSL_SOURCE_DIR}/crypto/thread_none.c"
"${BORINGSSL_SOURCE_DIR}/crypto/thread_pthread.c"
"${BORINGSSL_SOURCE_DIR}/crypto/thread_win.c"
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/pmbtoken.c"
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/trust_token.c"
"${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/name_print.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_x509.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/t_x509a.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_att.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_cmp.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_d2.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_def.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_ext.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_lu.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_obj.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_req.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_set.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_trs.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_txt.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_v3.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_vfy.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_vpm.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509cset.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509name.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509rset.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x509spki.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_algor.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_all.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_attrib.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_crl.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_exten.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_info.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_name.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_pkey.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_pubkey.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_req.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_sig.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_spki.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_val.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_x509.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509/x_x509a.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_cache.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_data.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_lib.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_map.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_node.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_tree.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_akey.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_akeya.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_alt.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_bcons.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_bitst.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_conf.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_cpols.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_crld.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_enum.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_extku.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_genn.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ia5.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_info.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_int.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_lib.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ncons.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ocsp.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pci.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcia.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcons.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pmaps.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_prn.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_purp.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_skey.c"
"${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_utl.c"
)
add_library(
_ssl
"${BORINGSSL_SOURCE_DIR}/ssl/bio_ssl.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/d1_both.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/d1_lib.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/d1_pkt.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/encrypted_client_hello.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/extensions.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/handshake_server.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/s3_both.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/s3_lib.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/s3_pkt.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_aead_ctx.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_asn1.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_buffer.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_cert.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_cipher.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_file.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_key_share.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_lib.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_privkey.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_session.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_stat.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_transcript.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls13_server.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls_method.cc"
"${BORINGSSL_SOURCE_DIR}/ssl/tls_record.cc"
"${BORINGSSL_SOURCE_DIR}/decrepit/ssl/ssl_decrepit.c"
"${BORINGSSL_SOURCE_DIR}/decrepit/cfb/cfb.c"
"${BORINGSSL_SOURCE_DIR}/decrepit/bio/base64_bio.c"
)
add_executable(
bssl
"${BORINGSSL_SOURCE_DIR}/tool/args.cc"
"${BORINGSSL_SOURCE_DIR}/tool/ciphers.cc"
"${BORINGSSL_SOURCE_DIR}/tool/client.cc"
"${BORINGSSL_SOURCE_DIR}/tool/const.cc"
"${BORINGSSL_SOURCE_DIR}/tool/digest.cc"
"${BORINGSSL_SOURCE_DIR}/tool/fd.cc"
"${BORINGSSL_SOURCE_DIR}/tool/file.cc"
"${BORINGSSL_SOURCE_DIR}/tool/generate_ech.cc"
"${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc"
"${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc"
"${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc"
"${BORINGSSL_SOURCE_DIR}/tool/rand.cc"
"${BORINGSSL_SOURCE_DIR}/tool/server.cc"
"${BORINGSSL_SOURCE_DIR}/tool/sign.cc"
"${BORINGSSL_SOURCE_DIR}/tool/speed.cc"
"${BORINGSSL_SOURCE_DIR}/tool/tool.cc"
"${BORINGSSL_SOURCE_DIR}/tool/transport_common.cc"
)
target_link_libraries(_ssl _crypto)
target_link_libraries(bssl _ssl)
if(NOT WIN32 AND NOT ANDROID)
target_link_libraries(_crypto pthread)
endif()
# NOTE: that ClickHouse does not support WIN32 anyway.
if(WIN32)
target_link_libraries(bssl ws2_32)
endif()
target_include_directories(_crypto SYSTEM PUBLIC "${BORINGSSL_SOURCE_DIR}/include")
target_include_directories(_ssl SYSTEM PUBLIC "${BORINGSSL_SOURCE_DIR}/include")
target_compile_options(_crypto PRIVATE -Wno-gnu-anonymous-struct)
add_library(OpenSSL::Crypto ALIAS _crypto)
add_library(OpenSSL::SSL ALIAS _ssl)
# Helper function used in the populate_openssl_vars function below
function(from_hex HEX DEC)
string(TOUPPER "${HEX}" HEX)
set(_res 0)
string(LENGTH "${HEX}" _strlen)
while (_strlen GREATER 0)
math(EXPR _res "${_res} * 16")
string(SUBSTRING "${HEX}" 0 1 NIBBLE)
string(SUBSTRING "${HEX}" 1 -1 HEX)
if (NIBBLE STREQUAL "A")
math(EXPR _res "${_res} + 10")
elseif (NIBBLE STREQUAL "B")
math(EXPR _res "${_res} + 11")
elseif (NIBBLE STREQUAL "C")
math(EXPR _res "${_res} + 12")
elseif (NIBBLE STREQUAL "D")
math(EXPR _res "${_res} + 13")
elseif (NIBBLE STREQUAL "E")
math(EXPR _res "${_res} + 14")
elseif (NIBBLE STREQUAL "F")
math(EXPR _res "${_res} + 15")
else ()
math(EXPR _res "${_res} + ${NIBBLE}")
endif ()
string(LENGTH "${HEX}" _strlen)
endwhile ()
set(${DEC} ${_res} PARENT_SCOPE)
endfunction()
# ClickHouse uses BoringSSL which is a fork of OpenSSL.
# This populates CMAKE var OPENSSL_VERSION from the OPENSSL_VERSION_NUMBER defined
# in contrib/boringssl/include/openssl/base.h. It also sets the CMAKE var OPENSSL_IS_BORING_SSL
# if it's defined in the file. Both OPENSSL_VERSION and OPENSSL_IS_BORING_SSL variables will be
# used to populate flags in the `system.build_options` table for more context on ssl version used.
# This cmake script is adopted from FindOpenSSL cmake module and slightly modified for this use-case .
if (EXISTS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h")
file(STRINGS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h" openssl_version_str
REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
file(STRINGS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h" openssl_is_boringssl
REGEX "^#[\t ]*define[\t ]+OPENSSL_IS_BORINGSSL.*")
# Set to true if OPENSSL_IS_BORING_SSL is defined
if (openssl_is_boringssl)
set(OPENSSL_IS_BORING_SSL 1)
endif ()
# If openssl_version_str is defined extrapolate and set OPENSSL_VERSION
if (openssl_version_str)
# The version number is encoded as 0xMNNFFPPS: major minor fix patch status
# The status gives if this is a developer or prerelease and is ignored here.
# Major, minor, and fix directly translate into the version numbers shown in
# the string. The patch field translates to the single character suffix that
# indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
# on.
string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
"\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
# 96 is the ASCII code of 'a' minus 1
math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
unset(_tmp)
# Once anyone knows how OpenSSL would call the patch versions beyond 'z'
# this should be updated to handle that, too. This has not happened yet
# so it is simply ignored here for now.
string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
endif ()
set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
else ()
# Since OpenSSL 3.0.0, the new version format is MAJOR.MINOR.PATCH and
# a new OPENSSL_VERSION_STR macro contains exactly that
file(STRINGS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h" OPENSSL_VERSION_STR
REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_STR[\t ]+\"([0-9])+\\.([0-9])+\\.([0-9])+\".*")
string(REGEX REPLACE "^.*OPENSSL_VERSION_STR[\t ]+\"([0-9]+\\.[0-9]+\\.[0-9]+)\".*$"
"\\1" OPENSSL_VERSION_STR "${OPENSSL_VERSION_STR}")
set(OPENSSL_VERSION "${OPENSSL_VERSION_STR}")
# Setting OPENSSL_VERSION_MAJOR OPENSSL_VERSION_MINOR and OPENSSL_VERSION_FIX
string(REGEX MATCHALL "([0-9])+" OPENSSL_VERSION_NUMBER "${OPENSSL_VERSION}")
list(POP_FRONT OPENSSL_VERSION_NUMBER
OPENSSL_VERSION_MAJOR
OPENSSL_VERSION_MINOR
OPENSSL_VERSION_FIX)
unset(OPENSSL_VERSION_NUMBER)
unset(OPENSSL_VERSION_STR)
endif ()
endif ()
# Set CMAKE variables so that they can be referenced properly from everywhere
set(OPENSSL_VERSION "${OPENSSL_VERSION}" CACHE INTERNAL "")
set(OPENSSL_IS_BORING_SSL "${OPENSSL_IS_BORING_SSL}" CACHE INTERNAL 0)

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -1,782 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.section __TEXT,__const
.align 5
Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl _aes_hw_set_encrypt_key
.private_extern _aes_hw_set_encrypt_key
.align 5
_aes_hw_set_encrypt_key:
Lenc_key:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
cmp x0,#0
b.eq Lenc_key_abort
cmp x2,#0
b.eq Lenc_key_abort
mov x3,#-2
cmp w1,#128
b.lt Lenc_key_abort
cmp w1,#256
b.gt Lenc_key_abort
tst w1,#0x3f
b.ne Lenc_key_abort
adrp x3,Lrcon@PAGE
add x3,x3,Lrcon@PAGEOFF
cmp w1,#192
eor v0.16b,v0.16b,v0.16b
ld1 {v3.16b},[x0],#16
mov w1,#8 // reuse w1
ld1 {v1.4s,v2.4s},[x3],#32
b.lt Loop128
b.eq L192
b L256
.align 4
Loop128:
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
b.ne Loop128
ld1 {v1.4s},[x3]
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2]
add x2,x2,#0x50
mov w12,#10
b Ldone
.align 4
L192:
ld1 {v4.8b},[x0],#8
movi v6.16b,#8 // borrow v6.16b
st1 {v3.4s},[x2],#16
sub v2.16b,v2.16b,v6.16b // adjust the mask
Loop192:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.8b},[x2],#8
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
dup v5.4s,v3.s[3]
eor v5.16b,v5.16b,v4.16b
eor v6.16b,v6.16b,v1.16b
ext v4.16b,v0.16b,v4.16b,#12
shl v1.16b,v1.16b,#1
eor v4.16b,v4.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
eor v4.16b,v4.16b,v6.16b
st1 {v3.4s},[x2],#16
b.ne Loop192
mov w12,#12
add x2,x2,#0x20
b Ldone
.align 4
L256:
ld1 {v4.16b},[x0]
mov w1,#7
mov w12,#14
st1 {v3.4s},[x2],#16
Loop256:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2],#16
b.eq Ldone
dup v6.4s,v3.s[3] // just splat
ext v5.16b,v0.16b,v4.16b,#12
aese v6.16b,v0.16b
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
eor v4.16b,v4.16b,v6.16b
b Loop256
Ldone:
str w12,[x2]
mov x3,#0
Lenc_key_abort:
mov x0,x3 // return value
ldr x29,[sp],#16
ret
.globl _aes_hw_set_decrypt_key
.private_extern _aes_hw_set_decrypt_key
.align 5
_aes_hw_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl Lenc_key
cmp x0,#0
b.ne Ldec_key_abort
sub x2,x2,#240 // restore original x2
mov x4,#-16
add x0,x2,x12,lsl#4 // end of key schedule
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
Loop_imc:
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
aesimc v0.16b,v0.16b
aesimc v1.16b,v1.16b
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
cmp x0,x2
b.hi Loop_imc
ld1 {v0.4s},[x2]
aesimc v0.16b,v0.16b
st1 {v0.4s},[x0]
eor x0,x0,x0 // return value
Ldec_key_abort:
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _aes_hw_encrypt
.private_extern _aes_hw_encrypt
.align 5
_aes_hw_encrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
Loop_enc:
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aese v2.16b,v1.16b
aesmc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt Loop_enc
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aese v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.globl _aes_hw_decrypt
.private_extern _aes_hw_decrypt
.align 5
_aes_hw_decrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
Loop_dec:
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aesd v2.16b,v1.16b
aesimc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt Loop_dec
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aesd v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.globl _aes_hw_cbc_encrypt
.private_extern _aes_hw_cbc_encrypt
.align 5
_aes_hw_cbc_encrypt:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
mov x8,#16
b.lo Lcbc_abort
csel x8,xzr,x8,eq
cmp w5,#0 // en- or decrypting?
ldr w5,[x3,#240]
and x2,x2,#-16
ld1 {v6.16b},[x4]
ld1 {v0.16b},[x0],x8
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#6
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
sub w5,w5,#2
ld1 {v18.4s,v19.4s},[x7],#32
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
b.eq Lcbc_dec
cmp w5,#2
eor v0.16b,v0.16b,v6.16b
eor v5.16b,v16.16b,v7.16b
b.eq Lcbc_enc128
ld1 {v2.4s,v3.4s},[x7]
add x7,x3,#16
add x6,x3,#16*4
add x12,x3,#16*5
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
add x14,x3,#16*6
add x3,x3,#16*7
b Lenter_cbc_enc
.align 4
Loop_cbc_enc:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
Lenter_cbc_enc:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x6]
cmp w5,#4
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x12]
b.eq Lcbc_enc192
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x14]
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x3]
nop
Lcbc_enc192:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs Loop_cbc_enc
st1 {v6.16b},[x1],#16
b Lcbc_done
.align 5
Lcbc_enc128:
ld1 {v2.4s,v3.4s},[x7]
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
b Lenter_cbc_enc128
Loop_cbc_enc128:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
Lenter_cbc_enc128:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs Loop_cbc_enc128
st1 {v6.16b},[x1],#16
b Lcbc_done
.align 5
Lcbc_dec:
ld1 {v18.16b},[x0],#16
subs x2,x2,#32 // bias
add w6,w5,#2
orr v3.16b,v0.16b,v0.16b
orr v1.16b,v0.16b,v0.16b
orr v19.16b,v18.16b,v18.16b
b.lo Lcbc_dec_tail
orr v1.16b,v18.16b,v18.16b
ld1 {v18.16b},[x0],#16
orr v2.16b,v0.16b,v0.16b
orr v3.16b,v1.16b,v1.16b
orr v19.16b,v18.16b,v18.16b
Loop3x_cbc_dec:
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt Loop3x_cbc_dec
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
eor v4.16b,v6.16b,v7.16b
subs x2,x2,#0x30
eor v5.16b,v2.16b,v7.16b
csel x6,x2,x6,lo // x6, w6, is zero at this point
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
add x0,x0,x6 // x0 is adjusted in such way that
// at exit from the loop v1.16b-v18.16b
// are loaded with last "words"
orr v6.16b,v19.16b,v19.16b
mov x7,x3
aesd v0.16b,v20.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
ld1 {v2.16b},[x0],#16
aesd v0.16b,v21.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
aesd v0.16b,v22.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
ld1 {v19.16b},[x0],#16
aesd v0.16b,v23.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
add w6,w5,#2
eor v4.16b,v4.16b,v0.16b
eor v5.16b,v5.16b,v1.16b
eor v18.16b,v18.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v4.16b},[x1],#16
orr v0.16b,v2.16b,v2.16b
st1 {v5.16b},[x1],#16
orr v1.16b,v3.16b,v3.16b
st1 {v18.16b},[x1],#16
orr v18.16b,v19.16b,v19.16b
b.hs Loop3x_cbc_dec
cmn x2,#0x30
b.eq Lcbc_done
nop
Lcbc_dec_tail:
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt Lcbc_dec_tail
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
cmn x2,#0x20
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
eor v5.16b,v6.16b,v7.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
b.eq Lcbc_dec_one
eor v5.16b,v5.16b,v1.16b
eor v17.16b,v17.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
st1 {v17.16b},[x1],#16
b Lcbc_done
Lcbc_dec_one:
eor v5.16b,v5.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
Lcbc_done:
st1 {v6.16b},[x4]
Lcbc_abort:
ldr x29,[sp],#16
ret
.globl _aes_hw_ctr32_encrypt_blocks
.private_extern _aes_hw_ctr32_encrypt_blocks
.align 5
_aes_hw_ctr32_encrypt_blocks:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]
ldr w8, [x4, #12]
ld1 {v0.4s},[x4]
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#4
mov x12,#16
cmp x2,#2
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
sub w5,w5,#2
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
csel x12,xzr,x12,lo
#ifndef __ARMEB__
rev w8, w8
#endif
orr v1.16b,v0.16b,v0.16b
add w10, w8, #1
orr v18.16b,v0.16b,v0.16b
add w8, w8, #2
orr v6.16b,v0.16b,v0.16b
rev w10, w10
mov v1.s[3],w10
b.ls Lctr32_tail
rev w12, w8
sub x2,x2,#3 // bias
mov v18.s[3],w12
b Loop3x_ctr32
.align 4
Loop3x_ctr32:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
aese v18.16b,v17.16b
aesmc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt Loop3x_ctr32
aese v0.16b,v16.16b
aesmc v4.16b,v0.16b
aese v1.16b,v16.16b
aesmc v5.16b,v1.16b
ld1 {v2.16b},[x0],#16
orr v0.16b,v6.16b,v6.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
orr v1.16b,v6.16b,v6.16b
aese v4.16b,v17.16b
aesmc v4.16b,v4.16b
aese v5.16b,v17.16b
aesmc v5.16b,v5.16b
ld1 {v19.16b},[x0],#16
mov x7,x3
aese v18.16b,v17.16b
aesmc v17.16b,v18.16b
orr v18.16b,v6.16b,v6.16b
add w9,w8,#1
aese v4.16b,v20.16b
aesmc v4.16b,v4.16b
aese v5.16b,v20.16b
aesmc v5.16b,v5.16b
eor v2.16b,v2.16b,v7.16b
add w10,w8,#2
aese v17.16b,v20.16b
aesmc v17.16b,v17.16b
eor v3.16b,v3.16b,v7.16b
add w8,w8,#3
aese v4.16b,v21.16b
aesmc v4.16b,v4.16b
aese v5.16b,v21.16b
aesmc v5.16b,v5.16b
eor v19.16b,v19.16b,v7.16b
rev w9,w9
aese v17.16b,v21.16b
aesmc v17.16b,v17.16b
mov v0.s[3], w9
rev w10,w10
aese v4.16b,v22.16b
aesmc v4.16b,v4.16b
aese v5.16b,v22.16b
aesmc v5.16b,v5.16b
mov v1.s[3], w10
rev w12,w8
aese v17.16b,v22.16b
aesmc v17.16b,v17.16b
mov v18.s[3], w12
subs x2,x2,#3
aese v4.16b,v23.16b
aese v5.16b,v23.16b
aese v17.16b,v23.16b
eor v2.16b,v2.16b,v4.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
st1 {v2.16b},[x1],#16
eor v3.16b,v3.16b,v5.16b
mov w6,w5
st1 {v3.16b},[x1],#16
eor v19.16b,v19.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v19.16b},[x1],#16
b.hs Loop3x_ctr32
adds x2,x2,#3
b.eq Lctr32_done
cmp x2,#1
mov x12,#16
csel x12,xzr,x12,eq
Lctr32_tail:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v17.4s},[x7],#16
b.gt Lctr32_tail
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v2.16b},[x0],x12
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v1.16b,v20.16b
aesmc v1.16b,v1.16b
ld1 {v3.16b},[x0]
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v1.16b,v21.16b
aesmc v1.16b,v1.16b
eor v2.16b,v2.16b,v7.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v1.16b,v22.16b
aesmc v1.16b,v1.16b
eor v3.16b,v3.16b,v7.16b
aese v0.16b,v23.16b
aese v1.16b,v23.16b
cmp x2,#1
eor v2.16b,v2.16b,v0.16b
eor v3.16b,v3.16b,v1.16b
st1 {v2.16b},[x1],#16
b.eq Lctr32_done
st1 {v3.16b},[x1]
Lctr32_done:
ldr x29,[sp],#16
ret
#endif
#endif // !OPENSSL_NO_ASM

View File

@ -1,343 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl _gcm_init_neon
.private_extern _gcm_init_neon
.align 4
_gcm_init_neon:
AARCH64_VALID_CALL_TARGET
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
shl v19.2d, v19.2d, #57 // 0xc2.0
ext v3.16b, v17.16b, v17.16b, #8
ushr v18.2d, v19.2d, #63
dup v17.4s, v17.s[1]
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
ushr v18.2d, v3.2d, #63
sshr v17.4s, v17.4s, #31 // broadcast carry bit
and v18.16b, v18.16b, v16.16b
shl v3.2d, v3.2d, #1
ext v18.16b, v18.16b, v18.16b, #8
and v16.16b, v16.16b, v17.16b
orr v3.16b, v3.16b, v18.16b // H<<<=1
eor v5.16b, v3.16b, v16.16b // twisted H
st1 {v5.2d}, [x0] // store Htable[0]
ret
.globl _gcm_gmult_neon
.private_extern _gcm_gmult_neon
.align 4
_gcm_gmult_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, Lmasks@PAGE // load constants
add x9, x9, Lmasks@PAGEOFF
ld1 {v24.2d, v25.2d}, [x9]
rev64 v3.16b, v3.16b // byteswap Xi
ext v3.16b, v3.16b, v3.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
mov x3, #16
b Lgmult_neon
.globl _gcm_ghash_neon
.private_extern _gcm_ghash_neon
.align 4
_gcm_ghash_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, Lmasks@PAGE // load constants
add x9, x9, Lmasks@PAGEOFF
ld1 {v24.2d, v25.2d}, [x9]
rev64 v0.16b, v0.16b // byteswap Xi
ext v0.16b, v0.16b, v0.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
Loop_neon:
ld1 {v3.16b}, [x2], #16 // load inp
rev64 v3.16b, v3.16b // byteswap inp
ext v3.16b, v3.16b, v3.16b, #8
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
Lgmult_neon:
// Split the input into v3 and v4. (The upper halves are unused,
// so it is okay to leave them alone.)
ins v4.d[0], v3.d[1]
ext v16.8b, v5.8b, v5.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v0.8b, v3.8b, v3.8b, #1 // B1
pmull v0.8h, v5.8b, v0.8b // E = A*B1
ext v17.8b, v5.8b, v5.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v5.8b, v19.8b // G = A*B2
ext v18.8b, v5.8b, v5.8b, #3 // A3
eor v16.16b, v16.16b, v0.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v0.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v0.8h, v5.8b, v0.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v0.16b // N = I + J
pmull v19.8h, v5.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v0.8h, v5.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v0.16b, v0.16b, v16.16b
eor v0.16b, v0.16b, v18.16b
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
ext v16.8b, v7.8b, v7.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v1.8b, v3.8b, v3.8b, #1 // B1
pmull v1.8h, v7.8b, v1.8b // E = A*B1
ext v17.8b, v7.8b, v7.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v7.8b, v19.8b // G = A*B2
ext v18.8b, v7.8b, v7.8b, #3 // A3
eor v16.16b, v16.16b, v1.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v1.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v1.8h, v7.8b, v1.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v1.16b // N = I + J
pmull v19.8h, v7.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v1.8h, v7.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v1.16b, v1.16b, v16.16b
eor v1.16b, v1.16b, v18.16b
ext v16.8b, v6.8b, v6.8b, #1 // A1
pmull v16.8h, v16.8b, v4.8b // F = A1*B
ext v2.8b, v4.8b, v4.8b, #1 // B1
pmull v2.8h, v6.8b, v2.8b // E = A*B1
ext v17.8b, v6.8b, v6.8b, #2 // A2
pmull v17.8h, v17.8b, v4.8b // H = A2*B
ext v19.8b, v4.8b, v4.8b, #2 // B2
pmull v19.8h, v6.8b, v19.8b // G = A*B2
ext v18.8b, v6.8b, v6.8b, #3 // A3
eor v16.16b, v16.16b, v2.16b // L = E + F
pmull v18.8h, v18.8b, v4.8b // J = A3*B
ext v2.8b, v4.8b, v4.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v2.8h, v6.8b, v2.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v4.8b, v4.8b, #4 // B4
eor v18.16b, v18.16b, v2.16b // N = I + J
pmull v19.8h, v6.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v2.8h, v6.8b, v4.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v2.16b, v2.16b, v16.16b
eor v2.16b, v2.16b, v18.16b
ext v16.16b, v0.16b, v2.16b, #8
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
eor v1.16b, v1.16b, v2.16b
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
// This is a no-op due to the ins instruction below.
// ins v2.d[0], v1.d[1]
// equivalent of reduction_avx from ghash-x86_64.pl
shl v17.2d, v0.2d, #57 // 1st phase
shl v18.2d, v0.2d, #62
eor v18.16b, v18.16b, v17.16b //
shl v17.2d, v0.2d, #63
eor v18.16b, v18.16b, v17.16b //
// Note Xm contains {Xl.d[1], Xh.d[0]}.
eor v18.16b, v18.16b, v1.16b
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
ushr v18.2d, v0.2d, #1 // 2nd phase
eor v2.16b, v2.16b,v0.16b
eor v0.16b, v0.16b,v18.16b //
ushr v18.2d, v18.2d, #6
ushr v0.2d, v0.2d, #1 //
eor v0.16b, v0.16b, v2.16b //
eor v0.16b, v0.16b, v18.16b //
subs x3, x3, #16
bne Loop_neon
rev64 v0.16b, v0.16b // byteswap Xi and write
ext v0.16b, v0.16b, v0.16b, #8
st1 {v0.16b}, [x0]
ret
.section __TEXT,__const
.align 4
Lmasks:
.quad 0x0000ffffffffffff // k48
.quad 0x00000000ffffffff // k32
.quad 0x000000000000ffff // k16
.quad 0x0000000000000000 // k0
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM

View File

@ -1,249 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl _gcm_init_v8
.private_extern _gcm_init_v8
.align 4
_gcm_init_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
ext v3.16b,v17.16b,v17.16b,#8
ushr v18.2d,v19.2d,#63
dup v17.4s,v17.s[1]
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
ushr v18.2d,v3.2d,#63
sshr v17.4s,v17.4s,#31 //broadcast carry bit
and v18.16b,v18.16b,v16.16b
shl v3.2d,v3.2d,#1
ext v18.16b,v18.16b,v18.16b,#8
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
st1 {v20.2d},[x0],#16 //store Htable[0]
//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
pmull v0.1q,v20.1d,v20.1d
eor v16.16b,v16.16b,v20.16b
pmull2 v2.1q,v20.2d,v20.2d
pmull v1.1q,v16.1d,v16.1d
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v22.16b,v0.16b,v18.16b
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
ret
.globl _gcm_gmult_v8
.private_extern _gcm_gmult_v8
.align 4
_gcm_gmult_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
shl v19.2d,v19.2d,#57
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v3.16b,v17.16b,v17.16b,#8
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.globl _gcm_ghash_v8
.private_extern _gcm_ghash_v8
.align 4
_gcm_ghash_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
//to be rotated in order to
//make it appear as in
//algorithm specification
subs x3,x3,#32 //see if x3 is 32 or larger
mov x12,#16 //x12 is used as post-
//increment for input pointer;
//as loop is modulo-scheduled
//x12 is zeroed just in time
//to preclude overstepping
//inp[len], which means that
//last block[s] are actually
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
rev64 v0.16b,v0.16b
#endif
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
b.lo Lodd_tail_v8 //x3 was less than 32
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v7.16b,v17.16b,v17.16b,#8
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
pmull2 v6.1q,v20.2d,v7.2d
b Loop_mod2x_v8
.align 4
Loop_mod2x_v8:
ext v18.16b,v3.16b,v3.16b,#8
subs x3,x3,#32 //is there more data?
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
csel x12,xzr,x12,lo //is it time to zero x12?
pmull v5.1q,v21.1d,v17.1d
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
eor v0.16b,v0.16b,v4.16b //accumulate
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
eor v2.16b,v2.16b,v6.16b
csel x12,xzr,x12,eq //is it time to zero x12?
eor v1.16b,v1.16b,v5.16b
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
#endif
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v7.16b,v17.16b,v17.16b,#8
ext v3.16b,v16.16b,v16.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v3.16b,v3.16b,v18.16b
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
eor v3.16b,v3.16b,v0.16b
pmull2 v6.1q,v20.2d,v7.2d
b.hs Loop_mod2x_v8 //there was at least 32 more bytes
eor v2.16b,v2.16b,v18.16b
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
adds x3,x3,#32 //re-construct x3
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
b.eq Ldone_v8 //is x3 zero?
Lodd_tail_v8:
ext v18.16b,v0.16b,v0.16b,#8
eor v3.16b,v3.16b,v0.16b //inp^=Xi
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
Ldone_v8:
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM

View File

@ -1,758 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
// with |argv|, then saves the callee-saved registers into |state|. It returns
// the result of |func|. The |unwind| argument is unused.
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
// const uint64_t *argv, size_t argc,
// uint64_t unwind);
.globl _abi_test_trampoline
.private_extern _abi_test_trampoline
.align 4
_abi_test_trampoline:
Labi_test_trampoline_begin:
AARCH64_SIGN_LINK_REGISTER
// Stack layout (low to high addresses)
// x29,x30 (16 bytes)
// d8-d15 (64 bytes)
// x19-x28 (80 bytes)
// x1 (8 bytes)
// padding (8 bytes)
stp x29, x30, [sp, #-176]!
mov x29, sp
// Saved callee-saved registers and |state|.
stp d8, d9, [sp, #16]
stp d10, d11, [sp, #32]
stp d12, d13, [sp, #48]
stp d14, d15, [sp, #64]
stp x19, x20, [sp, #80]
stp x21, x22, [sp, #96]
stp x23, x24, [sp, #112]
stp x25, x26, [sp, #128]
stp x27, x28, [sp, #144]
str x1, [sp, #160]
// Load registers from |state|, with the exception of x29. x29 is the
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
// mandate that x29 always point to a frame. iOS64 does so, which means
// we cannot fill x29 with entropy without violating ABI rules
// ourselves. x29 is tested separately below.
ldp d8, d9, [x1], #16
ldp d10, d11, [x1], #16
ldp d12, d13, [x1], #16
ldp d14, d15, [x1], #16
ldp x19, x20, [x1], #16
ldp x21, x22, [x1], #16
ldp x23, x24, [x1], #16
ldp x25, x26, [x1], #16
ldp x27, x28, [x1], #16
// Move parameters into temporary registers.
mov x9, x0
mov x10, x2
mov x11, x3
// Load parameters into registers.
cbz x11, Largs_done
ldr x0, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x1, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x2, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x3, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x4, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x5, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x6, [x10], #8
subs x11, x11, #1
b.eq Largs_done
ldr x7, [x10], #8
Largs_done:
blr x9
// Reload |state| and store registers.
ldr x1, [sp, #160]
stp d8, d9, [x1], #16
stp d10, d11, [x1], #16
stp d12, d13, [x1], #16
stp d14, d15, [x1], #16
stp x19, x20, [x1], #16
stp x21, x22, [x1], #16
stp x23, x24, [x1], #16
stp x25, x26, [x1], #16
stp x27, x28, [x1], #16
// |func| is required to preserve x29, the frame pointer. We cannot load
// random values into x29 (see comment above), so compare it against the
// expected value and zero the field of |state| if corrupted.
mov x9, sp
cmp x29, x9
b.eq Lx29_ok
str xzr, [x1]
Lx29_ok:
// Restore callee-saved registers.
ldp d8, d9, [sp, #16]
ldp d10, d11, [sp, #32]
ldp d12, d13, [sp, #48]
ldp d14, d15, [sp, #64]
ldp x19, x20, [sp, #80]
ldp x21, x22, [sp, #96]
ldp x23, x24, [sp, #112]
ldp x25, x26, [sp, #128]
ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176
AARCH64_VALIDATE_LINK_REGISTER
ret
.globl _abi_test_clobber_x0
.private_extern _abi_test_clobber_x0
.align 4
_abi_test_clobber_x0:
AARCH64_VALID_CALL_TARGET
mov x0, xzr
ret
.globl _abi_test_clobber_x1
.private_extern _abi_test_clobber_x1
.align 4
_abi_test_clobber_x1:
AARCH64_VALID_CALL_TARGET
mov x1, xzr
ret
.globl _abi_test_clobber_x2
.private_extern _abi_test_clobber_x2
.align 4
_abi_test_clobber_x2:
AARCH64_VALID_CALL_TARGET
mov x2, xzr
ret
.globl _abi_test_clobber_x3
.private_extern _abi_test_clobber_x3
.align 4
_abi_test_clobber_x3:
AARCH64_VALID_CALL_TARGET
mov x3, xzr
ret
.globl _abi_test_clobber_x4
.private_extern _abi_test_clobber_x4
.align 4
_abi_test_clobber_x4:
AARCH64_VALID_CALL_TARGET
mov x4, xzr
ret
.globl _abi_test_clobber_x5
.private_extern _abi_test_clobber_x5
.align 4
_abi_test_clobber_x5:
AARCH64_VALID_CALL_TARGET
mov x5, xzr
ret
.globl _abi_test_clobber_x6
.private_extern _abi_test_clobber_x6
.align 4
_abi_test_clobber_x6:
AARCH64_VALID_CALL_TARGET
mov x6, xzr
ret
.globl _abi_test_clobber_x7
.private_extern _abi_test_clobber_x7
.align 4
_abi_test_clobber_x7:
AARCH64_VALID_CALL_TARGET
mov x7, xzr
ret
.globl _abi_test_clobber_x8
.private_extern _abi_test_clobber_x8
.align 4
_abi_test_clobber_x8:
AARCH64_VALID_CALL_TARGET
mov x8, xzr
ret
.globl _abi_test_clobber_x9
.private_extern _abi_test_clobber_x9
.align 4
_abi_test_clobber_x9:
AARCH64_VALID_CALL_TARGET
mov x9, xzr
ret
.globl _abi_test_clobber_x10
.private_extern _abi_test_clobber_x10
.align 4
_abi_test_clobber_x10:
AARCH64_VALID_CALL_TARGET
mov x10, xzr
ret
.globl _abi_test_clobber_x11
.private_extern _abi_test_clobber_x11
.align 4
_abi_test_clobber_x11:
AARCH64_VALID_CALL_TARGET
mov x11, xzr
ret
.globl _abi_test_clobber_x12
.private_extern _abi_test_clobber_x12
.align 4
_abi_test_clobber_x12:
AARCH64_VALID_CALL_TARGET
mov x12, xzr
ret
.globl _abi_test_clobber_x13
.private_extern _abi_test_clobber_x13
.align 4
_abi_test_clobber_x13:
AARCH64_VALID_CALL_TARGET
mov x13, xzr
ret
.globl _abi_test_clobber_x14
.private_extern _abi_test_clobber_x14
.align 4
_abi_test_clobber_x14:
AARCH64_VALID_CALL_TARGET
mov x14, xzr
ret
.globl _abi_test_clobber_x15
.private_extern _abi_test_clobber_x15
.align 4
_abi_test_clobber_x15:
AARCH64_VALID_CALL_TARGET
mov x15, xzr
ret
.globl _abi_test_clobber_x16
.private_extern _abi_test_clobber_x16
.align 4
_abi_test_clobber_x16:
AARCH64_VALID_CALL_TARGET
mov x16, xzr
ret
.globl _abi_test_clobber_x17
.private_extern _abi_test_clobber_x17
.align 4
_abi_test_clobber_x17:
AARCH64_VALID_CALL_TARGET
mov x17, xzr
ret
.globl _abi_test_clobber_x19
.private_extern _abi_test_clobber_x19
.align 4
_abi_test_clobber_x19:
AARCH64_VALID_CALL_TARGET
mov x19, xzr
ret
.globl _abi_test_clobber_x20
.private_extern _abi_test_clobber_x20
.align 4
_abi_test_clobber_x20:
AARCH64_VALID_CALL_TARGET
mov x20, xzr
ret
.globl _abi_test_clobber_x21
.private_extern _abi_test_clobber_x21
.align 4
_abi_test_clobber_x21:
AARCH64_VALID_CALL_TARGET
mov x21, xzr
ret
.globl _abi_test_clobber_x22
.private_extern _abi_test_clobber_x22
.align 4
_abi_test_clobber_x22:
AARCH64_VALID_CALL_TARGET
mov x22, xzr
ret
.globl _abi_test_clobber_x23
.private_extern _abi_test_clobber_x23
.align 4
_abi_test_clobber_x23:
AARCH64_VALID_CALL_TARGET
mov x23, xzr
ret
.globl _abi_test_clobber_x24
.private_extern _abi_test_clobber_x24
.align 4
_abi_test_clobber_x24:
AARCH64_VALID_CALL_TARGET
mov x24, xzr
ret
.globl _abi_test_clobber_x25
.private_extern _abi_test_clobber_x25
.align 4
_abi_test_clobber_x25:
AARCH64_VALID_CALL_TARGET
mov x25, xzr
ret
.globl _abi_test_clobber_x26
.private_extern _abi_test_clobber_x26
.align 4
_abi_test_clobber_x26:
AARCH64_VALID_CALL_TARGET
mov x26, xzr
ret
.globl _abi_test_clobber_x27
.private_extern _abi_test_clobber_x27
.align 4
_abi_test_clobber_x27:
AARCH64_VALID_CALL_TARGET
mov x27, xzr
ret
.globl _abi_test_clobber_x28
.private_extern _abi_test_clobber_x28
.align 4
_abi_test_clobber_x28:
AARCH64_VALID_CALL_TARGET
mov x28, xzr
ret
.globl _abi_test_clobber_x29
.private_extern _abi_test_clobber_x29
.align 4
_abi_test_clobber_x29:
AARCH64_VALID_CALL_TARGET
mov x29, xzr
ret
.globl _abi_test_clobber_d0
.private_extern _abi_test_clobber_d0
.align 4
_abi_test_clobber_d0:
AARCH64_VALID_CALL_TARGET
fmov d0, xzr
ret
.globl _abi_test_clobber_d1
.private_extern _abi_test_clobber_d1
.align 4
_abi_test_clobber_d1:
AARCH64_VALID_CALL_TARGET
fmov d1, xzr
ret
.globl _abi_test_clobber_d2
.private_extern _abi_test_clobber_d2
.align 4
_abi_test_clobber_d2:
AARCH64_VALID_CALL_TARGET
fmov d2, xzr
ret
.globl _abi_test_clobber_d3
.private_extern _abi_test_clobber_d3
.align 4
_abi_test_clobber_d3:
AARCH64_VALID_CALL_TARGET
fmov d3, xzr
ret
.globl _abi_test_clobber_d4
.private_extern _abi_test_clobber_d4
.align 4
_abi_test_clobber_d4:
AARCH64_VALID_CALL_TARGET
fmov d4, xzr
ret
.globl _abi_test_clobber_d5
.private_extern _abi_test_clobber_d5
.align 4
_abi_test_clobber_d5:
AARCH64_VALID_CALL_TARGET
fmov d5, xzr
ret
.globl _abi_test_clobber_d6
.private_extern _abi_test_clobber_d6
.align 4
_abi_test_clobber_d6:
AARCH64_VALID_CALL_TARGET
fmov d6, xzr
ret
.globl _abi_test_clobber_d7
.private_extern _abi_test_clobber_d7
.align 4
_abi_test_clobber_d7:
AARCH64_VALID_CALL_TARGET
fmov d7, xzr
ret
.globl _abi_test_clobber_d8
.private_extern _abi_test_clobber_d8
.align 4
_abi_test_clobber_d8:
AARCH64_VALID_CALL_TARGET
fmov d8, xzr
ret
.globl _abi_test_clobber_d9
.private_extern _abi_test_clobber_d9
.align 4
_abi_test_clobber_d9:
AARCH64_VALID_CALL_TARGET
fmov d9, xzr
ret
.globl _abi_test_clobber_d10
.private_extern _abi_test_clobber_d10
.align 4
_abi_test_clobber_d10:
AARCH64_VALID_CALL_TARGET
fmov d10, xzr
ret
.globl _abi_test_clobber_d11
.private_extern _abi_test_clobber_d11
.align 4
_abi_test_clobber_d11:
AARCH64_VALID_CALL_TARGET
fmov d11, xzr
ret
.globl _abi_test_clobber_d12
.private_extern _abi_test_clobber_d12
.align 4
_abi_test_clobber_d12:
AARCH64_VALID_CALL_TARGET
fmov d12, xzr
ret
.globl _abi_test_clobber_d13
.private_extern _abi_test_clobber_d13
.align 4
_abi_test_clobber_d13:
AARCH64_VALID_CALL_TARGET
fmov d13, xzr
ret
.globl _abi_test_clobber_d14
.private_extern _abi_test_clobber_d14
.align 4
_abi_test_clobber_d14:
AARCH64_VALID_CALL_TARGET
fmov d14, xzr
ret
.globl _abi_test_clobber_d15
.private_extern _abi_test_clobber_d15
.align 4
_abi_test_clobber_d15:
AARCH64_VALID_CALL_TARGET
fmov d15, xzr
ret
.globl _abi_test_clobber_d16
.private_extern _abi_test_clobber_d16
.align 4
_abi_test_clobber_d16:
AARCH64_VALID_CALL_TARGET
fmov d16, xzr
ret
.globl _abi_test_clobber_d17
.private_extern _abi_test_clobber_d17
.align 4
_abi_test_clobber_d17:
AARCH64_VALID_CALL_TARGET
fmov d17, xzr
ret
.globl _abi_test_clobber_d18
.private_extern _abi_test_clobber_d18
.align 4
_abi_test_clobber_d18:
AARCH64_VALID_CALL_TARGET
fmov d18, xzr
ret
.globl _abi_test_clobber_d19
.private_extern _abi_test_clobber_d19
.align 4
_abi_test_clobber_d19:
AARCH64_VALID_CALL_TARGET
fmov d19, xzr
ret
.globl _abi_test_clobber_d20
.private_extern _abi_test_clobber_d20
.align 4
_abi_test_clobber_d20:
AARCH64_VALID_CALL_TARGET
fmov d20, xzr
ret
.globl _abi_test_clobber_d21
.private_extern _abi_test_clobber_d21
.align 4
_abi_test_clobber_d21:
AARCH64_VALID_CALL_TARGET
fmov d21, xzr
ret
.globl _abi_test_clobber_d22
.private_extern _abi_test_clobber_d22
.align 4
_abi_test_clobber_d22:
AARCH64_VALID_CALL_TARGET
fmov d22, xzr
ret
.globl _abi_test_clobber_d23
.private_extern _abi_test_clobber_d23
.align 4
_abi_test_clobber_d23:
AARCH64_VALID_CALL_TARGET
fmov d23, xzr
ret
.globl _abi_test_clobber_d24
.private_extern _abi_test_clobber_d24
.align 4
_abi_test_clobber_d24:
AARCH64_VALID_CALL_TARGET
fmov d24, xzr
ret
.globl _abi_test_clobber_d25
.private_extern _abi_test_clobber_d25
.align 4
_abi_test_clobber_d25:
AARCH64_VALID_CALL_TARGET
fmov d25, xzr
ret
.globl _abi_test_clobber_d26
.private_extern _abi_test_clobber_d26
.align 4
_abi_test_clobber_d26:
AARCH64_VALID_CALL_TARGET
fmov d26, xzr
ret
.globl _abi_test_clobber_d27
.private_extern _abi_test_clobber_d27
.align 4
_abi_test_clobber_d27:
AARCH64_VALID_CALL_TARGET
fmov d27, xzr
ret
.globl _abi_test_clobber_d28
.private_extern _abi_test_clobber_d28
.align 4
_abi_test_clobber_d28:
AARCH64_VALID_CALL_TARGET
fmov d28, xzr
ret
.globl _abi_test_clobber_d29
.private_extern _abi_test_clobber_d29
.align 4
_abi_test_clobber_d29:
AARCH64_VALID_CALL_TARGET
fmov d29, xzr
ret
.globl _abi_test_clobber_d30
.private_extern _abi_test_clobber_d30
.align 4
_abi_test_clobber_d30:
AARCH64_VALID_CALL_TARGET
fmov d30, xzr
ret
.globl _abi_test_clobber_d31
.private_extern _abi_test_clobber_d31
.align 4
_abi_test_clobber_d31:
AARCH64_VALID_CALL_TARGET
fmov d31, xzr
ret
.globl _abi_test_clobber_v8_upper
.private_extern _abi_test_clobber_v8_upper
.align 4
_abi_test_clobber_v8_upper:
AARCH64_VALID_CALL_TARGET
fmov v8.d[1], xzr
ret
.globl _abi_test_clobber_v9_upper
.private_extern _abi_test_clobber_v9_upper
.align 4
_abi_test_clobber_v9_upper:
AARCH64_VALID_CALL_TARGET
fmov v9.d[1], xzr
ret
.globl _abi_test_clobber_v10_upper
.private_extern _abi_test_clobber_v10_upper
.align 4
_abi_test_clobber_v10_upper:
AARCH64_VALID_CALL_TARGET
fmov v10.d[1], xzr
ret
.globl _abi_test_clobber_v11_upper
.private_extern _abi_test_clobber_v11_upper
.align 4
_abi_test_clobber_v11_upper:
AARCH64_VALID_CALL_TARGET
fmov v11.d[1], xzr
ret
.globl _abi_test_clobber_v12_upper
.private_extern _abi_test_clobber_v12_upper
.align 4
_abi_test_clobber_v12_upper:
AARCH64_VALID_CALL_TARGET
fmov v12.d[1], xzr
ret
.globl _abi_test_clobber_v13_upper
.private_extern _abi_test_clobber_v13_upper
.align 4
_abi_test_clobber_v13_upper:
AARCH64_VALID_CALL_TARGET
fmov v13.d[1], xzr
ret
.globl _abi_test_clobber_v14_upper
.private_extern _abi_test_clobber_v14_upper
.align 4
_abi_test_clobber_v14_upper:
AARCH64_VALID_CALL_TARGET
fmov v14.d[1], xzr
ret
.globl _abi_test_clobber_v15_upper
.private_extern _abi_test_clobber_v15_upper
.align 4
_abi_test_clobber_v15_upper:
AARCH64_VALID_CALL_TARGET
fmov v15.d[1], xzr
ret
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

View File

@ -1,790 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.code 32
#undef __thumb2__
.align 5
Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl _aes_hw_set_encrypt_key
.private_extern _aes_hw_set_encrypt_key
#ifdef __thumb2__
.thumb_func _aes_hw_set_encrypt_key
#endif
.align 5
_aes_hw_set_encrypt_key:
Lenc_key:
mov r3,#-1
cmp r0,#0
beq Lenc_key_abort
cmp r2,#0
beq Lenc_key_abort
mov r3,#-2
cmp r1,#128
blt Lenc_key_abort
cmp r1,#256
bgt Lenc_key_abort
tst r1,#0x3f
bne Lenc_key_abort
adr r3,Lrcon
cmp r1,#192
veor q0,q0,q0
vld1.8 {q3},[r0]!
mov r1,#8 @ reuse r1
vld1.32 {q1,q2},[r3]!
blt Loop128
beq L192
b L256
.align 4
Loop128:
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
bne Loop128
vld1.32 {q1},[r3]
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
veor q3,q3,q10
vst1.32 {q3},[r2]
add r2,r2,#0x50
mov r12,#10
b Ldone
.align 4
L192:
vld1.8 {d16},[r0]!
vmov.i8 q10,#8 @ borrow q10
vst1.32 {q3},[r2]!
vsub.i8 q2,q2,q10 @ adjust the mask
Loop192:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {d16},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vdup.32 q9,d7[1]
veor q9,q9,q8
veor q10,q10,q1
vext.8 q8,q0,q8,#12
vshl.u8 q1,q1,#1
veor q8,q8,q9
veor q3,q3,q10
veor q8,q8,q10
vst1.32 {q3},[r2]!
bne Loop192
mov r12,#12
add r2,r2,#0x20
b Ldone
.align 4
L256:
vld1.8 {q8},[r0]
mov r1,#7
mov r12,#14
vst1.32 {q3},[r2]!
Loop256:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {q8},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vst1.32 {q3},[r2]!
beq Ldone
vdup.32 q10,d7[1]
vext.8 q9,q0,q8,#12
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
veor q8,q8,q10
b Loop256
Ldone:
str r12,[r2]
mov r3,#0
Lenc_key_abort:
mov r0,r3 @ return value
bx lr
.globl _aes_hw_set_decrypt_key
.private_extern _aes_hw_set_decrypt_key
#ifdef __thumb2__
.thumb_func _aes_hw_set_decrypt_key
#endif
.align 5
_aes_hw_set_decrypt_key:
stmdb sp!,{r4,lr}
bl Lenc_key
cmp r0,#0
bne Ldec_key_abort
sub r2,r2,#240 @ restore original r2
mov r4,#-16
add r0,r2,r12,lsl#4 @ end of key schedule
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
Loop_imc:
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
cmp r0,r2
bhi Loop_imc
vld1.32 {q0},[r2]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
vst1.32 {q0},[r0]
eor r0,r0,r0 @ return value
Ldec_key_abort:
ldmia sp!,{r4,pc}
.globl _aes_hw_encrypt
.private_extern _aes_hw_encrypt
#ifdef __thumb2__
.thumb_func _aes_hw_encrypt
#endif
.align 5
_aes_hw_encrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
Loop_enc:
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q1},[r2]!
bgt Loop_enc
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.globl _aes_hw_decrypt
.private_extern _aes_hw_decrypt
#ifdef __thumb2__
.thumb_func _aes_hw_decrypt
#endif
.align 5
_aes_hw_decrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
Loop_dec:
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q1},[r2]!
bgt Loop_dec
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.globl _aes_hw_cbc_encrypt
.private_extern _aes_hw_cbc_encrypt
#ifdef __thumb2__
.thumb_func _aes_hw_cbc_encrypt
#endif
.align 5
_aes_hw_cbc_encrypt:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load remaining args
subs r2,r2,#16
mov r8,#16
blo Lcbc_abort
moveq r8,#0
cmp r5,#0 @ en- or decrypting?
ldr r5,[r3,#240]
and r2,r2,#-16
vld1.8 {q6},[r4]
vld1.8 {q0},[r0],r8
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#6
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
sub r5,r5,#2
vld1.32 {q10,q11},[r7]!
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
beq Lcbc_dec
cmp r5,#2
veor q0,q0,q6
veor q5,q8,q7
beq Lcbc_enc128
vld1.32 {q2,q3},[r7]
add r7,r3,#16
add r6,r3,#16*4
add r12,r3,#16*5
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
add r14,r3,#16*6
add r3,r3,#16*7
b Lenter_cbc_enc
.align 4
Loop_cbc_enc:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
Lenter_cbc_enc:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r6]
cmp r5,#4
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r12]
beq Lcbc_enc192
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r14]
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r3]
nop
Lcbc_enc192:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs Loop_cbc_enc
vst1.8 {q6},[r1]!
b Lcbc_done
.align 5
Lcbc_enc128:
vld1.32 {q2,q3},[r7]
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
b Lenter_cbc_enc128
Loop_cbc_enc128:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
Lenter_cbc_enc128:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs Loop_cbc_enc128
vst1.8 {q6},[r1]!
b Lcbc_done
.align 5
Lcbc_dec:
vld1.8 {q10},[r0]!
subs r2,r2,#32 @ bias
add r6,r5,#2
vorr q3,q0,q0
vorr q1,q0,q0
vorr q11,q10,q10
blo Lcbc_dec_tail
vorr q1,q10,q10
vld1.8 {q10},[r0]!
vorr q2,q0,q0
vorr q3,q1,q1
vorr q11,q10,q10
Loop3x_cbc_dec:
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt Loop3x_cbc_dec
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q4,q6,q7
subs r2,r2,#0x30
veor q5,q2,q7
movlo r6,r2 @ r6, r6, is zero at this point
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
add r0,r0,r6 @ r0 is adjusted in such way that
@ at exit from the loop q1-q10
@ are loaded with last "words"
vorr q6,q11,q11
mov r7,r3
.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q2},[r0]!
.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q3},[r0]!
.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q11},[r0]!
.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
add r6,r5,#2
veor q4,q4,q0
veor q5,q5,q1
veor q10,q10,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q4},[r1]!
vorr q0,q2,q2
vst1.8 {q5},[r1]!
vorr q1,q3,q3
vst1.8 {q10},[r1]!
vorr q10,q11,q11
bhs Loop3x_cbc_dec
cmn r2,#0x30
beq Lcbc_done
nop
Lcbc_dec_tail:
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt Lcbc_dec_tail
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
cmn r2,#0x20
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q5,q6,q7
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
beq Lcbc_dec_one
veor q5,q5,q1
veor q9,q9,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
vst1.8 {q9},[r1]!
b Lcbc_done
Lcbc_dec_one:
veor q5,q5,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
Lcbc_done:
vst1.8 {q6},[r4]
Lcbc_abort:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,pc}
.globl _aes_hw_ctr32_encrypt_blocks
.private_extern _aes_hw_ctr32_encrypt_blocks
#ifdef __thumb2__
.thumb_func _aes_hw_ctr32_encrypt_blocks
#endif
.align 5
_aes_hw_ctr32_encrypt_blocks:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldr r4, [ip] @ load remaining arg
ldr r5,[r3,#240]
ldr r8, [r4, #12]
vld1.32 {q0},[r4]
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#4
mov r12,#16
cmp r2,#2
add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
sub r5,r5,#2
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
movlo r12,#0
#ifndef __ARMEB__
rev r8, r8
#endif
vorr q1,q0,q0
add r10, r8, #1
vorr q10,q0,q0
add r8, r8, #2
vorr q6,q0,q0
rev r10, r10
vmov.32 d3[1],r10
bls Lctr32_tail
rev r12, r8
sub r2,r2,#3 @ bias
vmov.32 d21[1],r12
b Loop3x_ctr32
.align 4
Loop3x_ctr32:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q9},[r7]!
bgt Loop3x_ctr32
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
vld1.8 {q2},[r0]!
vorr q0,q6,q6
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.8 {q3},[r0]!
vorr q1,q6,q6
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vld1.8 {q11},[r0]!
mov r7,r3
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
vorr q10,q6,q6
add r9,r8,#1
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q2,q2,q7
add r10,r8,#2
.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
veor q3,q3,q7
add r8,r8,#3
.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q11,q11,q7
rev r9,r9
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d1[1], r9
rev r10,r10
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vmov.32 d3[1], r10
rev r12,r8
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d21[1], r12
subs r2,r2,#3
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
veor q2,q2,q4
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
vst1.8 {q2},[r1]!
veor q3,q3,q5
mov r6,r5
vst1.8 {q3},[r1]!
veor q11,q11,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q11},[r1]!
bhs Loop3x_ctr32
adds r2,r2,#3
beq Lctr32_done
cmp r2,#1
mov r12,#16
moveq r12,#0
Lctr32_tail:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q9},[r7]!
bgt Lctr32_tail
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q2},[r0],r12
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q3},[r0]
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q2,q2,q7
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q3,q3,q7
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
cmp r2,#1
veor q2,q2,q0
veor q3,q3,q1
vst1.8 {q2},[r1]!
beq Lctr32_done
vst1.8 {q3},[r1]
Lctr32_done:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
#endif
#endif // !OPENSSL_NO_ASM

View File

@ -1,982 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
.text
#if defined(__thumb2__)
.syntax unified
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.align 5
LOPENSSL_armcap:
.word OPENSSL_armcap_P-Lbn_mul_mont
#endif
.globl _bn_mul_mont
.private_extern _bn_mul_mont
#ifdef __thumb2__
.thumb_func _bn_mul_mont
#endif
.align 5
_bn_mul_mont:
Lbn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne Lialu
adr r0,Lbn_mul_mont
ldr r2,LOPENSSL_armcap
ldr r0,[r0,r2]
#ifdef __APPLE__
ldr r0,[r0]
#endif
tst r0,#ARMV7_NEON @ NEON available?
ldmia sp, {r0,r2}
beq Lialu
add sp,sp,#8
b bn_mul8x_mont_neon
.align 4
Lialu:
#endif
cmp ip,#2
mov r0,ip @ load num
#ifdef __thumb2__
ittt lt
#endif
movlt r0,#0
addlt sp,sp,#2*4
blt Labrt
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
mov r0,r0,lsl#2 @ rescale r0 for byte count
sub sp,sp,r0 @ alloca(4*num)
sub sp,sp,#4 @ +extra dword
sub r0,r0,#4 @ "num=num-1"
add r4,r2,r0 @ &bp[num-1]
add r0,sp,r0 @ r0 to point at &tp[num-1]
ldr r8,[r0,#14*4] @ &n0
ldr r2,[r2] @ bp[0]
ldr r5,[r1],#4 @ ap[0],ap++
ldr r6,[r3],#4 @ np[0],np++
ldr r8,[r8] @ *n0
str r4,[r0,#15*4] @ save &bp[num]
umull r10,r11,r5,r2 @ ap[0]*bp[0]
str r8,[r0,#14*4] @ save n0 value
mul r8,r10,r8 @ "tp[0]"*n0
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
mov r4,sp
L1st:
ldr r5,[r1],#4 @ ap[j],ap++
mov r10,r11
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne L1st
adds r12,r12,r11
ldr r4,[r0,#13*4] @ restore bp
mov r14,#0
ldr r8,[r0,#14*4] @ restore n0
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
mov r7,sp
str r14,[r0,#4] @ tp[num]=
Louter:
sub r7,r0,r7 @ "original" r0-1 value
sub r1,r1,r7 @ "rewind" ap to &ap[1]
ldr r2,[r4,#4]! @ *(++bp)
sub r3,r3,r7 @ "rewind" np to &np[1]
ldr r5,[r1,#-4] @ ap[0]
ldr r10,[sp] @ tp[0]
ldr r6,[r3,#-4] @ np[0]
ldr r7,[sp,#4] @ tp[1]
mov r11,#0
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
str r4,[r0,#13*4] @ save bp
mul r8,r10,r8
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
mov r4,sp
Linner:
ldr r5,[r1],#4 @ ap[j],ap++
adds r10,r11,r7 @ +=tp[j]
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adc r11,r11,#0
ldr r7,[r4,#8] @ tp[j+1]
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne Linner
adds r12,r12,r11
mov r14,#0
ldr r4,[r0,#13*4] @ restore bp
adc r14,r14,#0
ldr r8,[r0,#14*4] @ restore n0
adds r12,r12,r7
ldr r7,[r0,#15*4] @ restore &bp[num]
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
str r14,[r0,#4] @ tp[num]=
cmp r4,r7
#ifdef __thumb2__
itt ne
#endif
movne r7,sp
bne Louter
ldr r2,[r0,#12*4] @ pull rp
mov r5,sp
add r0,r0,#4 @ r0 to point at &tp[num]
sub r5,r0,r5 @ "original" num value
mov r4,sp @ "rewind" r4
mov r1,r4 @ "borrow" r1
sub r3,r3,r5 @ "rewind" r3 to &np[0]
subs r7,r7,r7 @ "clear" carry flag
Lsub: ldr r7,[r4],#4
ldr r6,[r3],#4
sbcs r7,r7,r6 @ tp[j]-np[j]
str r7,[r2],#4 @ rp[j]=
teq r4,r0 @ preserve carry
bne Lsub
sbcs r14,r14,#0 @ upmost carry
mov r4,sp @ "rewind" r4
sub r2,r2,r5 @ "rewind" r2
Lcopy: ldr r7,[r4] @ conditional copy
ldr r5,[r2]
str sp,[r4],#4 @ zap tp
#ifdef __thumb2__
it cc
#endif
movcc r5,r7
str r5,[r2],#4
teq r4,r0 @ preserve carry
bne Lcopy
mov sp,r0
add sp,sp,#4 @ skip over tp[num+1]
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
Labrt:
#if __ARM_ARCH__>=5
bx lr @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
#if __ARM_MAX_ARCH__>=7
#ifdef __thumb2__
.thumb_func bn_mul8x_mont_neon
#endif
.align 5
bn_mul8x_mont_neon:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load rest of parameter block
mov ip,sp
cmp r5,#8
bhi LNEON_8n
@ special case for r5==8, everything is in register bank...
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
sub r7,sp,r5,lsl#4
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
and r7,r7,#-64
vld1.32 {d30[0]}, [r4,:32]
mov sp,r7 @ alloca
vzip.16 d28,d8
vmull.u32 q6,d28,d0[0]
vmull.u32 q7,d28,d0[1]
vmull.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmull.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
vmul.u32 d29,d29,d30
vmull.u32 q10,d28,d2[0]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmull.u32 q11,d28,d2[1]
vmull.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmull.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
sub r9,r5,#1
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
b LNEON_outer8
.align 4
LNEON_outer8:
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
vzip.16 d28,d8
vadd.u64 d12,d12,d10
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
subs r9,r9,#1
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
bne LNEON_outer8
vadd.u64 d12,d12,d10
mov r7,sp
vshr.u64 d10,d12,#16
mov r8,r5
vadd.u64 d13,d13,d10
add r6,sp,#96
vshr.u64 d10,d13,#16
vzip.16 d12,d13
b LNEON_tail_entry
.align 4
LNEON_8n:
veor q6,q6,q6
sub r7,sp,#128
veor q7,q7,q7
sub r7,r7,r5,lsl#4
veor q8,q8,q8
and r7,r7,#-64
veor q9,q9,q9
mov sp,r7 @ alloca
veor q10,q10,q10
add r7,r7,#256
veor q11,q11,q11
sub r8,r5,#8
veor q12,q12,q12
veor q13,q13,q13
LNEON_8n_init:
vst1.64 {q6,q7},[r7,:256]!
subs r8,r8,#8
vst1.64 {q8,q9},[r7,:256]!
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12,q13},[r7,:256]!
bne LNEON_8n_init
add r6,sp,#256
vld1.32 {d0,d1,d2,d3},[r1]!
add r10,sp,#8
vld1.32 {d30[0]},[r4,:32]
mov r9,r5
b LNEON_8n_outer
.align 4
LNEON_8n_outer:
vld1.32 {d28[0]},[r2,:32]! @ *b++
veor d8,d8,d8
vzip.16 d28,d8
add r7,sp,#128
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
vmlal.u32 q10,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q11,d28,d2[1]
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q6,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q7,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q8,d29,d5[0]
vshr.u64 d12,d12,#16
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vadd.u64 d12,d12,d13
vmlal.u32 q11,d29,d6[1]
vshr.u64 d12,d12,#16
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vadd.u64 d14,d14,d12
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]!
vmlal.u32 q8,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q9,d28,d1[0]
vshl.i64 d29,d15,#16
vmlal.u32 q10,d28,d1[1]
vadd.u64 d29,d29,d14
vmlal.u32 q11,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q12,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
vmlal.u32 q13,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q7,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q8,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q9,d29,d5[0]
vshr.u64 d14,d14,#16
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vadd.u64 d14,d14,d15
vmlal.u32 q12,d29,d6[1]
vshr.u64 d14,d14,#16
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vadd.u64 d16,d16,d14
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]!
vmlal.u32 q9,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q10,d28,d1[0]
vshl.i64 d29,d17,#16
vmlal.u32 q11,d28,d1[1]
vadd.u64 d29,d29,d16
vmlal.u32 q12,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q13,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
vmlal.u32 q6,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q8,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q9,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q10,d29,d5[0]
vshr.u64 d16,d16,#16
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vadd.u64 d16,d16,d17
vmlal.u32 q13,d29,d6[1]
vshr.u64 d16,d16,#16
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vadd.u64 d18,d18,d16
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]!
vmlal.u32 q10,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q11,d28,d1[0]
vshl.i64 d29,d19,#16
vmlal.u32 q12,d28,d1[1]
vadd.u64 d29,d29,d18
vmlal.u32 q13,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q6,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
vmlal.u32 q7,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q9,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q10,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q11,d29,d5[0]
vshr.u64 d18,d18,#16
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vadd.u64 d18,d18,d19
vmlal.u32 q6,d29,d6[1]
vshr.u64 d18,d18,#16
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vadd.u64 d20,d20,d18
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]!
vmlal.u32 q11,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q12,d28,d1[0]
vshl.i64 d29,d21,#16
vmlal.u32 q13,d28,d1[1]
vadd.u64 d29,d29,d20
vmlal.u32 q6,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q7,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
vmlal.u32 q8,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q10,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q11,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q12,d29,d5[0]
vshr.u64 d20,d20,#16
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vadd.u64 d20,d20,d21
vmlal.u32 q7,d29,d6[1]
vshr.u64 d20,d20,#16
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vadd.u64 d22,d22,d20
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]!
vmlal.u32 q12,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q13,d28,d1[0]
vshl.i64 d29,d23,#16
vmlal.u32 q6,d28,d1[1]
vadd.u64 d29,d29,d22
vmlal.u32 q7,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q8,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
vmlal.u32 q9,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q11,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q12,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q13,d29,d5[0]
vshr.u64 d22,d22,#16
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vadd.u64 d22,d22,d23
vmlal.u32 q8,d29,d6[1]
vshr.u64 d22,d22,#16
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vadd.u64 d24,d24,d22
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]!
vmlal.u32 q13,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q6,d28,d1[0]
vshl.i64 d29,d25,#16
vmlal.u32 q7,d28,d1[1]
vadd.u64 d29,d29,d24
vmlal.u32 q8,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q9,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
vmlal.u32 q10,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q12,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q13,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q6,d29,d5[0]
vshr.u64 d24,d24,#16
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vadd.u64 d24,d24,d25
vmlal.u32 q9,d29,d6[1]
vshr.u64 d24,d24,#16
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vadd.u64 d26,d26,d24
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]!
vmlal.u32 q6,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q7,d28,d1[0]
vshl.i64 d29,d27,#16
vmlal.u32 q8,d28,d1[1]
vadd.u64 d29,d29,d26
vmlal.u32 q9,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
vmlal.u32 q11,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q12,d28,d3[1]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q13,d29,d4[0]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q6,d29,d4[1]
vmlal.u32 q7,d29,d5[0]
vshr.u64 d26,d26,#16
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vadd.u64 d26,d26,d27
vmlal.u32 q10,d29,d6[1]
vshr.u64 d26,d26,#16
vmlal.u32 q11,d29,d7[0]
vmlal.u32 q12,d29,d7[1]
vadd.u64 d12,d12,d26
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
add r10,sp,#8 @ rewind
sub r8,r5,#8
b LNEON_8n_inner
.align 4
LNEON_8n_inner:
subs r8,r8,#8
vmlal.u32 q6,d28,d0[0]
vld1.64 {q13},[r6,:128]
vmlal.u32 q7,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
vmlal.u32 q8,d28,d1[0]
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q9,d28,d1[1]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmlal.u32 q11,d29,d6[1]
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vst1.64 {q6},[r7,:128]!
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]
vmlal.u32 q8,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
vmlal.u32 q9,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d1[1]
vmlal.u32 q11,d28,d2[0]
vmlal.u32 q12,d28,d2[1]
vmlal.u32 q13,d28,d3[0]
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
vmlal.u32 q7,d29,d4[0]
vmlal.u32 q8,d29,d4[1]
vmlal.u32 q9,d29,d5[0]
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vmlal.u32 q12,d29,d6[1]
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vst1.64 {q7},[r7,:128]!
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]
vmlal.u32 q9,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
vmlal.u32 q10,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q11,d28,d1[1]
vmlal.u32 q12,d28,d2[0]
vmlal.u32 q13,d28,d2[1]
vmlal.u32 q6,d28,d3[0]
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
vmlal.u32 q8,d29,d4[0]
vmlal.u32 q9,d29,d4[1]
vmlal.u32 q10,d29,d5[0]
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vmlal.u32 q13,d29,d6[1]
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vst1.64 {q8},[r7,:128]!
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]
vmlal.u32 q10,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
vmlal.u32 q11,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q12,d28,d1[1]
vmlal.u32 q13,d28,d2[0]
vmlal.u32 q6,d28,d2[1]
vmlal.u32 q7,d28,d3[0]
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
vmlal.u32 q9,d29,d4[0]
vmlal.u32 q10,d29,d4[1]
vmlal.u32 q11,d29,d5[0]
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vmlal.u32 q6,d29,d6[1]
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vst1.64 {q9},[r7,:128]!
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]
vmlal.u32 q11,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
vmlal.u32 q12,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q13,d28,d1[1]
vmlal.u32 q6,d28,d2[0]
vmlal.u32 q7,d28,d2[1]
vmlal.u32 q8,d28,d3[0]
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
vmlal.u32 q10,d29,d4[0]
vmlal.u32 q11,d29,d4[1]
vmlal.u32 q12,d29,d5[0]
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vmlal.u32 q7,d29,d6[1]
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vst1.64 {q10},[r7,:128]!
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]
vmlal.u32 q12,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
vmlal.u32 q13,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q6,d28,d1[1]
vmlal.u32 q7,d28,d2[0]
vmlal.u32 q8,d28,d2[1]
vmlal.u32 q9,d28,d3[0]
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
vmlal.u32 q11,d29,d4[0]
vmlal.u32 q12,d29,d4[1]
vmlal.u32 q13,d29,d5[0]
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vmlal.u32 q8,d29,d6[1]
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vst1.64 {q11},[r7,:128]!
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]
vmlal.u32 q13,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
vmlal.u32 q6,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q7,d28,d1[1]
vmlal.u32 q8,d28,d2[0]
vmlal.u32 q9,d28,d2[1]
vmlal.u32 q10,d28,d3[0]
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
vmlal.u32 q12,d29,d4[0]
vmlal.u32 q13,d29,d4[1]
vmlal.u32 q6,d29,d5[0]
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vmlal.u32 q9,d29,d6[1]
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vst1.64 {q12},[r7,:128]!
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]
vmlal.u32 q6,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
vmlal.u32 q7,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q8,d28,d1[1]
vmlal.u32 q9,d28,d2[0]
vmlal.u32 q10,d28,d2[1]
vmlal.u32 q11,d28,d3[0]
vmlal.u32 q12,d28,d3[1]
it eq
subeq r1,r1,r5,lsl#2 @ rewind
vmlal.u32 q13,d29,d4[0]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q6,d29,d4[1]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q7,d29,d5[0]
add r10,sp,#8 @ rewind
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vmlal.u32 q10,d29,d6[1]
vmlal.u32 q11,d29,d7[0]
vst1.64 {q13},[r7,:128]!
vmlal.u32 q12,d29,d7[1]
bne LNEON_8n_inner
add r6,sp,#128
vst1.64 {q6,q7},[r7,:256]!
veor q2,q2,q2 @ d4-d5
vst1.64 {q8,q9},[r7,:256]!
veor q3,q3,q3 @ d6-d7
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12},[r7,:128]
subs r9,r9,#8
vld1.64 {q6,q7},[r6,:256]!
vld1.64 {q8,q9},[r6,:256]!
vld1.64 {q10,q11},[r6,:256]!
vld1.64 {q12,q13},[r6,:256]!
itt ne
subne r3,r3,r5,lsl#2 @ rewind
bne LNEON_8n_outer
add r7,sp,#128
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
vshr.u64 d10,d12,#16
vst1.64 {q2,q3},[sp,:256]!
vadd.u64 d13,d13,d10
vst1.64 {q2,q3}, [sp,:256]!
vshr.u64 d10,d13,#16
vst1.64 {q2,q3}, [sp,:256]!
vzip.16 d12,d13
mov r8,r5
b LNEON_tail_entry
.align 4
LNEON_tail:
vadd.u64 d12,d12,d10
vshr.u64 d10,d12,#16
vld1.64 {q8,q9}, [r6, :256]!
vadd.u64 d13,d13,d10
vld1.64 {q10,q11}, [r6, :256]!
vshr.u64 d10,d13,#16
vld1.64 {q12,q13}, [r6, :256]!
vzip.16 d12,d13
LNEON_tail_entry:
vadd.u64 d14,d14,d10
vst1.32 {d12[0]}, [r7, :32]!
vshr.u64 d10,d14,#16
vadd.u64 d15,d15,d10
vshr.u64 d10,d15,#16
vzip.16 d14,d15
vadd.u64 d16,d16,d10
vst1.32 {d14[0]}, [r7, :32]!
vshr.u64 d10,d16,#16
vadd.u64 d17,d17,d10
vshr.u64 d10,d17,#16
vzip.16 d16,d17
vadd.u64 d18,d18,d10
vst1.32 {d16[0]}, [r7, :32]!
vshr.u64 d10,d18,#16
vadd.u64 d19,d19,d10
vshr.u64 d10,d19,#16
vzip.16 d18,d19
vadd.u64 d20,d20,d10
vst1.32 {d18[0]}, [r7, :32]!
vshr.u64 d10,d20,#16
vadd.u64 d21,d21,d10
vshr.u64 d10,d21,#16
vzip.16 d20,d21
vadd.u64 d22,d22,d10
vst1.32 {d20[0]}, [r7, :32]!
vshr.u64 d10,d22,#16
vadd.u64 d23,d23,d10
vshr.u64 d10,d23,#16
vzip.16 d22,d23
vadd.u64 d24,d24,d10
vst1.32 {d22[0]}, [r7, :32]!
vshr.u64 d10,d24,#16
vadd.u64 d25,d25,d10
vshr.u64 d10,d25,#16
vzip.16 d24,d25
vadd.u64 d26,d26,d10
vst1.32 {d24[0]}, [r7, :32]!
vshr.u64 d10,d26,#16
vadd.u64 d27,d27,d10
vshr.u64 d10,d27,#16
vzip.16 d26,d27
vld1.64 {q6,q7}, [r6, :256]!
subs r8,r8,#8
vst1.32 {d26[0]}, [r7, :32]!
bne LNEON_tail
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
sub r3,r3,r5,lsl#2 @ rewind r3
subs r1,sp,#0 @ clear carry flag
add r2,sp,r5,lsl#2
LNEON_sub:
ldmia r1!, {r4,r5,r6,r7}
ldmia r3!, {r8,r9,r10,r11}
sbcs r8, r4,r8
sbcs r9, r5,r9
sbcs r10,r6,r10
sbcs r11,r7,r11
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne LNEON_sub
ldr r10, [r1] @ load top-most bit
mov r11,sp
veor q0,q0,q0
sub r11,r2,r11 @ this is num*4
veor q1,q1,q1
mov r1,sp
sub r0,r0,r11 @ rewind r0
mov r3,r2 @ second 3/4th of frame
sbcs r10,r10,#0 @ result is carry flag
LNEON_copy_n_zap:
ldmia r1!, {r4,r5,r6,r7}
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r3,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
ldmia r1, {r4,r5,r6,r7}
stmia r0!, {r8,r9,r10,r11}
sub r1,r1,#16
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r1,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne LNEON_copy_n_zap
mov sp,ip
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
bx lr @ bx lr
#endif
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7
.comm _OPENSSL_armcap_P,4
.non_lazy_symbol_pointer
OPENSSL_armcap_P:
.indirect_symbol _OPENSSL_armcap_P
.long 0
.private_extern _OPENSSL_armcap_P
#endif
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

View File

@ -1,258 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
@ instructions are in aesv8-armx.pl.)
.text
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
#define ldrplb ldrbpl
#define ldrneb ldrbne
#endif
#if defined(__thumb2__)
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.globl _gcm_init_neon
.private_extern _gcm_init_neon
#ifdef __thumb2__
.thumb_func _gcm_init_neon
#endif
.align 4
_gcm_init_neon:
vld1.64 d7,[r1]! @ load H
vmov.i8 q8,#0xe1
vld1.64 d6,[r1]
vshl.i64 d17,#57
vshr.u64 d16,#63 @ t0=0xc2....01
vdup.8 q9,d7[7]
vshr.u64 d26,d6,#63
vshr.s8 q9,#7 @ broadcast carry bit
vshl.i64 q3,q3,#1
vand q8,q8,q9
vorr d7,d26 @ H<<<=1
veor q3,q3,q8 @ twisted H
vstmia r0,{q3}
bx lr @ bx lr
.globl _gcm_gmult_neon
.private_extern _gcm_gmult_neon
#ifdef __thumb2__
.thumb_func _gcm_gmult_neon
#endif
.align 4
_gcm_gmult_neon:
vld1.64 d7,[r0]! @ load Xi
vld1.64 d6,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
mov r3,#16
b Lgmult_neon
.globl _gcm_ghash_neon
.private_extern _gcm_ghash_neon
#ifdef __thumb2__
.thumb_func _gcm_ghash_neon
#endif
.align 4
_gcm_ghash_neon:
vld1.64 d1,[r0]! @ load Xi
vld1.64 d0,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
Loop_neon:
vld1.64 d7,[r2]! @ load inp
vld1.64 d6,[r2]!
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
veor q3,q0 @ inp^=Xi
Lgmult_neon:
vext.8 d16, d26, d26, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d0, d6, d6, #1 @ B1
vmull.p8 q0, d26, d0 @ E = A*B1
vext.8 d18, d26, d26, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d26, d22 @ G = A*B2
vext.8 d20, d26, d26, #3 @ A3
veor q8, q8, q0 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d0, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q0, d26, d0 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d26, d22 @ K = A*B4
veor q10, q10, q0 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q0, d26, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q0, q0, q8
veor q0, q0, q10
veor d6,d6,d7 @ Karatsuba pre-processing
vext.8 d16, d28, d28, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d2, d6, d6, #1 @ B1
vmull.p8 q1, d28, d2 @ E = A*B1
vext.8 d18, d28, d28, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d28, d22 @ G = A*B2
vext.8 d20, d28, d28, #3 @ A3
veor q8, q8, q1 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d2, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q1, d28, d2 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d28, d22 @ K = A*B4
veor q10, q10, q1 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q1, d28, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q1, q1, q8
veor q1, q1, q10
vext.8 d16, d27, d27, #1 @ A1
vmull.p8 q8, d16, d7 @ F = A1*B
vext.8 d4, d7, d7, #1 @ B1
vmull.p8 q2, d27, d4 @ E = A*B1
vext.8 d18, d27, d27, #2 @ A2
vmull.p8 q9, d18, d7 @ H = A2*B
vext.8 d22, d7, d7, #2 @ B2
vmull.p8 q11, d27, d22 @ G = A*B2
vext.8 d20, d27, d27, #3 @ A3
veor q8, q8, q2 @ L = E + F
vmull.p8 q10, d20, d7 @ J = A3*B
vext.8 d4, d7, d7, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q2, d27, d4 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d7, d7, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d27, d22 @ K = A*B4
veor q10, q10, q2 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q2, d27, d7 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q2, q2, q8
veor q2, q2, q10
veor q1,q1,q0 @ Karatsuba post-processing
veor q1,q1,q2
veor d1,d1,d2
veor d4,d4,d3 @ Xh|Xl - 256-bit result
@ equivalent of reduction_avx from ghash-x86_64.pl
vshl.i64 q9,q0,#57 @ 1st phase
vshl.i64 q10,q0,#62
veor q10,q10,q9 @
vshl.i64 q9,q0,#63
veor q10, q10, q9 @
veor d1,d1,d20 @
veor d4,d4,d21
vshr.u64 q10,q0,#1 @ 2nd phase
veor q2,q2,q0
veor q0,q0,q10 @
vshr.u64 q10,q10,#6
vshr.u64 q0,q0,#1 @
veor q0,q0,q2 @
veor q0,q0,q10 @
subs r3,#16
bne Loop_neon
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
sub r0,#16
vst1.64 d1,[r0]! @ write out Xi
vst1.64 d0,[r0]
bx lr @ bx lr
#endif
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM

View File

@ -1,256 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.code 32
#undef __thumb2__
.globl _gcm_init_v8
.private_extern _gcm_init_v8
#ifdef __thumb2__
.thumb_func _gcm_init_v8
#endif
.align 4
_gcm_init_v8:
vld1.64 {q9},[r1] @ load input H
vmov.i8 q11,#0xe1
vshl.i64 q11,q11,#57 @ 0xc2.0
vext.8 q3,q9,q9,#8
vshr.u64 q10,q11,#63
vdup.32 q9,d18[1]
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
vshr.u64 q10,q3,#63
vshr.s32 q9,q9,#31 @ broadcast carry bit
vand q10,q10,q8
vshl.i64 q3,q3,#1
vext.8 q10,q10,q10,#8
vand q8,q8,q9
vorr q3,q3,q10 @ H<<<=1
veor q12,q3,q8 @ twisted H
vst1.64 {q12},[r0]! @ store Htable[0]
@ calculate H^2
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
veor q8,q8,q12
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q14,q0,q10
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
veor q9,q9,q14
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
bx lr
.globl _gcm_gmult_v8
.private_extern _gcm_gmult_v8
#ifdef __thumb2__
.thumb_func _gcm_gmult_v8
#endif
.align 4
_gcm_gmult_v8:
vld1.64 {q9},[r0] @ load Xi
vmov.i8 q11,#0xe1
vld1.64 {q12,q13},[r1] @ load twisted H, ...
vshl.u64 q11,q11,#57
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q3,q9,q9,#8
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
bx lr
.globl _gcm_ghash_v8
.private_extern _gcm_ghash_v8
#ifdef __thumb2__
.thumb_func _gcm_ghash_v8
#endif
.align 4
_gcm_ghash_v8:
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
vld1.64 {q0},[r0] @ load [rotated] Xi
@ "[rotated]" means that
@ loaded value would have
@ to be rotated in order to
@ make it appear as in
@ algorithm specification
subs r3,r3,#32 @ see if r3 is 32 or larger
mov r12,#16 @ r12 is used as post-
@ increment for input pointer;
@ as loop is modulo-scheduled
@ r12 is zeroed just in time
@ to preclude overstepping
@ inp[len], which means that
@ last block[s] are actually
@ loaded twice, but last
@ copy is not processed
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
vmov.i8 q11,#0xe1
vld1.64 {q14},[r1]
moveq r12,#0 @ is it time to zero r12?
vext.8 q0,q0,q0,#8 @ rotate Xi
vld1.64 {q8},[r2]! @ load [rotated] I[0]
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
#ifndef __ARMEB__
vrev64.8 q8,q8
vrev64.8 q0,q0
#endif
vext.8 q3,q8,q8,#8 @ rotate I[0]
blo Lodd_tail_v8 @ r3 was less than 32
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q7,q9,q9,#8
veor q3,q3,q0 @ I[i]^=Xi
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q9,q9,q7 @ Karatsuba pre-processing
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
b Loop_mod2x_v8
.align 4
Loop_mod2x_v8:
vext.8 q10,q3,q3,#8
subs r3,r3,#32 @ is there more data?
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
movlo r12,#0 @ is it time to zero r12?
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
veor q10,q10,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
veor q0,q0,q4 @ accumulate
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
veor q2,q2,q6
moveq r12,#0 @ is it time to zero r12?
veor q1,q1,q5
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
#ifndef __ARMEB__
vrev64.8 q8,q8
#endif
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
vext.8 q7,q9,q9,#8
vext.8 q3,q8,q8,#8
veor q0,q1,q10
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q3,q3,q2 @ accumulate q3 early
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q3,q3,q10
veor q9,q9,q7 @ Karatsuba pre-processing
veor q3,q3,q0
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
bhs Loop_mod2x_v8 @ there was at least 32 more bytes
veor q2,q2,q10
vext.8 q3,q8,q8,#8 @ re-construct q3
adds r3,r3,#32 @ re-construct r3
veor q0,q0,q2 @ re-construct q0
beq Ldone_v8 @ is r3 zero?
Lodd_tail_v8:
vext.8 q10,q0,q0,#8
veor q3,q3,q0 @ inp^=Xi
veor q9,q8,q10 @ q9 is rotated inp^Xi
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
Ldone_v8:
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
bx lr
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM

File diff suppressed because it is too large Load Diff

View File

@ -1,376 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.syntax unified
.text
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ with |argv|, then saves the callee-saved registers into |state|. It returns
@ the result of |func|. The |unwind| argument is unused.
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
@ const uint32_t *argv, size_t argc,
@ int unwind);
.globl _abi_test_trampoline
.private_extern _abi_test_trampoline
.align 4
_abi_test_trampoline:
@ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
sub sp, sp, #28
@ Every register in AAPCS is either non-volatile or a parameter (except
@ r9 on iOS), so this code, by the actual call, loses all its scratch
@ registers. First fill in stack parameters while there are registers
@ to spare.
cmp r3, #4
bls Lstack_args_done
mov r4, sp @ r4 is the output pointer.
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
add r2, r2, #16 @ Skip four arguments.
Lstack_args_loop:
ldr r6, [r2], #4
cmp r2, r5
str r6, [r4], #4
bne Lstack_args_loop
Lstack_args_done:
@ Load registers from |r1|.
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Load register parameters. This uses up our remaining registers, so we
@ repurpose lr as scratch space.
ldr r3, [sp, #40] @ Reload argc.
ldr lr, [sp, #36] @ Load argv into lr.
cmp r3, #3
bhi Larg_r3
beq Larg_r2
cmp r3, #1
bhi Larg_r1
beq Larg_r0
b Largs_done
Larg_r3:
ldr r3, [lr, #12] @ argv[3]
Larg_r2:
ldr r2, [lr, #8] @ argv[2]
Larg_r1:
ldr r1, [lr, #4] @ argv[1]
Larg_r0:
ldr r0, [lr] @ argv[0]
Largs_done:
@ With every other register in use, load the function pointer into lr
@ and call the function.
ldr lr, [sp, #28]
blx lr
@ r1-r3 are free for use again. The trampoline only supports
@ single-return functions. Pass r4-r11 to the caller.
ldr r1, [sp, #32]
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Unwind the stack and restore registers.
add sp, sp, #44 @ 44 = 28+16
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
bx lr
.globl _abi_test_clobber_r0
.private_extern _abi_test_clobber_r0
.align 4
_abi_test_clobber_r0:
mov r0, #0
bx lr
.globl _abi_test_clobber_r1
.private_extern _abi_test_clobber_r1
.align 4
_abi_test_clobber_r1:
mov r1, #0
bx lr
.globl _abi_test_clobber_r2
.private_extern _abi_test_clobber_r2
.align 4
_abi_test_clobber_r2:
mov r2, #0
bx lr
.globl _abi_test_clobber_r3
.private_extern _abi_test_clobber_r3
.align 4
_abi_test_clobber_r3:
mov r3, #0
bx lr
.globl _abi_test_clobber_r4
.private_extern _abi_test_clobber_r4
.align 4
_abi_test_clobber_r4:
mov r4, #0
bx lr
.globl _abi_test_clobber_r5
.private_extern _abi_test_clobber_r5
.align 4
_abi_test_clobber_r5:
mov r5, #0
bx lr
.globl _abi_test_clobber_r6
.private_extern _abi_test_clobber_r6
.align 4
_abi_test_clobber_r6:
mov r6, #0
bx lr
.globl _abi_test_clobber_r7
.private_extern _abi_test_clobber_r7
.align 4
_abi_test_clobber_r7:
mov r7, #0
bx lr
.globl _abi_test_clobber_r8
.private_extern _abi_test_clobber_r8
.align 4
_abi_test_clobber_r8:
mov r8, #0
bx lr
.globl _abi_test_clobber_r9
.private_extern _abi_test_clobber_r9
.align 4
_abi_test_clobber_r9:
mov r9, #0
bx lr
.globl _abi_test_clobber_r10
.private_extern _abi_test_clobber_r10
.align 4
_abi_test_clobber_r10:
mov r10, #0
bx lr
.globl _abi_test_clobber_r11
.private_extern _abi_test_clobber_r11
.align 4
_abi_test_clobber_r11:
mov r11, #0
bx lr
.globl _abi_test_clobber_r12
.private_extern _abi_test_clobber_r12
.align 4
_abi_test_clobber_r12:
mov r12, #0
bx lr
.globl _abi_test_clobber_d0
.private_extern _abi_test_clobber_d0
.align 4
_abi_test_clobber_d0:
mov r0, #0
vmov s0, r0
vmov s1, r0
bx lr
.globl _abi_test_clobber_d1
.private_extern _abi_test_clobber_d1
.align 4
_abi_test_clobber_d1:
mov r0, #0
vmov s2, r0
vmov s3, r0
bx lr
.globl _abi_test_clobber_d2
.private_extern _abi_test_clobber_d2
.align 4
_abi_test_clobber_d2:
mov r0, #0
vmov s4, r0
vmov s5, r0
bx lr
.globl _abi_test_clobber_d3
.private_extern _abi_test_clobber_d3
.align 4
_abi_test_clobber_d3:
mov r0, #0
vmov s6, r0
vmov s7, r0
bx lr
.globl _abi_test_clobber_d4
.private_extern _abi_test_clobber_d4
.align 4
_abi_test_clobber_d4:
mov r0, #0
vmov s8, r0
vmov s9, r0
bx lr
.globl _abi_test_clobber_d5
.private_extern _abi_test_clobber_d5
.align 4
_abi_test_clobber_d5:
mov r0, #0
vmov s10, r0
vmov s11, r0
bx lr
.globl _abi_test_clobber_d6
.private_extern _abi_test_clobber_d6
.align 4
_abi_test_clobber_d6:
mov r0, #0
vmov s12, r0
vmov s13, r0
bx lr
.globl _abi_test_clobber_d7
.private_extern _abi_test_clobber_d7
.align 4
_abi_test_clobber_d7:
mov r0, #0
vmov s14, r0
vmov s15, r0
bx lr
.globl _abi_test_clobber_d8
.private_extern _abi_test_clobber_d8
.align 4
_abi_test_clobber_d8:
mov r0, #0
vmov s16, r0
vmov s17, r0
bx lr
.globl _abi_test_clobber_d9
.private_extern _abi_test_clobber_d9
.align 4
_abi_test_clobber_d9:
mov r0, #0
vmov s18, r0
vmov s19, r0
bx lr
.globl _abi_test_clobber_d10
.private_extern _abi_test_clobber_d10
.align 4
_abi_test_clobber_d10:
mov r0, #0
vmov s20, r0
vmov s21, r0
bx lr
.globl _abi_test_clobber_d11
.private_extern _abi_test_clobber_d11
.align 4
_abi_test_clobber_d11:
mov r0, #0
vmov s22, r0
vmov s23, r0
bx lr
.globl _abi_test_clobber_d12
.private_extern _abi_test_clobber_d12
.align 4
_abi_test_clobber_d12:
mov r0, #0
vmov s24, r0
vmov s25, r0
bx lr
.globl _abi_test_clobber_d13
.private_extern _abi_test_clobber_d13
.align 4
_abi_test_clobber_d13:
mov r0, #0
vmov s26, r0
vmov s27, r0
bx lr
.globl _abi_test_clobber_d14
.private_extern _abi_test_clobber_d14
.align 4
_abi_test_clobber_d14:
mov r0, #0
vmov s28, r0
vmov s29, r0
bx lr
.globl _abi_test_clobber_d15
.private_extern _abi_test_clobber_d15
.align 4
_abi_test_clobber_d15:
mov r0, #0
vmov s30, r0
vmov s31, r0
bx lr
#endif // !OPENSSL_NO_ASM

View File

@ -1,785 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.arch armv8-a+crypto
.section .rodata
.align 5
.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl aes_hw_set_encrypt_key
.hidden aes_hw_set_encrypt_key
.type aes_hw_set_encrypt_key,%function
.align 5
aes_hw_set_encrypt_key:
.Lenc_key:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
cmp x0,#0
b.eq .Lenc_key_abort
cmp x2,#0
b.eq .Lenc_key_abort
mov x3,#-2
cmp w1,#128
b.lt .Lenc_key_abort
cmp w1,#256
b.gt .Lenc_key_abort
tst w1,#0x3f
b.ne .Lenc_key_abort
adrp x3,.Lrcon
add x3,x3,:lo12:.Lrcon
cmp w1,#192
eor v0.16b,v0.16b,v0.16b
ld1 {v3.16b},[x0],#16
mov w1,#8 // reuse w1
ld1 {v1.4s,v2.4s},[x3],#32
b.lt .Loop128
b.eq .L192
b .L256
.align 4
.Loop128:
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
b.ne .Loop128
ld1 {v1.4s},[x3]
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2]
add x2,x2,#0x50
mov w12,#10
b .Ldone
.align 4
.L192:
ld1 {v4.8b},[x0],#8
movi v6.16b,#8 // borrow v6.16b
st1 {v3.4s},[x2],#16
sub v2.16b,v2.16b,v6.16b // adjust the mask
.Loop192:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.8b},[x2],#8
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
dup v5.4s,v3.s[3]
eor v5.16b,v5.16b,v4.16b
eor v6.16b,v6.16b,v1.16b
ext v4.16b,v0.16b,v4.16b,#12
shl v1.16b,v1.16b,#1
eor v4.16b,v4.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
eor v4.16b,v4.16b,v6.16b
st1 {v3.4s},[x2],#16
b.ne .Loop192
mov w12,#12
add x2,x2,#0x20
b .Ldone
.align 4
.L256:
ld1 {v4.16b},[x0]
mov w1,#7
mov w12,#14
st1 {v3.4s},[x2],#16
.Loop256:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2],#16
b.eq .Ldone
dup v6.4s,v3.s[3] // just splat
ext v5.16b,v0.16b,v4.16b,#12
aese v6.16b,v0.16b
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
eor v4.16b,v4.16b,v6.16b
b .Loop256
.Ldone:
str w12,[x2]
mov x3,#0
.Lenc_key_abort:
mov x0,x3 // return value
ldr x29,[sp],#16
ret
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
.globl aes_hw_set_decrypt_key
.hidden aes_hw_set_decrypt_key
.type aes_hw_set_decrypt_key,%function
.align 5
aes_hw_set_decrypt_key:
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl .Lenc_key
cmp x0,#0
b.ne .Ldec_key_abort
sub x2,x2,#240 // restore original x2
mov x4,#-16
add x0,x2,x12,lsl#4 // end of key schedule
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
.Loop_imc:
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
aesimc v0.16b,v0.16b
aesimc v1.16b,v1.16b
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
cmp x0,x2
b.hi .Loop_imc
ld1 {v0.4s},[x2]
aesimc v0.16b,v0.16b
st1 {v0.4s},[x0]
eor x0,x0,x0 // return value
.Ldec_key_abort:
ldp x29,x30,[sp],#16
AARCH64_VALIDATE_LINK_REGISTER
ret
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
.globl aes_hw_encrypt
.hidden aes_hw_encrypt
.type aes_hw_encrypt,%function
.align 5
aes_hw_encrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
.Loop_enc:
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aese v2.16b,v1.16b
aesmc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt .Loop_enc
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aese v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.size aes_hw_encrypt,.-aes_hw_encrypt
.globl aes_hw_decrypt
.hidden aes_hw_decrypt
.type aes_hw_decrypt,%function
.align 5
aes_hw_decrypt:
AARCH64_VALID_CALL_TARGET
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
.Loop_dec:
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aesd v2.16b,v1.16b
aesimc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt .Loop_dec
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aesd v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.size aes_hw_decrypt,.-aes_hw_decrypt
.globl aes_hw_cbc_encrypt
.hidden aes_hw_cbc_encrypt
.type aes_hw_cbc_encrypt,%function
.align 5
aes_hw_cbc_encrypt:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
mov x8,#16
b.lo .Lcbc_abort
csel x8,xzr,x8,eq
cmp w5,#0 // en- or decrypting?
ldr w5,[x3,#240]
and x2,x2,#-16
ld1 {v6.16b},[x4]
ld1 {v0.16b},[x0],x8
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#6
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
sub w5,w5,#2
ld1 {v18.4s,v19.4s},[x7],#32
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
b.eq .Lcbc_dec
cmp w5,#2
eor v0.16b,v0.16b,v6.16b
eor v5.16b,v16.16b,v7.16b
b.eq .Lcbc_enc128
ld1 {v2.4s,v3.4s},[x7]
add x7,x3,#16
add x6,x3,#16*4
add x12,x3,#16*5
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
add x14,x3,#16*6
add x3,x3,#16*7
b .Lenter_cbc_enc
.align 4
.Loop_cbc_enc:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
.Lenter_cbc_enc:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x6]
cmp w5,#4
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x12]
b.eq .Lcbc_enc192
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x14]
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x3]
nop
.Lcbc_enc192:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs .Loop_cbc_enc
st1 {v6.16b},[x1],#16
b .Lcbc_done
.align 5
.Lcbc_enc128:
ld1 {v2.4s,v3.4s},[x7]
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
b .Lenter_cbc_enc128
.Loop_cbc_enc128:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
.Lenter_cbc_enc128:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs .Loop_cbc_enc128
st1 {v6.16b},[x1],#16
b .Lcbc_done
.align 5
.Lcbc_dec:
ld1 {v18.16b},[x0],#16
subs x2,x2,#32 // bias
add w6,w5,#2
orr v3.16b,v0.16b,v0.16b
orr v1.16b,v0.16b,v0.16b
orr v19.16b,v18.16b,v18.16b
b.lo .Lcbc_dec_tail
orr v1.16b,v18.16b,v18.16b
ld1 {v18.16b},[x0],#16
orr v2.16b,v0.16b,v0.16b
orr v3.16b,v1.16b,v1.16b
orr v19.16b,v18.16b,v18.16b
.Loop3x_cbc_dec:
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Loop3x_cbc_dec
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
eor v4.16b,v6.16b,v7.16b
subs x2,x2,#0x30
eor v5.16b,v2.16b,v7.16b
csel x6,x2,x6,lo // x6, w6, is zero at this point
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
add x0,x0,x6 // x0 is adjusted in such way that
// at exit from the loop v1.16b-v18.16b
// are loaded with last "words"
orr v6.16b,v19.16b,v19.16b
mov x7,x3
aesd v0.16b,v20.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
ld1 {v2.16b},[x0],#16
aesd v0.16b,v21.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
aesd v0.16b,v22.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
ld1 {v19.16b},[x0],#16
aesd v0.16b,v23.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
add w6,w5,#2
eor v4.16b,v4.16b,v0.16b
eor v5.16b,v5.16b,v1.16b
eor v18.16b,v18.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v4.16b},[x1],#16
orr v0.16b,v2.16b,v2.16b
st1 {v5.16b},[x1],#16
orr v1.16b,v3.16b,v3.16b
st1 {v18.16b},[x1],#16
orr v18.16b,v19.16b,v19.16b
b.hs .Loop3x_cbc_dec
cmn x2,#0x30
b.eq .Lcbc_done
nop
.Lcbc_dec_tail:
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Lcbc_dec_tail
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
cmn x2,#0x20
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
eor v5.16b,v6.16b,v7.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
b.eq .Lcbc_dec_one
eor v5.16b,v5.16b,v1.16b
eor v17.16b,v17.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
st1 {v17.16b},[x1],#16
b .Lcbc_done
.Lcbc_dec_one:
eor v5.16b,v5.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
.Lcbc_done:
st1 {v6.16b},[x4]
.Lcbc_abort:
ldr x29,[sp],#16
ret
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
.globl aes_hw_ctr32_encrypt_blocks
.hidden aes_hw_ctr32_encrypt_blocks
.type aes_hw_ctr32_encrypt_blocks,%function
.align 5
aes_hw_ctr32_encrypt_blocks:
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
AARCH64_VALID_CALL_TARGET
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]
ldr w8, [x4, #12]
ld1 {v0.4s},[x4]
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#4
mov x12,#16
cmp x2,#2
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
sub w5,w5,#2
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
csel x12,xzr,x12,lo
#ifndef __ARMEB__
rev w8, w8
#endif
orr v1.16b,v0.16b,v0.16b
add w10, w8, #1
orr v18.16b,v0.16b,v0.16b
add w8, w8, #2
orr v6.16b,v0.16b,v0.16b
rev w10, w10
mov v1.s[3],w10
b.ls .Lctr32_tail
rev w12, w8
sub x2,x2,#3 // bias
mov v18.s[3],w12
b .Loop3x_ctr32
.align 4
.Loop3x_ctr32:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
aese v18.16b,v17.16b
aesmc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Loop3x_ctr32
aese v0.16b,v16.16b
aesmc v4.16b,v0.16b
aese v1.16b,v16.16b
aesmc v5.16b,v1.16b
ld1 {v2.16b},[x0],#16
orr v0.16b,v6.16b,v6.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
orr v1.16b,v6.16b,v6.16b
aese v4.16b,v17.16b
aesmc v4.16b,v4.16b
aese v5.16b,v17.16b
aesmc v5.16b,v5.16b
ld1 {v19.16b},[x0],#16
mov x7,x3
aese v18.16b,v17.16b
aesmc v17.16b,v18.16b
orr v18.16b,v6.16b,v6.16b
add w9,w8,#1
aese v4.16b,v20.16b
aesmc v4.16b,v4.16b
aese v5.16b,v20.16b
aesmc v5.16b,v5.16b
eor v2.16b,v2.16b,v7.16b
add w10,w8,#2
aese v17.16b,v20.16b
aesmc v17.16b,v17.16b
eor v3.16b,v3.16b,v7.16b
add w8,w8,#3
aese v4.16b,v21.16b
aesmc v4.16b,v4.16b
aese v5.16b,v21.16b
aesmc v5.16b,v5.16b
eor v19.16b,v19.16b,v7.16b
rev w9,w9
aese v17.16b,v21.16b
aesmc v17.16b,v17.16b
mov v0.s[3], w9
rev w10,w10
aese v4.16b,v22.16b
aesmc v4.16b,v4.16b
aese v5.16b,v22.16b
aesmc v5.16b,v5.16b
mov v1.s[3], w10
rev w12,w8
aese v17.16b,v22.16b
aesmc v17.16b,v17.16b
mov v18.s[3], w12
subs x2,x2,#3
aese v4.16b,v23.16b
aese v5.16b,v23.16b
aese v17.16b,v23.16b
eor v2.16b,v2.16b,v4.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
st1 {v2.16b},[x1],#16
eor v3.16b,v3.16b,v5.16b
mov w6,w5
st1 {v3.16b},[x1],#16
eor v19.16b,v19.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v19.16b},[x1],#16
b.hs .Loop3x_ctr32
adds x2,x2,#3
b.eq .Lctr32_done
cmp x2,#1
mov x12,#16
csel x12,xzr,x12,eq
.Lctr32_tail:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v17.4s},[x7],#16
b.gt .Lctr32_tail
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v2.16b},[x0],x12
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v1.16b,v20.16b
aesmc v1.16b,v1.16b
ld1 {v3.16b},[x0]
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v1.16b,v21.16b
aesmc v1.16b,v1.16b
eor v2.16b,v2.16b,v7.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v1.16b,v22.16b
aesmc v1.16b,v1.16b
eor v3.16b,v3.16b,v7.16b
aese v0.16b,v23.16b
aese v1.16b,v23.16b
cmp x2,#1
eor v2.16b,v2.16b,v0.16b
eor v3.16b,v3.16b,v1.16b
st1 {v2.16b},[x1],#16
b.eq .Lctr32_done
st1 {v3.16b},[x1]
.Lctr32_done:
ldr x29,[sp],#16
ret
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,346 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.globl gcm_init_neon
.hidden gcm_init_neon
.type gcm_init_neon,%function
.align 4
gcm_init_neon:
AARCH64_VALID_CALL_TARGET
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
shl v19.2d, v19.2d, #57 // 0xc2.0
ext v3.16b, v17.16b, v17.16b, #8
ushr v18.2d, v19.2d, #63
dup v17.4s, v17.s[1]
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
ushr v18.2d, v3.2d, #63
sshr v17.4s, v17.4s, #31 // broadcast carry bit
and v18.16b, v18.16b, v16.16b
shl v3.2d, v3.2d, #1
ext v18.16b, v18.16b, v18.16b, #8
and v16.16b, v16.16b, v17.16b
orr v3.16b, v3.16b, v18.16b // H<<<=1
eor v5.16b, v3.16b, v16.16b // twisted H
st1 {v5.2d}, [x0] // store Htable[0]
ret
.size gcm_init_neon,.-gcm_init_neon
.globl gcm_gmult_neon
.hidden gcm_gmult_neon
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, .Lmasks // load constants
add x9, x9, :lo12:.Lmasks
ld1 {v24.2d, v25.2d}, [x9]
rev64 v3.16b, v3.16b // byteswap Xi
ext v3.16b, v3.16b, v3.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
mov x3, #16
b .Lgmult_neon
.size gcm_gmult_neon,.-gcm_gmult_neon
.globl gcm_ghash_neon
.hidden gcm_ghash_neon
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
AARCH64_VALID_CALL_TARGET
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, .Lmasks // load constants
add x9, x9, :lo12:.Lmasks
ld1 {v24.2d, v25.2d}, [x9]
rev64 v0.16b, v0.16b // byteswap Xi
ext v0.16b, v0.16b, v0.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
.Loop_neon:
ld1 {v3.16b}, [x2], #16 // load inp
rev64 v3.16b, v3.16b // byteswap inp
ext v3.16b, v3.16b, v3.16b, #8
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
.Lgmult_neon:
// Split the input into v3 and v4. (The upper halves are unused,
// so it is okay to leave them alone.)
ins v4.d[0], v3.d[1]
ext v16.8b, v5.8b, v5.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v0.8b, v3.8b, v3.8b, #1 // B1
pmull v0.8h, v5.8b, v0.8b // E = A*B1
ext v17.8b, v5.8b, v5.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v5.8b, v19.8b // G = A*B2
ext v18.8b, v5.8b, v5.8b, #3 // A3
eor v16.16b, v16.16b, v0.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v0.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v0.8h, v5.8b, v0.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v0.16b // N = I + J
pmull v19.8h, v5.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v0.8h, v5.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v0.16b, v0.16b, v16.16b
eor v0.16b, v0.16b, v18.16b
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
ext v16.8b, v7.8b, v7.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v1.8b, v3.8b, v3.8b, #1 // B1
pmull v1.8h, v7.8b, v1.8b // E = A*B1
ext v17.8b, v7.8b, v7.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v7.8b, v19.8b // G = A*B2
ext v18.8b, v7.8b, v7.8b, #3 // A3
eor v16.16b, v16.16b, v1.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v1.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v1.8h, v7.8b, v1.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v1.16b // N = I + J
pmull v19.8h, v7.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v1.8h, v7.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v1.16b, v1.16b, v16.16b
eor v1.16b, v1.16b, v18.16b
ext v16.8b, v6.8b, v6.8b, #1 // A1
pmull v16.8h, v16.8b, v4.8b // F = A1*B
ext v2.8b, v4.8b, v4.8b, #1 // B1
pmull v2.8h, v6.8b, v2.8b // E = A*B1
ext v17.8b, v6.8b, v6.8b, #2 // A2
pmull v17.8h, v17.8b, v4.8b // H = A2*B
ext v19.8b, v4.8b, v4.8b, #2 // B2
pmull v19.8h, v6.8b, v19.8b // G = A*B2
ext v18.8b, v6.8b, v6.8b, #3 // A3
eor v16.16b, v16.16b, v2.16b // L = E + F
pmull v18.8h, v18.8b, v4.8b // J = A3*B
ext v2.8b, v4.8b, v4.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v2.8h, v6.8b, v2.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v4.8b, v4.8b, #4 // B4
eor v18.16b, v18.16b, v2.16b // N = I + J
pmull v19.8h, v6.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v2.8h, v6.8b, v4.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v2.16b, v2.16b, v16.16b
eor v2.16b, v2.16b, v18.16b
ext v16.16b, v0.16b, v2.16b, #8
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
eor v1.16b, v1.16b, v2.16b
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
// This is a no-op due to the ins instruction below.
// ins v2.d[0], v1.d[1]
// equivalent of reduction_avx from ghash-x86_64.pl
shl v17.2d, v0.2d, #57 // 1st phase
shl v18.2d, v0.2d, #62
eor v18.16b, v18.16b, v17.16b //
shl v17.2d, v0.2d, #63
eor v18.16b, v18.16b, v17.16b //
// Note Xm contains {Xl.d[1], Xh.d[0]}.
eor v18.16b, v18.16b, v1.16b
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
ushr v18.2d, v0.2d, #1 // 2nd phase
eor v2.16b, v2.16b,v0.16b
eor v0.16b, v0.16b,v18.16b //
ushr v18.2d, v18.2d, #6
ushr v0.2d, v0.2d, #1 //
eor v0.16b, v0.16b, v2.16b //
eor v0.16b, v0.16b, v18.16b //
subs x3, x3, #16
bne .Loop_neon
rev64 v0.16b, v0.16b // byteswap Xi and write
ext v0.16b, v0.16b, v0.16b, #8
st1 {v0.16b}, [x0]
ret
.size gcm_ghash_neon,.-gcm_ghash_neon
.section .rodata
.align 4
.Lmasks:
.quad 0x0000ffffffffffff // k48
.quad 0x00000000ffffffff // k32
.quad 0x000000000000ffff // k16
.quad 0x0000000000000000 // k0
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,252 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.arch armv8-a+crypto
.globl gcm_init_v8
.hidden gcm_init_v8
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
ext v3.16b,v17.16b,v17.16b,#8
ushr v18.2d,v19.2d,#63
dup v17.4s,v17.s[1]
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
ushr v18.2d,v3.2d,#63
sshr v17.4s,v17.4s,#31 //broadcast carry bit
and v18.16b,v18.16b,v16.16b
shl v3.2d,v3.2d,#1
ext v18.16b,v18.16b,v18.16b,#8
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
st1 {v20.2d},[x0],#16 //store Htable[0]
//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
pmull v0.1q,v20.1d,v20.1d
eor v16.16b,v16.16b,v20.16b
pmull2 v2.1q,v20.2d,v20.2d
pmull v1.1q,v16.1d,v16.1d
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v22.16b,v0.16b,v18.16b
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
ret
.size gcm_init_v8,.-gcm_init_v8
.globl gcm_gmult_v8
.hidden gcm_gmult_v8
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
shl v19.2d,v19.2d,#57
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v3.16b,v17.16b,v17.16b,#8
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_gmult_v8,.-gcm_gmult_v8
.globl gcm_ghash_v8
.hidden gcm_ghash_v8
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
AARCH64_VALID_CALL_TARGET
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
//to be rotated in order to
//make it appear as in
//algorithm specification
subs x3,x3,#32 //see if x3 is 32 or larger
mov x12,#16 //x12 is used as post-
//increment for input pointer;
//as loop is modulo-scheduled
//x12 is zeroed just in time
//to preclude overstepping
//inp[len], which means that
//last block[s] are actually
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
rev64 v0.16b,v0.16b
#endif
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
b.lo .Lodd_tail_v8 //x3 was less than 32
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v7.16b,v17.16b,v17.16b,#8
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
pmull2 v6.1q,v20.2d,v7.2d
b .Loop_mod2x_v8
.align 4
.Loop_mod2x_v8:
ext v18.16b,v3.16b,v3.16b,#8
subs x3,x3,#32 //is there more data?
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
csel x12,xzr,x12,lo //is it time to zero x12?
pmull v5.1q,v21.1d,v17.1d
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
eor v0.16b,v0.16b,v4.16b //accumulate
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
eor v2.16b,v2.16b,v6.16b
csel x12,xzr,x12,eq //is it time to zero x12?
eor v1.16b,v1.16b,v5.16b
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
#endif
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v7.16b,v17.16b,v17.16b,#8
ext v3.16b,v16.16b,v16.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v3.16b,v3.16b,v18.16b
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
eor v3.16b,v3.16b,v0.16b
pmull2 v6.1q,v20.2d,v7.2d
b.hs .Loop_mod2x_v8 //there was at least 32 more bytes
eor v2.16b,v2.16b,v18.16b
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
adds x3,x3,#32 //re-construct x3
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
b.eq .Ldone_v8 //is x3 zero?
.Lodd_tail_v8:
ext v18.16b,v0.16b,v0.16b,#8
eor v3.16b,v3.16b,v0.16b //inp^=Xi
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
.Ldone_v8:
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_ghash_v8,.-gcm_ghash_v8
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,761 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
// with |argv|, then saves the callee-saved registers into |state|. It returns
// the result of |func|. The |unwind| argument is unused.
// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
// const uint64_t *argv, size_t argc,
// uint64_t unwind);
.type abi_test_trampoline, %function
.globl abi_test_trampoline
.hidden abi_test_trampoline
.align 4
abi_test_trampoline:
.Labi_test_trampoline_begin:
AARCH64_SIGN_LINK_REGISTER
// Stack layout (low to high addresses)
// x29,x30 (16 bytes)
// d8-d15 (64 bytes)
// x19-x28 (80 bytes)
// x1 (8 bytes)
// padding (8 bytes)
stp x29, x30, [sp, #-176]!
mov x29, sp
// Saved callee-saved registers and |state|.
stp d8, d9, [sp, #16]
stp d10, d11, [sp, #32]
stp d12, d13, [sp, #48]
stp d14, d15, [sp, #64]
stp x19, x20, [sp, #80]
stp x21, x22, [sp, #96]
stp x23, x24, [sp, #112]
stp x25, x26, [sp, #128]
stp x27, x28, [sp, #144]
str x1, [sp, #160]
// Load registers from |state|, with the exception of x29. x29 is the
// frame pointer and also callee-saved, but AAPCS64 allows platforms to
// mandate that x29 always point to a frame. iOS64 does so, which means
// we cannot fill x29 with entropy without violating ABI rules
// ourselves. x29 is tested separately below.
ldp d8, d9, [x1], #16
ldp d10, d11, [x1], #16
ldp d12, d13, [x1], #16
ldp d14, d15, [x1], #16
ldp x19, x20, [x1], #16
ldp x21, x22, [x1], #16
ldp x23, x24, [x1], #16
ldp x25, x26, [x1], #16
ldp x27, x28, [x1], #16
// Move parameters into temporary registers.
mov x9, x0
mov x10, x2
mov x11, x3
// Load parameters into registers.
cbz x11, .Largs_done
ldr x0, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x1, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x2, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x3, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x4, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x5, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x6, [x10], #8
subs x11, x11, #1
b.eq .Largs_done
ldr x7, [x10], #8
.Largs_done:
blr x9
// Reload |state| and store registers.
ldr x1, [sp, #160]
stp d8, d9, [x1], #16
stp d10, d11, [x1], #16
stp d12, d13, [x1], #16
stp d14, d15, [x1], #16
stp x19, x20, [x1], #16
stp x21, x22, [x1], #16
stp x23, x24, [x1], #16
stp x25, x26, [x1], #16
stp x27, x28, [x1], #16
// |func| is required to preserve x29, the frame pointer. We cannot load
// random values into x29 (see comment above), so compare it against the
// expected value and zero the field of |state| if corrupted.
mov x9, sp
cmp x29, x9
b.eq .Lx29_ok
str xzr, [x1]
.Lx29_ok:
// Restore callee-saved registers.
ldp d8, d9, [sp, #16]
ldp d10, d11, [sp, #32]
ldp d12, d13, [sp, #48]
ldp d14, d15, [sp, #64]
ldp x19, x20, [sp, #80]
ldp x21, x22, [sp, #96]
ldp x23, x24, [sp, #112]
ldp x25, x26, [sp, #128]
ldp x27, x28, [sp, #144]
ldp x29, x30, [sp], #176
AARCH64_VALIDATE_LINK_REGISTER
ret
.size abi_test_trampoline,.-abi_test_trampoline
.type abi_test_clobber_x0, %function
.globl abi_test_clobber_x0
.hidden abi_test_clobber_x0
.align 4
abi_test_clobber_x0:
AARCH64_VALID_CALL_TARGET
mov x0, xzr
ret
.size abi_test_clobber_x0,.-abi_test_clobber_x0
.type abi_test_clobber_x1, %function
.globl abi_test_clobber_x1
.hidden abi_test_clobber_x1
.align 4
abi_test_clobber_x1:
AARCH64_VALID_CALL_TARGET
mov x1, xzr
ret
.size abi_test_clobber_x1,.-abi_test_clobber_x1
.type abi_test_clobber_x2, %function
.globl abi_test_clobber_x2
.hidden abi_test_clobber_x2
.align 4
abi_test_clobber_x2:
AARCH64_VALID_CALL_TARGET
mov x2, xzr
ret
.size abi_test_clobber_x2,.-abi_test_clobber_x2
.type abi_test_clobber_x3, %function
.globl abi_test_clobber_x3
.hidden abi_test_clobber_x3
.align 4
abi_test_clobber_x3:
AARCH64_VALID_CALL_TARGET
mov x3, xzr
ret
.size abi_test_clobber_x3,.-abi_test_clobber_x3
.type abi_test_clobber_x4, %function
.globl abi_test_clobber_x4
.hidden abi_test_clobber_x4
.align 4
abi_test_clobber_x4:
AARCH64_VALID_CALL_TARGET
mov x4, xzr
ret
.size abi_test_clobber_x4,.-abi_test_clobber_x4
.type abi_test_clobber_x5, %function
.globl abi_test_clobber_x5
.hidden abi_test_clobber_x5
.align 4
abi_test_clobber_x5:
AARCH64_VALID_CALL_TARGET
mov x5, xzr
ret
.size abi_test_clobber_x5,.-abi_test_clobber_x5
.type abi_test_clobber_x6, %function
.globl abi_test_clobber_x6
.hidden abi_test_clobber_x6
.align 4
abi_test_clobber_x6:
AARCH64_VALID_CALL_TARGET
mov x6, xzr
ret
.size abi_test_clobber_x6,.-abi_test_clobber_x6
.type abi_test_clobber_x7, %function
.globl abi_test_clobber_x7
.hidden abi_test_clobber_x7
.align 4
abi_test_clobber_x7:
AARCH64_VALID_CALL_TARGET
mov x7, xzr
ret
.size abi_test_clobber_x7,.-abi_test_clobber_x7
.type abi_test_clobber_x8, %function
.globl abi_test_clobber_x8
.hidden abi_test_clobber_x8
.align 4
abi_test_clobber_x8:
AARCH64_VALID_CALL_TARGET
mov x8, xzr
ret
.size abi_test_clobber_x8,.-abi_test_clobber_x8
.type abi_test_clobber_x9, %function
.globl abi_test_clobber_x9
.hidden abi_test_clobber_x9
.align 4
abi_test_clobber_x9:
AARCH64_VALID_CALL_TARGET
mov x9, xzr
ret
.size abi_test_clobber_x9,.-abi_test_clobber_x9
.type abi_test_clobber_x10, %function
.globl abi_test_clobber_x10
.hidden abi_test_clobber_x10
.align 4
abi_test_clobber_x10:
AARCH64_VALID_CALL_TARGET
mov x10, xzr
ret
.size abi_test_clobber_x10,.-abi_test_clobber_x10
.type abi_test_clobber_x11, %function
.globl abi_test_clobber_x11
.hidden abi_test_clobber_x11
.align 4
abi_test_clobber_x11:
AARCH64_VALID_CALL_TARGET
mov x11, xzr
ret
.size abi_test_clobber_x11,.-abi_test_clobber_x11
.type abi_test_clobber_x12, %function
.globl abi_test_clobber_x12
.hidden abi_test_clobber_x12
.align 4
abi_test_clobber_x12:
AARCH64_VALID_CALL_TARGET
mov x12, xzr
ret
.size abi_test_clobber_x12,.-abi_test_clobber_x12
.type abi_test_clobber_x13, %function
.globl abi_test_clobber_x13
.hidden abi_test_clobber_x13
.align 4
abi_test_clobber_x13:
AARCH64_VALID_CALL_TARGET
mov x13, xzr
ret
.size abi_test_clobber_x13,.-abi_test_clobber_x13
.type abi_test_clobber_x14, %function
.globl abi_test_clobber_x14
.hidden abi_test_clobber_x14
.align 4
abi_test_clobber_x14:
AARCH64_VALID_CALL_TARGET
mov x14, xzr
ret
.size abi_test_clobber_x14,.-abi_test_clobber_x14
.type abi_test_clobber_x15, %function
.globl abi_test_clobber_x15
.hidden abi_test_clobber_x15
.align 4
abi_test_clobber_x15:
AARCH64_VALID_CALL_TARGET
mov x15, xzr
ret
.size abi_test_clobber_x15,.-abi_test_clobber_x15
.type abi_test_clobber_x16, %function
.globl abi_test_clobber_x16
.hidden abi_test_clobber_x16
.align 4
abi_test_clobber_x16:
AARCH64_VALID_CALL_TARGET
mov x16, xzr
ret
.size abi_test_clobber_x16,.-abi_test_clobber_x16
.type abi_test_clobber_x17, %function
.globl abi_test_clobber_x17
.hidden abi_test_clobber_x17
.align 4
abi_test_clobber_x17:
AARCH64_VALID_CALL_TARGET
mov x17, xzr
ret
.size abi_test_clobber_x17,.-abi_test_clobber_x17
.type abi_test_clobber_x19, %function
.globl abi_test_clobber_x19
.hidden abi_test_clobber_x19
.align 4
abi_test_clobber_x19:
AARCH64_VALID_CALL_TARGET
mov x19, xzr
ret
.size abi_test_clobber_x19,.-abi_test_clobber_x19
.type abi_test_clobber_x20, %function
.globl abi_test_clobber_x20
.hidden abi_test_clobber_x20
.align 4
abi_test_clobber_x20:
AARCH64_VALID_CALL_TARGET
mov x20, xzr
ret
.size abi_test_clobber_x20,.-abi_test_clobber_x20
.type abi_test_clobber_x21, %function
.globl abi_test_clobber_x21
.hidden abi_test_clobber_x21
.align 4
abi_test_clobber_x21:
AARCH64_VALID_CALL_TARGET
mov x21, xzr
ret
.size abi_test_clobber_x21,.-abi_test_clobber_x21
.type abi_test_clobber_x22, %function
.globl abi_test_clobber_x22
.hidden abi_test_clobber_x22
.align 4
abi_test_clobber_x22:
AARCH64_VALID_CALL_TARGET
mov x22, xzr
ret
.size abi_test_clobber_x22,.-abi_test_clobber_x22
.type abi_test_clobber_x23, %function
.globl abi_test_clobber_x23
.hidden abi_test_clobber_x23
.align 4
abi_test_clobber_x23:
AARCH64_VALID_CALL_TARGET
mov x23, xzr
ret
.size abi_test_clobber_x23,.-abi_test_clobber_x23
.type abi_test_clobber_x24, %function
.globl abi_test_clobber_x24
.hidden abi_test_clobber_x24
.align 4
abi_test_clobber_x24:
AARCH64_VALID_CALL_TARGET
mov x24, xzr
ret
.size abi_test_clobber_x24,.-abi_test_clobber_x24
.type abi_test_clobber_x25, %function
.globl abi_test_clobber_x25
.hidden abi_test_clobber_x25
.align 4
abi_test_clobber_x25:
AARCH64_VALID_CALL_TARGET
mov x25, xzr
ret
.size abi_test_clobber_x25,.-abi_test_clobber_x25
.type abi_test_clobber_x26, %function
.globl abi_test_clobber_x26
.hidden abi_test_clobber_x26
.align 4
abi_test_clobber_x26:
AARCH64_VALID_CALL_TARGET
mov x26, xzr
ret
.size abi_test_clobber_x26,.-abi_test_clobber_x26
.type abi_test_clobber_x27, %function
.globl abi_test_clobber_x27
.hidden abi_test_clobber_x27
.align 4
abi_test_clobber_x27:
AARCH64_VALID_CALL_TARGET
mov x27, xzr
ret
.size abi_test_clobber_x27,.-abi_test_clobber_x27
.type abi_test_clobber_x28, %function
.globl abi_test_clobber_x28
.hidden abi_test_clobber_x28
.align 4
abi_test_clobber_x28:
AARCH64_VALID_CALL_TARGET
mov x28, xzr
ret
.size abi_test_clobber_x28,.-abi_test_clobber_x28
.type abi_test_clobber_x29, %function
.globl abi_test_clobber_x29
.hidden abi_test_clobber_x29
.align 4
abi_test_clobber_x29:
AARCH64_VALID_CALL_TARGET
mov x29, xzr
ret
.size abi_test_clobber_x29,.-abi_test_clobber_x29
.type abi_test_clobber_d0, %function
.globl abi_test_clobber_d0
.hidden abi_test_clobber_d0
.align 4
abi_test_clobber_d0:
AARCH64_VALID_CALL_TARGET
fmov d0, xzr
ret
.size abi_test_clobber_d0,.-abi_test_clobber_d0
.type abi_test_clobber_d1, %function
.globl abi_test_clobber_d1
.hidden abi_test_clobber_d1
.align 4
abi_test_clobber_d1:
AARCH64_VALID_CALL_TARGET
fmov d1, xzr
ret
.size abi_test_clobber_d1,.-abi_test_clobber_d1
.type abi_test_clobber_d2, %function
.globl abi_test_clobber_d2
.hidden abi_test_clobber_d2
.align 4
abi_test_clobber_d2:
AARCH64_VALID_CALL_TARGET
fmov d2, xzr
ret
.size abi_test_clobber_d2,.-abi_test_clobber_d2
.type abi_test_clobber_d3, %function
.globl abi_test_clobber_d3
.hidden abi_test_clobber_d3
.align 4
abi_test_clobber_d3:
AARCH64_VALID_CALL_TARGET
fmov d3, xzr
ret
.size abi_test_clobber_d3,.-abi_test_clobber_d3
.type abi_test_clobber_d4, %function
.globl abi_test_clobber_d4
.hidden abi_test_clobber_d4
.align 4
abi_test_clobber_d4:
AARCH64_VALID_CALL_TARGET
fmov d4, xzr
ret
.size abi_test_clobber_d4,.-abi_test_clobber_d4
.type abi_test_clobber_d5, %function
.globl abi_test_clobber_d5
.hidden abi_test_clobber_d5
.align 4
abi_test_clobber_d5:
AARCH64_VALID_CALL_TARGET
fmov d5, xzr
ret
.size abi_test_clobber_d5,.-abi_test_clobber_d5
.type abi_test_clobber_d6, %function
.globl abi_test_clobber_d6
.hidden abi_test_clobber_d6
.align 4
abi_test_clobber_d6:
AARCH64_VALID_CALL_TARGET
fmov d6, xzr
ret
.size abi_test_clobber_d6,.-abi_test_clobber_d6
.type abi_test_clobber_d7, %function
.globl abi_test_clobber_d7
.hidden abi_test_clobber_d7
.align 4
abi_test_clobber_d7:
AARCH64_VALID_CALL_TARGET
fmov d7, xzr
ret
.size abi_test_clobber_d7,.-abi_test_clobber_d7
.type abi_test_clobber_d8, %function
.globl abi_test_clobber_d8
.hidden abi_test_clobber_d8
.align 4
abi_test_clobber_d8:
AARCH64_VALID_CALL_TARGET
fmov d8, xzr
ret
.size abi_test_clobber_d8,.-abi_test_clobber_d8
.type abi_test_clobber_d9, %function
.globl abi_test_clobber_d9
.hidden abi_test_clobber_d9
.align 4
abi_test_clobber_d9:
AARCH64_VALID_CALL_TARGET
fmov d9, xzr
ret
.size abi_test_clobber_d9,.-abi_test_clobber_d9
.type abi_test_clobber_d10, %function
.globl abi_test_clobber_d10
.hidden abi_test_clobber_d10
.align 4
abi_test_clobber_d10:
AARCH64_VALID_CALL_TARGET
fmov d10, xzr
ret
.size abi_test_clobber_d10,.-abi_test_clobber_d10
.type abi_test_clobber_d11, %function
.globl abi_test_clobber_d11
.hidden abi_test_clobber_d11
.align 4
abi_test_clobber_d11:
AARCH64_VALID_CALL_TARGET
fmov d11, xzr
ret
.size abi_test_clobber_d11,.-abi_test_clobber_d11
.type abi_test_clobber_d12, %function
.globl abi_test_clobber_d12
.hidden abi_test_clobber_d12
.align 4
abi_test_clobber_d12:
AARCH64_VALID_CALL_TARGET
fmov d12, xzr
ret
.size abi_test_clobber_d12,.-abi_test_clobber_d12
.type abi_test_clobber_d13, %function
.globl abi_test_clobber_d13
.hidden abi_test_clobber_d13
.align 4
abi_test_clobber_d13:
AARCH64_VALID_CALL_TARGET
fmov d13, xzr
ret
.size abi_test_clobber_d13,.-abi_test_clobber_d13
.type abi_test_clobber_d14, %function
.globl abi_test_clobber_d14
.hidden abi_test_clobber_d14
.align 4
abi_test_clobber_d14:
AARCH64_VALID_CALL_TARGET
fmov d14, xzr
ret
.size abi_test_clobber_d14,.-abi_test_clobber_d14
.type abi_test_clobber_d15, %function
.globl abi_test_clobber_d15
.hidden abi_test_clobber_d15
.align 4
abi_test_clobber_d15:
AARCH64_VALID_CALL_TARGET
fmov d15, xzr
ret
.size abi_test_clobber_d15,.-abi_test_clobber_d15
.type abi_test_clobber_d16, %function
.globl abi_test_clobber_d16
.hidden abi_test_clobber_d16
.align 4
abi_test_clobber_d16:
AARCH64_VALID_CALL_TARGET
fmov d16, xzr
ret
.size abi_test_clobber_d16,.-abi_test_clobber_d16
.type abi_test_clobber_d17, %function
.globl abi_test_clobber_d17
.hidden abi_test_clobber_d17
.align 4
abi_test_clobber_d17:
AARCH64_VALID_CALL_TARGET
fmov d17, xzr
ret
.size abi_test_clobber_d17,.-abi_test_clobber_d17
.type abi_test_clobber_d18, %function
.globl abi_test_clobber_d18
.hidden abi_test_clobber_d18
.align 4
abi_test_clobber_d18:
AARCH64_VALID_CALL_TARGET
fmov d18, xzr
ret
.size abi_test_clobber_d18,.-abi_test_clobber_d18
.type abi_test_clobber_d19, %function
.globl abi_test_clobber_d19
.hidden abi_test_clobber_d19
.align 4
abi_test_clobber_d19:
AARCH64_VALID_CALL_TARGET
fmov d19, xzr
ret
.size abi_test_clobber_d19,.-abi_test_clobber_d19
.type abi_test_clobber_d20, %function
.globl abi_test_clobber_d20
.hidden abi_test_clobber_d20
.align 4
abi_test_clobber_d20:
AARCH64_VALID_CALL_TARGET
fmov d20, xzr
ret
.size abi_test_clobber_d20,.-abi_test_clobber_d20
.type abi_test_clobber_d21, %function
.globl abi_test_clobber_d21
.hidden abi_test_clobber_d21
.align 4
abi_test_clobber_d21:
AARCH64_VALID_CALL_TARGET
fmov d21, xzr
ret
.size abi_test_clobber_d21,.-abi_test_clobber_d21
.type abi_test_clobber_d22, %function
.globl abi_test_clobber_d22
.hidden abi_test_clobber_d22
.align 4
abi_test_clobber_d22:
AARCH64_VALID_CALL_TARGET
fmov d22, xzr
ret
.size abi_test_clobber_d22,.-abi_test_clobber_d22
.type abi_test_clobber_d23, %function
.globl abi_test_clobber_d23
.hidden abi_test_clobber_d23
.align 4
abi_test_clobber_d23:
AARCH64_VALID_CALL_TARGET
fmov d23, xzr
ret
.size abi_test_clobber_d23,.-abi_test_clobber_d23
.type abi_test_clobber_d24, %function
.globl abi_test_clobber_d24
.hidden abi_test_clobber_d24
.align 4
abi_test_clobber_d24:
AARCH64_VALID_CALL_TARGET
fmov d24, xzr
ret
.size abi_test_clobber_d24,.-abi_test_clobber_d24
.type abi_test_clobber_d25, %function
.globl abi_test_clobber_d25
.hidden abi_test_clobber_d25
.align 4
abi_test_clobber_d25:
AARCH64_VALID_CALL_TARGET
fmov d25, xzr
ret
.size abi_test_clobber_d25,.-abi_test_clobber_d25
.type abi_test_clobber_d26, %function
.globl abi_test_clobber_d26
.hidden abi_test_clobber_d26
.align 4
abi_test_clobber_d26:
AARCH64_VALID_CALL_TARGET
fmov d26, xzr
ret
.size abi_test_clobber_d26,.-abi_test_clobber_d26
.type abi_test_clobber_d27, %function
.globl abi_test_clobber_d27
.hidden abi_test_clobber_d27
.align 4
abi_test_clobber_d27:
AARCH64_VALID_CALL_TARGET
fmov d27, xzr
ret
.size abi_test_clobber_d27,.-abi_test_clobber_d27
.type abi_test_clobber_d28, %function
.globl abi_test_clobber_d28
.hidden abi_test_clobber_d28
.align 4
abi_test_clobber_d28:
AARCH64_VALID_CALL_TARGET
fmov d28, xzr
ret
.size abi_test_clobber_d28,.-abi_test_clobber_d28
.type abi_test_clobber_d29, %function
.globl abi_test_clobber_d29
.hidden abi_test_clobber_d29
.align 4
abi_test_clobber_d29:
AARCH64_VALID_CALL_TARGET
fmov d29, xzr
ret
.size abi_test_clobber_d29,.-abi_test_clobber_d29
.type abi_test_clobber_d30, %function
.globl abi_test_clobber_d30
.hidden abi_test_clobber_d30
.align 4
abi_test_clobber_d30:
AARCH64_VALID_CALL_TARGET
fmov d30, xzr
ret
.size abi_test_clobber_d30,.-abi_test_clobber_d30
.type abi_test_clobber_d31, %function
.globl abi_test_clobber_d31
.hidden abi_test_clobber_d31
.align 4
abi_test_clobber_d31:
AARCH64_VALID_CALL_TARGET
fmov d31, xzr
ret
.size abi_test_clobber_d31,.-abi_test_clobber_d31
.type abi_test_clobber_v8_upper, %function
.globl abi_test_clobber_v8_upper
.hidden abi_test_clobber_v8_upper
.align 4
abi_test_clobber_v8_upper:
AARCH64_VALID_CALL_TARGET
fmov v8.d[1], xzr
ret
.size abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
.type abi_test_clobber_v9_upper, %function
.globl abi_test_clobber_v9_upper
.hidden abi_test_clobber_v9_upper
.align 4
abi_test_clobber_v9_upper:
AARCH64_VALID_CALL_TARGET
fmov v9.d[1], xzr
ret
.size abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
.type abi_test_clobber_v10_upper, %function
.globl abi_test_clobber_v10_upper
.hidden abi_test_clobber_v10_upper
.align 4
abi_test_clobber_v10_upper:
AARCH64_VALID_CALL_TARGET
fmov v10.d[1], xzr
ret
.size abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
.type abi_test_clobber_v11_upper, %function
.globl abi_test_clobber_v11_upper
.hidden abi_test_clobber_v11_upper
.align 4
abi_test_clobber_v11_upper:
AARCH64_VALID_CALL_TARGET
fmov v11.d[1], xzr
ret
.size abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
.type abi_test_clobber_v12_upper, %function
.globl abi_test_clobber_v12_upper
.hidden abi_test_clobber_v12_upper
.align 4
abi_test_clobber_v12_upper:
AARCH64_VALID_CALL_TARGET
fmov v12.d[1], xzr
ret
.size abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
.type abi_test_clobber_v13_upper, %function
.globl abi_test_clobber_v13_upper
.hidden abi_test_clobber_v13_upper
.align 4
abi_test_clobber_v13_upper:
AARCH64_VALID_CALL_TARGET
fmov v13.d[1], xzr
ret
.size abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
.type abi_test_clobber_v14_upper, %function
.globl abi_test_clobber_v14_upper
.hidden abi_test_clobber_v14_upper
.align 4
abi_test_clobber_v14_upper:
AARCH64_VALID_CALL_TARGET
fmov v14.d[1], xzr
ret
.size abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
.type abi_test_clobber_v15_upper, %function
.globl abi_test_clobber_v15_upper
.hidden abi_test_clobber_v15_upper
.align 4
abi_test_clobber_v15_upper:
AARCH64_VALID_CALL_TARGET
fmov v15.d[1], xzr
ret
.size abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

File diff suppressed because it is too large Load Diff

View File

@ -1,781 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-)
.fpu neon
.code 32
#undef __thumb2__
.align 5
.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl aes_hw_set_encrypt_key
.hidden aes_hw_set_encrypt_key
.type aes_hw_set_encrypt_key,%function
.align 5
aes_hw_set_encrypt_key:
.Lenc_key:
mov r3,#-1
cmp r0,#0
beq .Lenc_key_abort
cmp r2,#0
beq .Lenc_key_abort
mov r3,#-2
cmp r1,#128
blt .Lenc_key_abort
cmp r1,#256
bgt .Lenc_key_abort
tst r1,#0x3f
bne .Lenc_key_abort
adr r3,.Lrcon
cmp r1,#192
veor q0,q0,q0
vld1.8 {q3},[r0]!
mov r1,#8 @ reuse r1
vld1.32 {q1,q2},[r3]!
blt .Loop128
beq .L192
b .L256
.align 4
.Loop128:
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
bne .Loop128
vld1.32 {q1},[r3]
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
veor q3,q3,q10
vst1.32 {q3},[r2]
add r2,r2,#0x50
mov r12,#10
b .Ldone
.align 4
.L192:
vld1.8 {d16},[r0]!
vmov.i8 q10,#8 @ borrow q10
vst1.32 {q3},[r2]!
vsub.i8 q2,q2,q10 @ adjust the mask
.Loop192:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {d16},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vdup.32 q9,d7[1]
veor q9,q9,q8
veor q10,q10,q1
vext.8 q8,q0,q8,#12
vshl.u8 q1,q1,#1
veor q8,q8,q9
veor q3,q3,q10
veor q8,q8,q10
vst1.32 {q3},[r2]!
bne .Loop192
mov r12,#12
add r2,r2,#0x20
b .Ldone
.align 4
.L256:
vld1.8 {q8},[r0]
mov r1,#7
mov r12,#14
vst1.32 {q3},[r2]!
.Loop256:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {q8},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vst1.32 {q3},[r2]!
beq .Ldone
vdup.32 q10,d7[1]
vext.8 q9,q0,q8,#12
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
veor q8,q8,q10
b .Loop256
.Ldone:
str r12,[r2]
mov r3,#0
.Lenc_key_abort:
mov r0,r3 @ return value
bx lr
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
.globl aes_hw_set_decrypt_key
.hidden aes_hw_set_decrypt_key
.type aes_hw_set_decrypt_key,%function
.align 5
aes_hw_set_decrypt_key:
stmdb sp!,{r4,lr}
bl .Lenc_key
cmp r0,#0
bne .Ldec_key_abort
sub r2,r2,#240 @ restore original r2
mov r4,#-16
add r0,r2,r12,lsl#4 @ end of key schedule
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
.Loop_imc:
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
cmp r0,r2
bhi .Loop_imc
vld1.32 {q0},[r2]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
vst1.32 {q0},[r0]
eor r0,r0,r0 @ return value
.Ldec_key_abort:
ldmia sp!,{r4,pc}
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
.globl aes_hw_encrypt
.hidden aes_hw_encrypt
.type aes_hw_encrypt,%function
.align 5
aes_hw_encrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
.Loop_enc:
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q1},[r2]!
bgt .Loop_enc
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.size aes_hw_encrypt,.-aes_hw_encrypt
.globl aes_hw_decrypt
.hidden aes_hw_decrypt
.type aes_hw_decrypt,%function
.align 5
aes_hw_decrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
.Loop_dec:
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q1},[r2]!
bgt .Loop_dec
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.size aes_hw_decrypt,.-aes_hw_decrypt
.globl aes_hw_cbc_encrypt
.hidden aes_hw_cbc_encrypt
.type aes_hw_cbc_encrypt,%function
.align 5
aes_hw_cbc_encrypt:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load remaining args
subs r2,r2,#16
mov r8,#16
blo .Lcbc_abort
moveq r8,#0
cmp r5,#0 @ en- or decrypting?
ldr r5,[r3,#240]
and r2,r2,#-16
vld1.8 {q6},[r4]
vld1.8 {q0},[r0],r8
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#6
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
sub r5,r5,#2
vld1.32 {q10,q11},[r7]!
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
beq .Lcbc_dec
cmp r5,#2
veor q0,q0,q6
veor q5,q8,q7
beq .Lcbc_enc128
vld1.32 {q2,q3},[r7]
add r7,r3,#16
add r6,r3,#16*4
add r12,r3,#16*5
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
add r14,r3,#16*6
add r3,r3,#16*7
b .Lenter_cbc_enc
.align 4
.Loop_cbc_enc:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
.Lenter_cbc_enc:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r6]
cmp r5,#4
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r12]
beq .Lcbc_enc192
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r14]
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r3]
nop
.Lcbc_enc192:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs .Loop_cbc_enc
vst1.8 {q6},[r1]!
b .Lcbc_done
.align 5
.Lcbc_enc128:
vld1.32 {q2,q3},[r7]
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
b .Lenter_cbc_enc128
.Loop_cbc_enc128:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
.Lenter_cbc_enc128:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs .Loop_cbc_enc128
vst1.8 {q6},[r1]!
b .Lcbc_done
.align 5
.Lcbc_dec:
vld1.8 {q10},[r0]!
subs r2,r2,#32 @ bias
add r6,r5,#2
vorr q3,q0,q0
vorr q1,q0,q0
vorr q11,q10,q10
blo .Lcbc_dec_tail
vorr q1,q10,q10
vld1.8 {q10},[r0]!
vorr q2,q0,q0
vorr q3,q1,q1
vorr q11,q10,q10
.Loop3x_cbc_dec:
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt .Loop3x_cbc_dec
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q4,q6,q7
subs r2,r2,#0x30
veor q5,q2,q7
movlo r6,r2 @ r6, r6, is zero at this point
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
add r0,r0,r6 @ r0 is adjusted in such way that
@ at exit from the loop q1-q10
@ are loaded with last "words"
vorr q6,q11,q11
mov r7,r3
.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q2},[r0]!
.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q3},[r0]!
.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q11},[r0]!
.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
add r6,r5,#2
veor q4,q4,q0
veor q5,q5,q1
veor q10,q10,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q4},[r1]!
vorr q0,q2,q2
vst1.8 {q5},[r1]!
vorr q1,q3,q3
vst1.8 {q10},[r1]!
vorr q10,q11,q11
bhs .Loop3x_cbc_dec
cmn r2,#0x30
beq .Lcbc_done
nop
.Lcbc_dec_tail:
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt .Lcbc_dec_tail
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
cmn r2,#0x20
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q5,q6,q7
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
beq .Lcbc_dec_one
veor q5,q5,q1
veor q9,q9,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
vst1.8 {q9},[r1]!
b .Lcbc_done
.Lcbc_dec_one:
veor q5,q5,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
.Lcbc_done:
vst1.8 {q6},[r4]
.Lcbc_abort:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,pc}
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
.globl aes_hw_ctr32_encrypt_blocks
.hidden aes_hw_ctr32_encrypt_blocks
.type aes_hw_ctr32_encrypt_blocks,%function
.align 5
aes_hw_ctr32_encrypt_blocks:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldr r4, [ip] @ load remaining arg
ldr r5,[r3,#240]
ldr r8, [r4, #12]
vld1.32 {q0},[r4]
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#4
mov r12,#16
cmp r2,#2
add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
sub r5,r5,#2
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
movlo r12,#0
#ifndef __ARMEB__
rev r8, r8
#endif
vorr q1,q0,q0
add r10, r8, #1
vorr q10,q0,q0
add r8, r8, #2
vorr q6,q0,q0
rev r10, r10
vmov.32 d3[1],r10
bls .Lctr32_tail
rev r12, r8
sub r2,r2,#3 @ bias
vmov.32 d21[1],r12
b .Loop3x_ctr32
.align 4
.Loop3x_ctr32:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q9},[r7]!
bgt .Loop3x_ctr32
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
vld1.8 {q2},[r0]!
vorr q0,q6,q6
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.8 {q3},[r0]!
vorr q1,q6,q6
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vld1.8 {q11},[r0]!
mov r7,r3
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
vorr q10,q6,q6
add r9,r8,#1
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q2,q2,q7
add r10,r8,#2
.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
veor q3,q3,q7
add r8,r8,#3
.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q11,q11,q7
rev r9,r9
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d1[1], r9
rev r10,r10
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vmov.32 d3[1], r10
rev r12,r8
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d21[1], r12
subs r2,r2,#3
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
veor q2,q2,q4
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
vst1.8 {q2},[r1]!
veor q3,q3,q5
mov r6,r5
vst1.8 {q3},[r1]!
veor q11,q11,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q11},[r1]!
bhs .Loop3x_ctr32
adds r2,r2,#3
beq .Lctr32_done
cmp r2,#1
mov r12,#16
moveq r12,#0
.Lctr32_tail:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q9},[r7]!
bgt .Lctr32_tail
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q2},[r0],r12
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q3},[r0]
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q2,q2,q7
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q3,q3,q7
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
cmp r2,#1
veor q2,q2,q0
veor q3,q3,q1
vst1.8 {q2},[r1]!
beq .Lctr32_done
vst1.8 {q3},[r1]
.Lctr32_done:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,977 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
.arch armv7-a
.text
#if defined(__thumb2__)
.syntax unified
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-.Lbn_mul_mont
#endif
.globl bn_mul_mont
.hidden bn_mul_mont
.type bn_mul_mont,%function
.align 5
bn_mul_mont:
.Lbn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
adr r0,.Lbn_mul_mont
ldr r2,.LOPENSSL_armcap
ldr r0,[r0,r2]
#ifdef __APPLE__
ldr r0,[r0]
#endif
tst r0,#ARMV7_NEON @ NEON available?
ldmia sp, {r0,r2}
beq .Lialu
add sp,sp,#8
b bn_mul8x_mont_neon
.align 4
.Lialu:
#endif
cmp ip,#2
mov r0,ip @ load num
#ifdef __thumb2__
ittt lt
#endif
movlt r0,#0
addlt sp,sp,#2*4
blt .Labrt
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
mov r0,r0,lsl#2 @ rescale r0 for byte count
sub sp,sp,r0 @ alloca(4*num)
sub sp,sp,#4 @ +extra dword
sub r0,r0,#4 @ "num=num-1"
add r4,r2,r0 @ &bp[num-1]
add r0,sp,r0 @ r0 to point at &tp[num-1]
ldr r8,[r0,#14*4] @ &n0
ldr r2,[r2] @ bp[0]
ldr r5,[r1],#4 @ ap[0],ap++
ldr r6,[r3],#4 @ np[0],np++
ldr r8,[r8] @ *n0
str r4,[r0,#15*4] @ save &bp[num]
umull r10,r11,r5,r2 @ ap[0]*bp[0]
str r8,[r0,#14*4] @ save n0 value
mul r8,r10,r8 @ "tp[0]"*n0
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
mov r4,sp
.L1st:
ldr r5,[r1],#4 @ ap[j],ap++
mov r10,r11
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .L1st
adds r12,r12,r11
ldr r4,[r0,#13*4] @ restore bp
mov r14,#0
ldr r8,[r0,#14*4] @ restore n0
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
mov r7,sp
str r14,[r0,#4] @ tp[num]=
.Louter:
sub r7,r0,r7 @ "original" r0-1 value
sub r1,r1,r7 @ "rewind" ap to &ap[1]
ldr r2,[r4,#4]! @ *(++bp)
sub r3,r3,r7 @ "rewind" np to &np[1]
ldr r5,[r1,#-4] @ ap[0]
ldr r10,[sp] @ tp[0]
ldr r6,[r3,#-4] @ np[0]
ldr r7,[sp,#4] @ tp[1]
mov r11,#0
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
str r4,[r0,#13*4] @ save bp
mul r8,r10,r8
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
mov r4,sp
.Linner:
ldr r5,[r1],#4 @ ap[j],ap++
adds r10,r11,r7 @ +=tp[j]
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adc r11,r11,#0
ldr r7,[r4,#8] @ tp[j+1]
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .Linner
adds r12,r12,r11
mov r14,#0
ldr r4,[r0,#13*4] @ restore bp
adc r14,r14,#0
ldr r8,[r0,#14*4] @ restore n0
adds r12,r12,r7
ldr r7,[r0,#15*4] @ restore &bp[num]
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
str r14,[r0,#4] @ tp[num]=
cmp r4,r7
#ifdef __thumb2__
itt ne
#endif
movne r7,sp
bne .Louter
ldr r2,[r0,#12*4] @ pull rp
mov r5,sp
add r0,r0,#4 @ r0 to point at &tp[num]
sub r5,r0,r5 @ "original" num value
mov r4,sp @ "rewind" r4
mov r1,r4 @ "borrow" r1
sub r3,r3,r5 @ "rewind" r3 to &np[0]
subs r7,r7,r7 @ "clear" carry flag
.Lsub: ldr r7,[r4],#4
ldr r6,[r3],#4
sbcs r7,r7,r6 @ tp[j]-np[j]
str r7,[r2],#4 @ rp[j]=
teq r4,r0 @ preserve carry
bne .Lsub
sbcs r14,r14,#0 @ upmost carry
mov r4,sp @ "rewind" r4
sub r2,r2,r5 @ "rewind" r2
.Lcopy: ldr r7,[r4] @ conditional copy
ldr r5,[r2]
str sp,[r4],#4 @ zap tp
#ifdef __thumb2__
it cc
#endif
movcc r5,r7
str r5,[r2],#4
teq r4,r0 @ preserve carry
bne .Lcopy
mov sp,r0
add sp,sp,#4 @ skip over tp[num+1]
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
.Labrt:
#if __ARM_ARCH__>=5
bx lr @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size bn_mul_mont,.-bn_mul_mont
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.type bn_mul8x_mont_neon,%function
.align 5
bn_mul8x_mont_neon:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load rest of parameter block
mov ip,sp
cmp r5,#8
bhi .LNEON_8n
@ special case for r5==8, everything is in register bank...
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
sub r7,sp,r5,lsl#4
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
and r7,r7,#-64
vld1.32 {d30[0]}, [r4,:32]
mov sp,r7 @ alloca
vzip.16 d28,d8
vmull.u32 q6,d28,d0[0]
vmull.u32 q7,d28,d0[1]
vmull.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmull.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
vmul.u32 d29,d29,d30
vmull.u32 q10,d28,d2[0]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmull.u32 q11,d28,d2[1]
vmull.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmull.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
sub r9,r5,#1
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
b .LNEON_outer8
.align 4
.LNEON_outer8:
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
vzip.16 d28,d8
vadd.u64 d12,d12,d10
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
subs r9,r9,#1
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
bne .LNEON_outer8
vadd.u64 d12,d12,d10
mov r7,sp
vshr.u64 d10,d12,#16
mov r8,r5
vadd.u64 d13,d13,d10
add r6,sp,#96
vshr.u64 d10,d13,#16
vzip.16 d12,d13
b .LNEON_tail_entry
.align 4
.LNEON_8n:
veor q6,q6,q6
sub r7,sp,#128
veor q7,q7,q7
sub r7,r7,r5,lsl#4
veor q8,q8,q8
and r7,r7,#-64
veor q9,q9,q9
mov sp,r7 @ alloca
veor q10,q10,q10
add r7,r7,#256
veor q11,q11,q11
sub r8,r5,#8
veor q12,q12,q12
veor q13,q13,q13
.LNEON_8n_init:
vst1.64 {q6,q7},[r7,:256]!
subs r8,r8,#8
vst1.64 {q8,q9},[r7,:256]!
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12,q13},[r7,:256]!
bne .LNEON_8n_init
add r6,sp,#256
vld1.32 {d0,d1,d2,d3},[r1]!
add r10,sp,#8
vld1.32 {d30[0]},[r4,:32]
mov r9,r5
b .LNEON_8n_outer
.align 4
.LNEON_8n_outer:
vld1.32 {d28[0]},[r2,:32]! @ *b++
veor d8,d8,d8
vzip.16 d28,d8
add r7,sp,#128
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
vmlal.u32 q10,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q11,d28,d2[1]
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q6,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q7,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q8,d29,d5[0]
vshr.u64 d12,d12,#16
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vadd.u64 d12,d12,d13
vmlal.u32 q11,d29,d6[1]
vshr.u64 d12,d12,#16
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vadd.u64 d14,d14,d12
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]!
vmlal.u32 q8,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q9,d28,d1[0]
vshl.i64 d29,d15,#16
vmlal.u32 q10,d28,d1[1]
vadd.u64 d29,d29,d14
vmlal.u32 q11,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q12,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
vmlal.u32 q13,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q7,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q8,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q9,d29,d5[0]
vshr.u64 d14,d14,#16
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vadd.u64 d14,d14,d15
vmlal.u32 q12,d29,d6[1]
vshr.u64 d14,d14,#16
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vadd.u64 d16,d16,d14
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]!
vmlal.u32 q9,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q10,d28,d1[0]
vshl.i64 d29,d17,#16
vmlal.u32 q11,d28,d1[1]
vadd.u64 d29,d29,d16
vmlal.u32 q12,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q13,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
vmlal.u32 q6,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q8,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q9,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q10,d29,d5[0]
vshr.u64 d16,d16,#16
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vadd.u64 d16,d16,d17
vmlal.u32 q13,d29,d6[1]
vshr.u64 d16,d16,#16
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vadd.u64 d18,d18,d16
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]!
vmlal.u32 q10,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q11,d28,d1[0]
vshl.i64 d29,d19,#16
vmlal.u32 q12,d28,d1[1]
vadd.u64 d29,d29,d18
vmlal.u32 q13,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q6,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
vmlal.u32 q7,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q9,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q10,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q11,d29,d5[0]
vshr.u64 d18,d18,#16
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vadd.u64 d18,d18,d19
vmlal.u32 q6,d29,d6[1]
vshr.u64 d18,d18,#16
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vadd.u64 d20,d20,d18
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]!
vmlal.u32 q11,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q12,d28,d1[0]
vshl.i64 d29,d21,#16
vmlal.u32 q13,d28,d1[1]
vadd.u64 d29,d29,d20
vmlal.u32 q6,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q7,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
vmlal.u32 q8,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q10,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q11,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q12,d29,d5[0]
vshr.u64 d20,d20,#16
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vadd.u64 d20,d20,d21
vmlal.u32 q7,d29,d6[1]
vshr.u64 d20,d20,#16
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vadd.u64 d22,d22,d20
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]!
vmlal.u32 q12,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q13,d28,d1[0]
vshl.i64 d29,d23,#16
vmlal.u32 q6,d28,d1[1]
vadd.u64 d29,d29,d22
vmlal.u32 q7,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q8,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
vmlal.u32 q9,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q11,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q12,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q13,d29,d5[0]
vshr.u64 d22,d22,#16
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vadd.u64 d22,d22,d23
vmlal.u32 q8,d29,d6[1]
vshr.u64 d22,d22,#16
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vadd.u64 d24,d24,d22
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]!
vmlal.u32 q13,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q6,d28,d1[0]
vshl.i64 d29,d25,#16
vmlal.u32 q7,d28,d1[1]
vadd.u64 d29,d29,d24
vmlal.u32 q8,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q9,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
vmlal.u32 q10,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q12,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q13,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q6,d29,d5[0]
vshr.u64 d24,d24,#16
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vadd.u64 d24,d24,d25
vmlal.u32 q9,d29,d6[1]
vshr.u64 d24,d24,#16
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vadd.u64 d26,d26,d24
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]!
vmlal.u32 q6,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q7,d28,d1[0]
vshl.i64 d29,d27,#16
vmlal.u32 q8,d28,d1[1]
vadd.u64 d29,d29,d26
vmlal.u32 q9,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
vmlal.u32 q11,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q12,d28,d3[1]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q13,d29,d4[0]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q6,d29,d4[1]
vmlal.u32 q7,d29,d5[0]
vshr.u64 d26,d26,#16
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vadd.u64 d26,d26,d27
vmlal.u32 q10,d29,d6[1]
vshr.u64 d26,d26,#16
vmlal.u32 q11,d29,d7[0]
vmlal.u32 q12,d29,d7[1]
vadd.u64 d12,d12,d26
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
add r10,sp,#8 @ rewind
sub r8,r5,#8
b .LNEON_8n_inner
.align 4
.LNEON_8n_inner:
subs r8,r8,#8
vmlal.u32 q6,d28,d0[0]
vld1.64 {q13},[r6,:128]
vmlal.u32 q7,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
vmlal.u32 q8,d28,d1[0]
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q9,d28,d1[1]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmlal.u32 q11,d29,d6[1]
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vst1.64 {q6},[r7,:128]!
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]
vmlal.u32 q8,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
vmlal.u32 q9,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d1[1]
vmlal.u32 q11,d28,d2[0]
vmlal.u32 q12,d28,d2[1]
vmlal.u32 q13,d28,d3[0]
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
vmlal.u32 q7,d29,d4[0]
vmlal.u32 q8,d29,d4[1]
vmlal.u32 q9,d29,d5[0]
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vmlal.u32 q12,d29,d6[1]
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vst1.64 {q7},[r7,:128]!
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]
vmlal.u32 q9,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
vmlal.u32 q10,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q11,d28,d1[1]
vmlal.u32 q12,d28,d2[0]
vmlal.u32 q13,d28,d2[1]
vmlal.u32 q6,d28,d3[0]
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
vmlal.u32 q8,d29,d4[0]
vmlal.u32 q9,d29,d4[1]
vmlal.u32 q10,d29,d5[0]
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vmlal.u32 q13,d29,d6[1]
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vst1.64 {q8},[r7,:128]!
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]
vmlal.u32 q10,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
vmlal.u32 q11,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q12,d28,d1[1]
vmlal.u32 q13,d28,d2[0]
vmlal.u32 q6,d28,d2[1]
vmlal.u32 q7,d28,d3[0]
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
vmlal.u32 q9,d29,d4[0]
vmlal.u32 q10,d29,d4[1]
vmlal.u32 q11,d29,d5[0]
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vmlal.u32 q6,d29,d6[1]
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vst1.64 {q9},[r7,:128]!
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]
vmlal.u32 q11,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
vmlal.u32 q12,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q13,d28,d1[1]
vmlal.u32 q6,d28,d2[0]
vmlal.u32 q7,d28,d2[1]
vmlal.u32 q8,d28,d3[0]
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
vmlal.u32 q10,d29,d4[0]
vmlal.u32 q11,d29,d4[1]
vmlal.u32 q12,d29,d5[0]
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vmlal.u32 q7,d29,d6[1]
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vst1.64 {q10},[r7,:128]!
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]
vmlal.u32 q12,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
vmlal.u32 q13,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q6,d28,d1[1]
vmlal.u32 q7,d28,d2[0]
vmlal.u32 q8,d28,d2[1]
vmlal.u32 q9,d28,d3[0]
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
vmlal.u32 q11,d29,d4[0]
vmlal.u32 q12,d29,d4[1]
vmlal.u32 q13,d29,d5[0]
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vmlal.u32 q8,d29,d6[1]
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vst1.64 {q11},[r7,:128]!
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]
vmlal.u32 q13,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
vmlal.u32 q6,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q7,d28,d1[1]
vmlal.u32 q8,d28,d2[0]
vmlal.u32 q9,d28,d2[1]
vmlal.u32 q10,d28,d3[0]
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
vmlal.u32 q12,d29,d4[0]
vmlal.u32 q13,d29,d4[1]
vmlal.u32 q6,d29,d5[0]
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vmlal.u32 q9,d29,d6[1]
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vst1.64 {q12},[r7,:128]!
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]
vmlal.u32 q6,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
vmlal.u32 q7,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q8,d28,d1[1]
vmlal.u32 q9,d28,d2[0]
vmlal.u32 q10,d28,d2[1]
vmlal.u32 q11,d28,d3[0]
vmlal.u32 q12,d28,d3[1]
it eq
subeq r1,r1,r5,lsl#2 @ rewind
vmlal.u32 q13,d29,d4[0]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q6,d29,d4[1]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q7,d29,d5[0]
add r10,sp,#8 @ rewind
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vmlal.u32 q10,d29,d6[1]
vmlal.u32 q11,d29,d7[0]
vst1.64 {q13},[r7,:128]!
vmlal.u32 q12,d29,d7[1]
bne .LNEON_8n_inner
add r6,sp,#128
vst1.64 {q6,q7},[r7,:256]!
veor q2,q2,q2 @ d4-d5
vst1.64 {q8,q9},[r7,:256]!
veor q3,q3,q3 @ d6-d7
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12},[r7,:128]
subs r9,r9,#8
vld1.64 {q6,q7},[r6,:256]!
vld1.64 {q8,q9},[r6,:256]!
vld1.64 {q10,q11},[r6,:256]!
vld1.64 {q12,q13},[r6,:256]!
itt ne
subne r3,r3,r5,lsl#2 @ rewind
bne .LNEON_8n_outer
add r7,sp,#128
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
vshr.u64 d10,d12,#16
vst1.64 {q2,q3},[sp,:256]!
vadd.u64 d13,d13,d10
vst1.64 {q2,q3}, [sp,:256]!
vshr.u64 d10,d13,#16
vst1.64 {q2,q3}, [sp,:256]!
vzip.16 d12,d13
mov r8,r5
b .LNEON_tail_entry
.align 4
.LNEON_tail:
vadd.u64 d12,d12,d10
vshr.u64 d10,d12,#16
vld1.64 {q8,q9}, [r6, :256]!
vadd.u64 d13,d13,d10
vld1.64 {q10,q11}, [r6, :256]!
vshr.u64 d10,d13,#16
vld1.64 {q12,q13}, [r6, :256]!
vzip.16 d12,d13
.LNEON_tail_entry:
vadd.u64 d14,d14,d10
vst1.32 {d12[0]}, [r7, :32]!
vshr.u64 d10,d14,#16
vadd.u64 d15,d15,d10
vshr.u64 d10,d15,#16
vzip.16 d14,d15
vadd.u64 d16,d16,d10
vst1.32 {d14[0]}, [r7, :32]!
vshr.u64 d10,d16,#16
vadd.u64 d17,d17,d10
vshr.u64 d10,d17,#16
vzip.16 d16,d17
vadd.u64 d18,d18,d10
vst1.32 {d16[0]}, [r7, :32]!
vshr.u64 d10,d18,#16
vadd.u64 d19,d19,d10
vshr.u64 d10,d19,#16
vzip.16 d18,d19
vadd.u64 d20,d20,d10
vst1.32 {d18[0]}, [r7, :32]!
vshr.u64 d10,d20,#16
vadd.u64 d21,d21,d10
vshr.u64 d10,d21,#16
vzip.16 d20,d21
vadd.u64 d22,d22,d10
vst1.32 {d20[0]}, [r7, :32]!
vshr.u64 d10,d22,#16
vadd.u64 d23,d23,d10
vshr.u64 d10,d23,#16
vzip.16 d22,d23
vadd.u64 d24,d24,d10
vst1.32 {d22[0]}, [r7, :32]!
vshr.u64 d10,d24,#16
vadd.u64 d25,d25,d10
vshr.u64 d10,d25,#16
vzip.16 d24,d25
vadd.u64 d26,d26,d10
vst1.32 {d24[0]}, [r7, :32]!
vshr.u64 d10,d26,#16
vadd.u64 d27,d27,d10
vshr.u64 d10,d27,#16
vzip.16 d26,d27
vld1.64 {q6,q7}, [r6, :256]!
subs r8,r8,#8
vst1.32 {d26[0]}, [r7, :32]!
bne .LNEON_tail
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
sub r3,r3,r5,lsl#2 @ rewind r3
subs r1,sp,#0 @ clear carry flag
add r2,sp,r5,lsl#2
.LNEON_sub:
ldmia r1!, {r4,r5,r6,r7}
ldmia r3!, {r8,r9,r10,r11}
sbcs r8, r4,r8
sbcs r9, r5,r9
sbcs r10,r6,r10
sbcs r11,r7,r11
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_sub
ldr r10, [r1] @ load top-most bit
mov r11,sp
veor q0,q0,q0
sub r11,r2,r11 @ this is num*4
veor q1,q1,q1
mov r1,sp
sub r0,r0,r11 @ rewind r0
mov r3,r2 @ second 3/4th of frame
sbcs r10,r10,#0 @ result is carry flag
.LNEON_copy_n_zap:
ldmia r1!, {r4,r5,r6,r7}
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r3,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
ldmia r1, {r4,r5,r6,r7}
stmia r0!, {r8,r9,r10,r11}
sub r1,r1,#16
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r1,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_copy_n_zap
mov sp,ip
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
bx lr @ bx lr
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
#endif
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
.hidden OPENSSL_armcap_P
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,255 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
@ instructions are in aesv8-armx.pl.)
.arch armv7-a
.text
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
#define ldrplb ldrbpl
#define ldrneb ldrbne
#endif
#if defined(__thumb2__)
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.globl gcm_init_neon
.hidden gcm_init_neon
.type gcm_init_neon,%function
.align 4
gcm_init_neon:
vld1.64 d7,[r1]! @ load H
vmov.i8 q8,#0xe1
vld1.64 d6,[r1]
vshl.i64 d17,#57
vshr.u64 d16,#63 @ t0=0xc2....01
vdup.8 q9,d7[7]
vshr.u64 d26,d6,#63
vshr.s8 q9,#7 @ broadcast carry bit
vshl.i64 q3,q3,#1
vand q8,q8,q9
vorr d7,d26 @ H<<<=1
veor q3,q3,q8 @ twisted H
vstmia r0,{q3}
bx lr @ bx lr
.size gcm_init_neon,.-gcm_init_neon
.globl gcm_gmult_neon
.hidden gcm_gmult_neon
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
vld1.64 d7,[r0]! @ load Xi
vld1.64 d6,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
mov r3,#16
b .Lgmult_neon
.size gcm_gmult_neon,.-gcm_gmult_neon
.globl gcm_ghash_neon
.hidden gcm_ghash_neon
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
vld1.64 d1,[r0]! @ load Xi
vld1.64 d0,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
.Loop_neon:
vld1.64 d7,[r2]! @ load inp
vld1.64 d6,[r2]!
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
veor q3,q0 @ inp^=Xi
.Lgmult_neon:
vext.8 d16, d26, d26, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d0, d6, d6, #1 @ B1
vmull.p8 q0, d26, d0 @ E = A*B1
vext.8 d18, d26, d26, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d26, d22 @ G = A*B2
vext.8 d20, d26, d26, #3 @ A3
veor q8, q8, q0 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d0, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q0, d26, d0 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d26, d22 @ K = A*B4
veor q10, q10, q0 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q0, d26, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q0, q0, q8
veor q0, q0, q10
veor d6,d6,d7 @ Karatsuba pre-processing
vext.8 d16, d28, d28, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d2, d6, d6, #1 @ B1
vmull.p8 q1, d28, d2 @ E = A*B1
vext.8 d18, d28, d28, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d28, d22 @ G = A*B2
vext.8 d20, d28, d28, #3 @ A3
veor q8, q8, q1 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d2, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q1, d28, d2 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d28, d22 @ K = A*B4
veor q10, q10, q1 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q1, d28, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q1, q1, q8
veor q1, q1, q10
vext.8 d16, d27, d27, #1 @ A1
vmull.p8 q8, d16, d7 @ F = A1*B
vext.8 d4, d7, d7, #1 @ B1
vmull.p8 q2, d27, d4 @ E = A*B1
vext.8 d18, d27, d27, #2 @ A2
vmull.p8 q9, d18, d7 @ H = A2*B
vext.8 d22, d7, d7, #2 @ B2
vmull.p8 q11, d27, d22 @ G = A*B2
vext.8 d20, d27, d27, #3 @ A3
veor q8, q8, q2 @ L = E + F
vmull.p8 q10, d20, d7 @ J = A3*B
vext.8 d4, d7, d7, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q2, d27, d4 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d7, d7, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d27, d22 @ K = A*B4
veor q10, q10, q2 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q2, d27, d7 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q2, q2, q8
veor q2, q2, q10
veor q1,q1,q0 @ Karatsuba post-processing
veor q1,q1,q2
veor d1,d1,d2
veor d4,d4,d3 @ Xh|Xl - 256-bit result
@ equivalent of reduction_avx from ghash-x86_64.pl
vshl.i64 q9,q0,#57 @ 1st phase
vshl.i64 q10,q0,#62
veor q10,q10,q9 @
vshl.i64 q9,q0,#63
veor q10, q10, q9 @
veor d1,d1,d20 @
veor d4,d4,d21
vshr.u64 q10,q0,#1 @ 2nd phase
veor q2,q2,q0
veor q0,q0,q10 @
vshr.u64 q10,q10,#6
vshr.u64 q0,q0,#1 @
veor q0,q0,q2 @
veor q0,q0,q10 @
subs r3,#16
bne .Loop_neon
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
sub r0,#16
vst1.64 d1,[r0]! @ write out Xi
vst1.64 d0,[r0]
bx lr @ bx lr
.size gcm_ghash_neon,.-gcm_ghash_neon
#endif
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,253 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
#include <openssl/arm_arch.h>
.text
.fpu neon
.code 32
#undef __thumb2__
.globl gcm_init_v8
.hidden gcm_init_v8
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
vld1.64 {q9},[r1] @ load input H
vmov.i8 q11,#0xe1
vshl.i64 q11,q11,#57 @ 0xc2.0
vext.8 q3,q9,q9,#8
vshr.u64 q10,q11,#63
vdup.32 q9,d18[1]
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
vshr.u64 q10,q3,#63
vshr.s32 q9,q9,#31 @ broadcast carry bit
vand q10,q10,q8
vshl.i64 q3,q3,#1
vext.8 q10,q10,q10,#8
vand q8,q8,q9
vorr q3,q3,q10 @ H<<<=1
veor q12,q3,q8 @ twisted H
vst1.64 {q12},[r0]! @ store Htable[0]
@ calculate H^2
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
veor q8,q8,q12
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q14,q0,q10
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
veor q9,q9,q14
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
bx lr
.size gcm_init_v8,.-gcm_init_v8
.globl gcm_gmult_v8
.hidden gcm_gmult_v8
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
vld1.64 {q9},[r0] @ load Xi
vmov.i8 q11,#0xe1
vld1.64 {q12,q13},[r1] @ load twisted H, ...
vshl.u64 q11,q11,#57
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q3,q9,q9,#8
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
bx lr
.size gcm_gmult_v8,.-gcm_gmult_v8
.globl gcm_ghash_v8
.hidden gcm_ghash_v8
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
vld1.64 {q0},[r0] @ load [rotated] Xi
@ "[rotated]" means that
@ loaded value would have
@ to be rotated in order to
@ make it appear as in
@ algorithm specification
subs r3,r3,#32 @ see if r3 is 32 or larger
mov r12,#16 @ r12 is used as post-
@ increment for input pointer;
@ as loop is modulo-scheduled
@ r12 is zeroed just in time
@ to preclude overstepping
@ inp[len], which means that
@ last block[s] are actually
@ loaded twice, but last
@ copy is not processed
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
vmov.i8 q11,#0xe1
vld1.64 {q14},[r1]
moveq r12,#0 @ is it time to zero r12?
vext.8 q0,q0,q0,#8 @ rotate Xi
vld1.64 {q8},[r2]! @ load [rotated] I[0]
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
#ifndef __ARMEB__
vrev64.8 q8,q8
vrev64.8 q0,q0
#endif
vext.8 q3,q8,q8,#8 @ rotate I[0]
blo .Lodd_tail_v8 @ r3 was less than 32
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q7,q9,q9,#8
veor q3,q3,q0 @ I[i]^=Xi
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q9,q9,q7 @ Karatsuba pre-processing
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
b .Loop_mod2x_v8
.align 4
.Loop_mod2x_v8:
vext.8 q10,q3,q3,#8
subs r3,r3,#32 @ is there more data?
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
movlo r12,#0 @ is it time to zero r12?
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
veor q10,q10,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
veor q0,q0,q4 @ accumulate
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
veor q2,q2,q6
moveq r12,#0 @ is it time to zero r12?
veor q1,q1,q5
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
#ifndef __ARMEB__
vrev64.8 q8,q8
#endif
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
vext.8 q7,q9,q9,#8
vext.8 q3,q8,q8,#8
veor q0,q1,q10
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q3,q3,q2 @ accumulate q3 early
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q3,q3,q10
veor q9,q9,q7 @ Karatsuba pre-processing
veor q3,q3,q0
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
bhs .Loop_mod2x_v8 @ there was at least 32 more bytes
veor q2,q2,q10
vext.8 q3,q8,q8,#8 @ re-construct q3
adds r3,r3,#32 @ re-construct r3
veor q0,q0,q2 @ re-construct q0
beq .Ldone_v8 @ is r3 zero?
.Lodd_tail_v8:
vext.8 q10,q0,q0,#8
veor q3,q3,q0 @ inp^=Xi
veor q9,q8,q10 @ q9 is rotated inp^Xi
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
.Ldone_v8:
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
bx lr
.size gcm_ghash_v8,.-gcm_ghash_v8
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,379 +0,0 @@
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.syntax unified
.arch armv7-a
.fpu vfp
.text
@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@ with |argv|, then saves the callee-saved registers into |state|. It returns
@ the result of |func|. The |unwind| argument is unused.
@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
@ const uint32_t *argv, size_t argc,
@ int unwind);
.type abi_test_trampoline, %function
.globl abi_test_trampoline
.hidden abi_test_trampoline
.align 4
abi_test_trampoline:
@ Save parameters and all callee-saved registers. For convenience, we
@ save r9 on iOS even though it's volatile.
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
sub sp, sp, #28
@ Every register in AAPCS is either non-volatile or a parameter (except
@ r9 on iOS), so this code, by the actual call, loses all its scratch
@ registers. First fill in stack parameters while there are registers
@ to spare.
cmp r3, #4
bls .Lstack_args_done
mov r4, sp @ r4 is the output pointer.
add r5, r2, r3, lsl #2 @ Set r5 to the end of argv.
add r2, r2, #16 @ Skip four arguments.
.Lstack_args_loop:
ldr r6, [r2], #4
cmp r2, r5
str r6, [r4], #4
bne .Lstack_args_loop
.Lstack_args_done:
@ Load registers from |r1|.
vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
ldmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Load register parameters. This uses up our remaining registers, so we
@ repurpose lr as scratch space.
ldr r3, [sp, #40] @ Reload argc.
ldr lr, [sp, #36] @ .Load argv into lr.
cmp r3, #3
bhi .Larg_r3
beq .Larg_r2
cmp r3, #1
bhi .Larg_r1
beq .Larg_r0
b .Largs_done
.Larg_r3:
ldr r3, [lr, #12] @ argv[3]
.Larg_r2:
ldr r2, [lr, #8] @ argv[2]
.Larg_r1:
ldr r1, [lr, #4] @ argv[1]
.Larg_r0:
ldr r0, [lr] @ argv[0]
.Largs_done:
@ With every other register in use, load the function pointer into lr
@ and call the function.
ldr lr, [sp, #28]
blx lr
@ r1-r3 are free for use again. The trampoline only supports
@ single-return functions. Pass r4-r11 to the caller.
ldr r1, [sp, #32]
vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
#if defined(__APPLE__)
@ r9 is not volatile on iOS.
stmia r1!, {r4,r5,r6,r7,r8,r10-r11}
#else
stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
#endif
@ Unwind the stack and restore registers.
add sp, sp, #44 @ 44 = 28+16
ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above).
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
bx lr
.size abi_test_trampoline,.-abi_test_trampoline
.type abi_test_clobber_r0, %function
.globl abi_test_clobber_r0
.hidden abi_test_clobber_r0
.align 4
abi_test_clobber_r0:
mov r0, #0
bx lr
.size abi_test_clobber_r0,.-abi_test_clobber_r0
.type abi_test_clobber_r1, %function
.globl abi_test_clobber_r1
.hidden abi_test_clobber_r1
.align 4
abi_test_clobber_r1:
mov r1, #0
bx lr
.size abi_test_clobber_r1,.-abi_test_clobber_r1
.type abi_test_clobber_r2, %function
.globl abi_test_clobber_r2
.hidden abi_test_clobber_r2
.align 4
abi_test_clobber_r2:
mov r2, #0
bx lr
.size abi_test_clobber_r2,.-abi_test_clobber_r2
.type abi_test_clobber_r3, %function
.globl abi_test_clobber_r3
.hidden abi_test_clobber_r3
.align 4
abi_test_clobber_r3:
mov r3, #0
bx lr
.size abi_test_clobber_r3,.-abi_test_clobber_r3
.type abi_test_clobber_r4, %function
.globl abi_test_clobber_r4
.hidden abi_test_clobber_r4
.align 4
abi_test_clobber_r4:
mov r4, #0
bx lr
.size abi_test_clobber_r4,.-abi_test_clobber_r4
.type abi_test_clobber_r5, %function
.globl abi_test_clobber_r5
.hidden abi_test_clobber_r5
.align 4
abi_test_clobber_r5:
mov r5, #0
bx lr
.size abi_test_clobber_r5,.-abi_test_clobber_r5
.type abi_test_clobber_r6, %function
.globl abi_test_clobber_r6
.hidden abi_test_clobber_r6
.align 4
abi_test_clobber_r6:
mov r6, #0
bx lr
.size abi_test_clobber_r6,.-abi_test_clobber_r6
.type abi_test_clobber_r7, %function
.globl abi_test_clobber_r7
.hidden abi_test_clobber_r7
.align 4
abi_test_clobber_r7:
mov r7, #0
bx lr
.size abi_test_clobber_r7,.-abi_test_clobber_r7
.type abi_test_clobber_r8, %function
.globl abi_test_clobber_r8
.hidden abi_test_clobber_r8
.align 4
abi_test_clobber_r8:
mov r8, #0
bx lr
.size abi_test_clobber_r8,.-abi_test_clobber_r8
.type abi_test_clobber_r9, %function
.globl abi_test_clobber_r9
.hidden abi_test_clobber_r9
.align 4
abi_test_clobber_r9:
mov r9, #0
bx lr
.size abi_test_clobber_r9,.-abi_test_clobber_r9
.type abi_test_clobber_r10, %function
.globl abi_test_clobber_r10
.hidden abi_test_clobber_r10
.align 4
abi_test_clobber_r10:
mov r10, #0
bx lr
.size abi_test_clobber_r10,.-abi_test_clobber_r10
.type abi_test_clobber_r11, %function
.globl abi_test_clobber_r11
.hidden abi_test_clobber_r11
.align 4
abi_test_clobber_r11:
mov r11, #0
bx lr
.size abi_test_clobber_r11,.-abi_test_clobber_r11
.type abi_test_clobber_r12, %function
.globl abi_test_clobber_r12
.hidden abi_test_clobber_r12
.align 4
abi_test_clobber_r12:
mov r12, #0
bx lr
.size abi_test_clobber_r12,.-abi_test_clobber_r12
.type abi_test_clobber_d0, %function
.globl abi_test_clobber_d0
.hidden abi_test_clobber_d0
.align 4
abi_test_clobber_d0:
mov r0, #0
vmov s0, r0
vmov s1, r0
bx lr
.size abi_test_clobber_d0,.-abi_test_clobber_d0
.type abi_test_clobber_d1, %function
.globl abi_test_clobber_d1
.hidden abi_test_clobber_d1
.align 4
abi_test_clobber_d1:
mov r0, #0
vmov s2, r0
vmov s3, r0
bx lr
.size abi_test_clobber_d1,.-abi_test_clobber_d1
.type abi_test_clobber_d2, %function
.globl abi_test_clobber_d2
.hidden abi_test_clobber_d2
.align 4
abi_test_clobber_d2:
mov r0, #0
vmov s4, r0
vmov s5, r0
bx lr
.size abi_test_clobber_d2,.-abi_test_clobber_d2
.type abi_test_clobber_d3, %function
.globl abi_test_clobber_d3
.hidden abi_test_clobber_d3
.align 4
abi_test_clobber_d3:
mov r0, #0
vmov s6, r0
vmov s7, r0
bx lr
.size abi_test_clobber_d3,.-abi_test_clobber_d3
.type abi_test_clobber_d4, %function
.globl abi_test_clobber_d4
.hidden abi_test_clobber_d4
.align 4
abi_test_clobber_d4:
mov r0, #0
vmov s8, r0
vmov s9, r0
bx lr
.size abi_test_clobber_d4,.-abi_test_clobber_d4
.type abi_test_clobber_d5, %function
.globl abi_test_clobber_d5
.hidden abi_test_clobber_d5
.align 4
abi_test_clobber_d5:
mov r0, #0
vmov s10, r0
vmov s11, r0
bx lr
.size abi_test_clobber_d5,.-abi_test_clobber_d5
.type abi_test_clobber_d6, %function
.globl abi_test_clobber_d6
.hidden abi_test_clobber_d6
.align 4
abi_test_clobber_d6:
mov r0, #0
vmov s12, r0
vmov s13, r0
bx lr
.size abi_test_clobber_d6,.-abi_test_clobber_d6
.type abi_test_clobber_d7, %function
.globl abi_test_clobber_d7
.hidden abi_test_clobber_d7
.align 4
abi_test_clobber_d7:
mov r0, #0
vmov s14, r0
vmov s15, r0
bx lr
.size abi_test_clobber_d7,.-abi_test_clobber_d7
.type abi_test_clobber_d8, %function
.globl abi_test_clobber_d8
.hidden abi_test_clobber_d8
.align 4
abi_test_clobber_d8:
mov r0, #0
vmov s16, r0
vmov s17, r0
bx lr
.size abi_test_clobber_d8,.-abi_test_clobber_d8
.type abi_test_clobber_d9, %function
.globl abi_test_clobber_d9
.hidden abi_test_clobber_d9
.align 4
abi_test_clobber_d9:
mov r0, #0
vmov s18, r0
vmov s19, r0
bx lr
.size abi_test_clobber_d9,.-abi_test_clobber_d9
.type abi_test_clobber_d10, %function
.globl abi_test_clobber_d10
.hidden abi_test_clobber_d10
.align 4
abi_test_clobber_d10:
mov r0, #0
vmov s20, r0
vmov s21, r0
bx lr
.size abi_test_clobber_d10,.-abi_test_clobber_d10
.type abi_test_clobber_d11, %function
.globl abi_test_clobber_d11
.hidden abi_test_clobber_d11
.align 4
abi_test_clobber_d11:
mov r0, #0
vmov s22, r0
vmov s23, r0
bx lr
.size abi_test_clobber_d11,.-abi_test_clobber_d11
.type abi_test_clobber_d12, %function
.globl abi_test_clobber_d12
.hidden abi_test_clobber_d12
.align 4
abi_test_clobber_d12:
mov r0, #0
vmov s24, r0
vmov s25, r0
bx lr
.size abi_test_clobber_d12,.-abi_test_clobber_d12
.type abi_test_clobber_d13, %function
.globl abi_test_clobber_d13
.hidden abi_test_clobber_d13
.align 4
abi_test_clobber_d13:
mov r0, #0
vmov s26, r0
vmov s27, r0
bx lr
.size abi_test_clobber_d13,.-abi_test_clobber_d13
.type abi_test_clobber_d14, %function
.globl abi_test_clobber_d14
.hidden abi_test_clobber_d14
.align 4
abi_test_clobber_d14:
mov r0, #0
vmov s28, r0
vmov s29, r0
bx lr
.size abi_test_clobber_d14,.-abi_test_clobber_d14
.type abi_test_clobber_d15, %function
.globl abi_test_clobber_d15
.hidden abi_test_clobber_d15
.align 4
abi_test_clobber_d15:
mov r0, #0
vmov s30, r0
vmov s31, r0
bx lr
.size abi_test_clobber_d15,.-abi_test_clobber_d15
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits

View File

@ -1,587 +0,0 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if !defined(OPENSSL_NO_ASM) && defined(__powerpc64__)
.machine "any"
.abiversion 2
.text
.globl gcm_init_p8
.type gcm_init_p8,@function
.align 5
gcm_init_p8:
.localentry gcm_init_p8,0
li 0,-4096
li 8,0x10
li 12,-1
li 9,0x20
or 0,0,0
li 10,0x30
.long 0x7D202699
vspltisb 8,-16
vspltisb 5,1
vaddubm 8,8,8
vxor 4,4,4
vor 8,8,5
vsldoi 8,8,4,15
vsldoi 6,4,5,1
vaddubm 8,8,8
vspltisb 7,7
vor 8,8,6
vspltb 6,9,0
vsl 9,9,5
vsrab 6,6,7
vand 6,6,8
vxor 3,9,6
vsldoi 9,3,3,8
vsldoi 8,4,8,8
vsldoi 11,4,9,8
vsldoi 10,9,4,8
.long 0x7D001F99
.long 0x7D681F99
li 8,0x40
.long 0x7D291F99
li 9,0x50
.long 0x7D4A1F99
li 10,0x60
.long 0x10035CC8
.long 0x10234CC8
.long 0x104354C8
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
vxor 16,0,6
vsldoi 17,16,16,8
vsldoi 19,4,17,8
vsldoi 18,17,4,8
.long 0x7E681F99
li 8,0x70
.long 0x7E291F99
li 9,0x80
.long 0x7E4A1F99
li 10,0x90
.long 0x10039CC8
.long 0x11B09CC8
.long 0x10238CC8
.long 0x11D08CC8
.long 0x104394C8
.long 0x11F094C8
.long 0x10E044C8
.long 0x114D44C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vsldoi 11,14,4,8
vsldoi 9,4,14,8
vxor 0,0,5
vxor 2,2,6
vxor 13,13,11
vxor 15,15,9
vsldoi 0,0,0,8
vsldoi 13,13,13,8
vxor 0,0,7
vxor 13,13,10
vsldoi 6,0,0,8
vsldoi 9,13,13,8
.long 0x100044C8
.long 0x11AD44C8
vxor 6,6,2
vxor 9,9,15
vxor 0,0,6
vxor 13,13,9
vsldoi 9,0,0,8
vsldoi 17,13,13,8
vsldoi 11,4,9,8
vsldoi 10,9,4,8
vsldoi 19,4,17,8
vsldoi 18,17,4,8
.long 0x7D681F99
li 8,0xa0
.long 0x7D291F99
li 9,0xb0
.long 0x7D4A1F99
li 10,0xc0
.long 0x7E681F99
.long 0x7E291F99
.long 0x7E4A1F99
or 12,12,12
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size gcm_init_p8,.-gcm_init_p8
.globl gcm_gmult_p8
.type gcm_gmult_p8,@function
.align 5
gcm_gmult_p8:
.localentry gcm_gmult_p8,0
lis 0,0xfff8
li 8,0x10
li 12,-1
li 9,0x20
or 0,0,0
li 10,0x30
.long 0x7C601E99
.long 0x7D682699
lvsl 12,0,0
.long 0x7D292699
vspltisb 5,0x07
.long 0x7D4A2699
vxor 12,12,5
.long 0x7D002699
vperm 3,3,3,12
vxor 4,4,4
.long 0x10035CC8
.long 0x10234CC8
.long 0x104354C8
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
vxor 0,0,6
vperm 0,0,0,12
.long 0x7C001F99
or 12,12,12
blr
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size gcm_gmult_p8,.-gcm_gmult_p8
.globl gcm_ghash_p8
.type gcm_ghash_p8,@function
.align 5
gcm_ghash_p8:
.localentry gcm_ghash_p8,0
li 0,-4096
li 8,0x10
li 12,-1
li 9,0x20
or 0,0,0
li 10,0x30
.long 0x7C001E99
.long 0x7D682699
li 8,0x40
lvsl 12,0,0
.long 0x7D292699
li 9,0x50
vspltisb 5,0x07
.long 0x7D4A2699
li 10,0x60
vxor 12,12,5
.long 0x7D002699
vperm 0,0,0,12
vxor 4,4,4
cmpldi 6,64
bge .Lgcm_ghash_p8_4x
.long 0x7C602E99
addi 5,5,16
subic. 6,6,16
vperm 3,3,3,12
vxor 3,3,0
beq .Lshort
.long 0x7E682699
li 8,16
.long 0x7E292699
add 9,5,6
.long 0x7E4A2699
.align 5
.Loop_2x:
.long 0x7E002E99
vperm 16,16,16,12
subic 6,6,32
.long 0x10039CC8
.long 0x11B05CC8
subfe 0,0,0
.long 0x10238CC8
.long 0x11D04CC8
and 0,0,6
.long 0x104394C8
.long 0x11F054C8
add 5,5,0
vxor 0,0,13
vxor 1,1,14
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 2,2,15
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
.long 0x7C682E99
addi 5,5,32
vsldoi 6,0,0,8
.long 0x100044C8
vperm 3,3,3,12
vxor 6,6,2
vxor 3,3,6
vxor 3,3,0
cmpld 9,5
bgt .Loop_2x
cmplwi 6,0
bne .Leven
.Lshort:
.long 0x10035CC8
.long 0x10234CC8
.long 0x104354C8
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
.Leven:
vxor 0,0,6
vperm 0,0,0,12
.long 0x7C001F99
or 12,12,12
blr
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
.align 5
.gcm_ghash_p8_4x:
.Lgcm_ghash_p8_4x:
stdu 1,-256(1)
li 10,63
li 11,79
stvx 20,10,1
addi 10,10,32
stvx 21,11,1
addi 11,11,32
stvx 22,10,1
addi 10,10,32
stvx 23,11,1
addi 11,11,32
stvx 24,10,1
addi 10,10,32
stvx 25,11,1
addi 11,11,32
stvx 26,10,1
addi 10,10,32
stvx 27,11,1
addi 11,11,32
stvx 28,10,1
addi 10,10,32
stvx 29,11,1
addi 11,11,32
stvx 30,10,1
li 10,0x60
stvx 31,11,1
li 0,-1
stw 12,252(1)
or 0,0,0
lvsl 5,0,8
li 8,0x70
.long 0x7E292699
li 9,0x80
vspltisb 6,8
li 10,0x90
.long 0x7EE82699
li 8,0xa0
.long 0x7F092699
li 9,0xb0
.long 0x7F2A2699
li 10,0xc0
.long 0x7FA82699
li 8,0x10
.long 0x7FC92699
li 9,0x20
.long 0x7FEA2699
li 10,0x30
vsldoi 7,4,6,8
vaddubm 18,5,7
vaddubm 19,6,18
srdi 6,6,4
.long 0x7C602E99
.long 0x7E082E99
subic. 6,6,8
.long 0x7EC92E99
.long 0x7F8A2E99
addi 5,5,0x40
vperm 3,3,3,12
vperm 16,16,16,12
vperm 22,22,22,12
vperm 28,28,28,12
vxor 2,3,0
.long 0x11B0BCC8
.long 0x11D0C4C8
.long 0x11F0CCC8
vperm 11,17,9,18
vperm 5,22,28,19
vperm 10,17,9,19
vperm 6,22,28,18
.long 0x12B68CC8
.long 0x12855CC8
.long 0x137C4CC8
.long 0x134654C8
vxor 21,21,14
vxor 20,20,13
vxor 27,27,21
vxor 26,26,15
blt .Ltail_4x
.Loop_4x:
.long 0x7C602E99
.long 0x7E082E99
subic. 6,6,4
.long 0x7EC92E99
.long 0x7F8A2E99
addi 5,5,0x40
vperm 16,16,16,12
vperm 22,22,22,12
vperm 28,28,28,12
vperm 3,3,3,12
.long 0x1002ECC8
.long 0x1022F4C8
.long 0x1042FCC8
.long 0x11B0BCC8
.long 0x11D0C4C8
.long 0x11F0CCC8
vxor 0,0,20
vxor 1,1,27
vxor 2,2,26
vperm 5,22,28,19
vperm 6,22,28,18
.long 0x10E044C8
.long 0x12855CC8
.long 0x134654C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x12B68CC8
.long 0x137C4CC8
.long 0x100044C8
vxor 20,20,13
vxor 26,26,15
vxor 2,2,3
vxor 21,21,14
vxor 2,2,6
vxor 27,27,21
vxor 2,2,0
bge .Loop_4x
.Ltail_4x:
.long 0x1002ECC8
.long 0x1022F4C8
.long 0x1042FCC8
vxor 0,0,20
vxor 1,1,27
.long 0x10E044C8
vsldoi 5,1,4,8
vsldoi 6,4,1,8
vxor 2,2,26
vxor 0,0,5
vxor 2,2,6
vsldoi 0,0,0,8
vxor 0,0,7
vsldoi 6,0,0,8
.long 0x100044C8
vxor 6,6,2
vxor 0,0,6
addic. 6,6,4
beq .Ldone_4x
.long 0x7C602E99
cmpldi 6,2
li 6,-4
blt .Lone
.long 0x7E082E99
beq .Ltwo
.Lthree:
.long 0x7EC92E99
vperm 3,3,3,12
vperm 16,16,16,12
vperm 22,22,22,12
vxor 2,3,0
vor 29,23,23
vor 30, 24, 24
vor 31,25,25
vperm 5,16,22,19
vperm 6,16,22,18
.long 0x12B08CC8
.long 0x13764CC8
.long 0x12855CC8
.long 0x134654C8
vxor 27,27,21
b .Ltail_4x
.align 4
.Ltwo:
vperm 3,3,3,12
vperm 16,16,16,12
vxor 2,3,0
vperm 5,4,16,19
vperm 6,4,16,18
vsldoi 29,4,17,8
vor 30, 17, 17
vsldoi 31,17,4,8
.long 0x12855CC8
.long 0x13704CC8
.long 0x134654C8
b .Ltail_4x
.align 4
.Lone:
vperm 3,3,3,12
vsldoi 29,4,9,8
vor 30, 9, 9
vsldoi 31,9,4,8
vxor 2,3,0
vxor 20,20,20
vxor 27,27,27
vxor 26,26,26
b .Ltail_4x
.Ldone_4x:
vperm 0,0,0,12
.long 0x7C001F99
li 10,63
li 11,79
or 12,12,12
lvx 20,10,1
addi 10,10,32
lvx 21,11,1
addi 11,11,32
lvx 22,10,1
addi 10,10,32
lvx 23,11,1
addi 11,11,32
lvx 24,10,1
addi 10,10,32
lvx 25,11,1
addi 11,11,32
lvx 26,10,1
addi 10,10,32
lvx 27,11,1
addi 11,11,32
lvx 28,10,1
addi 10,10,32
lvx 29,11,1
addi 11,11,32
lvx 30,10,1
lvx 31,11,1
addi 1,1,256
blr
.long 0
.byte 0,12,0x04,0,0x80,0,4,0
.long 0
.size gcm_ghash_p8,.-gcm_ghash_p8
.byte 71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM && __powerpc64__
.section .note.GNU-stack,"",@progbits

View File

@ -1,975 +0,0 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl ChaCha20_ctr32
.hidden ChaCha20_ctr32
.type ChaCha20_ctr32,@function
.align 16
ChaCha20_ctr32:
.L_ChaCha20_ctr32_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %eax,%eax
cmpl 28(%esp),%eax
je .L000no_data
call .Lpic_point
.Lpic_point:
popl %eax
leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
testl $16777216,(%ebp)
jz .L001x86
testl $512,4(%ebp)
jz .L001x86
jmp .Lssse3_shortcut
.L001x86:
movl 32(%esp),%esi
movl 36(%esp),%edi
subl $132,%esp
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
movl %eax,80(%esp)
movl %ebx,84(%esp)
movl %ecx,88(%esp)
movl %edx,92(%esp)
movl 16(%esi),%eax
movl 20(%esi),%ebx
movl 24(%esi),%ecx
movl 28(%esi),%edx
movl %eax,96(%esp)
movl %ebx,100(%esp)
movl %ecx,104(%esp)
movl %edx,108(%esp)
movl (%edi),%eax
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%edx
subl $1,%eax
movl %eax,112(%esp)
movl %ebx,116(%esp)
movl %ecx,120(%esp)
movl %edx,124(%esp)
jmp .L002entry
.align 16
.L003outer_loop:
movl %ebx,156(%esp)
movl %eax,152(%esp)
movl %ecx,160(%esp)
.L002entry:
movl $1634760805,%eax
movl $857760878,4(%esp)
movl $2036477234,8(%esp)
movl $1797285236,12(%esp)
movl 84(%esp),%ebx
movl 88(%esp),%ebp
movl 104(%esp),%ecx
movl 108(%esp),%esi
movl 116(%esp),%edx
movl 120(%esp),%edi
movl %ebx,20(%esp)
movl %ebp,24(%esp)
movl %ecx,40(%esp)
movl %esi,44(%esp)
movl %edx,52(%esp)
movl %edi,56(%esp)
movl 92(%esp),%ebx
movl 124(%esp),%edi
movl 112(%esp),%edx
movl 80(%esp),%ebp
movl 96(%esp),%ecx
movl 100(%esp),%esi
addl $1,%edx
movl %ebx,28(%esp)
movl %edi,60(%esp)
movl %edx,112(%esp)
movl $10,%ebx
jmp .L004loop
.align 16
.L004loop:
addl %ebp,%eax
movl %ebx,128(%esp)
movl %ebp,%ebx
xorl %eax,%edx
roll $16,%edx
addl %edx,%ecx
xorl %ecx,%ebx
movl 52(%esp),%edi
roll $12,%ebx
movl 20(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,(%esp)
roll $8,%edx
movl 4(%esp),%eax
addl %edx,%ecx
movl %edx,48(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
movl %ecx,32(%esp)
roll $16,%edi
movl %ebx,16(%esp)
addl %edi,%esi
movl 40(%esp),%ecx
xorl %esi,%ebp
movl 56(%esp),%edx
roll $12,%ebp
movl 24(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,4(%esp)
roll $8,%edi
movl 8(%esp),%eax
addl %edi,%esi
movl %edi,52(%esp)
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
movl %esi,36(%esp)
roll $16,%edx
movl %ebp,20(%esp)
addl %edx,%ecx
movl 44(%esp),%esi
xorl %ecx,%ebx
movl 60(%esp),%edi
roll $12,%ebx
movl 28(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,8(%esp)
roll $8,%edx
movl 12(%esp),%eax
addl %edx,%ecx
movl %edx,56(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
roll $16,%edi
movl %ebx,24(%esp)
addl %edi,%esi
xorl %esi,%ebp
roll $12,%ebp
movl 20(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,12(%esp)
roll $8,%edi
movl (%esp),%eax
addl %edi,%esi
movl %edi,%edx
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
roll $16,%edx
movl %ebp,28(%esp)
addl %edx,%ecx
xorl %ecx,%ebx
movl 48(%esp),%edi
roll $12,%ebx
movl 24(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,(%esp)
roll $8,%edx
movl 4(%esp),%eax
addl %edx,%ecx
movl %edx,60(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
movl %ecx,40(%esp)
roll $16,%edi
movl %ebx,20(%esp)
addl %edi,%esi
movl 32(%esp),%ecx
xorl %esi,%ebp
movl 52(%esp),%edx
roll $12,%ebp
movl 28(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,4(%esp)
roll $8,%edi
movl 8(%esp),%eax
addl %edi,%esi
movl %edi,48(%esp)
xorl %esi,%ebp
addl %ebx,%eax
roll $7,%ebp
xorl %eax,%edx
movl %esi,44(%esp)
roll $16,%edx
movl %ebp,24(%esp)
addl %edx,%ecx
movl 36(%esp),%esi
xorl %ecx,%ebx
movl 56(%esp),%edi
roll $12,%ebx
movl 16(%esp),%ebp
addl %ebx,%eax
xorl %eax,%edx
movl %eax,8(%esp)
roll $8,%edx
movl 12(%esp),%eax
addl %edx,%ecx
movl %edx,52(%esp)
xorl %ecx,%ebx
addl %ebp,%eax
roll $7,%ebx
xorl %eax,%edi
roll $16,%edi
movl %ebx,28(%esp)
addl %edi,%esi
xorl %esi,%ebp
movl 48(%esp),%edx
roll $12,%ebp
movl 128(%esp),%ebx
addl %ebp,%eax
xorl %eax,%edi
movl %eax,12(%esp)
roll $8,%edi
movl (%esp),%eax
addl %edi,%esi
movl %edi,56(%esp)
xorl %esi,%ebp
roll $7,%ebp
decl %ebx
jnz .L004loop
movl 160(%esp),%ebx
addl $1634760805,%eax
addl 80(%esp),%ebp
addl 96(%esp),%ecx
addl 100(%esp),%esi
cmpl $64,%ebx
jb .L005tail
movl 156(%esp),%ebx
addl 112(%esp),%edx
addl 120(%esp),%edi
xorl (%ebx),%eax
xorl 16(%ebx),%ebp
movl %eax,(%esp)
movl 152(%esp),%eax
xorl 32(%ebx),%ecx
xorl 36(%ebx),%esi
xorl 48(%ebx),%edx
xorl 56(%ebx),%edi
movl %ebp,16(%eax)
movl %ecx,32(%eax)
movl %esi,36(%eax)
movl %edx,48(%eax)
movl %edi,56(%eax)
movl 4(%esp),%ebp
movl 8(%esp),%ecx
movl 12(%esp),%esi
movl 20(%esp),%edx
movl 24(%esp),%edi
addl $857760878,%ebp
addl $2036477234,%ecx
addl $1797285236,%esi
addl 84(%esp),%edx
addl 88(%esp),%edi
xorl 4(%ebx),%ebp
xorl 8(%ebx),%ecx
xorl 12(%ebx),%esi
xorl 20(%ebx),%edx
xorl 24(%ebx),%edi
movl %ebp,4(%eax)
movl %ecx,8(%eax)
movl %esi,12(%eax)
movl %edx,20(%eax)
movl %edi,24(%eax)
movl 28(%esp),%ebp
movl 40(%esp),%ecx
movl 44(%esp),%esi
movl 52(%esp),%edx
movl 60(%esp),%edi
addl 92(%esp),%ebp
addl 104(%esp),%ecx
addl 108(%esp),%esi
addl 116(%esp),%edx
addl 124(%esp),%edi
xorl 28(%ebx),%ebp
xorl 40(%ebx),%ecx
xorl 44(%ebx),%esi
xorl 52(%ebx),%edx
xorl 60(%ebx),%edi
leal 64(%ebx),%ebx
movl %ebp,28(%eax)
movl (%esp),%ebp
movl %ecx,40(%eax)
movl 160(%esp),%ecx
movl %esi,44(%eax)
movl %edx,52(%eax)
movl %edi,60(%eax)
movl %ebp,(%eax)
leal 64(%eax),%eax
subl $64,%ecx
jnz .L003outer_loop
jmp .L006done
.L005tail:
addl 112(%esp),%edx
addl 120(%esp),%edi
movl %eax,(%esp)
movl %ebp,16(%esp)
movl %ecx,32(%esp)
movl %esi,36(%esp)
movl %edx,48(%esp)
movl %edi,56(%esp)
movl 4(%esp),%ebp
movl 8(%esp),%ecx
movl 12(%esp),%esi
movl 20(%esp),%edx
movl 24(%esp),%edi
addl $857760878,%ebp
addl $2036477234,%ecx
addl $1797285236,%esi
addl 84(%esp),%edx
addl 88(%esp),%edi
movl %ebp,4(%esp)
movl %ecx,8(%esp)
movl %esi,12(%esp)
movl %edx,20(%esp)
movl %edi,24(%esp)
movl 28(%esp),%ebp
movl 40(%esp),%ecx
movl 44(%esp),%esi
movl 52(%esp),%edx
movl 60(%esp),%edi
addl 92(%esp),%ebp
addl 104(%esp),%ecx
addl 108(%esp),%esi
addl 116(%esp),%edx
addl 124(%esp),%edi
movl %ebp,28(%esp)
movl 156(%esp),%ebp
movl %ecx,40(%esp)
movl 152(%esp),%ecx
movl %esi,44(%esp)
xorl %esi,%esi
movl %edx,52(%esp)
movl %edi,60(%esp)
xorl %eax,%eax
xorl %edx,%edx
.L007tail_loop:
movb (%esi,%ebp,1),%al
movb (%esp,%esi,1),%dl
leal 1(%esi),%esi
xorb %dl,%al
movb %al,-1(%ecx,%esi,1)
decl %ebx
jnz .L007tail_loop
.L006done:
addl $132,%esp
.L000no_data:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
.globl ChaCha20_ssse3
.hidden ChaCha20_ssse3
.type ChaCha20_ssse3,@function
.align 16
ChaCha20_ssse3:
.L_ChaCha20_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
.Lssse3_shortcut:
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl 36(%esp),%ebx
movl %esp,%ebp
subl $524,%esp
andl $-64,%esp
movl %ebp,512(%esp)
leal .Lssse3_data-.Lpic_point(%eax),%eax
movdqu (%ebx),%xmm3
cmpl $256,%ecx
jb .L0081x
movl %edx,516(%esp)
movl %ebx,520(%esp)
subl $256,%ecx
leal 384(%esp),%ebp
movdqu (%edx),%xmm7
pshufd $0,%xmm3,%xmm0
pshufd $85,%xmm3,%xmm1
pshufd $170,%xmm3,%xmm2
pshufd $255,%xmm3,%xmm3
paddd 48(%eax),%xmm0
pshufd $0,%xmm7,%xmm4
pshufd $85,%xmm7,%xmm5
psubd 64(%eax),%xmm0
pshufd $170,%xmm7,%xmm6
pshufd $255,%xmm7,%xmm7
movdqa %xmm0,64(%ebp)
movdqa %xmm1,80(%ebp)
movdqa %xmm2,96(%ebp)
movdqa %xmm3,112(%ebp)
movdqu 16(%edx),%xmm3
movdqa %xmm4,-64(%ebp)
movdqa %xmm5,-48(%ebp)
movdqa %xmm6,-32(%ebp)
movdqa %xmm7,-16(%ebp)
movdqa 32(%eax),%xmm7
leal 128(%esp),%ebx
pshufd $0,%xmm3,%xmm0
pshufd $85,%xmm3,%xmm1
pshufd $170,%xmm3,%xmm2
pshufd $255,%xmm3,%xmm3
pshufd $0,%xmm7,%xmm4
pshufd $85,%xmm7,%xmm5
pshufd $170,%xmm7,%xmm6
pshufd $255,%xmm7,%xmm7
movdqa %xmm0,(%ebp)
movdqa %xmm1,16(%ebp)
movdqa %xmm2,32(%ebp)
movdqa %xmm3,48(%ebp)
movdqa %xmm4,-128(%ebp)
movdqa %xmm5,-112(%ebp)
movdqa %xmm6,-96(%ebp)
movdqa %xmm7,-80(%ebp)
leal 128(%esi),%esi
leal 128(%edi),%edi
jmp .L009outer_loop
.align 16
.L009outer_loop:
movdqa -112(%ebp),%xmm1
movdqa -96(%ebp),%xmm2
movdqa -80(%ebp),%xmm3
movdqa -48(%ebp),%xmm5
movdqa -32(%ebp),%xmm6
movdqa -16(%ebp),%xmm7
movdqa %xmm1,-112(%ebx)
movdqa %xmm2,-96(%ebx)
movdqa %xmm3,-80(%ebx)
movdqa %xmm5,-48(%ebx)
movdqa %xmm6,-32(%ebx)
movdqa %xmm7,-16(%ebx)
movdqa 32(%ebp),%xmm2
movdqa 48(%ebp),%xmm3
movdqa 64(%ebp),%xmm4
movdqa 80(%ebp),%xmm5
movdqa 96(%ebp),%xmm6
movdqa 112(%ebp),%xmm7
paddd 64(%eax),%xmm4
movdqa %xmm2,32(%ebx)
movdqa %xmm3,48(%ebx)
movdqa %xmm4,64(%ebx)
movdqa %xmm5,80(%ebx)
movdqa %xmm6,96(%ebx)
movdqa %xmm7,112(%ebx)
movdqa %xmm4,64(%ebp)
movdqa -128(%ebp),%xmm0
movdqa %xmm4,%xmm6
movdqa -64(%ebp),%xmm3
movdqa (%ebp),%xmm4
movdqa 16(%ebp),%xmm5
movl $10,%edx
nop
.align 16
.L010loop:
paddd %xmm3,%xmm0
movdqa %xmm3,%xmm2
pxor %xmm0,%xmm6
pshufb (%eax),%xmm6
paddd %xmm6,%xmm4
pxor %xmm4,%xmm2
movdqa -48(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -112(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 80(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-128(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,64(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
movdqa %xmm4,(%ebx)
pshufb (%eax),%xmm7
movdqa %xmm2,-64(%ebx)
paddd %xmm7,%xmm5
movdqa 32(%ebx),%xmm4
pxor %xmm5,%xmm3
movdqa -32(%ebx),%xmm2
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -96(%ebx),%xmm0
paddd %xmm3,%xmm1
movdqa 96(%ebx),%xmm6
pxor %xmm1,%xmm7
movdqa %xmm1,-112(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,80(%ebx)
pxor %xmm5,%xmm3
paddd %xmm2,%xmm0
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
pxor %xmm0,%xmm6
por %xmm1,%xmm3
movdqa %xmm5,16(%ebx)
pshufb (%eax),%xmm6
movdqa %xmm3,-48(%ebx)
paddd %xmm6,%xmm4
movdqa 48(%ebx),%xmm5
pxor %xmm4,%xmm2
movdqa -16(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -80(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 112(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-96(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,96(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
pshufb (%eax),%xmm7
movdqa %xmm2,-32(%ebx)
paddd %xmm7,%xmm5
pxor %xmm5,%xmm3
movdqa -48(%ebx),%xmm2
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -128(%ebx),%xmm0
paddd %xmm3,%xmm1
pxor %xmm1,%xmm7
movdqa %xmm1,-80(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,%xmm6
pxor %xmm5,%xmm3
paddd %xmm2,%xmm0
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
pxor %xmm0,%xmm6
por %xmm1,%xmm3
pshufb (%eax),%xmm6
movdqa %xmm3,-16(%ebx)
paddd %xmm6,%xmm4
pxor %xmm4,%xmm2
movdqa -32(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -112(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 64(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-128(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,112(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
movdqa %xmm4,32(%ebx)
pshufb (%eax),%xmm7
movdqa %xmm2,-48(%ebx)
paddd %xmm7,%xmm5
movdqa (%ebx),%xmm4
pxor %xmm5,%xmm3
movdqa -16(%ebx),%xmm2
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -96(%ebx),%xmm0
paddd %xmm3,%xmm1
movdqa 80(%ebx),%xmm6
pxor %xmm1,%xmm7
movdqa %xmm1,-112(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,64(%ebx)
pxor %xmm5,%xmm3
paddd %xmm2,%xmm0
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
pxor %xmm0,%xmm6
por %xmm1,%xmm3
movdqa %xmm5,48(%ebx)
pshufb (%eax),%xmm6
movdqa %xmm3,-32(%ebx)
paddd %xmm6,%xmm4
movdqa 16(%ebx),%xmm5
pxor %xmm4,%xmm2
movdqa -64(%ebx),%xmm3
movdqa %xmm2,%xmm1
pslld $12,%xmm2
psrld $20,%xmm1
por %xmm1,%xmm2
movdqa -80(%ebx),%xmm1
paddd %xmm2,%xmm0
movdqa 96(%ebx),%xmm7
pxor %xmm0,%xmm6
movdqa %xmm0,-96(%ebx)
pshufb 16(%eax),%xmm6
paddd %xmm6,%xmm4
movdqa %xmm6,80(%ebx)
pxor %xmm4,%xmm2
paddd %xmm3,%xmm1
movdqa %xmm2,%xmm0
pslld $7,%xmm2
psrld $25,%xmm0
pxor %xmm1,%xmm7
por %xmm0,%xmm2
pshufb (%eax),%xmm7
movdqa %xmm2,-16(%ebx)
paddd %xmm7,%xmm5
pxor %xmm5,%xmm3
movdqa %xmm3,%xmm0
pslld $12,%xmm3
psrld $20,%xmm0
por %xmm0,%xmm3
movdqa -128(%ebx),%xmm0
paddd %xmm3,%xmm1
movdqa 64(%ebx),%xmm6
pxor %xmm1,%xmm7
movdqa %xmm1,-80(%ebx)
pshufb 16(%eax),%xmm7
paddd %xmm7,%xmm5
movdqa %xmm7,96(%ebx)
pxor %xmm5,%xmm3
movdqa %xmm3,%xmm1
pslld $7,%xmm3
psrld $25,%xmm1
por %xmm1,%xmm3
decl %edx
jnz .L010loop
movdqa %xmm3,-64(%ebx)
movdqa %xmm4,(%ebx)
movdqa %xmm5,16(%ebx)
movdqa %xmm6,64(%ebx)
movdqa %xmm7,96(%ebx)
movdqa -112(%ebx),%xmm1
movdqa -96(%ebx),%xmm2
movdqa -80(%ebx),%xmm3
paddd -128(%ebp),%xmm0
paddd -112(%ebp),%xmm1
paddd -96(%ebp),%xmm2
paddd -80(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 16(%esi),%esi
pxor %xmm0,%xmm4
movdqa -64(%ebx),%xmm0
pxor %xmm1,%xmm5
movdqa -48(%ebx),%xmm1
pxor %xmm2,%xmm6
movdqa -32(%ebx),%xmm2
pxor %xmm3,%xmm7
movdqa -16(%ebx),%xmm3
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 16(%edi),%edi
paddd -64(%ebp),%xmm0
paddd -48(%ebp),%xmm1
paddd -32(%ebp),%xmm2
paddd -16(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 16(%esi),%esi
pxor %xmm0,%xmm4
movdqa (%ebx),%xmm0
pxor %xmm1,%xmm5
movdqa 16(%ebx),%xmm1
pxor %xmm2,%xmm6
movdqa 32(%ebx),%xmm2
pxor %xmm3,%xmm7
movdqa 48(%ebx),%xmm3
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 16(%edi),%edi
paddd (%ebp),%xmm0
paddd 16(%ebp),%xmm1
paddd 32(%ebp),%xmm2
paddd 48(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 16(%esi),%esi
pxor %xmm0,%xmm4
movdqa 64(%ebx),%xmm0
pxor %xmm1,%xmm5
movdqa 80(%ebx),%xmm1
pxor %xmm2,%xmm6
movdqa 96(%ebx),%xmm2
pxor %xmm3,%xmm7
movdqa 112(%ebx),%xmm3
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 16(%edi),%edi
paddd 64(%ebp),%xmm0
paddd 80(%ebp),%xmm1
paddd 96(%ebp),%xmm2
paddd 112(%ebp),%xmm3
movdqa %xmm0,%xmm6
punpckldq %xmm1,%xmm0
movdqa %xmm2,%xmm7
punpckldq %xmm3,%xmm2
punpckhdq %xmm1,%xmm6
punpckhdq %xmm3,%xmm7
movdqa %xmm0,%xmm1
punpcklqdq %xmm2,%xmm0
movdqa %xmm6,%xmm3
punpcklqdq %xmm7,%xmm6
punpckhqdq %xmm2,%xmm1
punpckhqdq %xmm7,%xmm3
movdqu -128(%esi),%xmm4
movdqu -64(%esi),%xmm5
movdqu (%esi),%xmm2
movdqu 64(%esi),%xmm7
leal 208(%esi),%esi
pxor %xmm0,%xmm4
pxor %xmm1,%xmm5
pxor %xmm2,%xmm6
pxor %xmm3,%xmm7
movdqu %xmm4,-128(%edi)
movdqu %xmm5,-64(%edi)
movdqu %xmm6,(%edi)
movdqu %xmm7,64(%edi)
leal 208(%edi),%edi
subl $256,%ecx
jnc .L009outer_loop
addl $256,%ecx
jz .L011done
movl 520(%esp),%ebx
leal -128(%esi),%esi
movl 516(%esp),%edx
leal -128(%edi),%edi
movd 64(%ebp),%xmm2
movdqu (%ebx),%xmm3
paddd 96(%eax),%xmm2
pand 112(%eax),%xmm3
por %xmm2,%xmm3
.L0081x:
movdqa 32(%eax),%xmm0
movdqu (%edx),%xmm1
movdqu 16(%edx),%xmm2
movdqa (%eax),%xmm6
movdqa 16(%eax),%xmm7
movl %ebp,48(%esp)
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
movl $10,%edx
jmp .L012loop1x
.align 16
.L013outer1x:
movdqa 80(%eax),%xmm3
movdqa (%esp),%xmm0
movdqa 16(%esp),%xmm1
movdqa 32(%esp),%xmm2
paddd 48(%esp),%xmm3
movl $10,%edx
movdqa %xmm3,48(%esp)
jmp .L012loop1x
.align 16
.L012loop1x:
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,222
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $20,%xmm1
pslld $12,%xmm4
por %xmm4,%xmm1
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,223
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $25,%xmm1
pslld $7,%xmm4
por %xmm4,%xmm1
pshufd $78,%xmm2,%xmm2
pshufd $57,%xmm1,%xmm1
pshufd $147,%xmm3,%xmm3
nop
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,222
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $20,%xmm1
pslld $12,%xmm4
por %xmm4,%xmm1
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
.byte 102,15,56,0,223
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm4
psrld $25,%xmm1
pslld $7,%xmm4
por %xmm4,%xmm1
pshufd $78,%xmm2,%xmm2
pshufd $147,%xmm1,%xmm1
pshufd $57,%xmm3,%xmm3
decl %edx
jnz .L012loop1x
paddd (%esp),%xmm0
paddd 16(%esp),%xmm1
paddd 32(%esp),%xmm2
paddd 48(%esp),%xmm3
cmpl $64,%ecx
jb .L014tail
movdqu (%esi),%xmm4
movdqu 16(%esi),%xmm5
pxor %xmm4,%xmm0
movdqu 32(%esi),%xmm4
pxor %xmm5,%xmm1
movdqu 48(%esi),%xmm5
pxor %xmm4,%xmm2
pxor %xmm5,%xmm3
leal 64(%esi),%esi
movdqu %xmm0,(%edi)
movdqu %xmm1,16(%edi)
movdqu %xmm2,32(%edi)
movdqu %xmm3,48(%edi)
leal 64(%edi),%edi
subl $64,%ecx
jnz .L013outer1x
jmp .L011done
.L014tail:
movdqa %xmm0,(%esp)
movdqa %xmm1,16(%esp)
movdqa %xmm2,32(%esp)
movdqa %xmm3,48(%esp)
xorl %eax,%eax
xorl %edx,%edx
xorl %ebp,%ebp
.L015tail_loop:
movb (%esp,%ebp,1),%al
movb (%esi,%ebp,1),%dl
leal 1(%ebp),%ebp
xorb %dl,%al
movb %al,-1(%edi,%ebp,1)
decl %ecx
jnz .L015tail_loop
.L011done:
movl 512(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
.align 64
.Lssse3_data:
.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
.long 1634760805,857760878,2036477234,1797285236
.long 0,1,2,3
.long 4,4,4,4
.long 1,0,0,0
.long 4,0,0,0
.long 0,-1,-1,-1
.align 64
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
.byte 114,103,62,0
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -1,997 +0,0 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl bn_mul_add_words
.hidden bn_mul_add_words
.type bn_mul_add_words,@function
.align 16
bn_mul_add_words:
.L_bn_mul_add_words_begin:
call .L000PIC_me_up
.L000PIC_me_up:
popl %eax
leal OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L001maw_non_sse2
movl 4(%esp),%eax
movl 8(%esp),%edx
movl 12(%esp),%ecx
movd 16(%esp),%mm0
pxor %mm1,%mm1
jmp .L002maw_sse2_entry
.align 16
.L003maw_sse2_unrolled:
movd (%eax),%mm3
paddq %mm3,%mm1
movd (%edx),%mm2
pmuludq %mm0,%mm2
movd 4(%edx),%mm4
pmuludq %mm0,%mm4
movd 8(%edx),%mm6
pmuludq %mm0,%mm6
movd 12(%edx),%mm7
pmuludq %mm0,%mm7
paddq %mm2,%mm1
movd 4(%eax),%mm3
paddq %mm4,%mm3
movd 8(%eax),%mm5
paddq %mm6,%mm5
movd 12(%eax),%mm4
paddq %mm4,%mm7
movd %mm1,(%eax)
movd 16(%edx),%mm2
pmuludq %mm0,%mm2
psrlq $32,%mm1
movd 20(%edx),%mm4
pmuludq %mm0,%mm4
paddq %mm3,%mm1
movd 24(%edx),%mm6
pmuludq %mm0,%mm6
movd %mm1,4(%eax)
psrlq $32,%mm1
movd 28(%edx),%mm3
addl $32,%edx
pmuludq %mm0,%mm3
paddq %mm5,%mm1
movd 16(%eax),%mm5
paddq %mm5,%mm2
movd %mm1,8(%eax)
psrlq $32,%mm1
paddq %mm7,%mm1
movd 20(%eax),%mm5
paddq %mm5,%mm4
movd %mm1,12(%eax)
psrlq $32,%mm1
paddq %mm2,%mm1
movd 24(%eax),%mm5
paddq %mm5,%mm6
movd %mm1,16(%eax)
psrlq $32,%mm1
paddq %mm4,%mm1
movd 28(%eax),%mm5
paddq %mm5,%mm3
movd %mm1,20(%eax)
psrlq $32,%mm1
paddq %mm6,%mm1
movd %mm1,24(%eax)
psrlq $32,%mm1
paddq %mm3,%mm1
movd %mm1,28(%eax)
leal 32(%eax),%eax
psrlq $32,%mm1
subl $8,%ecx
jz .L004maw_sse2_exit
.L002maw_sse2_entry:
testl $4294967288,%ecx
jnz .L003maw_sse2_unrolled
.align 4
.L005maw_sse2_loop:
movd (%edx),%mm2
movd (%eax),%mm3
pmuludq %mm0,%mm2
leal 4(%edx),%edx
paddq %mm3,%mm1
paddq %mm2,%mm1
movd %mm1,(%eax)
subl $1,%ecx
psrlq $32,%mm1
leal 4(%eax),%eax
jnz .L005maw_sse2_loop
.L004maw_sse2_exit:
movd %mm1,%eax
emms
ret
.align 16
.L001maw_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %esi,%esi
movl 20(%esp),%edi
movl 28(%esp),%ecx
movl 24(%esp),%ebx
andl $4294967288,%ecx
movl 32(%esp),%ebp
pushl %ecx
jz .L006maw_finish
.align 16
.L007maw_loop:
movl (%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl (%edi),%eax
adcl $0,%edx
movl %eax,(%edi)
movl %edx,%esi
movl 4(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 4(%edi),%eax
adcl $0,%edx
movl %eax,4(%edi)
movl %edx,%esi
movl 8(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 8(%edi),%eax
adcl $0,%edx
movl %eax,8(%edi)
movl %edx,%esi
movl 12(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 12(%edi),%eax
adcl $0,%edx
movl %eax,12(%edi)
movl %edx,%esi
movl 16(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 16(%edi),%eax
adcl $0,%edx
movl %eax,16(%edi)
movl %edx,%esi
movl 20(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 20(%edi),%eax
adcl $0,%edx
movl %eax,20(%edi)
movl %edx,%esi
movl 24(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 24(%edi),%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
movl 28(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 28(%edi),%eax
adcl $0,%edx
movl %eax,28(%edi)
movl %edx,%esi
subl $8,%ecx
leal 32(%ebx),%ebx
leal 32(%edi),%edi
jnz .L007maw_loop
.L006maw_finish:
movl 32(%esp),%ecx
andl $7,%ecx
jnz .L008maw_finish2
jmp .L009maw_end
.L008maw_finish2:
movl (%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl (%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,(%edi)
movl %edx,%esi
jz .L009maw_end
movl 4(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 4(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,4(%edi)
movl %edx,%esi
jz .L009maw_end
movl 8(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 8(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,8(%edi)
movl %edx,%esi
jz .L009maw_end
movl 12(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 12(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,12(%edi)
movl %edx,%esi
jz .L009maw_end
movl 16(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 16(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,16(%edi)
movl %edx,%esi
jz .L009maw_end
movl 20(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 20(%edi),%eax
adcl $0,%edx
decl %ecx
movl %eax,20(%edi)
movl %edx,%esi
jz .L009maw_end
movl 24(%ebx),%eax
mull %ebp
addl %esi,%eax
adcl $0,%edx
addl 24(%edi),%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
.L009maw_end:
movl %esi,%eax
popl %ecx
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_mul_add_words,.-.L_bn_mul_add_words_begin
.globl bn_mul_words
.hidden bn_mul_words
.type bn_mul_words,@function
.align 16
bn_mul_words:
.L_bn_mul_words_begin:
call .L010PIC_me_up
.L010PIC_me_up:
popl %eax
leal OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L011mw_non_sse2
movl 4(%esp),%eax
movl 8(%esp),%edx
movl 12(%esp),%ecx
movd 16(%esp),%mm0
pxor %mm1,%mm1
.align 16
.L012mw_sse2_loop:
movd (%edx),%mm2
pmuludq %mm0,%mm2
leal 4(%edx),%edx
paddq %mm2,%mm1
movd %mm1,(%eax)
subl $1,%ecx
psrlq $32,%mm1
leal 4(%eax),%eax
jnz .L012mw_sse2_loop
movd %mm1,%eax
emms
ret
.align 16
.L011mw_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %esi,%esi
movl 20(%esp),%edi
movl 24(%esp),%ebx
movl 28(%esp),%ebp
movl 32(%esp),%ecx
andl $4294967288,%ebp
jz .L013mw_finish
.L014mw_loop:
movl (%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,(%edi)
movl %edx,%esi
movl 4(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,4(%edi)
movl %edx,%esi
movl 8(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,8(%edi)
movl %edx,%esi
movl 12(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,12(%edi)
movl %edx,%esi
movl 16(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,16(%edi)
movl %edx,%esi
movl 20(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,20(%edi)
movl %edx,%esi
movl 24(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
movl 28(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,28(%edi)
movl %edx,%esi
addl $32,%ebx
addl $32,%edi
subl $8,%ebp
jz .L013mw_finish
jmp .L014mw_loop
.L013mw_finish:
movl 28(%esp),%ebp
andl $7,%ebp
jnz .L015mw_finish2
jmp .L016mw_end
.L015mw_finish2:
movl (%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 4(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,4(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 8(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,8(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 12(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,12(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 16(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,16(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 20(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,20(%edi)
movl %edx,%esi
decl %ebp
jz .L016mw_end
movl 24(%ebx),%eax
mull %ecx
addl %esi,%eax
adcl $0,%edx
movl %eax,24(%edi)
movl %edx,%esi
.L016mw_end:
movl %esi,%eax
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_mul_words,.-.L_bn_mul_words_begin
.globl bn_sqr_words
.hidden bn_sqr_words
.type bn_sqr_words,@function
.align 16
bn_sqr_words:
.L_bn_sqr_words_begin:
call .L017PIC_me_up
.L017PIC_me_up:
popl %eax
leal OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax
btl $26,(%eax)
jnc .L018sqr_non_sse2
movl 4(%esp),%eax
movl 8(%esp),%edx
movl 12(%esp),%ecx
.align 16
.L019sqr_sse2_loop:
movd (%edx),%mm0
pmuludq %mm0,%mm0
leal 4(%edx),%edx
movq %mm0,(%eax)
subl $1,%ecx
leal 8(%eax),%eax
jnz .L019sqr_sse2_loop
emms
ret
.align 16
.L018sqr_non_sse2:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%esi
movl 24(%esp),%edi
movl 28(%esp),%ebx
andl $4294967288,%ebx
jz .L020sw_finish
.L021sw_loop:
movl (%edi),%eax
mull %eax
movl %eax,(%esi)
movl %edx,4(%esi)
movl 4(%edi),%eax
mull %eax
movl %eax,8(%esi)
movl %edx,12(%esi)
movl 8(%edi),%eax
mull %eax
movl %eax,16(%esi)
movl %edx,20(%esi)
movl 12(%edi),%eax
mull %eax
movl %eax,24(%esi)
movl %edx,28(%esi)
movl 16(%edi),%eax
mull %eax
movl %eax,32(%esi)
movl %edx,36(%esi)
movl 20(%edi),%eax
mull %eax
movl %eax,40(%esi)
movl %edx,44(%esi)
movl 24(%edi),%eax
mull %eax
movl %eax,48(%esi)
movl %edx,52(%esi)
movl 28(%edi),%eax
mull %eax
movl %eax,56(%esi)
movl %edx,60(%esi)
addl $32,%edi
addl $64,%esi
subl $8,%ebx
jnz .L021sw_loop
.L020sw_finish:
movl 28(%esp),%ebx
andl $7,%ebx
jz .L022sw_end
movl (%edi),%eax
mull %eax
movl %eax,(%esi)
decl %ebx
movl %edx,4(%esi)
jz .L022sw_end
movl 4(%edi),%eax
mull %eax
movl %eax,8(%esi)
decl %ebx
movl %edx,12(%esi)
jz .L022sw_end
movl 8(%edi),%eax
mull %eax
movl %eax,16(%esi)
decl %ebx
movl %edx,20(%esi)
jz .L022sw_end
movl 12(%edi),%eax
mull %eax
movl %eax,24(%esi)
decl %ebx
movl %edx,28(%esi)
jz .L022sw_end
movl 16(%edi),%eax
mull %eax
movl %eax,32(%esi)
decl %ebx
movl %edx,36(%esi)
jz .L022sw_end
movl 20(%edi),%eax
mull %eax
movl %eax,40(%esi)
decl %ebx
movl %edx,44(%esi)
jz .L022sw_end
movl 24(%edi),%eax
mull %eax
movl %eax,48(%esi)
movl %edx,52(%esi)
.L022sw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_sqr_words,.-.L_bn_sqr_words_begin
.globl bn_div_words
.hidden bn_div_words
.type bn_div_words,@function
.align 16
bn_div_words:
.L_bn_div_words_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
movl 12(%esp),%ecx
divl %ecx
ret
.size bn_div_words,.-.L_bn_div_words_begin
.globl bn_add_words
.hidden bn_add_words
.type bn_add_words,@function
.align 16
bn_add_words:
.L_bn_add_words_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebx
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
jz .L023aw_finish
.L024aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
movl 4(%esi),%ecx
movl 4(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
movl 8(%esi),%ecx
movl 8(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
movl 12(%esi),%ecx
movl 12(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
movl 16(%esi),%ecx
movl 16(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
movl 20(%esi),%ecx
movl 20(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
movl 24(%esi),%ecx
movl 24(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
movl 28(%esi),%ecx
movl 28(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%esi
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz .L024aw_loop
.L023aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
jz .L025aw_end
movl (%esi),%ecx
movl (%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
jz .L025aw_end
movl 4(%esi),%ecx
movl 4(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
jz .L025aw_end
movl 8(%esi),%ecx
movl 8(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
jz .L025aw_end
movl 12(%esi),%ecx
movl 12(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
jz .L025aw_end
movl 16(%esi),%ecx
movl 16(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
jz .L025aw_end
movl 20(%esi),%ecx
movl 20(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
jz .L025aw_end
movl 24(%esi),%ecx
movl 24(%edi),%edx
addl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
addl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
.L025aw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_add_words,.-.L_bn_add_words_begin
.globl bn_sub_words
.hidden bn_sub_words
.type bn_sub_words,@function
.align 16
bn_sub_words:
.L_bn_sub_words_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%ebx
movl 24(%esp),%esi
movl 28(%esp),%edi
movl 32(%esp),%ebp
xorl %eax,%eax
andl $4294967288,%ebp
jz .L026aw_finish
.L027aw_loop:
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,(%ebx)
movl 4(%esi),%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,4(%ebx)
movl 8(%esi),%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,8(%ebx)
movl 12(%esi),%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,12(%ebx)
movl 16(%esi),%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,16(%ebx)
movl 20(%esi),%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,20(%ebx)
movl 24(%esi),%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
movl 28(%esi),%ecx
movl 28(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,28(%ebx)
addl $32,%esi
addl $32,%edi
addl $32,%ebx
subl $8,%ebp
jnz .L027aw_loop
.L026aw_finish:
movl 32(%esp),%ebp
andl $7,%ebp
jz .L028aw_end
movl (%esi),%ecx
movl (%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,(%ebx)
jz .L028aw_end
movl 4(%esi),%ecx
movl 4(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,4(%ebx)
jz .L028aw_end
movl 8(%esi),%ecx
movl 8(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,8(%ebx)
jz .L028aw_end
movl 12(%esi),%ecx
movl 12(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,12(%ebx)
jz .L028aw_end
movl 16(%esi),%ecx
movl 16(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,16(%ebx)
jz .L028aw_end
movl 20(%esi),%ecx
movl 20(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
decl %ebp
movl %ecx,20(%ebx)
jz .L028aw_end
movl 24(%esi),%ecx
movl 24(%edi),%edx
subl %eax,%ecx
movl $0,%eax
adcl %eax,%eax
subl %edx,%ecx
adcl $0,%eax
movl %ecx,24(%ebx)
.L028aw_end:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size bn_sub_words,.-.L_bn_sub_words_begin
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

View File

@ -1,294 +0,0 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl gcm_gmult_ssse3
.hidden gcm_gmult_ssse3
.type gcm_gmult_ssse3,@function
.align 16
gcm_gmult_ssse3:
.L_gcm_gmult_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movdqu (%edi),%xmm0
call .L000pic_point
.L000pic_point:
popl %eax
movdqa .Lreverse_bytes-.L000pic_point(%eax),%xmm7
movdqa .Llow4_mask-.L000pic_point(%eax),%xmm2
.byte 102,15,56,0,199
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L001loop_row_1:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L001loop_row_1
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L002loop_row_2:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L002loop_row_2
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
.L003loop_row_3:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L003loop_row_3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
.byte 102,15,56,0,215
movdqu %xmm2,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_gmult_ssse3,.-.L_gcm_gmult_ssse3_begin
.globl gcm_ghash_ssse3
.hidden gcm_ghash_ssse3
.type gcm_ghash_ssse3,@function
.align 16
gcm_ghash_ssse3:
.L_gcm_ghash_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%edx
movl 32(%esp),%ecx
movdqu (%edi),%xmm0
call .L004pic_point
.L004pic_point:
popl %ebx
movdqa .Lreverse_bytes-.L004pic_point(%ebx),%xmm7
andl $-16,%ecx
.byte 102,15,56,0,199
pxor %xmm3,%xmm3
.L005loop_ghash:
movdqa .Llow4_mask-.L004pic_point(%ebx),%xmm2
movdqu (%edx),%xmm1
.byte 102,15,56,0,207
pxor %xmm1,%xmm0
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
movl $5,%eax
.L006loop_row_4:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L006loop_row_4
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L007loop_row_5:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L007loop_row_5
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
.L008loop_row_6:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L008loop_row_6
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movdqa %xmm2,%xmm0
leal -256(%esi),%esi
leal 16(%edx),%edx
subl $16,%ecx
jnz .L005loop_ghash
.byte 102,15,56,0,199
movdqu %xmm0,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_ssse3,.-.L_gcm_ghash_ssse3_begin
.align 16
.Lreverse_bytes:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.align 16
.Llow4_mask:
.long 252645135,252645135,252645135,252645135
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,330 +0,0 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl gcm_init_clmul
.hidden gcm_init_clmul
.type gcm_init_clmul,@function
.align 16
gcm_init_clmul:
.L_gcm_init_clmul_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
call .L000pic
.L000pic:
popl %ecx
leal .Lbswap-.L000pic(%ecx),%ecx
movdqu (%eax),%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $255,%xmm2,%xmm4
movdqa %xmm2,%xmm3
psllq $1,%xmm2
pxor %xmm5,%xmm5
psrlq $63,%xmm3
pcmpgtd %xmm4,%xmm5
pslldq $8,%xmm3
por %xmm3,%xmm2
pand 16(%ecx),%xmm5
pxor %xmm5,%xmm2
movdqa %xmm2,%xmm0
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $78,%xmm2,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm2,%xmm3
movdqu %xmm2,(%edx)
pxor %xmm0,%xmm4
movdqu %xmm0,16(%edx)
.byte 102,15,58,15,227,8
movdqu %xmm4,32(%edx)
ret
.size gcm_init_clmul,.-.L_gcm_init_clmul_begin
.globl gcm_gmult_clmul
.hidden gcm_gmult_clmul
.type gcm_gmult_clmul,@function
.align 16
gcm_gmult_clmul:
.L_gcm_gmult_clmul_begin:
movl 4(%esp),%eax
movl 8(%esp),%edx
call .L001pic
.L001pic:
popl %ecx
leal .Lbswap-.L001pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movups (%edx),%xmm2
.byte 102,15,56,0,197
movups 32(%edx),%xmm4
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
ret
.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
.globl gcm_ghash_clmul
.hidden gcm_ghash_clmul
.type gcm_ghash_clmul,@function
.align 16
gcm_ghash_clmul:
.L_gcm_ghash_clmul_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%eax
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebx
call .L002pic
.L002pic:
popl %ecx
leal .Lbswap-.L002pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movdqu (%edx),%xmm2
.byte 102,15,56,0,197
subl $16,%ebx
jz .L003odd_tail
movdqu (%esi),%xmm3
movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221
.byte 102,15,56,0,245
movdqu 32(%edx),%xmm5
pxor %xmm3,%xmm0
pshufd $78,%xmm6,%xmm3
movdqa %xmm6,%xmm7
pxor %xmm6,%xmm3
leal 32(%esi),%esi
.byte 102,15,58,68,242,0
.byte 102,15,58,68,250,17
.byte 102,15,58,68,221,0
movups 16(%edx),%xmm2
nop
subl $32,%ebx
jbe .L004even_tail
jmp .L005mod_loop
.align 32
.L005mod_loop:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
nop
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,229,16
movups (%edx),%xmm2
xorps %xmm6,%xmm0
movdqa (%ecx),%xmm5
xorps %xmm7,%xmm1
movdqu (%esi),%xmm7
pxor %xmm0,%xmm3
movdqu 16(%esi),%xmm6
pxor %xmm1,%xmm3
.byte 102,15,56,0,253
pxor %xmm3,%xmm4
movdqa %xmm4,%xmm3
psrldq $8,%xmm4
pslldq $8,%xmm3
pxor %xmm4,%xmm1
pxor %xmm3,%xmm0
.byte 102,15,56,0,245
pxor %xmm7,%xmm1
movdqa %xmm6,%xmm7
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
.byte 102,15,58,68,242,0
movups 32(%edx),%xmm5
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
pshufd $78,%xmm7,%xmm3
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm7,%xmm3
pxor %xmm4,%xmm1
.byte 102,15,58,68,250,17
movups 16(%edx),%xmm2
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.byte 102,15,58,68,221,0
leal 32(%esi),%esi
subl $32,%ebx
ja .L005mod_loop
.L004even_tail:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,229,16
movdqa (%ecx),%xmm5
xorps %xmm6,%xmm0
xorps %xmm7,%xmm1
pxor %xmm0,%xmm3
pxor %xmm1,%xmm3
pxor %xmm3,%xmm4
movdqa %xmm4,%xmm3
psrldq $8,%xmm4
pslldq $8,%xmm3
pxor %xmm4,%xmm1
pxor %xmm3,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
testl %ebx,%ebx
jnz .L006done
movups (%edx),%xmm2
.L003odd_tail:
movdqu (%esi),%xmm3
.byte 102,15,56,0,221
pxor %xmm3,%xmm0
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.L006done:
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
.align 64
.Lbswap:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte 0
#endif
.section .note.GNU-stack,"",@progbits

View File

@ -1,688 +0,0 @@
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <boringssl_prefix_symbols_asm.h>
#endif
.text
.globl md5_block_asm_data_order
.hidden md5_block_asm_data_order
.type md5_block_asm_data_order,@function
.align 16
md5_block_asm_data_order:
.L_md5_block_asm_data_order_begin:
pushl %esi
pushl %edi
movl 12(%esp),%edi
movl 16(%esp),%esi
movl 20(%esp),%ecx
pushl %ebp
shll $6,%ecx
pushl %ebx
addl %esi,%ecx
subl $64,%ecx
movl (%edi),%eax
pushl %ecx
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%edx
.L000start:
movl %ecx,%edi
movl (%esi),%ebp
xorl %edx,%edi
andl %ebx,%edi
leal 3614090360(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 4(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 3905402710(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 8(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 606105819(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 12(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 3250441966(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 16(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 4118548399(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 20(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 1200080426(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 24(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 2821735955(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 28(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 4249261313(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 32(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 1770035416(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 36(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 2336552879(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 40(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 4294925233(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 44(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 2304563134(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 48(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 1804603682(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 52(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 4254626195(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 56(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 2792965006(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 60(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 1236535329(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 4(%esi),%ebp
addl %ecx,%ebx
leal 4129170786(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 24(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 3225465664(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 44(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 643717713(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl (%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 3921069994(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 3593408605(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 40(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 38016083(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 60(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 3634488961(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 16(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 3889429448(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 36(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 568446438(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 56(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 3275163606(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 12(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 4107603335(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 32(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 1163531501(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 52(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 2850285829(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 8(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 4243563512(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 28(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 1735328473(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 48(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 2368359562(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 4294588738(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 32(%esi),%ebp
movl %ebx,%edi
leal 2272392833(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 44(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 1839030562(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 56(%esi),%ebp
movl %edx,%edi
leal 4259657740(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 4(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 2763975236(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 16(%esi),%ebp
movl %ebx,%edi
leal 1272893353(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 28(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 4139469664(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 40(%esi),%ebp
movl %edx,%edi
leal 3200236656(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 52(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 681279174(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl (%esi),%ebp
movl %ebx,%edi
leal 3936430074(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 12(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 3572445317(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 24(%esi),%ebp
movl %edx,%edi
leal 76029189(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 36(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 3654602809(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 48(%esi),%ebp
movl %ebx,%edi
leal 3873151461(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 60(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 530742520(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 8(%esi),%ebp
movl %edx,%edi
leal 3299628645(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl (%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
orl %ebx,%edi
leal 4096336452(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 28(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 1126891415(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 56(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 2878612391(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 20(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 4237533241(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 48(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 1700485571(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 12(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 2399980690(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 40(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 4293915773(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 4(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 2240044497(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 32(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 1873313359(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 60(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 4264355552(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 24(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 2734768916(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 52(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 1309151649(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 16(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 4149444226(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 44(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 3174756917(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 8(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 718787259(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 36(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 3951481745(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 24(%esp),%ebp
addl %edi,%ebx
addl $64,%esi
roll $21,%ebx
movl (%ebp),%edi
addl %ecx,%ebx
addl %edi,%eax
movl 4(%ebp),%edi
addl %edi,%ebx
movl 8(%ebp),%edi
addl %edi,%ecx
movl 12(%ebp),%edi
addl %edi,%edx
movl %eax,(%ebp)
movl %ebx,4(%ebp)
movl (%esp),%edi
movl %ecx,8(%ebp)
movl %edx,12(%ebp)
cmpl %esi,%edi
jae .L000start
popl %eax
popl %ebx
popl %ebp
popl %edi
popl %esi
ret
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
#endif
.section .note.GNU-stack,"",@progbits

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More