Merge branch 'master' into keeper-parallel-storage

2024-09-19 16:20:50 +00:00 · 2024-04-29 15:13:19 +02:00 · 2024-04-29 15:13:19 +02:00 · b1d53f0472
commit b1d53f0472
parent bc3cfb008e dffcc51b83
2240 changed files with 118341 additions and 264980 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -96,7 +96,6 @@ Checks: [
  '-modernize-use-default-member-init',
  '-modernize-use-emplace',
  '-modernize-use-nodiscard',
-  '-modernize-use-override',
  '-modernize-use-trailing-return-type',

  '-performance-inefficient-string-concatenation',
@ -120,7 +119,6 @@ Checks: [
  '-readability-named-parameter',
  '-readability-redundant-declaration',
  '-readability-simplify-boolean-expr',
-  '-readability-static-accessed-through-instance',
  '-readability-suspicious-call-argument',
  '-readability-uppercase-literal-suffix',
  '-readability-use-anyofallof',
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -44,22 +44,35 @@ At a minimum, the following information should be added (but add more as needed)
 ---
 ### Modify your CI run:
 **NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing
-**NOTE:** Set desired options before CI starts or re-push after updates
+**NOTE:** Checked options will be applied if set before CI RunConfig/PrepareRunConfig step

-#### Run only:
- [ ] <!---ci_set_integration--> Integration tests
- [ ] <!---ci_set_arm--> Integration tests (arm64)
- [ ] <!---ci_set_stateless--> Stateless tests (release)
- [ ] <!---ci_set_stateless_asan--> Stateless tests (asan)
- [ ] <!---ci_set_stateful--> Stateful tests (release)
- [ ] <!---ci_set_stateful_asan--> Stateful tests (asan)
- [ ] <!---ci_set_reduced--> No sanitizers
- [ ] <!---ci_set_analyzer--> Tests with analyzer
- [ ] <!---ci_set_fast--> Fast tests
- [ ] <!---job_package_debug--> Only package_debug build
- [ ] <!---PLACE_YOUR_TAG_CONFIGURED_IN_ci_config.py_FILE_HERE--> Add your CI variant description here
+#### Include tests (required builds will be added automatically):
+- [ ] <!---ci_include_fast--> Fast test
+- [ ] <!---ci_include_integration--> Integration Tests
+- [ ] <!---ci_include_stateless--> Stateless tests
+- [ ] <!---ci_include_stateful--> Stateful tests
+- [ ] <!---ci_include_unit--> Unit tests
+- [ ] <!---ci_include_performance--> Performance tests
+- [ ] <!---ci_include_asan--> All with ASAN
+- [ ] <!---ci_include_tsan--> All with TSAN
+- [ ] <!---ci_include_analyzer--> All with Analyzer
+- [ ] <!---ci_include_KEYWORD--> Add your option here

-#### CI options:
+#### Exclude tests:
+- [ ] <!---ci_exclude_fast--> Fast test
+- [ ] <!---ci_exclude_integration--> Integration Tests
+- [ ] <!---ci_exclude_stateless--> Stateless tests
+- [ ] <!---ci_exclude_stateful--> Stateful tests
+- [ ] <!---ci_exclude_performance--> Performance tests
+- [ ] <!---ci_exclude_asan--> All with ASAN
+- [ ] <!---ci_exclude_tsan--> All with TSAN
+- [ ] <!---ci_exclude_msan--> All with MSAN
+- [ ] <!---ci_exclude_ubsan--> All with UBSAN
+- [ ] <!---ci_exclude_coverage--> All with Coverage
+- [ ] <!---ci_exclude_aarch64--> All with Aarch64
+- [ ] <!---ci_exclude_KEYWORD--> Add your option here
+
+#### Extra options:
 - [ ] <!---do_not_test--> do not test (only style check)
 - [ ] <!---no_merge_commit--> disable merge-commit (no merge from master before tests)
 - [ ] <!---no_ci_cache--> disable CI cache (job reuse)
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@ -23,6 +23,10 @@ jobs:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get version
          filter: tree:0
+      - name: Merge sync PR
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 sync_pr.py --merge || :
      - name: Python unit tests
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
@ -55,16 +59,17 @@ jobs:
    uses: ./.github/workflows/reusable_docker.yml
    with:
      data: ${{ needs.RunConfig.outputs.data }}
-  StyleCheck:
-    needs: [RunConfig, BuildDockers]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Style check
-      runner_type: style-checker
-      data: ${{ needs.RunConfig.outputs.data }}
-      run_command: |
-          python3 style_check.py --no-push
+  # Tested in MQ
+  # StyleCheck:
+  #   needs: [RunConfig, BuildDockers]
+  #   if: ${{ !failure() && !cancelled() }}
+  #   uses: ./.github/workflows/reusable_test.yml
+  #   with:
+  #     test_name: Style check
+  #     runner_type: style-checker
+  #     data: ${{ needs.RunConfig.outputs.data }}
+  #     run_command: |
+  #         python3 style_check.py --no-push
  CompatibilityCheckX86:
    needs: [RunConfig, BuilderDebRelease]
    if: ${{ !failure() && !cancelled() }}
@ -234,14 +239,15 @@ jobs:
      build_name: binary_riscv64
      data: ${{ needs.RunConfig.outputs.data }}
      checkout_depth: 0
-  BuilderBinS390X:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_s390x
-      data: ${{ needs.RunConfig.outputs.data }}
-      checkout_depth: 0
+  # disabled because s390x refused to build in the migration to OpenSSL
+  # BuilderBinS390X:
+  #   needs: [RunConfig, BuilderDebRelease]
+  #   if: ${{ !failure() && !cancelled() }}
+  #   uses: ./.github/workflows/reusable_build.yml
+  #   with:
+  #     build_name: binary_s390x
+  #     data: ${{ needs.RunConfig.outputs.data }}
+  #     checkout_depth: 0
 ############################################################################################
 ##################################### Docker images  #######################################
 ############################################################################################
@ -292,7 +298,7 @@ jobs:
      - BuilderBinFreeBSD
      - BuilderBinPPC64
      - BuilderBinRISCV64
-      - BuilderBinS390X
+      # - BuilderBinS390X # disabled because s390x refused to build in the migration to OpenSSL
      - BuilderBinAmd64Compat
      - BuilderBinAarch64V80Compat
      - BuilderBinClangTidy
@ -374,7 +380,7 @@ jobs:
    if: ${{ !failure() && !cancelled() }}
    uses: ./.github/workflows/reusable_test.yml
    with:
-      test_name: Stateless tests (release, analyzer, s3, DatabaseReplicated)
+      test_name: Stateless tests (release, old analyzer, s3, DatabaseReplicated)
      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
  FunctionalStatelessTestS3Debug:
@ -442,6 +448,14 @@ jobs:
      test_name: Stateless tests (debug)
      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
+  FunctionalStatelessTestAsanAzure:
+    needs: [RunConfig, BuilderDebAsan]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (azure, asan)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
 ##############################################################################################
 ############################ FUNCTIONAl STATEFUL TESTS #######################################
 ##############################################################################################
@ -592,6 +606,14 @@ jobs:
      test_name: Stress test (tsan)
      runner_type: stress-tester
      data: ${{ needs.RunConfig.outputs.data }}
+  StressTestTsanAzure:
+    needs: [RunConfig, BuilderDebTsan]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (azure, tsan)
+      runner_type: stress-tester
+      data: ${{ needs.RunConfig.outputs.data }}
  StressTestMsan:
    needs: [RunConfig, BuilderDebMsan]
    if: ${{ !failure() && !cancelled() }}
@ -632,7 +654,7 @@ jobs:
    if: ${{ !failure() && !cancelled() }}
    uses: ./.github/workflows/reusable_test.yml
    with:
-      test_name: Integration tests (asan, analyzer)
+      test_name: Integration tests (asan, old analyzer)
      runner_type: stress-tester
      data: ${{ needs.RunConfig.outputs.data }}
  IntegrationTestsTsan:
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@ -6,6 +6,7 @@ env:
  PYTHONUNBUFFERED: 1

 on:  # yamllint disable-line rule:truthy
+  merge_group:
  pull_request:
    types:
      - synchronize
@ -29,6 +30,7 @@ jobs:
          fetch-depth: 0 # to get version
          filter: tree:0
      - name: Labels check
+        if: ${{ github.event_name != 'merge_group' }}
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
          python3 run_check.py
@ -56,16 +58,9 @@ jobs:
              echo 'EOF'
            } >> "$GITHUB_OUTPUT"
      - name: Re-create GH statuses for skipped jobs if any
+        if: ${{ github.event_name != 'merge_group' }}
        run: |
            python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses
-      - name: Style check early
-        # hack to run style check before the docker build job if possible (style-check image not changed)
-        if: contains(fromJson(steps.runconfig.outputs.CI_DATA).jobs_data.jobs_to_do, 'Style check early')
-        run: |
-          DOCKER_TAG=$(echo '${{ toJson(fromJson(steps.runconfig.outputs.CI_DATA).docker_data.images) }}' | tr -d '\n')
-          export DOCKER_TAG=$DOCKER_TAG
-          python3 ./tests/ci/style_check.py --no-push
-          python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --post --job-name 'Style check'
  BuildDockers:
    needs: [RunConfig]
    if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }}
@ -88,7 +83,7 @@ jobs:
        ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
        RCSK
  FastTest:
-    needs: [RunConfig, StyleCheck]
+    needs: [RunConfig, BuildDockers]
    if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Fast test') }}
    uses: ./.github/workflows/reusable_test.yml
    with:
@ -102,7 +97,7 @@ jobs:
  # for main CI chain
  #
  Builds_1:
-    needs: [RunConfig, FastTest]
+    needs: [RunConfig, StyleCheck, FastTest]
    if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }}
    # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
    uses: ./.github/workflows/reusable_build_stage.yml
@ -163,15 +158,24 @@ jobs:
  #
  FinishCheck:
    if: ${{ !failure() && !cancelled() }}
-    needs: [Tests_1, Tests_2]
+    needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2]
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
+      - name: Check sync status
+        if: ${{ github.event_name == 'merge_group' }}
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 sync_pr.py --status
      - name: Finish label
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 finish_check.py
+          python3 finish_check.py ${{ (contains(needs.*.result, 'failure') && github.event_name == 'merge_group') && '--pipeline-failure' || '' }}
+      - name: Auto merge if approved
+        if: ${{ github.event_name != 'merge_group' }}
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
          python3 merge_pr.py --check-approved


--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@ -436,7 +436,7 @@ jobs:
    if: ${{ !failure() && !cancelled() }}
    uses: ./.github/workflows/reusable_test.yml
    with:
-      test_name: Integration tests (asan, analyzer)
+      test_name: Integration tests (asan, old analyzer)
      runner_type: stress-tester
      data: ${{ needs.RunConfig.outputs.data }}
  IntegrationTestsTsan:
--- a/.gitignore
+++ b/.gitignore
@ -164,6 +164,9 @@ tests/queries/0_stateless/*.generated-expect
 tests/queries/0_stateless/*.expect.history
 tests/integration/**/_gen

+# pytest --pdb history
+.pdb_history
+
 # rust
 /rust/**/target*
 # It is autogenerated from *.in
--- a/.gitmessage
+++ b/.gitmessage
@ -16,7 +16,7 @@
 #ci_set_reduced
 #ci_set_arm
 #ci_set_integration
-#ci_set_analyzer
+#ci_set_old_analyzer

 ## To run specified job in CI:
 #job_<JOB NAME>
--- a/.gitmodules
+++ b/.gitmodules
@ -22,9 +22,6 @@
 [submodule "contrib/capnproto"]
 	path = contrib/capnproto
 	url = https://github.com/ClickHouse/capnproto
-[submodule "contrib/double-conversion"]
-	path = contrib/double-conversion
-	url = https://github.com/google/double-conversion
 [submodule "contrib/re2"]
 	path = contrib/re2
 	url = https://github.com/google/re2
@ -176,9 +173,6 @@
 [submodule "contrib/libpq"]
 	path = contrib/libpq
 	url = https://github.com/ClickHouse/libpq
-[submodule "contrib/boringssl"]
-	path = contrib/boringssl
-	url = https://github.com/ClickHouse/boringssl
 [submodule "contrib/NuRaft"]
 	path = contrib/NuRaft
 	url = https://github.com/ClickHouse/NuRaft
@ -278,9 +272,6 @@
 [submodule "contrib/crc32-s390x"]
 	path = contrib/crc32-s390x
 	url = https://github.com/linux-on-ibm-z/crc32-s390x
-[submodule "contrib/openssl"]
-	path = contrib/openssl
-	url = https://github.com/openssl/openssl
 [submodule "contrib/google-benchmark"]
 	path = contrib/google-benchmark
 	url = https://github.com/google/benchmark
@ -326,6 +317,9 @@
 [submodule "contrib/crc32-vpmsum"]
 	path = contrib/crc32-vpmsum
 	url = https://github.com/antonblanchard/crc32-vpmsum.git
+[submodule "contrib/expected"]
+	path = contrib/expected
+	url = https://github.com/TartanLlama/expected
 [submodule "contrib/liburing"]
 	path = contrib/liburing
 	url = https://github.com/axboe/liburing
@ -369,3 +363,12 @@
 [submodule "contrib/idna"]
 	path = contrib/idna
 	url = https://github.com/ada-url/idna.git
+[submodule "contrib/rust_vendor"]
+	path = contrib/rust_vendor
+	url = https://github.com/ClickHouse/rust_vendor.git
+[submodule "contrib/openssl"]
+	path = contrib/openssl
+	url = https://github.com/ClickHouse/openssl.git
+[submodule "contrib/double-conversion"]
+	path = contrib/double-conversion
+	url = https://github.com/ClickHouse/double-conversion.git
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,10 +1,183 @@
 ### Table of Contents
+**[ClickHouse release v24.3 LTS, 2024-03-26](#243)**<br/>
 **[ClickHouse release v24.2, 2024-02-29](#242)**<br/>
 **[ClickHouse release v24.1, 2024-01-30](#241)**<br/>
 **[Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023/)**<br/>

 # 2024 Changelog

+### <a id="243"></a> ClickHouse release 24.3 LTS, 2024-03-27
+
+#### Upgrade Notes
+* The setting `allow_experimental_analyzer` is enabled by default and it switches the query analysis to a new implementation, which has better compatibility and feature completeness. The feature "analyzer" is considered beta instead of experimental. You can turn the old behavior by setting the `compatibility` to `24.2` or disabling the `allow_experimental_analyzer` setting. Watch the [video on YouTube](https://www.youtube.com/watch?v=zhrOYQpgvkk).
+* ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. This is controlled by the settings, `output_format_parquet_string_as_string`, `output_format_orc_string_as_string`, `output_format_arrow_string_as_string`. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases. Parquet/ORC/Arrow supports many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools lack support for the faster `lz4` compression method, that's why we set `zstd` by default. This is controlled by the settings `output_format_parquet_compression_method`, `output_format_orc_compression_method`, and `output_format_arrow_compression_method`. We changed the default to `zstd` for Parquet and ORC, but not Arrow (it is emphasized for low-level usages). [#61817](https://github.com/ClickHouse/ClickHouse/pull/61817) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* In the new ClickHouse version, the functions `geoDistance`, `greatCircleDistance`, and `greatCircleAngle` will use 64-bit double precision floating point data type for internal calculations and return type if all the arguments are Float64. This closes [#58476](https://github.com/ClickHouse/ClickHouse/issues/58476). In previous versions, the function always used Float32. You can switch to the old behavior by setting `geo_distance_returns_float64_on_float64_arguments` to `false` or setting `compatibility` to `24.2` or earlier. [#61848](https://github.com/ClickHouse/ClickHouse/pull/61848) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Co-authored with [Geet Patel](https://github.com/geetptl).
+* The obsolete in-memory data parts have been deprecated since version 23.5 and have not been supported since version 23.10. Now the remaining code is removed. Continuation of [#55186](https://github.com/ClickHouse/ClickHouse/issues/55186) and [#45409](https://github.com/ClickHouse/ClickHouse/issues/45409). It is unlikely that you have used in-memory data parts because they were available only before version 23.5 and only when you enabled them manually by specifying the corresponding SETTINGS for a MergeTree table. To check if you have in-memory data parts, run the following query: `SELECT part_type, count() FROM system.parts GROUP BY part_type ORDER BY part_type`. To disable the usage of in-memory data parts, do `ALTER TABLE ... MODIFY SETTING min_bytes_for_compact_part = DEFAULT, min_rows_for_compact_part = DEFAULT`. Before upgrading from old ClickHouse releases, first check that you don't have in-memory data parts. If there are in-memory data parts, disable them first, then wait while there are no in-memory data parts and continue the upgrade. [#61127](https://github.com/ClickHouse/ClickHouse/pull/61127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Changed the column name from `duration_ms` to `duration_microseconds` in the `system.zookeeper` table to reflect the reality that the duration is in the microsecond resolution. [#60774](https://github.com/ClickHouse/ClickHouse/pull/60774) ([Duc Canh Le](https://github.com/canhld94)).
+* Reject incoming INSERT queries in case when query-level settings `async_insert` and `deduplicate_blocks_in_dependent_materialized_views` are enabled together. This behaviour is controlled by a setting `throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert` and enabled by default. This is a continuation of https://github.com/ClickHouse/ClickHouse/pull/59699 needed to unblock https://github.com/ClickHouse/ClickHouse/pull/59915. [#60888](https://github.com/ClickHouse/ClickHouse/pull/60888) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Utility `clickhouse-copier` is moved to a separate repository on GitHub: https://github.com/ClickHouse/copier. It is no longer included in the bundle but is still available as a separate download. This closes: [#60734](https://github.com/ClickHouse/ClickHouse/issues/60734) This closes: [#60540](https://github.com/ClickHouse/ClickHouse/issues/60540) This closes: [#60250](https://github.com/ClickHouse/ClickHouse/issues/60250) This closes: [#52917](https://github.com/ClickHouse/ClickHouse/issues/52917) This closes: [#51140](https://github.com/ClickHouse/ClickHouse/issues/51140) This closes: [#47517](https://github.com/ClickHouse/ClickHouse/issues/47517) This closes: [#47189](https://github.com/ClickHouse/ClickHouse/issues/47189) This closes: [#46598](https://github.com/ClickHouse/ClickHouse/issues/46598) This closes: [#40257](https://github.com/ClickHouse/ClickHouse/issues/40257) This closes: [#36504](https://github.com/ClickHouse/ClickHouse/issues/36504) This closes: [#35485](https://github.com/ClickHouse/ClickHouse/issues/35485) This closes: [#33702](https://github.com/ClickHouse/ClickHouse/issues/33702) This closes: [#26702](https://github.com/ClickHouse/ClickHouse/issues/26702).
+* To increase compatibility with MySQL, the compatibility alias `locate` now accepts arguments `(needle, haystack[, start_pos])` by default. The previous behavior `(haystack, needle, [, start_pos])` can be restored by setting `function_locate_has_mysql_compatible_argument_order = 0`. [#61092](https://github.com/ClickHouse/ClickHouse/pull/61092) ([Robert Schulze](https://github.com/rschu1ze)).
+* Forbid `SimpleAggregateFunction` in `ORDER BY` of `MergeTree` tables (like `AggregateFunction` is forbidden, but they are forbidden because they are not comparable) by default (use `allow_suspicious_primary_key` to allow them). [#61399](https://github.com/ClickHouse/ClickHouse/pull/61399) ([Azat Khuzhin](https://github.com/azat)).
+* The `Ordinary` database engine is deprecated. You will receive a warning in clickhouse-client if your server is using it. This closes [#52229](https://github.com/ClickHouse/ClickHouse/issues/52229). [#56942](https://github.com/ClickHouse/ClickHouse/pull/56942) ([shabroo](https://github.com/shabroo)).
+
+#### New Feature
+* Support reading and writing backups as `tar` (in addition to `zip`). [#59535](https://github.com/ClickHouse/ClickHouse/pull/59535) ([josh-hildred](https://github.com/josh-hildred)).
+* Implemented support for S3 Express buckets. [#59965](https://github.com/ClickHouse/ClickHouse/pull/59965) ([Nikita Taranov](https://github.com/nickitat)).
+* Allow to attach parts from a different disk (using copy instead of hard link). [#60112](https://github.com/ClickHouse/ClickHouse/pull/60112) ([Unalian](https://github.com/Unalian)).
+* Size-capped `Memory` tables: controlled by their settings, `min_bytes_to_keep, max_bytes_to_keep, min_rows_to_keep` and `max_rows_to_keep`. [#60612](https://github.com/ClickHouse/ClickHouse/pull/60612) ([Jake Bamrah](https://github.com/JakeBamrah)).
+* Separate limits on number of waiting and executing queries. Added new server setting `max_waiting_queries` that limits the number of queries waiting due to `async_load_databases`. Existing limits on number of executing queries no longer count waiting queries. [#61053](https://github.com/ClickHouse/ClickHouse/pull/61053) ([Sergei Trifonov](https://github.com/serxa)).
+* Added a table `system.keywords` which contains all the keywords from parser. Mostly needed and will be used for better fuzzing and syntax highlighting. [#51808](https://github.com/ClickHouse/ClickHouse/pull/51808) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Add support for `ATTACH PARTITION ALL`. [#61107](https://github.com/ClickHouse/ClickHouse/pull/61107) ([Kirill Nikiforov](https://github.com/allmazz)).
+* Add a new function, `getClientHTTPHeader`. This closes [#54665](https://github.com/ClickHouse/ClickHouse/issues/54665). Co-authored with @lingtaolf. [#61820](https://github.com/ClickHouse/ClickHouse/pull/61820) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add `generate_series` as a table function (compatibility alias for PostgreSQL to the existing `numbers` function). This function generates table with an arithmetic progression with natural numbers. [#59390](https://github.com/ClickHouse/ClickHouse/pull/59390) ([divanik](https://github.com/divanik)).
+* A mode for `topK`/`topkWeighed` support mode, which return count of values and its error. [#54508](https://github.com/ClickHouse/ClickHouse/pull/54508) ([UnamedRus](https://github.com/UnamedRus)).
+* Added function `toMillisecond` which returns the millisecond component for values of type`DateTime` or `DateTime64`. [#60281](https://github.com/ClickHouse/ClickHouse/pull/60281) ([Shaun Struwig](https://github.com/Blargian)).
+* Allow configuring HTTP redirect handlers for clickhouse-server. For example, you can make `/` redirect to the Play UI. [#60390](https://github.com/ClickHouse/ClickHouse/pull/60390) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Performance Improvement
+* Optimized function `dotProduct` to omit unnecessary and expensive memory copies. [#60928](https://github.com/ClickHouse/ClickHouse/pull/60928) ([Robert Schulze](https://github.com/rschu1ze)).
+* 30x faster printing for 256-bit integers. [#61100](https://github.com/ClickHouse/ClickHouse/pull/61100) ([Raúl Marín](https://github.com/Algunenano)).
+* If the table's primary key contains mostly useless columns, don't keep them in memory. This is controlled by a new setting `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns` with the value `0.9` by default, which means: for a composite primary key, if a column changes its value for at least 0.9 of all the times, the next columns after it will be not loaded. [#60255](https://github.com/ClickHouse/ClickHouse/pull/60255) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Improve the performance of serialized aggregation methods when involving multiple `Nullable` columns. [#55809](https://github.com/ClickHouse/ClickHouse/pull/55809) ([Amos Bird](https://github.com/amosbird)).
+* Lazy builds JSON's output to improve performance of ALL JOIN. [#58278](https://github.com/ClickHouse/ClickHouse/pull/58278) ([LiuNeng](https://github.com/liuneng1994)).
+* Make HTTP/HTTPs connections with external services, such as AWS S3 reusable for all use cases. Even when the response is 3xx or 4xx. [#58845](https://github.com/ClickHouse/ClickHouse/pull/58845) ([Sema Checherinda](https://github.com/CheSema)).
+* Improvements to aggregate functions `argMin` / `argMax` / `any` / `anyLast` / `anyHeavy`, as well as `ORDER BY {u8/u16/u32/u64/i8/i16/u32/i64) LIMIT 1` queries. [#58640](https://github.com/ClickHouse/ClickHouse/pull/58640) ([Raúl Marín](https://github.com/Algunenano)).
+* Trivial optimization for column's filter. Peak memory can be reduced to 44% of the original in some cases. [#59698](https://github.com/ClickHouse/ClickHouse/pull/59698) ([李扬](https://github.com/taiyang-li)).
+* Execute `multiIf` function in a columnar fashion when the result type's underlying type is a number. [#60384](https://github.com/ClickHouse/ClickHouse/pull/60384) ([李扬](https://github.com/taiyang-li)).
+* Faster (almost 2x) mutexes. [#60823](https://github.com/ClickHouse/ClickHouse/pull/60823) ([Azat Khuzhin](https://github.com/azat)).
+* Drain multiple connections in parallel when a distributed query is finishing. [#60845](https://github.com/ClickHouse/ClickHouse/pull/60845) ([lizhuoyu5](https://github.com/lzydmxy)).
+* Optimize data movement between columns of a Nullable number or a Nullable string, which improves some micro-benchmarks. [#60846](https://github.com/ClickHouse/ClickHouse/pull/60846) ([李扬](https://github.com/taiyang-li)).
+* Operations with the filesystem cache will suffer less from the lock contention. [#61066](https://github.com/ClickHouse/ClickHouse/pull/61066) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Optimize array join and other JOINs by preventing a wrong compiler's optimization. Close [#61074](https://github.com/ClickHouse/ClickHouse/issues/61074).  [#61075](https://github.com/ClickHouse/ClickHouse/pull/61075) ([李扬](https://github.com/taiyang-li)).
+* If a query with a syntax error contained the `COLUMNS` matcher with a regular expression, the regular expression was compiled each time during the parser's backtracking, instead of being compiled once. This was a fundamental error. The compiled regexp was put to AST. But the letter A in AST means "abstract" which means it should not contain heavyweight objects. Parts of AST can be created and discarded during parsing, including a large number of backtracking. This leads to slowness on the parsing side and consequently allows DoS by a readonly user. But the main problem is that it prevents progress in fuzzers. [#61543](https://github.com/ClickHouse/ClickHouse/pull/61543) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add a new analyzer pass to optimize the IN operator for a single value. [#61564](https://github.com/ClickHouse/ClickHouse/pull/61564) ([LiuNeng](https://github.com/liuneng1994)).
+* DNSResolver shuffles set of resolved IPs which is needed to uniformly utilize multiple endpoints of AWS S3. [#60965](https://github.com/ClickHouse/ClickHouse/pull/60965) ([Sema Checherinda](https://github.com/CheSema)).
+
+#### Experimental Feature
+* Support parallel reading for Azure blob storage. This improves the performance of the experimental Azure object storage. [#61503](https://github.com/ClickHouse/ClickHouse/pull/61503) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Add asynchronous WriteBuffer for Azure blob storage similar to S3. This improves the performance of the experimental Azure object storage. [#59929](https://github.com/ClickHouse/ClickHouse/pull/59929) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Use managed identity for backups IO when using Azure Blob Storage. Add a setting to prevent ClickHouse from attempting to create a non-existent container, which requires permissions at the storage account level. [#61785](https://github.com/ClickHouse/ClickHouse/pull/61785) ([Daniel Pozo Escalona](https://github.com/danipozo)).
+* Add a setting `parallel_replicas_allow_in_with_subquery = 1` which allows subqueries for IN work with parallel replicas. [#60950](https://github.com/ClickHouse/ClickHouse/pull/60950) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* A change for the "zero-copy" replication: all zero copy locks related to a table have to be dropped when the table is dropped. The directory which contains these locks has to be removed also. [#57575](https://github.com/ClickHouse/ClickHouse/pull/57575) ([Sema Checherinda](https://github.com/CheSema)).
+
+#### Improvement
+* Use `MergeTree` as a default table engine. [#60524](https://github.com/ClickHouse/ClickHouse/pull/60524) ([Alexey Milovidov](https://github.com/alexey-milovidov))
+* Enable `output_format_pretty_row_numbers` by default. It is better for usability. [#61791](https://github.com/ClickHouse/ClickHouse/pull/61791) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* In the previous version, some numbers in Pretty formats were not pretty enough. [#61794](https://github.com/ClickHouse/ClickHouse/pull/61794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* A long value in Pretty formats won't be cut if it is the single value in the resultset, such as in the result of the `SHOW CREATE TABLE` query. [#61795](https://github.com/ClickHouse/ClickHouse/pull/61795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Similarly to `clickhouse-local`, `clickhouse-client` will accept the `--output-format` option as a synonym to the `--format` option. This closes [#59848](https://github.com/ClickHouse/ClickHouse/issues/59848). [#61797](https://github.com/ClickHouse/ClickHouse/pull/61797) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* If stdout is a terminal and the output format is not specified, `clickhouse-client` and similar tools will use `PrettyCompact` by default, similarly to the interactive mode. `clickhouse-client` and `clickhouse-local` will handle command line arguments for input and output formats in a unified fashion. This closes [#61272](https://github.com/ClickHouse/ClickHouse/issues/61272). [#61800](https://github.com/ClickHouse/ClickHouse/pull/61800) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Underscore digit groups in Pretty formats for better readability. This is controlled by a new setting, `output_format_pretty_highlight_digit_groups`. [#61802](https://github.com/ClickHouse/ClickHouse/pull/61802) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add ability to override initial INSERT settings via `SYSTEM FLUSH DISTRIBUTED`. [#61832](https://github.com/ClickHouse/ClickHouse/pull/61832) ([Azat Khuzhin](https://github.com/azat)).
+* Enable processors profiling (time spent/in and out bytes for sorting, aggregation, ...) by default. [#61096](https://github.com/ClickHouse/ClickHouse/pull/61096) ([Azat Khuzhin](https://github.com/azat)).
+* Support files without format extension in Filesystem database. [#60795](https://github.com/ClickHouse/ClickHouse/pull/60795) ([Kruglov Pavel](https://github.com/Avogar)).
+* Make all format names case insensitive, like Tsv, or TSV, or tsv, or even rowbinary. [#60420](https://github.com/ClickHouse/ClickHouse/pull/60420) ([豪肥肥](https://github.com/HowePa)). I appreciate if you will continue to write it correctly, e.g., `JSON` 😇, not `Json` 🤮, but we don't mind if you spell it as you prefer.
+* Added `none_only_active` mode for `distributed_ddl_output_mode` setting. [#60340](https://github.com/ClickHouse/ClickHouse/pull/60340) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* The advanced dashboard has slightly better colors for multi-line graphs. [#60391](https://github.com/ClickHouse/ClickHouse/pull/60391) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* The Advanced dashboard now has controls always visible on scrolling. This allows you to add a new chart without scrolling up. [#60692](https://github.com/ClickHouse/ClickHouse/pull/60692) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* While running the `MODIFY COLUMN` query for materialized views, check the inner table's structure to ensure every column exists. [#47427](https://github.com/ClickHouse/ClickHouse/pull/47427) ([sunny](https://github.com/sunny19930321)).
+* String types and Enums can be used in the same context, such as: arrays, UNION queries, conditional expressions. This closes [#60726](https://github.com/ClickHouse/ClickHouse/issues/60726). [#60727](https://github.com/ClickHouse/ClickHouse/pull/60727) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Allow declaring Enums in the structure of external data for query processing (this is an immediate temporary table that you can provide for your query). [#57857](https://github.com/ClickHouse/ClickHouse/pull/57857) ([Duc Canh Le](https://github.com/canhld94)).
+* Consider lightweight deleted rows when selecting parts to merge, so the disk size of the resulting part will be estimated better. [#58223](https://github.com/ClickHouse/ClickHouse/pull/58223) ([Zhuo Qiu](https://github.com/jewelzqiu)).
+* Added comments for columns for more system tables. Continuation of https://github.com/ClickHouse/ClickHouse/pull/58356. [#59016](https://github.com/ClickHouse/ClickHouse/pull/59016) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Now we can use virtual columns in PREWHERE. It's worthwhile for non-const virtual columns like `_part_offset`. [#59033](https://github.com/ClickHouse/ClickHouse/pull/59033) ([Amos Bird](https://github.com/amosbird)). Improved overall usability of virtual columns. Now it is allowed to use virtual columns in `PREWHERE` (it's worthwhile for non-const virtual columns like `_part_offset`). Now a builtin documentation is available for virtual columns as a comment of column in `DESCRIBE` query with enabled setting `describe_include_virtual_columns`. [#60205](https://github.com/ClickHouse/ClickHouse/pull/60205) ([Anton Popov](https://github.com/CurtizJ)).
+* Instead of using a constant key, now object storage generates key for determining remove objects capability. [#59495](https://github.com/ClickHouse/ClickHouse/pull/59495) ([Sema Checherinda](https://github.com/CheSema)).
+* Allow "local" as object storage type instead of "local_blob_storage". [#60165](https://github.com/ClickHouse/ClickHouse/pull/60165) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Parallel flush of pending INSERT blocks of Distributed engine on `DETACH`/server shutdown and `SYSTEM FLUSH DISTRIBUTED` (Parallelism will work only if you have multi-disk policy for a table (like everything in the Distributed engine right now)). [#60225](https://github.com/ClickHouse/ClickHouse/pull/60225) ([Azat Khuzhin](https://github.com/azat)).
+* Add a setting to force read-through cache for merges. [#60308](https://github.com/ClickHouse/ClickHouse/pull/60308) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* An improvement for the MySQL compatibility protocol. The issue [#57598](https://github.com/ClickHouse/ClickHouse/issues/57598) mentions a variant behaviour regarding transaction handling. An issued COMMIT/ROLLBACK when no transaction is active is reported as an error contrary to MySQL behaviour. [#60338](https://github.com/ClickHouse/ClickHouse/pull/60338) ([PapaToemmsn](https://github.com/PapaToemmsn)).
+* Function `substring` now has a new alias `byteSlice`. [#60494](https://github.com/ClickHouse/ClickHouse/pull/60494) ([Robert Schulze](https://github.com/rschu1ze)).
+* Renamed server setting `dns_cache_max_size` to `dns_cache_max_entries` to reduce ambiguity. [#60500](https://github.com/ClickHouse/ClickHouse/pull/60500) ([Kirill Nikiforov](https://github.com/allmazz)).
+* `SHOW INDEX | INDEXES | INDICES | KEYS` no longer sorts by the primary key columns (which was unintuitive). [#60514](https://github.com/ClickHouse/ClickHouse/pull/60514) ([Robert Schulze](https://github.com/rschu1ze)).
+* Keeper improvement: abort during startup if an invalid snapshot is detected to avoid data loss. [#60537](https://github.com/ClickHouse/ClickHouse/pull/60537) ([Antonio Andelic](https://github.com/antonio2368)).
+* Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
+* Keeper improvement: support `leadership_expiry_ms` in Keeper's settings. [#60806](https://github.com/ClickHouse/ClickHouse/pull/60806) ([Brokenice0415](https://github.com/Brokenice0415)).
+* Always infer exponential numbers in JSON formats regardless of the setting `input_format_try_infer_exponent_floats`. Add setting `input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects` that allows to use String type for ambiguous paths instead of an exception during named Tuples inference from JSON objects. [#60808](https://github.com/ClickHouse/ClickHouse/pull/60808) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add support for `START TRANSACTION` syntax typically used in MySQL syntax, resolving https://github.com/ClickHouse/ClickHouse/discussions/60865. [#60886](https://github.com/ClickHouse/ClickHouse/pull/60886) ([Zach Naimon](https://github.com/ArctypeZach)).
+* Add a flag for the full-sorting merge join algorithm to treat null as biggest/smallest. So the behavior can be compitable with other SQL systems, like Apache Spark. [#60896](https://github.com/ClickHouse/ClickHouse/pull/60896) ([loudongfeng](https://github.com/loudongfeng)).
+* Support detect output format by file exctension in `clickhouse-client` and `clickhouse-local`. [#61036](https://github.com/ClickHouse/ClickHouse/pull/61036) ([豪肥肥](https://github.com/HowePa)).
+* Update memory limit in runtime when Linux's CGroups value changed. [#61049](https://github.com/ClickHouse/ClickHouse/pull/61049) ([Han Fei](https://github.com/hanfei1991)).
+* Add the function `toUInt128OrZero`, which was missed by mistake (the mistake is related to https://github.com/ClickHouse/ClickHouse/pull/945). The compatibility aliases `FROM_UNIXTIME` and `DATE_FORMAT` (they are not ClickHouse-native and only exist for MySQL compatibility) have been made case insensitive, as expected for SQL-compatibility aliases. [#61114](https://github.com/ClickHouse/ClickHouse/pull/61114) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Improvements for the access checks, allowing to revoke of unpossessed rights in case the target user doesn't have the revoking grants either. Example: `GRANT SELECT ON *.* TO user1; REVOKE SELECT ON system.* FROM user1;`. [#61115](https://github.com/ClickHouse/ClickHouse/pull/61115) ([pufit](https://github.com/pufit)).
+* Fix `has()` function with `Nullable` column (fixes [#60214](https://github.com/ClickHouse/ClickHouse/issues/60214)). [#61249](https://github.com/ClickHouse/ClickHouse/pull/61249) ([Mikhail Koviazin](https://github.com/mkmkme)).
+* Now it's possible to specify the attribute `merge="true"` in config substitutions for subtrees `<include from_zk="/path" merge="true">`. In case this attribute specified, clickhouse will merge subtree with existing configuration, otherwise default behavior is append new content to configuration. [#61299](https://github.com/ClickHouse/ClickHouse/pull/61299) ([alesapin](https://github.com/alesapin)).
+* Add async metrics for virtual memory mappings: `VMMaxMapCount` & `VMNumMaps`. Closes [#60662](https://github.com/ClickHouse/ClickHouse/issues/60662). [#61354](https://github.com/ClickHouse/ClickHouse/pull/61354) ([Tuan Pham Anh](https://github.com/tuanpavn)).
+* Use `temporary_files_codec` setting in all places where we create temporary data, for example external memory sorting and external memory GROUP BY. Before it worked only in `partial_merge` JOIN algorithm. [#61456](https://github.com/ClickHouse/ClickHouse/pull/61456) ([Maksim Kita](https://github.com/kitaisreal)).
+* Add a new setting `max_parser_backtracks` which allows to limit the complexity of query parsing. [#61502](https://github.com/ClickHouse/ClickHouse/pull/61502) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Less contention during dynamic resize of the filesystem cache. [#61524](https://github.com/ClickHouse/ClickHouse/pull/61524) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Disallow sharded mode of StorageS3 queue, because it will be rewritten. [#61537](https://github.com/ClickHouse/ClickHouse/pull/61537) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fixed typo: from `use_leagcy_max_level` to `use_legacy_max_level`. [#61545](https://github.com/ClickHouse/ClickHouse/pull/61545) ([William Schoeffel](https://github.com/wiledusc)).
+* Remove some duplicate entries in `system.blob_storage_log`. [#61622](https://github.com/ClickHouse/ClickHouse/pull/61622) ([YenchangChan](https://github.com/YenchangChan)).
+* Added `current_user` function as a compatibility alias for MySQL. [#61770](https://github.com/ClickHouse/ClickHouse/pull/61770) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Fix inconsistent floating point aggregate function states in mixed x86-64 / ARM clusters [#60610](https://github.com/ClickHouse/ClickHouse/pull/60610) ([Harry Lee](https://github.com/HarryLeeIBM)).
+
+#### Build/Testing/Packaging Improvement
+* The real-time query profiler now works on AArch64. In previous versions, it worked only when a program didn't spend time inside a syscall. [#60807](https://github.com/ClickHouse/ClickHouse/pull/60807) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* ClickHouse version has been added to docker labels. Closes [#54224](https://github.com/ClickHouse/ClickHouse/issues/54224). [#60949](https://github.com/ClickHouse/ClickHouse/pull/60949) ([Nikolay Monkov](https://github.com/nikmonkov)).
+* Upgrade `prqlc` to 0.11.3. [#60616](https://github.com/ClickHouse/ClickHouse/pull/60616) ([Maximilian Roos](https://github.com/max-sixty)).
+* Add generic query text fuzzer in `clickhouse-local`. [#61508](https://github.com/ClickHouse/ClickHouse/pull/61508) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Fix finished_mutations_to_keep=0 for MergeTree (as docs says 0 is to keep everything) [#60031](https://github.com/ClickHouse/ClickHouse/pull/60031) ([Azat Khuzhin](https://github.com/azat)).
+* Something was wrong with the FINAL optimization, here is how the author describes it: "PartsSplitter invalid ranges for the same part". [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
+* Something was wrong with Apache Hive, which is experimental and not supported. [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)).
+* An improvement for experimental parallel replicas: force reanalysis if parallel replicas changed [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix usage of plain metadata type with new disks configuration option [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)).
+* Avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
+* Something was wrong with experimental KQL (Kusto) support: fix `max_query_size_for_kql_compound_operator`: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
+* Keeper fix: add timeouts when waiting for commit logs [#60544](https://github.com/ClickHouse/ClickHouse/pull/60544) ([Antonio Andelic](https://github.com/antonio2368)).
+* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix reading from MergeTree with non-deterministic functions in filter [#60586](https://github.com/ClickHouse/ClickHouse/pull/60586) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix logical error on bad compatibility setting value type [#60596](https://github.com/ClickHouse/ClickHouse/pull/60596) ([Kruglov Pavel](https://github.com/Avogar)).
+* fix(prql): Robust panic handler [#60615](https://github.com/ClickHouse/ClickHouse/pull/60615) ([Maximilian Roos](https://github.com/max-sixty)).
+* Fix `intDiv` for decimal and date arguments [#60672](https://github.com/ClickHouse/ClickHouse/pull/60672) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Fix: expand CTE in alter modify query [#60682](https://github.com/ClickHouse/ClickHouse/pull/60682) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Fix system.parts for non-Atomic/Ordinary database engine (i.e. Memory) [#60689](https://github.com/ClickHouse/ClickHouse/pull/60689) ([Azat Khuzhin](https://github.com/azat)).
+* Fix "Invalid storage definition in metadata file" for parameterized views [#60708](https://github.com/ClickHouse/ClickHouse/pull/60708) ([Azat Khuzhin](https://github.com/azat)).
+* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove wrong assertion in aggregate function quantileGK [#60740](https://github.com/ClickHouse/ClickHouse/pull/60740) ([李扬](https://github.com/taiyang-li)).
+* Fix insert-select + insert_deduplication_token bug by setting streams to 1 [#60745](https://github.com/ClickHouse/ClickHouse/pull/60745) ([Jordi Villar](https://github.com/jrdi)).
+* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
+* Fix toStartOfInterval [#60763](https://github.com/ClickHouse/ClickHouse/pull/60763) ([Andrey Zvonov](https://github.com/zvonand)).
+* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
+* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix possible stuck on error in HashedDictionaryParallelLoader [#60926](https://github.com/ClickHouse/ClickHouse/pull/60926) ([vdimir](https://github.com/vdimir)).
+* Fix async RESTORE with Replicated database (experimental feature) [#60934](https://github.com/ClickHouse/ClickHouse/pull/60934) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix deadlock in async inserts to `Log` tables via native protocol [#61055](https://github.com/ClickHouse/ClickHouse/pull/61055) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix lazy execution of default argument in dictGetOrDefault for RangeHashedDictionary [#61196](https://github.com/ClickHouse/ClickHouse/pull/61196) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix usage of session_token in S3 engine [#61234](https://github.com/ClickHouse/ClickHouse/pull/61234) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix bugs in show database [#61269](https://github.com/ClickHouse/ClickHouse/pull/61269) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix logical error in RabbitMQ storage with MATERIALIZED columns [#61320](https://github.com/ClickHouse/ClickHouse/pull/61320) ([vdimir](https://github.com/vdimir)).
+* Fix CREATE OR REPLACE DICTIONARY [#61356](https://github.com/ClickHouse/ClickHouse/pull/61356) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix ATTACH query with external ON CLUSTER [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)).
+* fix issue of actions dag split [#61458](https://github.com/ClickHouse/ClickHouse/pull/61458) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix finishing a failed RESTORE [#61466](https://github.com/ClickHouse/ClickHouse/pull/61466) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Disable async_insert_use_adaptive_busy_timeout correctly with compatibility settings [#61468](https://github.com/ClickHouse/ClickHouse/pull/61468) ([Raúl Marín](https://github.com/Algunenano)).
+* Allow queuing in restore pool [#61475](https://github.com/ClickHouse/ClickHouse/pull/61475) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix an inconsistency when reading system.parts using UUID. [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)).
+* Fix ALTER QUERY MODIFY SQL SECURITY [#61480](https://github.com/ClickHouse/ClickHouse/pull/61480) ([pufit](https://github.com/pufit)).
+* Fix a crash in window view (experimental feature) [#61526](https://github.com/ClickHouse/ClickHouse/pull/61526) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix `repeat` with non-native integers [#61527](https://github.com/ClickHouse/ClickHouse/pull/61527) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix client's `-s` argument [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix crash in arrayPartialReverseSort [#61539](https://github.com/ClickHouse/ClickHouse/pull/61539) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix addDays cause an error when used DateTime64 [#61561](https://github.com/ClickHouse/ClickHouse/pull/61561) ([Shuai li](https://github.com/loneylee)).
+* Disallow LowCardinality input type for JSONExtract [#61617](https://github.com/ClickHouse/ClickHouse/pull/61617) ([Julia Kartseva](https://github.com/jkartseva)).
+* Fix `system.part_log` for async insert with deduplication [#61620](https://github.com/ClickHouse/ClickHouse/pull/61620) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix a `Non-ready set` exception for system.parts. [#61666](https://github.com/ClickHouse/ClickHouse/pull/61666) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix actual_part_name for REPLACE_RANGE (`Entry actual part isn't empty yet`) [#61675](https://github.com/ClickHouse/ClickHouse/pull/61675) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix a sanitizer report in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).
+* Fix an observation that the RANGE frame is not supported for Nullable columns. [#61766](https://github.com/ClickHouse/ClickHouse/pull/61766) ([YuanLiu](https://github.com/ditgittube)).
+
 ### <a id="242"></a> ClickHouse release 24.2, 2024-02-29

 #### Backward Incompatible Change
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -455,8 +455,6 @@ endif ()

 enable_testing() # Enable for tests without binary

-option(ENABLE_OPENSSL "This option performs a build with OpenSSL. NOTE! This option is insecure and should never be used. By default, ClickHouse uses and only supports BoringSSL" OFF)
-
 if (ARCH_S390X)
    set(ENABLE_OPENSSL_DYNAMIC_DEFAULT ON)
 else ()
--- a/README.md
+++ b/README.md
@ -28,7 +28,6 @@ curl https://clickhouse.com/ | sh
 * [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
 * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
 * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlighting, powered by github.dev.
-* [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements.
 * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.

 ## Monthly Release & Community Call
@ -40,15 +39,8 @@ Every month we get together with the community (users, contributors, customers,

 ## Upcoming Events

-Keep an eye out for upcoming meetups and eventsaround the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
+Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.

-* [ClickHouse Meetup in Bellevue](https://www.meetup.com/clickhouse-seattle-user-group/events/298650371/) - Mar 11
-* [ClickHouse Meetup at Ramp's Offices in NYC](https://www.meetup.com/clickhouse-new-york-user-group/events/298640542/) - Mar 19
-* [ClickHouse Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/299479750/) - Mar 20
-* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/298997115/) - Mar 21
-* [ClickHouse Meetup in Bengaluru](https://www.meetup.com/clickhouse-bangalore-user-group/events/299479850/) - Mar 23
-* [ClickHouse Meetup in Zurich](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/299628922/) - Apr 16
-* [ClickHouse Meetup in Copenhagen](https://www.meetup.com/clickhouse-denmark-meetup-group/events/299629133/) - Apr 23
 * [ClickHouse Meetup in Dubai](https://www.meetup.com/clickhouse-dubai-meetup-group/events/299629189/) - May 28


--- a/SECURITY.md
+++ b/SECURITY.md
@ -13,18 +13,16 @@ The following versions of ClickHouse server are currently being supported with s

 | Version | Supported |
 |:-|:-|
+| 24.3 | ✔️ |
 | 24.2 | ✔️ |
 | 24.1 | ✔️ |
-| 23.12 | ✔️ |
-| 23.11 | ❌ |
-| 23.10 | ❌ |
-| 23.9 | ❌ |
+| 23.* | ❌ |
 | 23.8 | ✔️ |
 | 23.7 | ❌ |
 | 23.6 | ❌ |
 | 23.5 | ❌ |
 | 23.4 | ❌ |
-| 23.3 | ✔️ |
+| 23.3 | ❌ |
 | 23.2 | ❌ |
 | 23.1 | ❌ |
 | 22.* | ❌ |
--- a/base/base/scope_guard.h
+++ b/base/base/scope_guard.h
@ -29,11 +29,13 @@ public:
    requires std::is_convertible_v<G, F>
    constexpr BasicScopeGuard & operator=(BasicScopeGuard<G> && src) // NOLINT(cppcoreguidelines-rvalue-reference-param-not-moved, cppcoreguidelines-noexcept-move-operations)
    {
-        if (this != &src)
+        if constexpr (std::is_same_v<G, F>)
        {
-            invoke();
-            function = src.release();
+            if (this == &src)
+                return *this;
        }
+        invoke();
+        function = src.release();
        return *this;
    }

--- a/base/base/wide_integer_impl.h
+++ b/base/base/wide_integer_impl.h
@ -13,8 +13,6 @@
 #include <tuple>
 #include <limits>

-#include <boost/math/special_functions/fpclassify.hpp>
-
 // NOLINTBEGIN(*)

 /// Use same extended double for all platforms
@ -22,6 +20,7 @@
 #define CONSTEXPR_FROM_DOUBLE constexpr
 using FromDoubleIntermediateType = long double;
 #else
+#include <boost/math/special_functions/fpclassify.hpp>
 #include <boost/multiprecision/cpp_bin_float.hpp>
 /// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended`
 #define CONSTEXPR_FROM_DOUBLE
@ -309,6 +308,13 @@ struct integer<Bits, Signed>::_impl
        constexpr uint64_t max_int = std::numeric_limits<uint64_t>::max();
        static_assert(std::is_same_v<T, double> || std::is_same_v<T, FromDoubleIntermediateType>);
        /// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast).
+#if (LDBL_MANT_DIG == 64)
+        if (!std::isfinite(t))
+        {
+            self = 0;
+            return;
+        }
+#else
        if constexpr (std::is_same_v<T, double>)
        {
            if (!std::isfinite(t))
@ -325,6 +331,7 @@ struct integer<Bits, Signed>::_impl
                return;
            }
        }
+#endif

        const T alpha = t / static_cast<T>(max_int);

--- a/base/poco/Crypto/src/OpenSSLInitializer.cpp
+++ b/base/poco/Crypto/src/OpenSSLInitializer.cpp
@ -23,6 +23,9 @@
 #include <openssl/conf.h>
 #endif

+#if __has_feature(address_sanitizer)
+#include <sanitizer/lsan_interface.h>
+#endif

 using Poco::RandomInputStream;
 using Poco::Thread;
@ -67,12 +70,18 @@ void OpenSSLInitializer::initialize()
 		SSL_library_init();
 		SSL_load_error_strings();
 		OpenSSL_add_all_algorithms();
-		
+
 		char seed[SEEDSIZE];
 		RandomInputStream rnd;
 		rnd.read(seed, sizeof(seed));
-		RAND_seed(seed, SEEDSIZE);
-		
+        {
+#   if __has_feature(address_sanitizer)
+            /// Leak sanitizer (part of address sanitizer) thinks that a few bytes of memory in OpenSSL are allocated during but never released.
+            __lsan::ScopedDisabler lsan_disabler;
+#endif
+		    RAND_seed(seed, SEEDSIZE);
+        }
+
 		int nMutexes = CRYPTO_num_locks();
 		_mutexes = new Poco::FastMutex[nMutexes];
 		CRYPTO_set_locking_callback(&OpenSSLInitializer::lock);
@ -80,8 +89,8 @@ void OpenSSLInitializer::initialize()
 // https://sourceforge.net/p/poco/bugs/110/
 //
 // From http://www.openssl.org/docs/crypto/threads.html :
-// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(), 
-//  then a default implementation is used - on Windows and BeOS this uses the system's 
+// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(),
+//  then a default implementation is used - on Windows and BeOS this uses the system's
 //  default thread identifying APIs"
 		CRYPTO_set_id_callback(&OpenSSLInitializer::id);
 		CRYPTO_set_dynlock_create_callback(&OpenSSLInitializer::dynlockCreate);
@ -100,7 +109,7 @@ void OpenSSLInitializer::uninitialize()
 		CRYPTO_set_locking_callback(0);
 		CRYPTO_set_id_callback(0);
 		delete [] _mutexes;
-		
+
 		CONF_modules_free();
 	}
 }
--- a/base/poco/Foundation/include/Poco/Message.h
+++ b/base/poco/Foundation/include/Poco/Message.h
@ -66,9 +66,11 @@ public:
    /// The thread and process ids are set.

    Message(
-        const std::string & source, const std::string & text, Priority prio, const char * file, int line, std::string_view fmt_str = {});
+        const std::string & source, const std::string & text, Priority prio, const char * file, int line,
+        std::string_view fmt_str = {}, const std::vector<std::string> & fmt_str_args = {});
    Message(
-        std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str);
+        std::string && source, std::string && text, Priority prio, const char * file, int line,
+        std::string_view fmt_str, std::vector<std::string> && fmt_str_args);
    /// Creates a Message with the given source, text, priority,
    /// source file path and line.
    ///
@ -161,6 +163,9 @@ public:
    std::string_view getFormatString() const;
    void setFormatString(std::string_view fmt_str);

+    const std::vector<std::string> & getFormatStringArgs() const;
+    void setFormatStringArgs(const std::vector<std::string> & fmt_str_args);
+
    int getSourceLine() const;
    /// Returns the source file line of the statement
    /// generating the log message. May be 0
@ -210,6 +215,7 @@ private:
    int _line;
    StringMap * _pMap;
    std::string_view _fmt_str;
+    std::vector<std::string> _fmt_str_args;
 };


--- a/base/poco/Foundation/src/Message.cpp
+++ b/base/poco/Foundation/src/Message.cpp
@ -46,7 +46,9 @@ Message::Message(const std::string& source, const std::string& text, Priority pr
 }


-Message::Message(const std::string& source, const std::string& text, Priority prio, const char* file, int line, std::string_view fmt_str):
+Message::Message(
+        const std::string& source, const std::string& text, Priority prio, const char* file, int line,
+        std::string_view fmt_str, const std::vector<std::string>& fmt_str_args):
 	_source(source), 
 	_text(text), 
 	_prio(prio), 
@ -54,13 +56,16 @@ Message::Message(const std::string& source, const std::string& text, Priority pr
 	_file(file),
 	_line(line),
 	_pMap(0),
-    _fmt_str(fmt_str)
+	_fmt_str(fmt_str),
+	_fmt_str_args(fmt_str_args)
 {
 	init();
 }


-Message::Message(std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str):
+Message::Message(
+        std::string && source, std::string && text, Priority prio, const char * file, int line,
+        std::string_view fmt_str, std::vector<std::string> && fmt_str_args):
    _source(std::move(source)),
    _text(std::move(text)),
    _prio(prio),
@ -68,7 +73,8 @@ Message::Message(std::string && source, std::string && text, Priority prio, cons
    _file(file),
    _line(line),
    _pMap(0),
-    _fmt_str(fmt_str)
+    _fmt_str(fmt_str),
+    _fmt_str_args(std::move(fmt_str_args))
 {
    init();
 }
@ -83,7 +89,8 @@ Message::Message(const Message& msg):
 	_pid(msg._pid),
 	_file(msg._file),
 	_line(msg._line),
-    _fmt_str(msg._fmt_str)
+	_fmt_str(msg._fmt_str),
+	_fmt_str_args(msg._fmt_str_args)
 {
 	if (msg._pMap)
 		_pMap = new StringMap(*msg._pMap);
@ -102,7 +109,8 @@ Message::Message(const Message& msg, const std::string& text):
 	_pid(msg._pid),
 	_file(msg._file),
 	_line(msg._line),
-    _fmt_str(msg._fmt_str)
+	_fmt_str(msg._fmt_str),
+	_fmt_str_args(msg._fmt_str_args)
 {
 	if (msg._pMap)
 		_pMap = new StringMap(*msg._pMap);
@ -154,6 +162,7 @@ void Message::swap(Message& msg)
 	swap(_line, msg._line);
 	swap(_pMap, msg._pMap);
 	swap(_fmt_str, msg._fmt_str);
+	swap(_fmt_str_args, msg._fmt_str_args);
 }


@ -227,6 +236,17 @@ void Message::setFormatString(std::string_view fmt_str)
 }


+const std::vector<std::string>& Message::getFormatStringArgs() const
+{
+    return _fmt_str_args;
+}
+
+void Message::setFormatStringArgs(const std::vector<std::string>& fmt_str_args)
+{
+    _fmt_str_args = fmt_str_args;
+}
+
+
 bool Message::has(const std::string& param) const
 {
 	return _pMap && (_pMap->find(param) != _pMap->end());
--- a/base/poco/Foundation/src/pcre_compile.c
+++ b/base/poco/Foundation/src/pcre_compile.c
@ -4835,7 +4835,7 @@ for (;; ptr++)

    If the class contains characters outside the 0-255 range, a different
    opcode is compiled. It may optionally have a bit map for characters < 256,
-    but those above are are explicitly listed afterwards. A flag byte tells
+    but those above are explicitly listed afterwards. A flag byte tells
    whether the bitmap is present, and whether this is a negated class or not.

    In JavaScript compatibility mode, an isolated ']' causes an error. In
--- a/base/poco/JSON/src/pdjson.c
+++ b/base/poco/JSON/src/pdjson.c
@ -314,13 +314,13 @@ static int read_unicode(json_stream *json)

        if (l < 0xdc00 || l > 0xdfff) {
            json_error(json, "invalid surrogate pair continuation \\u%04lx out "
-                             "of range (dc00-dfff)", l);
+                             "of range (dc00-dfff)", (unsigned long)l);
            return -1;
        }

        cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000);
    } else if (cp >= 0xdc00 && cp <= 0xdfff) {
-            json_error(json, "dangling surrogate \\u%04lx", cp);
+            json_error(json, "dangling surrogate \\u%04lx", (unsigned long)cp);
            return -1;
    }

--- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h
+++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h
@ -213,6 +213,19 @@ namespace Net
        Poco::Timespan getKeepAliveTimeout() const;
        /// Returns the connection timeout for HTTP connections.

+        void setKeepAliveMaxRequests(int max_requests);
+
+        int getKeepAliveMaxRequests() const;
+
+        int getKeepAliveRequest() const;
+
+        bool isKeepAliveExpired(double reliability = 1.0) const;
+        /// Returns if the connection is expired with some margin as fraction of timeout as reliability
+
+        double getKeepAliveReliability() const;
+        /// Returns the current fraction of keep alive timeout when connection is considered safe to use
+        /// It helps to avoid situation when a client uses nearly expired connection and receives NoMessageException
+
        virtual std::ostream & sendRequest(HTTPRequest & request);
        /// Sends the header for the given HTTP request to
        /// the server.
@ -345,6 +358,8 @@ namespace Net

        void assign(HTTPClientSession & session);

+        void setKeepAliveRequest(int request);
+
        HTTPSessionFactory _proxySessionFactory;
        /// Factory to create HTTPClientSession to proxy.
    private:
@ -353,6 +368,8 @@ namespace Net
        Poco::UInt16 _port;
        ProxyConfig _proxyConfig;
        Poco::Timespan _keepAliveTimeout;
+        int _keepAliveCurrentRequest = 0;
+        int _keepAliveMaxRequests = 1000;
        Poco::Timestamp _lastRequest;
        bool _reconnect;
        bool _mustReconnect;
@ -361,6 +378,7 @@ namespace Net
        Poco::SharedPtr<std::ostream> _pRequestStream;
        Poco::SharedPtr<std::istream> _pResponseStream;

+        static const double _defaultKeepAliveReliabilityLevel;
        static ProxyConfig _globalProxyConfig;

        HTTPClientSession(const HTTPClientSession &);
@ -450,9 +468,19 @@ namespace Net
        return _lastRequest;
    }

-    inline void HTTPClientSession::setLastRequest(Poco::Timestamp time)
+    inline double HTTPClientSession::getKeepAliveReliability() const
    {
-        _lastRequest = time;
+        return _defaultKeepAliveReliabilityLevel;
+    }
+
+    inline int HTTPClientSession::getKeepAliveMaxRequests() const
+    {
+        return _keepAliveMaxRequests;
+    }
+
+    inline int HTTPClientSession::getKeepAliveRequest() const
+    {
+        return _keepAliveCurrentRequest;
    }

 }
--- a/base/poco/Net/include/Poco/Net/HTTPMessage.h
+++ b/base/poco/Net/include/Poco/Net/HTTPMessage.h
@ -120,6 +120,10 @@ namespace Net
        /// The value is set to "Keep-Alive" if keepAlive is
        /// true, or to "Close" otherwise.

+        void setKeepAliveTimeout(int timeout, int max_requests);
+        int getKeepAliveTimeout() const;
+        int getKeepAliveMaxRequests() const;
+
        bool getKeepAlive() const;
        /// Returns true if
        ///   * the message has a Connection header field and its value is "Keep-Alive"
--- a/base/poco/Net/include/Poco/Net/HTTPServerParams.h
+++ b/base/poco/Net/include/Poco/Net/HTTPServerParams.h
@ -44,7 +44,7 @@ namespace Net
        ///   - timeout:              60 seconds
        ///   - keepAlive:            true
        ///   - maxKeepAliveRequests: 0
-        ///   - keepAliveTimeout:     10 seconds
+        ///   - keepAliveTimeout:     15 seconds

        void setServerName(const std::string & serverName);
        /// Sets the name and port (name:port) that the server uses to identify itself.
--- a/base/poco/Net/include/Poco/Net/HTTPServerSession.h
+++ b/base/poco/Net/include/Poco/Net/HTTPServerSession.h
@ -56,6 +56,8 @@ namespace Net
        SocketAddress serverAddress();
        /// Returns the server's address.

+        void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout);
+
    private:
        bool _firstRequest;
        Poco::Timespan _keepAliveTimeout;
--- a/base/poco/Net/include/Poco/Net/NameValueCollection.h
+++ b/base/poco/Net/include/Poco/Net/NameValueCollection.h
@ -79,6 +79,11 @@ namespace Net
        /// Returns the value of the first name-value pair with the given name.
        /// If no value with the given name has been found, the defaultValue is returned.

+        const std::vector<std::reference_wrapper<const std::string>> getAll(const std::string & name) const;
+        /// Returns all values of all name-value pairs with the given name.
+        ///
+        /// Returns an empty vector if there are no name-value pairs with the given name.
+
        bool has(const std::string & name) const;
        /// Returns true if there is at least one name-value pair
        /// with the given name.
--- a/base/poco/Net/src/HTTPClientSession.cpp
+++ b/base/poco/Net/src/HTTPClientSession.cpp
@ -37,6 +37,7 @@ namespace Net {


 HTTPClientSession::ProxyConfig HTTPClientSession::_globalProxyConfig;
+const double HTTPClientSession::_defaultKeepAliveReliabilityLevel = 0.9;


 HTTPClientSession::HTTPClientSession():
@ -220,7 +221,41 @@ void HTTPClientSession::setGlobalProxyConfig(const ProxyConfig& config)

 void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout)
 {
-	_keepAliveTimeout = timeout;
+    if (connected())
+    {
+        throw Poco::IllegalStateException("cannot change keep alive timeout on initiated connection, "
+                                          "That value is managed privately after connection is established.");
+    }
+    _keepAliveTimeout = timeout;
+}
+
+
+void HTTPClientSession::setKeepAliveMaxRequests(int max_requests)
+{
+    if (connected())
+    {
+        throw Poco::IllegalStateException("cannot change keep alive max requests on initiated connection, "
+                                          "That value is managed privately after connection is established.");
+    }
+    _keepAliveMaxRequests = max_requests;
+}
+
+
+void HTTPClientSession::setKeepAliveRequest(int request)
+{
+    _keepAliveCurrentRequest = request;
+}
+
+
+
+void HTTPClientSession::setLastRequest(Poco::Timestamp time)
+{
+    if (connected())
+    {
+        throw Poco::IllegalStateException("cannot change last request on initiated connection, "
+                                          "That value is managed privately after connection is established.");
+    }
+    _lastRequest = time;
 }


@ -231,6 +266,8 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request)
 	clearException();
 	_responseReceived = false;

+    _keepAliveCurrentRequest += 1;
+
 	bool keepAlive = getKeepAlive();
 	if (((connected() && !keepAlive) || mustReconnect()) && !_host.empty())
 	{
@ -241,8 +278,10 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request)
 	{
 		if (!connected())
 			reconnect();
-		if (!keepAlive)
-			request.setKeepAlive(false);
+        if (!request.has(HTTPMessage::CONNECTION))
+            request.setKeepAlive(keepAlive);
+        if (keepAlive && !request.has(HTTPMessage::CONNECTION_KEEP_ALIVE) && _keepAliveTimeout.totalSeconds() > 0)
+            request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds(), _keepAliveMaxRequests);
 		if (!request.has(HTTPRequest::HOST) && !_host.empty())
 			request.setHost(_host, _port);
 		if (!_proxyConfig.host.empty() && !bypassProxy())
@ -324,6 +363,17 @@ std::istream& HTTPClientSession::receiveResponse(HTTPResponse& response)

 	_mustReconnect = getKeepAlive() && !response.getKeepAlive();

+    if (!_mustReconnect)
+    {
+        /// when server sends its keep alive timeout, client has to follow that value
+        auto timeout = response.getKeepAliveTimeout();
+        if (timeout > 0)
+            _keepAliveTimeout = std::min(_keepAliveTimeout, Poco::Timespan(timeout, 0));
+        auto max_requests = response.getKeepAliveMaxRequests();
+        if (max_requests > 0)
+            _keepAliveMaxRequests = std::min(_keepAliveMaxRequests, max_requests);
+    }
+
 	if (!_expectResponseBody || response.getStatus() < 200 || response.getStatus() == HTTPResponse::HTTP_NO_CONTENT || response.getStatus() == HTTPResponse::HTTP_NOT_MODIFIED)
 		_pResponseStream = new HTTPFixedLengthInputStream(*this, 0);
 	else if (response.getChunkedTransferEncoding())
@ -430,15 +480,18 @@ std::string HTTPClientSession::proxyRequestPrefix() const
 	return result;
 }

+bool HTTPClientSession::isKeepAliveExpired(double reliability) const
+{
+    Poco::Timestamp now;
+    return Timespan(Timestamp::TimeDiff(reliability *_keepAliveTimeout.totalMicroseconds())) <= now - _lastRequest
+            || _keepAliveCurrentRequest > _keepAliveMaxRequests;
+}

 bool HTTPClientSession::mustReconnect() const
 {
 	if (!_mustReconnect)
-	{
-		Poco::Timestamp now;
-		return _keepAliveTimeout <= now - _lastRequest;
-	}
-	else return true;
+        return isKeepAliveExpired(_defaultKeepAliveReliabilityLevel);
+    return true;
 }


@ -511,14 +564,21 @@ void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session)
    if (buffered())
        throw Poco::LogicException("assign to a session with not empty buffered data");

-    attachSocket(session.detachSocket());
-    setLastRequest(session.getLastRequest());
+    poco_assert(!connected());
+
    setResolvedHost(session.getResolvedHost());
-    setKeepAlive(session.getKeepAlive());
+    setProxyConfig(session.getProxyConfig());

    setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout());
+    setKeepAlive(session.getKeepAlive());
+
+    setLastRequest(session.getLastRequest());
    setKeepAliveTimeout(session.getKeepAliveTimeout());
-    setProxyConfig(session.getProxyConfig());
+
+    _keepAliveMaxRequests = session._keepAliveMaxRequests;
+    _keepAliveCurrentRequest = session._keepAliveCurrentRequest;
+
+    attachSocket(session.detachSocket());

    session.reset();
 }
--- a/base/poco/Net/src/HTTPMessage.cpp
+++ b/base/poco/Net/src/HTTPMessage.cpp
@ -17,6 +17,7 @@
 #include "Poco/NumberFormatter.h"
 #include "Poco/NumberParser.h"
 #include "Poco/String.h"
+#include <format>


 using Poco::NumberFormatter;
@ -179,4 +180,51 @@ bool HTTPMessage::getKeepAlive() const
 }


+void HTTPMessage::setKeepAliveTimeout(int timeout, int max_requests)
+{
+    add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}, max={}", timeout, max_requests));
+}
+
+
+int parseFromHeaderValues(const std::string_view header_value, const std::string_view param_name)
+{
+    auto param_value_pos = header_value.find(param_name);
+    if (param_value_pos == std::string::npos)
+        param_value_pos = header_value.size();
+    if (param_value_pos != header_value.size())
+        param_value_pos += param_name.size();
+
+    auto param_value_end = header_value.find(',', param_value_pos);
+    if (param_value_end == std::string::npos)
+        param_value_end = header_value.size();
+
+    auto timeout_value_substr = header_value.substr(param_value_pos, param_value_end - param_value_pos);
+    if (timeout_value_substr.empty())
+        return -1;
+
+    int value = 0;
+    auto [ptr, ec] = std::from_chars(timeout_value_substr.begin(), timeout_value_substr.end(), value);
+
+    if (ec == std::errc())
+        return value;
+
+    return -1;
+}
+
+
+int HTTPMessage::getKeepAliveTimeout() const
+{
+    const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY);
+    static const std::string_view timeout_param = "timeout=";
+    return parseFromHeaderValues(ka_header, timeout_param);
+}
+
+
+int HTTPMessage::getKeepAliveMaxRequests() const
+{
+    const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY);
+    static const std::string_view timeout_param = "max=";
+    return parseFromHeaderValues(ka_header, timeout_param);
+}
+
 } } // namespace Poco::Net
--- a/base/poco/Net/src/HTTPServerConnection.cpp
+++ b/base/poco/Net/src/HTTPServerConnection.cpp
@ -88,7 +88,18 @@ void HTTPServerConnection::run()
 					
 						pHandler->handleRequest(request, response);
 						session.setKeepAlive(_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive());
-					}
+
+                        /// all that fuzz is all about to make session close with less timeout than 15s (set in HTTPServerParams c-tor)
+                        if (_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive())
+                        {
+                            int value = response.getKeepAliveTimeout();
+                            if (value < 0)
+                                value = request.getKeepAliveTimeout();
+                            if (value > 0)
+                                session.setKeepAliveTimeout(Poco::Timespan(value, 0));
+                        }
+
+                    }
 					else sendErrorResponse(session, HTTPResponse::HTTP_NOT_IMPLEMENTED);
 				}
 				catch (Poco::Exception&)
--- a/base/poco/Net/src/HTTPServerSession.cpp
+++ b/base/poco/Net/src/HTTPServerSession.cpp
@ -33,6 +33,12 @@ HTTPServerSession::~HTTPServerSession()
 {
 }

+void HTTPServerSession::setKeepAliveTimeout(Poco::Timespan keepAliveTimeout)
+{
+    _keepAliveTimeout = keepAliveTimeout;
+}
+
+

 bool HTTPServerSession::hasMoreRequests()
 {
--- a/base/poco/Net/src/NameValueCollection.cpp
+++ b/base/poco/Net/src/NameValueCollection.cpp
@ -15,6 +15,7 @@
 #include "Poco/Net/NameValueCollection.h"
 #include "Poco/Exception.h"
 #include <algorithm>
+#include <functional>


 using Poco::NotFoundException;
@ -55,7 +56,7 @@ void NameValueCollection::swap(NameValueCollection& nvc)
 	std::swap(_map, nvc._map);
 }

-	
+
 const std::string& NameValueCollection::operator [] (const std::string& name) const
 {
 	ConstIterator it = _map.find(name);
@ -65,8 +66,8 @@ const std::string& NameValueCollection::operator [] (const std::string& name) co
 		throw NotFoundException(name);
 }

-	
-void NameValueCollection::set(const std::string& name, const std::string& value)	
+
+void NameValueCollection::set(const std::string& name, const std::string& value)
 {
 	Iterator it = _map.find(name);
 	if (it != _map.end())
@ -75,13 +76,13 @@ void NameValueCollection::set(const std::string& name, const std::string& value)
 		_map.insert(HeaderMap::ValueType(name, value));
 }

-	
+
 void NameValueCollection::add(const std::string& name, const std::string& value)
 {
 	_map.insert(HeaderMap::ValueType(name, value));
 }

-	
+
 const std::string& NameValueCollection::get(const std::string& name) const
 {
 	ConstIterator it = _map.find(name);
@ -101,6 +102,15 @@ const std::string& NameValueCollection::get(const std::string& name, const std::
 		return defaultValue;
 }

+const std::vector<std::reference_wrapper<const std::string>> NameValueCollection::getAll(const std::string& name) const
+{
+    std::vector<std::reference_wrapper<const std::string>> values;
+    for (ConstIterator it = _map.find(name); it != _map.end(); it++)
+        if (it->first == name)
+            values.push_back(it->second);
+    return values;
+}
+

 bool NameValueCollection::has(const std::string& name) const
 {
@ -113,19 +123,19 @@ NameValueCollection::ConstIterator NameValueCollection::find(const std::string&
 	return _map.find(name);
 }

-	
+
 NameValueCollection::ConstIterator NameValueCollection::begin() const
 {
 	return _map.begin();
 }

-	
+
 NameValueCollection::ConstIterator NameValueCollection::end() const
 {
 	return _map.end();
 }

-	
+
 bool NameValueCollection::empty() const
 {
 	return _map.empty();
--- a/base/poco/NetSSL_OpenSSL/src/Context.cpp
+++ b/base/poco/NetSSL_OpenSSL/src/Context.cpp
@ -592,6 +592,7 @@ void Context::createSSLContext()
 	SSL_CTX_set_default_passwd_cb(_pSSLContext, &SSLManager::privateKeyPassphraseCallback);
 	Utility::clearErrorStack();
 	SSL_CTX_set_options(_pSSLContext, SSL_OP_ALL);
+	SSL_CTX_set_options(_pSSLContext, SSL_OP_IGNORE_UNEXPECTED_EOF);
 }


--- a/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp
+++ b/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp
@ -125,7 +125,7 @@ void SSLManager::initializeClient(PrivateKeyPassphraseHandlerPtr ptrPassphraseHa
 Context::Ptr SSLManager::defaultServerContext()
 {
 	Poco::FastMutex::ScopedLock lock(_mutex);
-	
+
 	if (!_ptrDefaultServerContext)
 		initDefaultContext(true);

@ -150,7 +150,7 @@ Context::Ptr SSLManager::defaultClientContext()
 			_ptrDefaultClientContext->disableProtocols(Context::PROTO_SSLV2 | Context::PROTO_SSLV3);
 		}
 	}
-		
+
 	return _ptrDefaultClientContext;
 }

@ -256,7 +256,7 @@ void SSLManager::initDefaultContext(bool server)
 	Context::Params params;
 	// mandatory options
 	params.privateKeyFile = config.getString(prefix + CFG_PRIV_KEY_FILE, "");
-	params.certificateFile = config.getString(prefix + CFG_CERTIFICATE_FILE, params.privateKeyFile);	
+	params.certificateFile = config.getString(prefix + CFG_CERTIFICATE_FILE, params.privateKeyFile);
 	params.caLocation = config.getString(prefix + CFG_CA_LOCATION, "");

 	if (server && params.certificateFile.empty() && params.privateKeyFile.empty())
@ -283,7 +283,7 @@ void SSLManager::initDefaultContext(bool server)
 	params.ecdhCurve    = config.getString(prefix + CFG_ECDH_CURVE, "");

 	Context::Usage usage;
-	
+
 	if (server)
 	{
 		if (requireTLSv1_2)
@ -308,7 +308,7 @@ void SSLManager::initDefaultContext(bool server)
 			usage = Context::CLIENT_USE;
 		_ptrDefaultClientContext = new Context(usage, params);
 	}
-	
+
 	std::string disabledProtocolsList = config.getString(prefix + CFG_DISABLE_PROTOCOLS, "");
 	Poco::StringTokenizer dpTok(disabledProtocolsList, ";,", Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY);
 	int disabledProtocols = 0;
@ -329,27 +329,28 @@ void SSLManager::initDefaultContext(bool server)
 		_ptrDefaultServerContext->disableProtocols(disabledProtocols);
 	else
 		_ptrDefaultClientContext->disableProtocols(disabledProtocols);
-		
-	bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false);
-	if (server)
-	{
-		std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", ""));
-		_ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext);
-		if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE))
-		{
-			int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE);
-			_ptrDefaultServerContext->setSessionCacheSize(cacheSize);
-		}
-		if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT))
-		{
-			int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT);
-			_ptrDefaultServerContext->setSessionTimeout(timeout);
-		}
-	}
-	else
-	{
-		_ptrDefaultClientContext->enableSessionCache(cacheSessions);
-	}
+
+    /// Temporarily disabled during the transition from boringssl to OpenSSL due to tsan issues.
+	/// bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false);
+	/// if (server)
+	/// {
+	/// 	std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", ""));
+	/// 	_ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext);
+	/// 	if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE))
+	/// 	{
+	/// 		int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE);
+	/// 		_ptrDefaultServerContext->setSessionCacheSize(cacheSize);
+	/// 	}
+	/// 	if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT))
+	/// 	{
+	/// 		int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT);
+	/// 		_ptrDefaultServerContext->setSessionTimeout(timeout);
+	/// 	}
+	/// }
+	/// else
+	/// {
+	/// 	_ptrDefaultClientContext->enableSessionCache(cacheSessions);
+	/// }
 	bool extendedVerification = config.getBool(prefix + CFG_EXTENDED_VERIFICATION, false);
 	if (server)
 		_ptrDefaultServerContext->enableExtendedCertificateVerification(extendedVerification);
@ -378,7 +379,7 @@ void SSLManager::initPassphraseHandler(bool server)
 {
 	if (server && _ptrServerPassphraseHandler) return;
 	if (!server && _ptrClientPassphraseHandler) return;
-	
+
 	std::string prefix = server ? CFG_SERVER_PREFIX : CFG_CLIENT_PREFIX;
 	Poco::Util::AbstractConfiguration& config = appConfig();

@ -399,7 +400,7 @@ void SSLManager::initPassphraseHandler(bool server)
 	}
 	else throw Poco::Util::UnknownOptionException(std::string("No passphrase handler known with the name ") + className);
 }
-	
+

 void SSLManager::initCertificateHandler(bool server)
 {
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@ -2,11 +2,11 @@

 # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
 # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54484)
+SET(VERSION_REVISION 54485)
 SET(VERSION_MAJOR 24)
-SET(VERSION_MINOR 3)
+SET(VERSION_MINOR 4)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH 891689a41506d00aa169548f5b4a8774351242c4)
-SET(VERSION_DESCRIBE v24.3.1.1-testing)
-SET(VERSION_STRING 24.3.1.1)
+SET(VERSION_GITHASH 2c5c589a882ceec35439650337b92db3e76f0081)
+SET(VERSION_DESCRIBE v24.4.1.1-testing)
+SET(VERSION_STRING 24.4.1.1)
 # end of autochange
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@ -8,9 +8,6 @@ option (SANITIZE "Enable one of the code sanitizers" "")

 set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")

-# It's possible to pass an ignore list to sanitizers (-fsanitize-ignorelist). Intentionally not doing this because
-# 1. out-of-source suppressions are awkward 2. it seems ignore lists don't work after the Clang v16 upgrade (#49829)
-
 if (SANITIZE)
    if (SANITIZE STREQUAL "address")
        set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -37,11 +37,7 @@ function(add_contrib cmake_folder)
    message(STATUS "Adding contrib module ${base_folders} (configuring with ${cmake_folder})")
    add_subdirectory (${cmake_folder})
 endfunction()
-if (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)
-    add_contrib (openssl-cmake openssl)
-else ()
-    add_contrib (boringssl-cmake boringssl)
-endif ()
+add_contrib (openssl-cmake openssl)
 add_contrib (miniselect-cmake miniselect)
 add_contrib (pdqsort-cmake pdqsort)
 add_contrib (pocketfft-cmake pocketfft)
@ -223,6 +219,8 @@ endif ()

 add_contrib (xxHash-cmake xxHash)

+add_contrib (expected-cmake expected)
+
 add_contrib (libbcrypt-cmake libbcrypt)

 add_contrib (google-benchmark-cmake google-benchmark)
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@ -1 +1 @@
-Subproject commit 4a12f99dfc9d47c687ff7700b927cc76856225d1
+Subproject commit cb5dc3c906e80f253e9ce9535807caef827cc2e0
--- a/contrib/arrow
+++ b/contrib/arrow
@ -1 +1 @@
-Subproject commit ba5c67934e8274d649befcffab56731632dc5253
+Subproject commit 8f36d71d18587f1f315ec832f424183cb6519cbb
--- a/contrib/avro-cmake/CMakeLists.txt
+++ b/contrib/avro-cmake/CMakeLists.txt
@ -59,12 +59,3 @@ target_link_libraries (_avrocpp PRIVATE boost::headers_only boost::iostreams)
 target_compile_definitions (_avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE)
 target_include_directories (_avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
 target_link_libraries (_avrocpp PRIVATE ch_contrib::snappy)
-
-# create a symlink to include headers with <avro/...>
-set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
-ADD_CUSTOM_TARGET(avro_symlink_headers ALL
-    COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}"
-    COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro"
-)
-add_dependencies(_avrocpp avro_symlink_headers)
-target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}")
--- a/contrib/aws
+++ b/contrib/aws
@ -1 +1 @@
-Subproject commit 32870e234cac03e0ac46370c26858b0ffdf14200
+Subproject commit 2e12d7c6dafa81311ee3d73ac6a178550ffa75be
--- a/contrib/aws-c-cal
+++ b/contrib/aws-c-cal
@ -1 +1 @@
-Subproject commit 9453687ff5493ba94eaccf8851200565c4364c77
+Subproject commit 1586846816e6d7d5ff744a2db943107a3a74a082
--- a/contrib/azure
+++ b/contrib/azure
@ -1 +1 @@
-Subproject commit e71395e44f309f97b5a486f5c2c59b82f85dd2d2
+Subproject commit b90fd3c6ef3185f5be3408056567bca0854129b6
--- a/contrib/azure-cmake/CMakeLists.txt
+++ b/contrib/azure-cmake/CMakeLists.txt
@ -10,6 +10,7 @@ set(AZURE_SDK_LIBRARY_DIR "${AZURE_DIR}/sdk")

 file(GLOB AZURE_SDK_SRC
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/*.cpp"
+    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/credentials/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/cryptography/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.cpp"
--- a/contrib/boringssl
+++ b/contrib/boringssl
@ -1 +0,0 @@
-Subproject commit aa6d2f865a2eab01cf94f197e11e36b6de47b5b4
--- a/contrib/boringssl-cmake/CMakeLists.txt
+++ b/contrib/boringssl-cmake/CMakeLists.txt
@ -1,799 +0,0 @@
-# Needed for:
-# - securely connecting to an external server, e.g. clickhouse-client --host ... --secure
-# - lots of thirdparty libraries
-
-# Actually, so many 3rd party libraries + unit tests need SSL that we cannot disable it
-# without breaking the build ...
-option(ENABLE_SSL "Enable ssl" ON) # breaks if OFF
-# TODO: Making SSL dependent on ENABLE_LIBRARIES is desirable but needs fixing dependent libs + tests.
-# option(ENABLE_SSL "Enable ssl" ${ENABLE_LIBRARIES})
-
-if(NOT ENABLE_SSL)
-  message(STATUS "Not using openssl")
-  return()
-endif()
-
-# Copyright (c) 2019 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-# This file is created by generate_build_files.py and edited accordingly.
-
-cmake_minimum_required(VERSION 3.5)
-
-project(BoringSSL LANGUAGES C CXX)
-
-set(BORINGSSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/boringssl")
-
-if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-  set(CLANG 1)
-endif()
-
-if(CMAKE_COMPILER_IS_GNUCXX OR CLANG)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-common -fno-exceptions -fno-rtti")
-  if(APPLE AND CLANG)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
-  endif()
-
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-common -std=c11")
-endif()
-
-# pthread_rwlock_t requires a feature flag.
-if(NOT WIN32)
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_XOPEN_SOURCE=700")
-endif()
-
-if(WIN32)
-  add_definitions(-D_HAS_EXCEPTIONS=0)
-  add_definitions(-DWIN32_LEAN_AND_MEAN)
-  add_definitions(-DNOMINMAX)
-  # Allow use of fopen.
-  add_definitions(-D_CRT_SECURE_NO_WARNINGS)
-  # VS 2017 and higher supports STL-only warning suppressions.
-  # A bug in CMake < 3.13.0 may cause the space in this value to
-  # cause issues when building with NASM. In that case, update CMake.
-  add_definitions("-D_STL_EXTRA_DISABLED_WARNINGS=4774 4987")
-endif()
-
-add_definitions(-DBORINGSSL_IMPLEMENTATION)
-
-# CMake's iOS support uses Apple's multiple-architecture toolchain. It takes an
-# architecture list from CMAKE_OSX_ARCHITECTURES, leaves CMAKE_SYSTEM_PROCESSOR
-# alone, and expects all architecture-specific logic to be conditioned within
-# the source files rather than the build. This does not work for our assembly
-# files, so we fix CMAKE_SYSTEM_PROCESSOR and only support single-architecture
-# builds.
-if(NOT OPENSSL_NO_ASM AND CMAKE_OSX_ARCHITECTURES)
-  list(LENGTH CMAKE_OSX_ARCHITECTURES NUM_ARCHES)
-  if(NOT NUM_ARCHES EQUAL 1)
-    message(FATAL_ERROR "Universal binaries not supported.")
-  endif()
-  list(GET CMAKE_OSX_ARCHITECTURES 0 CMAKE_SYSTEM_PROCESSOR)
-endif()
-
-if(OPENSSL_NO_ASM)
-  add_definitions(-DOPENSSL_NO_ASM)
-  set(ARCH "generic")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
-  set(ARCH "x86_64")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64")
-  set(ARCH "x86_64")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
-  # cmake reports AMD64 on Windows, but we might be building for 32-bit.
-  if(CMAKE_SIZEOF_VOID_P EQUAL 8)
-    set(ARCH "x86_64")
-  else()
-    set(ARCH "x86")
-  endif()
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86")
-  set(ARCH "x86")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386")
-  # cmake uses `uname -p` to set the system processor, but Solaris
-  # systems support multiple architectures.
-  if((${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") AND CMAKE_SIZEOF_VOID_P EQUAL 8)
-    set(ARCH "x86_64")
-  else()
-    set(ARCH "x86")
-  endif()
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
-  set(ARCH "x86")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
-  set(ARCH "aarch64")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
-  set(ARCH "aarch64")
-# Apple A12 Bionic chipset which is added in iPhone XS/XS Max/XR uses arm64e architecture.
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64e")
-  set(ARCH "aarch64")
-elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm*")
-  set(ARCH "arm")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "mips")
-  # Just to avoid the “unknown processor” error.
-  set(ARCH "generic")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le")
-  set(ARCH "ppc64le")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64")
-  set(ARCH "riscv64")
-else()
-  message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR})
-endif()
-
-if(NOT OPENSSL_NO_ASM)
-  if(UNIX)
-    enable_language(ASM)
-
-    # Clang's integerated assembler does not support debug symbols.
-    if(NOT CMAKE_ASM_COMPILER_ID MATCHES "Clang")
-      set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -Wa,-g")
-    endif()
-
-    # CMake does not add -isysroot and -arch flags to assembly.
-    if(APPLE)
-      if(CMAKE_OSX_SYSROOT)
-        set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -isysroot \"${CMAKE_OSX_SYSROOT}\"")
-      endif()
-      foreach(arch ${CMAKE_OSX_ARCHITECTURES})
-        set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -arch ${arch}")
-      endforeach()
-    endif()
-  else()
-    set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -gcv8")
-    enable_language(ASM_NASM)
-  endif()
-endif()
-
-set(
-  CRYPTO_ios_aarch64_SOURCES
-
-  ios-aarch64/crypto/chacha/chacha-armv8.S
-  ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
-  ios-aarch64/crypto/fipsmodule/armv8-mont.S
-  ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
-  ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
-  ios-aarch64/crypto/fipsmodule/sha1-armv8.S
-  ios-aarch64/crypto/fipsmodule/sha256-armv8.S
-  ios-aarch64/crypto/fipsmodule/sha512-armv8.S
-  ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
-  ios-aarch64/crypto/test/trampoline-armv8.S
-)
-
-set(
-  CRYPTO_ios_arm_SOURCES
-
-  ios-arm/crypto/chacha/chacha-armv4.S
-  ios-arm/crypto/fipsmodule/aesv8-armx32.S
-  ios-arm/crypto/fipsmodule/armv4-mont.S
-  ios-arm/crypto/fipsmodule/bsaes-armv7.S
-  ios-arm/crypto/fipsmodule/ghash-armv4.S
-  ios-arm/crypto/fipsmodule/ghashv8-armx32.S
-  ios-arm/crypto/fipsmodule/sha1-armv4-large.S
-  ios-arm/crypto/fipsmodule/sha256-armv4.S
-  ios-arm/crypto/fipsmodule/sha512-armv4.S
-  ios-arm/crypto/fipsmodule/vpaes-armv7.S
-  ios-arm/crypto/test/trampoline-armv4.S
-)
-
-set(
-  CRYPTO_linux_aarch64_SOURCES
-
-  linux-aarch64/crypto/chacha/chacha-armv8.S
-  linux-aarch64/crypto/fipsmodule/aesv8-armx64.S
-  linux-aarch64/crypto/fipsmodule/armv8-mont.S
-  linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
-  linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S
-  linux-aarch64/crypto/fipsmodule/sha1-armv8.S
-  linux-aarch64/crypto/fipsmodule/sha256-armv8.S
-  linux-aarch64/crypto/fipsmodule/sha512-armv8.S
-  linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
-  linux-aarch64/crypto/test/trampoline-armv8.S
-)
-
-set(
-  CRYPTO_linux_arm_SOURCES
-
-  linux-arm/crypto/chacha/chacha-armv4.S
-  linux-arm/crypto/fipsmodule/aesv8-armx32.S
-  linux-arm/crypto/fipsmodule/armv4-mont.S
-  linux-arm/crypto/fipsmodule/bsaes-armv7.S
-  linux-arm/crypto/fipsmodule/ghash-armv4.S
-  linux-arm/crypto/fipsmodule/ghashv8-armx32.S
-  linux-arm/crypto/fipsmodule/sha1-armv4-large.S
-  linux-arm/crypto/fipsmodule/sha256-armv4.S
-  linux-arm/crypto/fipsmodule/sha512-armv4.S
-  linux-arm/crypto/fipsmodule/vpaes-armv7.S
-  linux-arm/crypto/test/trampoline-armv4.S
-  "${BORINGSSL_SOURCE_DIR}/crypto/curve25519/asm/x25519-asm-arm.S"
-  "${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_arm_asm.S"
-)
-
-set(
-  CRYPTO_linux_ppc64le_SOURCES
-
-  linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
-  linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
-  linux-ppc64le/crypto/test/trampoline-ppc.S
-)
-
-set(
-  CRYPTO_linux_x86_SOURCES
-
-  linux-x86/crypto/chacha/chacha-x86.S
-  linux-x86/crypto/fipsmodule/aesni-x86.S
-  linux-x86/crypto/fipsmodule/bn-586.S
-  linux-x86/crypto/fipsmodule/co-586.S
-  linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S
-  linux-x86/crypto/fipsmodule/ghash-x86.S
-  linux-x86/crypto/fipsmodule/md5-586.S
-  linux-x86/crypto/fipsmodule/sha1-586.S
-  linux-x86/crypto/fipsmodule/sha256-586.S
-  linux-x86/crypto/fipsmodule/sha512-586.S
-  linux-x86/crypto/fipsmodule/vpaes-x86.S
-  linux-x86/crypto/fipsmodule/x86-mont.S
-  linux-x86/crypto/test/trampoline-x86.S
-)
-
-set(
-  CRYPTO_linux_x86_64_SOURCES
-
-  linux-x86_64/crypto/chacha/chacha-x86_64.S
-  linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
-  linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
-  linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
-  linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
-  linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
-  linux-x86_64/crypto/fipsmodule/ghash-x86_64.S
-  linux-x86_64/crypto/fipsmodule/md5-x86_64.S
-  linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
-  linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
-  linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S
-  linux-x86_64/crypto/fipsmodule/rsaz-avx2.S
-  linux-x86_64/crypto/fipsmodule/sha1-x86_64.S
-  linux-x86_64/crypto/fipsmodule/sha256-x86_64.S
-  linux-x86_64/crypto/fipsmodule/sha512-x86_64.S
-  linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
-  linux-x86_64/crypto/fipsmodule/x86_64-mont.S
-  linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
-  linux-x86_64/crypto/test/trampoline-x86_64.S
-  "${BORINGSSL_SOURCE_DIR}/crypto/hrss/asm/poly_rq_mul.S"
-)
-
-set(
-  CRYPTO_mac_x86_SOURCES
-
-  mac-x86/crypto/chacha/chacha-x86.S
-  mac-x86/crypto/fipsmodule/aesni-x86.S
-  mac-x86/crypto/fipsmodule/bn-586.S
-  mac-x86/crypto/fipsmodule/co-586.S
-  mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S
-  mac-x86/crypto/fipsmodule/ghash-x86.S
-  mac-x86/crypto/fipsmodule/md5-586.S
-  mac-x86/crypto/fipsmodule/sha1-586.S
-  mac-x86/crypto/fipsmodule/sha256-586.S
-  mac-x86/crypto/fipsmodule/sha512-586.S
-  mac-x86/crypto/fipsmodule/vpaes-x86.S
-  mac-x86/crypto/fipsmodule/x86-mont.S
-  mac-x86/crypto/test/trampoline-x86.S
-)
-
-set(
-  CRYPTO_mac_x86_64_SOURCES
-
-  mac-x86_64/crypto/chacha/chacha-x86_64.S
-  mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
-  mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
-  mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
-  mac-x86_64/crypto/fipsmodule/aesni-x86_64.S
-  mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
-  mac-x86_64/crypto/fipsmodule/ghash-x86_64.S
-  mac-x86_64/crypto/fipsmodule/md5-x86_64.S
-  mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
-  mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
-  mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S
-  mac-x86_64/crypto/fipsmodule/rsaz-avx2.S
-  mac-x86_64/crypto/fipsmodule/sha1-x86_64.S
-  mac-x86_64/crypto/fipsmodule/sha256-x86_64.S
-  mac-x86_64/crypto/fipsmodule/sha512-x86_64.S
-  mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S
-  mac-x86_64/crypto/fipsmodule/x86_64-mont.S
-  mac-x86_64/crypto/fipsmodule/x86_64-mont5.S
-  mac-x86_64/crypto/test/trampoline-x86_64.S
-)
-
-set(
-  CRYPTO_win_aarch64_SOURCES
-
-  win-aarch64/crypto/chacha/chacha-armv8.S
-  win-aarch64/crypto/fipsmodule/aesv8-armx64.S
-  win-aarch64/crypto/fipsmodule/armv8-mont.S
-  win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
-  win-aarch64/crypto/fipsmodule/ghashv8-armx64.S
-  win-aarch64/crypto/fipsmodule/sha1-armv8.S
-  win-aarch64/crypto/fipsmodule/sha256-armv8.S
-  win-aarch64/crypto/fipsmodule/sha512-armv8.S
-  win-aarch64/crypto/fipsmodule/vpaes-armv8.S
-  win-aarch64/crypto/test/trampoline-armv8.S
-)
-
-set(
-  CRYPTO_win_x86_SOURCES
-
-  win-x86/crypto/chacha/chacha-x86.asm
-  win-x86/crypto/fipsmodule/aesni-x86.asm
-  win-x86/crypto/fipsmodule/bn-586.asm
-  win-x86/crypto/fipsmodule/co-586.asm
-  win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm
-  win-x86/crypto/fipsmodule/ghash-x86.asm
-  win-x86/crypto/fipsmodule/md5-586.asm
-  win-x86/crypto/fipsmodule/sha1-586.asm
-  win-x86/crypto/fipsmodule/sha256-586.asm
-  win-x86/crypto/fipsmodule/sha512-586.asm
-  win-x86/crypto/fipsmodule/vpaes-x86.asm
-  win-x86/crypto/fipsmodule/x86-mont.asm
-  win-x86/crypto/test/trampoline-x86.asm
-)
-
-set(
-  CRYPTO_win_x86_64_SOURCES
-
-  win-x86_64/crypto/chacha/chacha-x86_64.asm
-  win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
-  win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
-  win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
-  win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
-  win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm
-  win-x86_64/crypto/fipsmodule/ghash-x86_64.asm
-  win-x86_64/crypto/fipsmodule/md5-x86_64.asm
-  win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm
-  win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm
-  win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm
-  win-x86_64/crypto/fipsmodule/rsaz-avx2.asm
-  win-x86_64/crypto/fipsmodule/sha1-x86_64.asm
-  win-x86_64/crypto/fipsmodule/sha256-x86_64.asm
-  win-x86_64/crypto/fipsmodule/sha512-x86_64.asm
-  win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm
-  win-x86_64/crypto/fipsmodule/x86_64-mont.asm
-  win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
-  win-x86_64/crypto/test/trampoline-x86_64.asm
-)
-
-if(APPLE AND ARCH STREQUAL "aarch64")
-  set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_aarch64_SOURCES})
-elseif(APPLE AND ARCH STREQUAL "arm")
-  set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_arm_SOURCES})
-elseif(APPLE)
-  set(CRYPTO_ARCH_SOURCES ${CRYPTO_mac_${ARCH}_SOURCES})
-elseif(UNIX)
-  set(CRYPTO_ARCH_SOURCES ${CRYPTO_linux_${ARCH}_SOURCES})
-elseif(WIN32)
-  set(CRYPTO_ARCH_SOURCES ${CRYPTO_win_${ARCH}_SOURCES})
-endif()
-
-add_library(
-  _crypto
-
-  ${CRYPTO_ARCH_SOURCES}
-  err_data.c
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_bitstr.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_bool.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_d2i_fp.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_dup.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_enum.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_gentm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_i2d_fp.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_int.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_mbstr.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strex.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_utctm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_utf8.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn1_lib.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn1_par.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn_pack.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_enum.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_int.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_string.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_dec.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_enc.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_fre.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_new.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_typ.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_utl.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/time_support.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/base64/base64.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/bio.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/bio_mem.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/connect.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/fd.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/file.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/hexdump.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/pair.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/blake2/blake2.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bytestring/asn1_compat.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bytestring/ber.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bytestring/cbb.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bytestring/cbs.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bytestring/unicode.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/chacha/chacha.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/cipher_extra.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/derive_key.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesccm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesctrhmac.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesgcmsiv.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_chacha20poly1305.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_null.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_rc2.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_rc4.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_tls.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/tls_cbc.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cmac/cmac.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-win.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-ppc64le.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/crypto.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/curve25519/curve25519.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/curve25519/spake25519.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/dh_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/params.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/digest_extra/digest_extra.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_derive.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/hash_to_curve.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/ecdh_extra/ecdh_extra.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/ecdsa_extra/ecdsa_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/engine/engine.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/err/err.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/digestsign.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/evp.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/evp_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/evp_ctx.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_dsa_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ec.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ec_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ed25519.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ed25519_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_rsa.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_rsa_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_x25519.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_x25519_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/pbkdf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/print.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/scrypt.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/sign.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/lhash/lhash.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/mem.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/obj/obj.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/obj/obj_xref.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_all.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_info.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_lib.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_oth.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_pk8.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_pkey.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_x509.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_xaux.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pkcs7/pkcs7.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pkcs7/pkcs7_x509.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/p5_pbev2.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/pkcs8.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/pkcs8_x509.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_arm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_vec.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pool/pool.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/deterministic.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/forkunsafe.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/fuchsia.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/passive.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/rand_extra.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/windows.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rc4/rc4.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/refcount_c11.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/refcount_lock.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rsa_extra/rsa_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rsa_extra/rsa_print.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/siphash/siphash.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/stack/stack.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/thread.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/thread_none.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/thread_pthread.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/thread_win.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/trust_token/pmbtoken.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/trust_token/trust_token.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/name_print.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_x509.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_x509a.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_att.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_cmp.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_d2.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_def.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_ext.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_lu.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_obj.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_req.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_set.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_trs.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_txt.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_v3.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_vfy.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_vpm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509cset.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509name.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509rset.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509spki.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_algor.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_all.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_attrib.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_crl.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_exten.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_info.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_name.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_pkey.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_pubkey.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_req.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_sig.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_spki.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_val.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_x509.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_x509a.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_cache.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_data.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_lib.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_map.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_node.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_tree.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_akey.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_akeya.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_alt.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_bcons.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_bitst.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_conf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_cpols.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_crld.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_enum.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_extku.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_genn.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ia5.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_info.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_int.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_lib.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ncons.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ocsp.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pci.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcia.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcons.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pmaps.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_prn.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_purp.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_skey.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_utl.c"
-)
-
-add_library(
-  _ssl
-
-  "${BORINGSSL_SOURCE_DIR}/ssl/bio_ssl.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/d1_both.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/d1_lib.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/d1_pkt.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/encrypted_client_hello.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/extensions.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/handshake_server.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/s3_both.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/s3_lib.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/s3_pkt.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_aead_ctx.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_asn1.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_buffer.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_cert.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_cipher.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_file.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_key_share.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_lib.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_privkey.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_session.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_stat.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_transcript.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/tls13_server.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/tls_method.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/tls_record.cc"
-
-  "${BORINGSSL_SOURCE_DIR}/decrepit/ssl/ssl_decrepit.c"
-  "${BORINGSSL_SOURCE_DIR}/decrepit/cfb/cfb.c"
-  "${BORINGSSL_SOURCE_DIR}/decrepit/bio/base64_bio.c"
-)
-
-add_executable(
-  bssl
-
-  "${BORINGSSL_SOURCE_DIR}/tool/args.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/ciphers.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/client.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/const.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/digest.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/fd.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/file.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/generate_ech.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/rand.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/server.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/sign.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/speed.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/tool.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/transport_common.cc"
-)
-
-target_link_libraries(_ssl _crypto)
-target_link_libraries(bssl _ssl)
-
-if(NOT WIN32 AND NOT ANDROID)
-  target_link_libraries(_crypto pthread)
-endif()
-
-# NOTE: that ClickHouse does not support WIN32 anyway.
-if(WIN32)
-  target_link_libraries(bssl ws2_32)
-endif()
-
-target_include_directories(_crypto SYSTEM PUBLIC "${BORINGSSL_SOURCE_DIR}/include")
-target_include_directories(_ssl SYSTEM PUBLIC "${BORINGSSL_SOURCE_DIR}/include")
-
-target_compile_options(_crypto PRIVATE -Wno-gnu-anonymous-struct)
-
-add_library(OpenSSL::Crypto ALIAS _crypto)
-add_library(OpenSSL::SSL ALIAS _ssl)
-
-# Helper function used in the populate_openssl_vars function below
-function(from_hex HEX DEC)
-  string(TOUPPER "${HEX}" HEX)
-  set(_res 0)
-  string(LENGTH "${HEX}" _strlen)
-
-  while (_strlen GREATER 0)
-    math(EXPR _res "${_res} * 16")
-    string(SUBSTRING "${HEX}" 0 1 NIBBLE)
-    string(SUBSTRING "${HEX}" 1 -1 HEX)
-    if (NIBBLE STREQUAL "A")
-      math(EXPR _res "${_res} + 10")
-    elseif (NIBBLE STREQUAL "B")
-      math(EXPR _res "${_res} + 11")
-    elseif (NIBBLE STREQUAL "C")
-      math(EXPR _res "${_res} + 12")
-    elseif (NIBBLE STREQUAL "D")
-      math(EXPR _res "${_res} + 13")
-    elseif (NIBBLE STREQUAL "E")
-      math(EXPR _res "${_res} + 14")
-    elseif (NIBBLE STREQUAL "F")
-      math(EXPR _res "${_res} + 15")
-    else ()
-      math(EXPR _res "${_res} + ${NIBBLE}")
-    endif ()
-
-    string(LENGTH "${HEX}" _strlen)
-  endwhile ()
-
-  set(${DEC} ${_res} PARENT_SCOPE)
-endfunction()
-
-# ClickHouse uses BoringSSL which is a fork of OpenSSL.
-# This populates CMAKE var OPENSSL_VERSION from the OPENSSL_VERSION_NUMBER defined
-# in contrib/boringssl/include/openssl/base.h. It also sets the CMAKE var OPENSSL_IS_BORING_SSL
-# if it's defined in the file. Both OPENSSL_VERSION and OPENSSL_IS_BORING_SSL variables will be
-# used to populate flags in the `system.build_options` table for more context on ssl version used.
-# This cmake script is adopted from FindOpenSSL cmake module and slightly modified for this use-case .
-if (EXISTS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h")
-  file(STRINGS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h" openssl_version_str
-          REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
-
-  file(STRINGS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h" openssl_is_boringssl
-          REGEX "^#[\t ]*define[\t ]+OPENSSL_IS_BORINGSSL.*")
-
-  # Set to true if OPENSSL_IS_BORING_SSL is defined
-  if (openssl_is_boringssl)
-    set(OPENSSL_IS_BORING_SSL 1)
-  endif ()
-
-  # If openssl_version_str is defined extrapolate and set OPENSSL_VERSION
-  if (openssl_version_str)
-    # The version number is encoded as 0xMNNFFPPS: major minor fix patch status
-    # The status gives if this is a developer or prerelease and is ignored here.
-    # Major, minor, and fix directly translate into the version numbers shown in
-    # the string. The patch field translates to the single character suffix that
-    # indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
-    # on.
-
-    string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
-            "\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
-    list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
-    list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
-    from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
-    list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
-    from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
-    list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
-
-    if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
-      from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
-      # 96 is the ASCII code of 'a' minus 1
-      math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
-      unset(_tmp)
-      # Once anyone knows how OpenSSL would call the patch versions beyond 'z'
-      # this should be updated to handle that, too. This has not happened yet
-      # so it is simply ignored here for now.
-      string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
-    endif ()
-
-    set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
-  else ()
-    # Since OpenSSL 3.0.0, the new version format is MAJOR.MINOR.PATCH and
-    # a new OPENSSL_VERSION_STR macro contains exactly that
-    file(STRINGS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h" OPENSSL_VERSION_STR
-            REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_STR[\t ]+\"([0-9])+\\.([0-9])+\\.([0-9])+\".*")
-    string(REGEX REPLACE "^.*OPENSSL_VERSION_STR[\t ]+\"([0-9]+\\.[0-9]+\\.[0-9]+)\".*$"
-            "\\1" OPENSSL_VERSION_STR "${OPENSSL_VERSION_STR}")
-
-    set(OPENSSL_VERSION "${OPENSSL_VERSION_STR}")
-
-    # Setting OPENSSL_VERSION_MAJOR OPENSSL_VERSION_MINOR and OPENSSL_VERSION_FIX
-    string(REGEX MATCHALL "([0-9])+" OPENSSL_VERSION_NUMBER "${OPENSSL_VERSION}")
-    list(POP_FRONT OPENSSL_VERSION_NUMBER
-            OPENSSL_VERSION_MAJOR
-            OPENSSL_VERSION_MINOR
-            OPENSSL_VERSION_FIX)
-
-    unset(OPENSSL_VERSION_NUMBER)
-    unset(OPENSSL_VERSION_STR)
-  endif ()
-endif ()
-
-# Set CMAKE variables so that they can be referenced properly from everywhere
-set(OPENSSL_VERSION "${OPENSSL_VERSION}" CACHE INTERNAL "")
-set(OPENSSL_IS_BORING_SSL "${OPENSSL_IS_BORING_SSL}" CACHE INTERNAL 0)
--- a/contrib/boringssl-cmake/crypto_test_data.cc
+++ b/contrib/boringssl-cmake/crypto_test_data.cc
--- a/contrib/boringssl-cmake/err_data.c
+++ b/contrib/boringssl-cmake/err_data.c
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/chacha/chacha-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/chacha/chacha-armv8.S
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
@ -1,782 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-#if __ARM_MAX_ARCH__>=7
-.text
-
-.section	__TEXT,__const
-.align	5
-Lrcon:
-.long	0x01,0x01,0x01,0x01
-.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
-.long	0x1b,0x1b,0x1b,0x1b
-
-.text
-
-.globl	_aes_hw_set_encrypt_key
-.private_extern	_aes_hw_set_encrypt_key
-
-.align	5
-_aes_hw_set_encrypt_key:
-Lenc_key:
-	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
-	AARCH64_VALID_CALL_TARGET
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	mov	x3,#-1
-	cmp	x0,#0
-	b.eq	Lenc_key_abort
-	cmp	x2,#0
-	b.eq	Lenc_key_abort
-	mov	x3,#-2
-	cmp	w1,#128
-	b.lt	Lenc_key_abort
-	cmp	w1,#256
-	b.gt	Lenc_key_abort
-	tst	w1,#0x3f
-	b.ne	Lenc_key_abort
-
-	adrp	x3,Lrcon@PAGE
-	add	x3,x3,Lrcon@PAGEOFF
-	cmp	w1,#192
-
-	eor	v0.16b,v0.16b,v0.16b
-	ld1	{v3.16b},[x0],#16
-	mov	w1,#8		// reuse w1
-	ld1	{v1.4s,v2.4s},[x3],#32
-
-	b.lt	Loop128
-	b.eq	L192
-	b	L256
-
-.align	4
-Loop128:
-	tbl	v6.16b,{v3.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v3.4s},[x2],#16
-	aese	v6.16b,v0.16b
-	subs	w1,w1,#1
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	shl	v1.16b,v1.16b,#1
-	eor	v3.16b,v3.16b,v6.16b
-	b.ne	Loop128
-
-	ld1	{v1.4s},[x3]
-
-	tbl	v6.16b,{v3.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v3.4s},[x2],#16
-	aese	v6.16b,v0.16b
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	shl	v1.16b,v1.16b,#1
-	eor	v3.16b,v3.16b,v6.16b
-
-	tbl	v6.16b,{v3.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v3.4s},[x2],#16
-	aese	v6.16b,v0.16b
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	eor	v3.16b,v3.16b,v6.16b
-	st1	{v3.4s},[x2]
-	add	x2,x2,#0x50
-
-	mov	w12,#10
-	b	Ldone
-
-.align	4
-L192:
-	ld1	{v4.8b},[x0],#8
-	movi	v6.16b,#8			// borrow v6.16b
-	st1	{v3.4s},[x2],#16
-	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
-
-Loop192:
-	tbl	v6.16b,{v4.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v4.8b},[x2],#8
-	aese	v6.16b,v0.16b
-	subs	w1,w1,#1
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-
-	dup	v5.4s,v3.s[3]
-	eor	v5.16b,v5.16b,v4.16b
-	eor	v6.16b,v6.16b,v1.16b
-	ext	v4.16b,v0.16b,v4.16b,#12
-	shl	v1.16b,v1.16b,#1
-	eor	v4.16b,v4.16b,v5.16b
-	eor	v3.16b,v3.16b,v6.16b
-	eor	v4.16b,v4.16b,v6.16b
-	st1	{v3.4s},[x2],#16
-	b.ne	Loop192
-
-	mov	w12,#12
-	add	x2,x2,#0x20
-	b	Ldone
-
-.align	4
-L256:
-	ld1	{v4.16b},[x0]
-	mov	w1,#7
-	mov	w12,#14
-	st1	{v3.4s},[x2],#16
-
-Loop256:
-	tbl	v6.16b,{v4.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v4.4s},[x2],#16
-	aese	v6.16b,v0.16b
-	subs	w1,w1,#1
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	shl	v1.16b,v1.16b,#1
-	eor	v3.16b,v3.16b,v6.16b
-	st1	{v3.4s},[x2],#16
-	b.eq	Ldone
-
-	dup	v6.4s,v3.s[3]		// just splat
-	ext	v5.16b,v0.16b,v4.16b,#12
-	aese	v6.16b,v0.16b
-
-	eor	v4.16b,v4.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v4.16b,v4.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v4.16b,v4.16b,v5.16b
-
-	eor	v4.16b,v4.16b,v6.16b
-	b	Loop256
-
-Ldone:
-	str	w12,[x2]
-	mov	x3,#0
-
-Lenc_key_abort:
-	mov	x0,x3			// return value
-	ldr	x29,[sp],#16
-	ret
-
-
-.globl	_aes_hw_set_decrypt_key
-.private_extern	_aes_hw_set_decrypt_key
-
-.align	5
-_aes_hw_set_decrypt_key:
-	AARCH64_SIGN_LINK_REGISTER
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	bl	Lenc_key
-
-	cmp	x0,#0
-	b.ne	Ldec_key_abort
-
-	sub	x2,x2,#240		// restore original x2
-	mov	x4,#-16
-	add	x0,x2,x12,lsl#4	// end of key schedule
-
-	ld1	{v0.4s},[x2]
-	ld1	{v1.4s},[x0]
-	st1	{v0.4s},[x0],x4
-	st1	{v1.4s},[x2],#16
-
-Loop_imc:
-	ld1	{v0.4s},[x2]
-	ld1	{v1.4s},[x0]
-	aesimc	v0.16b,v0.16b
-	aesimc	v1.16b,v1.16b
-	st1	{v0.4s},[x0],x4
-	st1	{v1.4s},[x2],#16
-	cmp	x0,x2
-	b.hi	Loop_imc
-
-	ld1	{v0.4s},[x2]
-	aesimc	v0.16b,v0.16b
-	st1	{v0.4s},[x0]
-
-	eor	x0,x0,x0		// return value
-Ldec_key_abort:
-	ldp	x29,x30,[sp],#16
-	AARCH64_VALIDATE_LINK_REGISTER
-	ret
-
-.globl	_aes_hw_encrypt
-.private_extern	_aes_hw_encrypt
-
-.align	5
-_aes_hw_encrypt:
-	AARCH64_VALID_CALL_TARGET
-	ldr	w3,[x2,#240]
-	ld1	{v0.4s},[x2],#16
-	ld1	{v2.16b},[x0]
-	sub	w3,w3,#2
-	ld1	{v1.4s},[x2],#16
-
-Loop_enc:
-	aese	v2.16b,v0.16b
-	aesmc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2],#16
-	subs	w3,w3,#2
-	aese	v2.16b,v1.16b
-	aesmc	v2.16b,v2.16b
-	ld1	{v1.4s},[x2],#16
-	b.gt	Loop_enc
-
-	aese	v2.16b,v0.16b
-	aesmc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2]
-	aese	v2.16b,v1.16b
-	eor	v2.16b,v2.16b,v0.16b
-
-	st1	{v2.16b},[x1]
-	ret
-
-.globl	_aes_hw_decrypt
-.private_extern	_aes_hw_decrypt
-
-.align	5
-_aes_hw_decrypt:
-	AARCH64_VALID_CALL_TARGET
-	ldr	w3,[x2,#240]
-	ld1	{v0.4s},[x2],#16
-	ld1	{v2.16b},[x0]
-	sub	w3,w3,#2
-	ld1	{v1.4s},[x2],#16
-
-Loop_dec:
-	aesd	v2.16b,v0.16b
-	aesimc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2],#16
-	subs	w3,w3,#2
-	aesd	v2.16b,v1.16b
-	aesimc	v2.16b,v2.16b
-	ld1	{v1.4s},[x2],#16
-	b.gt	Loop_dec
-
-	aesd	v2.16b,v0.16b
-	aesimc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2]
-	aesd	v2.16b,v1.16b
-	eor	v2.16b,v2.16b,v0.16b
-
-	st1	{v2.16b},[x1]
-	ret
-
-.globl	_aes_hw_cbc_encrypt
-.private_extern	_aes_hw_cbc_encrypt
-
-.align	5
-_aes_hw_cbc_encrypt:
-	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
-	AARCH64_VALID_CALL_TARGET
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	subs	x2,x2,#16
-	mov	x8,#16
-	b.lo	Lcbc_abort
-	csel	x8,xzr,x8,eq
-
-	cmp	w5,#0			// en- or decrypting?
-	ldr	w5,[x3,#240]
-	and	x2,x2,#-16
-	ld1	{v6.16b},[x4]
-	ld1	{v0.16b},[x0],x8
-
-	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
-	sub	w5,w5,#6
-	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
-	sub	w5,w5,#2
-	ld1	{v18.4s,v19.4s},[x7],#32
-	ld1	{v20.4s,v21.4s},[x7],#32
-	ld1	{v22.4s,v23.4s},[x7],#32
-	ld1	{v7.4s},[x7]
-
-	add	x7,x3,#32
-	mov	w6,w5
-	b.eq	Lcbc_dec
-
-	cmp	w5,#2
-	eor	v0.16b,v0.16b,v6.16b
-	eor	v5.16b,v16.16b,v7.16b
-	b.eq	Lcbc_enc128
-
-	ld1	{v2.4s,v3.4s},[x7]
-	add	x7,x3,#16
-	add	x6,x3,#16*4
-	add	x12,x3,#16*5
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	add	x14,x3,#16*6
-	add	x3,x3,#16*7
-	b	Lenter_cbc_enc
-
-.align	4
-Loop_cbc_enc:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	st1	{v6.16b},[x1],#16
-Lenter_cbc_enc:
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v2.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.4s},[x6]
-	cmp	w5,#4
-	aese	v0.16b,v3.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v17.4s},[x12]
-	b.eq	Lcbc_enc192
-
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.4s},[x14]
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v17.4s},[x3]
-	nop
-
-Lcbc_enc192:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	subs	x2,x2,#16
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	csel	x8,xzr,x8,eq
-	aese	v0.16b,v18.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v19.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.16b},[x0],x8
-	aese	v0.16b,v20.16b
-	aesmc	v0.16b,v0.16b
-	eor	v16.16b,v16.16b,v5.16b
-	aese	v0.16b,v21.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
-	aese	v0.16b,v22.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v23.16b
-	eor	v6.16b,v0.16b,v7.16b
-	b.hs	Loop_cbc_enc
-
-	st1	{v6.16b},[x1],#16
-	b	Lcbc_done
-
-.align	5
-Lcbc_enc128:
-	ld1	{v2.4s,v3.4s},[x7]
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	b	Lenter_cbc_enc128
-Loop_cbc_enc128:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	st1	{v6.16b},[x1],#16
-Lenter_cbc_enc128:
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	subs	x2,x2,#16
-	aese	v0.16b,v2.16b
-	aesmc	v0.16b,v0.16b
-	csel	x8,xzr,x8,eq
-	aese	v0.16b,v3.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v18.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v19.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.16b},[x0],x8
-	aese	v0.16b,v20.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v21.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v22.16b
-	aesmc	v0.16b,v0.16b
-	eor	v16.16b,v16.16b,v5.16b
-	aese	v0.16b,v23.16b
-	eor	v6.16b,v0.16b,v7.16b
-	b.hs	Loop_cbc_enc128
-
-	st1	{v6.16b},[x1],#16
-	b	Lcbc_done
-.align	5
-Lcbc_dec:
-	ld1	{v18.16b},[x0],#16
-	subs	x2,x2,#32		// bias
-	add	w6,w5,#2
-	orr	v3.16b,v0.16b,v0.16b
-	orr	v1.16b,v0.16b,v0.16b
-	orr	v19.16b,v18.16b,v18.16b
-	b.lo	Lcbc_dec_tail
-
-	orr	v1.16b,v18.16b,v18.16b
-	ld1	{v18.16b},[x0],#16
-	orr	v2.16b,v0.16b,v0.16b
-	orr	v3.16b,v1.16b,v1.16b
-	orr	v19.16b,v18.16b,v18.16b
-
-Loop3x_cbc_dec:
-	aesd	v0.16b,v16.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aesd	v0.16b,v17.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	Loop3x_cbc_dec
-
-	aesd	v0.16b,v16.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	eor	v4.16b,v6.16b,v7.16b
-	subs	x2,x2,#0x30
-	eor	v5.16b,v2.16b,v7.16b
-	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
-	aesd	v0.16b,v17.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	eor	v17.16b,v3.16b,v7.16b
-	add	x0,x0,x6		// x0 is adjusted in such way that
-					// at exit from the loop v1.16b-v18.16b
-					// are loaded with last "words"
-	orr	v6.16b,v19.16b,v19.16b
-	mov	x7,x3
-	aesd	v0.16b,v20.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v20.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v20.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v2.16b},[x0],#16
-	aesd	v0.16b,v21.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v21.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v21.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v3.16b},[x0],#16
-	aesd	v0.16b,v22.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v22.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v22.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v19.16b},[x0],#16
-	aesd	v0.16b,v23.16b
-	aesd	v1.16b,v23.16b
-	aesd	v18.16b,v23.16b
-	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
-	add	w6,w5,#2
-	eor	v4.16b,v4.16b,v0.16b
-	eor	v5.16b,v5.16b,v1.16b
-	eor	v18.16b,v18.16b,v17.16b
-	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
-	st1	{v4.16b},[x1],#16
-	orr	v0.16b,v2.16b,v2.16b
-	st1	{v5.16b},[x1],#16
-	orr	v1.16b,v3.16b,v3.16b
-	st1	{v18.16b},[x1],#16
-	orr	v18.16b,v19.16b,v19.16b
-	b.hs	Loop3x_cbc_dec
-
-	cmn	x2,#0x30
-	b.eq	Lcbc_done
-	nop
-
-Lcbc_dec_tail:
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	Lcbc_dec_tail
-
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	aesd	v1.16b,v20.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v20.16b
-	aesimc	v18.16b,v18.16b
-	cmn	x2,#0x20
-	aesd	v1.16b,v21.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v21.16b
-	aesimc	v18.16b,v18.16b
-	eor	v5.16b,v6.16b,v7.16b
-	aesd	v1.16b,v22.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v22.16b
-	aesimc	v18.16b,v18.16b
-	eor	v17.16b,v3.16b,v7.16b
-	aesd	v1.16b,v23.16b
-	aesd	v18.16b,v23.16b
-	b.eq	Lcbc_dec_one
-	eor	v5.16b,v5.16b,v1.16b
-	eor	v17.16b,v17.16b,v18.16b
-	orr	v6.16b,v19.16b,v19.16b
-	st1	{v5.16b},[x1],#16
-	st1	{v17.16b},[x1],#16
-	b	Lcbc_done
-
-Lcbc_dec_one:
-	eor	v5.16b,v5.16b,v18.16b
-	orr	v6.16b,v19.16b,v19.16b
-	st1	{v5.16b},[x1],#16
-
-Lcbc_done:
-	st1	{v6.16b},[x4]
-Lcbc_abort:
-	ldr	x29,[sp],#16
-	ret
-
-.globl	_aes_hw_ctr32_encrypt_blocks
-.private_extern	_aes_hw_ctr32_encrypt_blocks
-
-.align	5
-_aes_hw_ctr32_encrypt_blocks:
-	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
-	AARCH64_VALID_CALL_TARGET
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	ldr	w5,[x3,#240]
-
-	ldr	w8, [x4, #12]
-	ld1	{v0.4s},[x4]
-
-	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
-	sub	w5,w5,#4
-	mov	x12,#16
-	cmp	x2,#2
-	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
-	sub	w5,w5,#2
-	ld1	{v20.4s,v21.4s},[x7],#32
-	ld1	{v22.4s,v23.4s},[x7],#32
-	ld1	{v7.4s},[x7]
-	add	x7,x3,#32
-	mov	w6,w5
-	csel	x12,xzr,x12,lo
-#ifndef __ARMEB__
-	rev	w8, w8
-#endif
-	orr	v1.16b,v0.16b,v0.16b
-	add	w10, w8, #1
-	orr	v18.16b,v0.16b,v0.16b
-	add	w8, w8, #2
-	orr	v6.16b,v0.16b,v0.16b
-	rev	w10, w10
-	mov	v1.s[3],w10
-	b.ls	Lctr32_tail
-	rev	w12, w8
-	sub	x2,x2,#3		// bias
-	mov	v18.s[3],w12
-	b	Loop3x_ctr32
-
-.align	4
-Loop3x_ctr32:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v1.16b,v1.16b
-	aese	v18.16b,v16.16b
-	aesmc	v18.16b,v18.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v17.16b
-	aesmc	v1.16b,v1.16b
-	aese	v18.16b,v17.16b
-	aesmc	v18.16b,v18.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	Loop3x_ctr32
-
-	aese	v0.16b,v16.16b
-	aesmc	v4.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v5.16b,v1.16b
-	ld1	{v2.16b},[x0],#16
-	orr	v0.16b,v6.16b,v6.16b
-	aese	v18.16b,v16.16b
-	aesmc	v18.16b,v18.16b
-	ld1	{v3.16b},[x0],#16
-	orr	v1.16b,v6.16b,v6.16b
-	aese	v4.16b,v17.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v17.16b
-	aesmc	v5.16b,v5.16b
-	ld1	{v19.16b},[x0],#16
-	mov	x7,x3
-	aese	v18.16b,v17.16b
-	aesmc	v17.16b,v18.16b
-	orr	v18.16b,v6.16b,v6.16b
-	add	w9,w8,#1
-	aese	v4.16b,v20.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v20.16b
-	aesmc	v5.16b,v5.16b
-	eor	v2.16b,v2.16b,v7.16b
-	add	w10,w8,#2
-	aese	v17.16b,v20.16b
-	aesmc	v17.16b,v17.16b
-	eor	v3.16b,v3.16b,v7.16b
-	add	w8,w8,#3
-	aese	v4.16b,v21.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v21.16b
-	aesmc	v5.16b,v5.16b
-	eor	v19.16b,v19.16b,v7.16b
-	rev	w9,w9
-	aese	v17.16b,v21.16b
-	aesmc	v17.16b,v17.16b
-	mov	v0.s[3], w9
-	rev	w10,w10
-	aese	v4.16b,v22.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v22.16b
-	aesmc	v5.16b,v5.16b
-	mov	v1.s[3], w10
-	rev	w12,w8
-	aese	v17.16b,v22.16b
-	aesmc	v17.16b,v17.16b
-	mov	v18.s[3], w12
-	subs	x2,x2,#3
-	aese	v4.16b,v23.16b
-	aese	v5.16b,v23.16b
-	aese	v17.16b,v23.16b
-
-	eor	v2.16b,v2.16b,v4.16b
-	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
-	st1	{v2.16b},[x1],#16
-	eor	v3.16b,v3.16b,v5.16b
-	mov	w6,w5
-	st1	{v3.16b},[x1],#16
-	eor	v19.16b,v19.16b,v17.16b
-	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
-	st1	{v19.16b},[x1],#16
-	b.hs	Loop3x_ctr32
-
-	adds	x2,x2,#3
-	b.eq	Lctr32_done
-	cmp	x2,#1
-	mov	x12,#16
-	csel	x12,xzr,x12,eq
-
-Lctr32_tail:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v17.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	Lctr32_tail
-
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v1.16b,v1.16b
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v17.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v2.16b},[x0],x12
-	aese	v0.16b,v20.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v20.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v3.16b},[x0]
-	aese	v0.16b,v21.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v21.16b
-	aesmc	v1.16b,v1.16b
-	eor	v2.16b,v2.16b,v7.16b
-	aese	v0.16b,v22.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v22.16b
-	aesmc	v1.16b,v1.16b
-	eor	v3.16b,v3.16b,v7.16b
-	aese	v0.16b,v23.16b
-	aese	v1.16b,v23.16b
-
-	cmp	x2,#1
-	eor	v2.16b,v2.16b,v0.16b
-	eor	v3.16b,v3.16b,v1.16b
-	st1	{v2.16b},[x1],#16
-	b.eq	Lctr32_done
-	st1	{v3.16b},[x1]
-
-Lctr32_done:
-	ldr	x29,[sp],#16
-	ret
-
-#endif
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/armv8-mont.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/armv8-mont.S
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
@ -1,343 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-
-.globl	_gcm_init_neon
-.private_extern	_gcm_init_neon
-
-.align	4
-_gcm_init_neon:
-	AARCH64_VALID_CALL_TARGET
-	// This function is adapted from gcm_init_v8. xC2 is t3.
-	ld1	{v17.2d}, [x1]			// load H
-	movi	v19.16b, #0xe1
-	shl	v19.2d, v19.2d, #57		// 0xc2.0
-	ext	v3.16b, v17.16b, v17.16b, #8
-	ushr	v18.2d, v19.2d, #63
-	dup	v17.4s, v17.s[1]
-	ext	v16.16b, v18.16b, v19.16b, #8	// t0=0xc2....01
-	ushr	v18.2d, v3.2d, #63
-	sshr	v17.4s, v17.4s, #31		// broadcast carry bit
-	and	v18.16b, v18.16b, v16.16b
-	shl	v3.2d, v3.2d, #1
-	ext	v18.16b, v18.16b, v18.16b, #8
-	and	v16.16b, v16.16b, v17.16b
-	orr	v3.16b, v3.16b, v18.16b	// H<<<=1
-	eor	v5.16b, v3.16b, v16.16b	// twisted H
-	st1	{v5.2d}, [x0]			// store Htable[0]
-	ret
-
-
-.globl	_gcm_gmult_neon
-.private_extern	_gcm_gmult_neon
-
-.align	4
-_gcm_gmult_neon:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v3.16b}, [x0]		// load Xi
-	ld1	{v5.1d}, [x1], #8		// load twisted H
-	ld1	{v6.1d}, [x1]
-	adrp	x9, Lmasks@PAGE		// load constants
-	add	x9, x9, Lmasks@PAGEOFF
-	ld1	{v24.2d, v25.2d}, [x9]
-	rev64	v3.16b, v3.16b		// byteswap Xi
-	ext	v3.16b, v3.16b, v3.16b, #8
-	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
-
-	mov	x3, #16
-	b	Lgmult_neon
-
-
-.globl	_gcm_ghash_neon
-.private_extern	_gcm_ghash_neon
-
-.align	4
-_gcm_ghash_neon:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v0.16b}, [x0]		// load Xi
-	ld1	{v5.1d}, [x1], #8		// load twisted H
-	ld1	{v6.1d}, [x1]
-	adrp	x9, Lmasks@PAGE		// load constants
-	add	x9, x9, Lmasks@PAGEOFF
-	ld1	{v24.2d, v25.2d}, [x9]
-	rev64	v0.16b, v0.16b		// byteswap Xi
-	ext	v0.16b, v0.16b, v0.16b, #8
-	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
-
-Loop_neon:
-	ld1	{v3.16b}, [x2], #16	// load inp
-	rev64	v3.16b, v3.16b		// byteswap inp
-	ext	v3.16b, v3.16b, v3.16b, #8
-	eor	v3.16b, v3.16b, v0.16b	// inp ^= Xi
-
-Lgmult_neon:
-	// Split the input into v3 and v4. (The upper halves are unused,
-	// so it is okay to leave them alone.)
-	ins	v4.d[0], v3.d[1]
-	ext	v16.8b, v5.8b, v5.8b, #1	// A1
-	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
-	ext	v0.8b, v3.8b, v3.8b, #1		// B1
-	pmull	v0.8h, v5.8b, v0.8b		// E = A*B1
-	ext	v17.8b, v5.8b, v5.8b, #2	// A2
-	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
-	ext	v19.8b, v3.8b, v3.8b, #2	// B2
-	pmull	v19.8h, v5.8b, v19.8b		// G = A*B2
-	ext	v18.8b, v5.8b, v5.8b, #3	// A3
-	eor	v16.16b, v16.16b, v0.16b	// L = E + F
-	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
-	ext	v0.8b, v3.8b, v3.8b, #3		// B3
-	eor	v17.16b, v17.16b, v19.16b	// M = G + H
-	pmull	v0.8h, v5.8b, v0.8b		// I = A*B3
-
-	// Here we diverge from the 32-bit version. It computes the following
-	// (instructions reordered for clarity):
-	//
-	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
-	//     vand	$t0#hi, $t0#hi, $k48
-	//     veor	$t0#lo, $t0#lo, $t0#hi
-	//
-	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
-	//     vand	$t1#hi, $t1#hi, $k32
-	//     veor	$t1#lo, $t1#lo, $t1#hi
-	//
-	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
-	//     vand	$t2#hi, $t2#hi, $k16
-	//     veor	$t2#lo, $t2#lo, $t2#hi
-	//
-	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
-	//     vmov.i64	$t3#hi, #0
-	//
-	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
-	// upper halves of SIMD registers, so we must split each half into
-	// separate registers. To compensate, we pair computations up and
-	// parallelize.
-
-	ext	v19.8b, v3.8b, v3.8b, #4	// B4
-	eor	v18.16b, v18.16b, v0.16b	// N = I + J
-	pmull	v19.8h, v5.8b, v19.8b		// K = A*B4
-
-	// This can probably be scheduled more efficiently. For now, we just
-	// pair up independent instructions.
-	zip1	v20.2d, v16.2d, v17.2d
-	zip1	v22.2d, v18.2d, v19.2d
-	zip2	v21.2d, v16.2d, v17.2d
-	zip2	v23.2d, v18.2d, v19.2d
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	and	v21.16b, v21.16b, v24.16b
-	and	v23.16b, v23.16b, v25.16b
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	zip1	v16.2d, v20.2d, v21.2d
-	zip1	v18.2d, v22.2d, v23.2d
-	zip2	v17.2d, v20.2d, v21.2d
-	zip2	v19.2d, v22.2d, v23.2d
-
-	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
-	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
-	pmull	v0.8h, v5.8b, v3.8b		// D = A*B
-	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
-	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
-	eor	v16.16b, v16.16b, v17.16b
-	eor	v18.16b, v18.16b, v19.16b
-	eor	v0.16b, v0.16b, v16.16b
-	eor	v0.16b, v0.16b, v18.16b
-	eor	v3.8b, v3.8b, v4.8b	// Karatsuba pre-processing
-	ext	v16.8b, v7.8b, v7.8b, #1	// A1
-	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
-	ext	v1.8b, v3.8b, v3.8b, #1		// B1
-	pmull	v1.8h, v7.8b, v1.8b		// E = A*B1
-	ext	v17.8b, v7.8b, v7.8b, #2	// A2
-	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
-	ext	v19.8b, v3.8b, v3.8b, #2	// B2
-	pmull	v19.8h, v7.8b, v19.8b		// G = A*B2
-	ext	v18.8b, v7.8b, v7.8b, #3	// A3
-	eor	v16.16b, v16.16b, v1.16b	// L = E + F
-	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
-	ext	v1.8b, v3.8b, v3.8b, #3		// B3
-	eor	v17.16b, v17.16b, v19.16b	// M = G + H
-	pmull	v1.8h, v7.8b, v1.8b		// I = A*B3
-
-	// Here we diverge from the 32-bit version. It computes the following
-	// (instructions reordered for clarity):
-	//
-	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
-	//     vand	$t0#hi, $t0#hi, $k48
-	//     veor	$t0#lo, $t0#lo, $t0#hi
-	//
-	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
-	//     vand	$t1#hi, $t1#hi, $k32
-	//     veor	$t1#lo, $t1#lo, $t1#hi
-	//
-	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
-	//     vand	$t2#hi, $t2#hi, $k16
-	//     veor	$t2#lo, $t2#lo, $t2#hi
-	//
-	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
-	//     vmov.i64	$t3#hi, #0
-	//
-	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
-	// upper halves of SIMD registers, so we must split each half into
-	// separate registers. To compensate, we pair computations up and
-	// parallelize.
-
-	ext	v19.8b, v3.8b, v3.8b, #4	// B4
-	eor	v18.16b, v18.16b, v1.16b	// N = I + J
-	pmull	v19.8h, v7.8b, v19.8b		// K = A*B4
-
-	// This can probably be scheduled more efficiently. For now, we just
-	// pair up independent instructions.
-	zip1	v20.2d, v16.2d, v17.2d
-	zip1	v22.2d, v18.2d, v19.2d
-	zip2	v21.2d, v16.2d, v17.2d
-	zip2	v23.2d, v18.2d, v19.2d
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	and	v21.16b, v21.16b, v24.16b
-	and	v23.16b, v23.16b, v25.16b
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	zip1	v16.2d, v20.2d, v21.2d
-	zip1	v18.2d, v22.2d, v23.2d
-	zip2	v17.2d, v20.2d, v21.2d
-	zip2	v19.2d, v22.2d, v23.2d
-
-	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
-	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
-	pmull	v1.8h, v7.8b, v3.8b		// D = A*B
-	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
-	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
-	eor	v16.16b, v16.16b, v17.16b
-	eor	v18.16b, v18.16b, v19.16b
-	eor	v1.16b, v1.16b, v16.16b
-	eor	v1.16b, v1.16b, v18.16b
-	ext	v16.8b, v6.8b, v6.8b, #1	// A1
-	pmull	v16.8h, v16.8b, v4.8b		// F = A1*B
-	ext	v2.8b, v4.8b, v4.8b, #1		// B1
-	pmull	v2.8h, v6.8b, v2.8b		// E = A*B1
-	ext	v17.8b, v6.8b, v6.8b, #2	// A2
-	pmull	v17.8h, v17.8b, v4.8b		// H = A2*B
-	ext	v19.8b, v4.8b, v4.8b, #2	// B2
-	pmull	v19.8h, v6.8b, v19.8b		// G = A*B2
-	ext	v18.8b, v6.8b, v6.8b, #3	// A3
-	eor	v16.16b, v16.16b, v2.16b	// L = E + F
-	pmull	v18.8h, v18.8b, v4.8b		// J = A3*B
-	ext	v2.8b, v4.8b, v4.8b, #3		// B3
-	eor	v17.16b, v17.16b, v19.16b	// M = G + H
-	pmull	v2.8h, v6.8b, v2.8b		// I = A*B3
-
-	// Here we diverge from the 32-bit version. It computes the following
-	// (instructions reordered for clarity):
-	//
-	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
-	//     vand	$t0#hi, $t0#hi, $k48
-	//     veor	$t0#lo, $t0#lo, $t0#hi
-	//
-	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
-	//     vand	$t1#hi, $t1#hi, $k32
-	//     veor	$t1#lo, $t1#lo, $t1#hi
-	//
-	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
-	//     vand	$t2#hi, $t2#hi, $k16
-	//     veor	$t2#lo, $t2#lo, $t2#hi
-	//
-	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
-	//     vmov.i64	$t3#hi, #0
-	//
-	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
-	// upper halves of SIMD registers, so we must split each half into
-	// separate registers. To compensate, we pair computations up and
-	// parallelize.
-
-	ext	v19.8b, v4.8b, v4.8b, #4	// B4
-	eor	v18.16b, v18.16b, v2.16b	// N = I + J
-	pmull	v19.8h, v6.8b, v19.8b		// K = A*B4
-
-	// This can probably be scheduled more efficiently. For now, we just
-	// pair up independent instructions.
-	zip1	v20.2d, v16.2d, v17.2d
-	zip1	v22.2d, v18.2d, v19.2d
-	zip2	v21.2d, v16.2d, v17.2d
-	zip2	v23.2d, v18.2d, v19.2d
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	and	v21.16b, v21.16b, v24.16b
-	and	v23.16b, v23.16b, v25.16b
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	zip1	v16.2d, v20.2d, v21.2d
-	zip1	v18.2d, v22.2d, v23.2d
-	zip2	v17.2d, v20.2d, v21.2d
-	zip2	v19.2d, v22.2d, v23.2d
-
-	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
-	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
-	pmull	v2.8h, v6.8b, v4.8b		// D = A*B
-	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
-	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
-	eor	v16.16b, v16.16b, v17.16b
-	eor	v18.16b, v18.16b, v19.16b
-	eor	v2.16b, v2.16b, v16.16b
-	eor	v2.16b, v2.16b, v18.16b
-	ext	v16.16b, v0.16b, v2.16b, #8
-	eor	v1.16b, v1.16b, v0.16b	// Karatsuba post-processing
-	eor	v1.16b, v1.16b, v2.16b
-	eor	v1.16b, v1.16b, v16.16b	// Xm overlaps Xh.lo and Xl.hi
-	ins	v0.d[1], v1.d[0]		// Xh|Xl - 256-bit result
-	// This is a no-op due to the ins instruction below.
-	// ins	v2.d[0], v1.d[1]
-
-	// equivalent of reduction_avx from ghash-x86_64.pl
-	shl	v17.2d, v0.2d, #57		// 1st phase
-	shl	v18.2d, v0.2d, #62
-	eor	v18.16b, v18.16b, v17.16b	//
-	shl	v17.2d, v0.2d, #63
-	eor	v18.16b, v18.16b, v17.16b	//
-	// Note Xm contains {Xl.d[1], Xh.d[0]}.
-	eor	v18.16b, v18.16b, v1.16b
-	ins	v0.d[1], v18.d[0]		// Xl.d[1] ^= t2.d[0]
-	ins	v2.d[0], v18.d[1]		// Xh.d[0] ^= t2.d[1]
-
-	ushr	v18.2d, v0.2d, #1		// 2nd phase
-	eor	v2.16b, v2.16b,v0.16b
-	eor	v0.16b, v0.16b,v18.16b	//
-	ushr	v18.2d, v18.2d, #6
-	ushr	v0.2d, v0.2d, #1		//
-	eor	v0.16b, v0.16b, v2.16b	//
-	eor	v0.16b, v0.16b, v18.16b	//
-
-	subs	x3, x3, #16
-	bne	Loop_neon
-
-	rev64	v0.16b, v0.16b		// byteswap Xi and write
-	ext	v0.16b, v0.16b, v0.16b, #8
-	st1	{v0.16b}, [x0]
-
-	ret
-
-
-.section	__TEXT,__const
-.align	4
-Lmasks:
-.quad	0x0000ffffffffffff	// k48
-.quad	0x00000000ffffffff	// k32
-.quad	0x000000000000ffff	// k16
-.quad	0x0000000000000000	// k0
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
@ -1,249 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-
-.globl	_gcm_init_v8
-.private_extern	_gcm_init_v8
-
-.align	4
-_gcm_init_v8:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v17.2d},[x1]		//load input H
-	movi	v19.16b,#0xe1
-	shl	v19.2d,v19.2d,#57		//0xc2.0
-	ext	v3.16b,v17.16b,v17.16b,#8
-	ushr	v18.2d,v19.2d,#63
-	dup	v17.4s,v17.s[1]
-	ext	v16.16b,v18.16b,v19.16b,#8		//t0=0xc2....01
-	ushr	v18.2d,v3.2d,#63
-	sshr	v17.4s,v17.4s,#31		//broadcast carry bit
-	and	v18.16b,v18.16b,v16.16b
-	shl	v3.2d,v3.2d,#1
-	ext	v18.16b,v18.16b,v18.16b,#8
-	and	v16.16b,v16.16b,v17.16b
-	orr	v3.16b,v3.16b,v18.16b		//H<<<=1
-	eor	v20.16b,v3.16b,v16.16b		//twisted H
-	st1	{v20.2d},[x0],#16		//store Htable[0]
-
-	//calculate H^2
-	ext	v16.16b,v20.16b,v20.16b,#8		//Karatsuba pre-processing
-	pmull	v0.1q,v20.1d,v20.1d
-	eor	v16.16b,v16.16b,v20.16b
-	pmull2	v2.1q,v20.2d,v20.2d
-	pmull	v1.1q,v16.1d,v16.1d
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase
-
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	eor	v0.16b,v1.16b,v18.16b
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v18.16b,v18.16b,v2.16b
-	eor	v22.16b,v0.16b,v18.16b
-
-	ext	v17.16b,v22.16b,v22.16b,#8		//Karatsuba pre-processing
-	eor	v17.16b,v17.16b,v22.16b
-	ext	v21.16b,v16.16b,v17.16b,#8		//pack Karatsuba pre-processed
-	st1	{v21.2d,v22.2d},[x0]		//store Htable[1..2]
-
-	ret
-
-.globl	_gcm_gmult_v8
-.private_extern	_gcm_gmult_v8
-
-.align	4
-_gcm_gmult_v8:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v17.2d},[x0]		//load Xi
-	movi	v19.16b,#0xe1
-	ld1	{v20.2d,v21.2d},[x1]	//load twisted H, ...
-	shl	v19.2d,v19.2d,#57
-#ifndef __ARMEB__
-	rev64	v17.16b,v17.16b
-#endif
-	ext	v3.16b,v17.16b,v17.16b,#8
-
-	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
-	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
-	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
-	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
-
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	eor	v0.16b,v1.16b,v18.16b
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v18.16b,v18.16b,v2.16b
-	eor	v0.16b,v0.16b,v18.16b
-
-#ifndef __ARMEB__
-	rev64	v0.16b,v0.16b
-#endif
-	ext	v0.16b,v0.16b,v0.16b,#8
-	st1	{v0.2d},[x0]		//write out Xi
-
-	ret
-
-.globl	_gcm_ghash_v8
-.private_extern	_gcm_ghash_v8
-
-.align	4
-_gcm_ghash_v8:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v0.2d},[x0]		//load [rotated] Xi
-						//"[rotated]" means that
-						//loaded value would have
-						//to be rotated in order to
-						//make it appear as in
-						//algorithm specification
-	subs	x3,x3,#32		//see if x3 is 32 or larger
-	mov	x12,#16		//x12 is used as post-
-						//increment for input pointer;
-						//as loop is modulo-scheduled
-						//x12 is zeroed just in time
-						//to preclude overstepping
-						//inp[len], which means that
-						//last block[s] are actually
-						//loaded twice, but last
-						//copy is not processed
-	ld1	{v20.2d,v21.2d},[x1],#32	//load twisted H, ..., H^2
-	movi	v19.16b,#0xe1
-	ld1	{v22.2d},[x1]
-	csel	x12,xzr,x12,eq			//is it time to zero x12?
-	ext	v0.16b,v0.16b,v0.16b,#8		//rotate Xi
-	ld1	{v16.2d},[x2],#16	//load [rotated] I[0]
-	shl	v19.2d,v19.2d,#57		//compose 0xc2.0 constant
-#ifndef __ARMEB__
-	rev64	v16.16b,v16.16b
-	rev64	v0.16b,v0.16b
-#endif
-	ext	v3.16b,v16.16b,v16.16b,#8		//rotate I[0]
-	b.lo	Lodd_tail_v8		//x3 was less than 32
-	ld1	{v17.2d},[x2],x12	//load [rotated] I[1]
-#ifndef __ARMEB__
-	rev64	v17.16b,v17.16b
-#endif
-	ext	v7.16b,v17.16b,v17.16b,#8
-	eor	v3.16b,v3.16b,v0.16b		//I[i]^=Xi
-	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
-	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
-	pmull2	v6.1q,v20.2d,v7.2d
-	b	Loop_mod2x_v8
-
-.align	4
-Loop_mod2x_v8:
-	ext	v18.16b,v3.16b,v3.16b,#8
-	subs	x3,x3,#32		//is there more data?
-	pmull	v0.1q,v22.1d,v3.1d		//H^2.lo·Xi.lo
-	csel	x12,xzr,x12,lo			//is it time to zero x12?
-
-	pmull	v5.1q,v21.1d,v17.1d
-	eor	v18.16b,v18.16b,v3.16b		//Karatsuba pre-processing
-	pmull2	v2.1q,v22.2d,v3.2d		//H^2.hi·Xi.hi
-	eor	v0.16b,v0.16b,v4.16b		//accumulate
-	pmull2	v1.1q,v21.2d,v18.2d		//(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
-	ld1	{v16.2d},[x2],x12	//load [rotated] I[i+2]
-
-	eor	v2.16b,v2.16b,v6.16b
-	csel	x12,xzr,x12,eq			//is it time to zero x12?
-	eor	v1.16b,v1.16b,v5.16b
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	ld1	{v17.2d},[x2],x12	//load [rotated] I[i+3]
-#ifndef __ARMEB__
-	rev64	v16.16b,v16.16b
-#endif
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
-
-#ifndef __ARMEB__
-	rev64	v17.16b,v17.16b
-#endif
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	ext	v7.16b,v17.16b,v17.16b,#8
-	ext	v3.16b,v16.16b,v16.16b,#8
-	eor	v0.16b,v1.16b,v18.16b
-	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
-	eor	v3.16b,v3.16b,v2.16b		//accumulate v3.16b early
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v3.16b,v3.16b,v18.16b
-	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
-	eor	v3.16b,v3.16b,v0.16b
-	pmull2	v6.1q,v20.2d,v7.2d
-	b.hs	Loop_mod2x_v8		//there was at least 32 more bytes
-
-	eor	v2.16b,v2.16b,v18.16b
-	ext	v3.16b,v16.16b,v16.16b,#8		//re-construct v3.16b
-	adds	x3,x3,#32		//re-construct x3
-	eor	v0.16b,v0.16b,v2.16b		//re-construct v0.16b
-	b.eq	Ldone_v8		//is x3 zero?
-Lodd_tail_v8:
-	ext	v18.16b,v0.16b,v0.16b,#8
-	eor	v3.16b,v3.16b,v0.16b		//inp^=Xi
-	eor	v17.16b,v16.16b,v18.16b		//v17.16b is rotated inp^Xi
-
-	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
-	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
-	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
-	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
-
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	eor	v0.16b,v1.16b,v18.16b
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v18.16b,v18.16b,v2.16b
-	eor	v0.16b,v0.16b,v18.16b
-
-Ldone_v8:
-#ifndef __ARMEB__
-	rev64	v0.16b,v0.16b
-#endif
-	ext	v0.16b,v0.16b,v0.16b,#8
-	st1	{v0.2d},[x0]		//write out Xi
-
-	ret
-
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha1-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha1-armv8.S
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha256-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha256-armv8.S
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha512-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha512-armv8.S
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/test/trampoline-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/test/trampoline-armv8.S
@ -1,758 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-
-// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
-// with |argv|, then saves the callee-saved registers into |state|. It returns
-// the result of |func|. The |unwind| argument is unused.
-// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
-//                              const uint64_t *argv, size_t argc,
-//                              uint64_t unwind);
-
-.globl	_abi_test_trampoline
-.private_extern	_abi_test_trampoline
-.align	4
-_abi_test_trampoline:
-Labi_test_trampoline_begin:
-	AARCH64_SIGN_LINK_REGISTER
-	// Stack layout (low to high addresses)
-	//   x29,x30 (16 bytes)
-	//    d8-d15 (64 bytes)
-	//   x19-x28 (80 bytes)
-	//    x1 (8 bytes)
-	//   padding (8 bytes)
-	stp	x29, x30, [sp, #-176]!
-	mov	x29, sp
-
-	// Saved callee-saved registers and |state|.
-	stp	d8, d9, [sp, #16]
-	stp	d10, d11, [sp, #32]
-	stp	d12, d13, [sp, #48]
-	stp	d14, d15, [sp, #64]
-	stp	x19, x20, [sp, #80]
-	stp	x21, x22, [sp, #96]
-	stp	x23, x24, [sp, #112]
-	stp	x25, x26, [sp, #128]
-	stp	x27, x28, [sp, #144]
-	str	x1, [sp, #160]
-
-	// Load registers from |state|, with the exception of x29. x29 is the
-	// frame pointer and also callee-saved, but AAPCS64 allows platforms to
-	// mandate that x29 always point to a frame. iOS64 does so, which means
-	// we cannot fill x29 with entropy without violating ABI rules
-	// ourselves. x29 is tested separately below.
-	ldp	d8, d9, [x1], #16
-	ldp	d10, d11, [x1], #16
-	ldp	d12, d13, [x1], #16
-	ldp	d14, d15, [x1], #16
-	ldp	x19, x20, [x1], #16
-	ldp	x21, x22, [x1], #16
-	ldp	x23, x24, [x1], #16
-	ldp	x25, x26, [x1], #16
-	ldp	x27, x28, [x1], #16
-
-	// Move parameters into temporary registers.
-	mov	x9, x0
-	mov	x10, x2
-	mov	x11, x3
-
-	// Load parameters into registers.
-	cbz	x11, Largs_done
-	ldr	x0, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x1, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x2, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x3, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x4, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x5, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x6, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x7, [x10], #8
-
-Largs_done:
-	blr	x9
-
-	// Reload |state| and store registers.
-	ldr	x1, [sp, #160]
-	stp	d8, d9, [x1], #16
-	stp	d10, d11, [x1], #16
-	stp	d12, d13, [x1], #16
-	stp	d14, d15, [x1], #16
-	stp	x19, x20, [x1], #16
-	stp	x21, x22, [x1], #16
-	stp	x23, x24, [x1], #16
-	stp	x25, x26, [x1], #16
-	stp	x27, x28, [x1], #16
-
-	// |func| is required to preserve x29, the frame pointer. We cannot load
-	// random values into x29 (see comment above), so compare it against the
-	// expected value and zero the field of |state| if corrupted.
-	mov	x9, sp
-	cmp	x29, x9
-	b.eq	Lx29_ok
-	str	xzr, [x1]
-
-Lx29_ok:
-	// Restore callee-saved registers.
-	ldp	d8, d9, [sp, #16]
-	ldp	d10, d11, [sp, #32]
-	ldp	d12, d13, [sp, #48]
-	ldp	d14, d15, [sp, #64]
-	ldp	x19, x20, [sp, #80]
-	ldp	x21, x22, [sp, #96]
-	ldp	x23, x24, [sp, #112]
-	ldp	x25, x26, [sp, #128]
-	ldp	x27, x28, [sp, #144]
-
-	ldp	x29, x30, [sp], #176
-	AARCH64_VALIDATE_LINK_REGISTER
-	ret
-
-
-.globl	_abi_test_clobber_x0
-.private_extern	_abi_test_clobber_x0
-.align	4
-_abi_test_clobber_x0:
-	AARCH64_VALID_CALL_TARGET
-	mov	x0, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x1
-.private_extern	_abi_test_clobber_x1
-.align	4
-_abi_test_clobber_x1:
-	AARCH64_VALID_CALL_TARGET
-	mov	x1, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x2
-.private_extern	_abi_test_clobber_x2
-.align	4
-_abi_test_clobber_x2:
-	AARCH64_VALID_CALL_TARGET
-	mov	x2, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x3
-.private_extern	_abi_test_clobber_x3
-.align	4
-_abi_test_clobber_x3:
-	AARCH64_VALID_CALL_TARGET
-	mov	x3, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x4
-.private_extern	_abi_test_clobber_x4
-.align	4
-_abi_test_clobber_x4:
-	AARCH64_VALID_CALL_TARGET
-	mov	x4, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x5
-.private_extern	_abi_test_clobber_x5
-.align	4
-_abi_test_clobber_x5:
-	AARCH64_VALID_CALL_TARGET
-	mov	x5, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x6
-.private_extern	_abi_test_clobber_x6
-.align	4
-_abi_test_clobber_x6:
-	AARCH64_VALID_CALL_TARGET
-	mov	x6, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x7
-.private_extern	_abi_test_clobber_x7
-.align	4
-_abi_test_clobber_x7:
-	AARCH64_VALID_CALL_TARGET
-	mov	x7, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x8
-.private_extern	_abi_test_clobber_x8
-.align	4
-_abi_test_clobber_x8:
-	AARCH64_VALID_CALL_TARGET
-	mov	x8, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x9
-.private_extern	_abi_test_clobber_x9
-.align	4
-_abi_test_clobber_x9:
-	AARCH64_VALID_CALL_TARGET
-	mov	x9, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x10
-.private_extern	_abi_test_clobber_x10
-.align	4
-_abi_test_clobber_x10:
-	AARCH64_VALID_CALL_TARGET
-	mov	x10, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x11
-.private_extern	_abi_test_clobber_x11
-.align	4
-_abi_test_clobber_x11:
-	AARCH64_VALID_CALL_TARGET
-	mov	x11, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x12
-.private_extern	_abi_test_clobber_x12
-.align	4
-_abi_test_clobber_x12:
-	AARCH64_VALID_CALL_TARGET
-	mov	x12, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x13
-.private_extern	_abi_test_clobber_x13
-.align	4
-_abi_test_clobber_x13:
-	AARCH64_VALID_CALL_TARGET
-	mov	x13, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x14
-.private_extern	_abi_test_clobber_x14
-.align	4
-_abi_test_clobber_x14:
-	AARCH64_VALID_CALL_TARGET
-	mov	x14, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x15
-.private_extern	_abi_test_clobber_x15
-.align	4
-_abi_test_clobber_x15:
-	AARCH64_VALID_CALL_TARGET
-	mov	x15, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x16
-.private_extern	_abi_test_clobber_x16
-.align	4
-_abi_test_clobber_x16:
-	AARCH64_VALID_CALL_TARGET
-	mov	x16, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x17
-.private_extern	_abi_test_clobber_x17
-.align	4
-_abi_test_clobber_x17:
-	AARCH64_VALID_CALL_TARGET
-	mov	x17, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x19
-.private_extern	_abi_test_clobber_x19
-.align	4
-_abi_test_clobber_x19:
-	AARCH64_VALID_CALL_TARGET
-	mov	x19, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x20
-.private_extern	_abi_test_clobber_x20
-.align	4
-_abi_test_clobber_x20:
-	AARCH64_VALID_CALL_TARGET
-	mov	x20, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x21
-.private_extern	_abi_test_clobber_x21
-.align	4
-_abi_test_clobber_x21:
-	AARCH64_VALID_CALL_TARGET
-	mov	x21, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x22
-.private_extern	_abi_test_clobber_x22
-.align	4
-_abi_test_clobber_x22:
-	AARCH64_VALID_CALL_TARGET
-	mov	x22, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x23
-.private_extern	_abi_test_clobber_x23
-.align	4
-_abi_test_clobber_x23:
-	AARCH64_VALID_CALL_TARGET
-	mov	x23, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x24
-.private_extern	_abi_test_clobber_x24
-.align	4
-_abi_test_clobber_x24:
-	AARCH64_VALID_CALL_TARGET
-	mov	x24, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x25
-.private_extern	_abi_test_clobber_x25
-.align	4
-_abi_test_clobber_x25:
-	AARCH64_VALID_CALL_TARGET
-	mov	x25, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x26
-.private_extern	_abi_test_clobber_x26
-.align	4
-_abi_test_clobber_x26:
-	AARCH64_VALID_CALL_TARGET
-	mov	x26, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x27
-.private_extern	_abi_test_clobber_x27
-.align	4
-_abi_test_clobber_x27:
-	AARCH64_VALID_CALL_TARGET
-	mov	x27, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x28
-.private_extern	_abi_test_clobber_x28
-.align	4
-_abi_test_clobber_x28:
-	AARCH64_VALID_CALL_TARGET
-	mov	x28, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x29
-.private_extern	_abi_test_clobber_x29
-.align	4
-_abi_test_clobber_x29:
-	AARCH64_VALID_CALL_TARGET
-	mov	x29, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d0
-.private_extern	_abi_test_clobber_d0
-.align	4
-_abi_test_clobber_d0:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d0, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d1
-.private_extern	_abi_test_clobber_d1
-.align	4
-_abi_test_clobber_d1:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d1, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d2
-.private_extern	_abi_test_clobber_d2
-.align	4
-_abi_test_clobber_d2:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d2, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d3
-.private_extern	_abi_test_clobber_d3
-.align	4
-_abi_test_clobber_d3:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d3, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d4
-.private_extern	_abi_test_clobber_d4
-.align	4
-_abi_test_clobber_d4:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d4, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d5
-.private_extern	_abi_test_clobber_d5
-.align	4
-_abi_test_clobber_d5:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d5, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d6
-.private_extern	_abi_test_clobber_d6
-.align	4
-_abi_test_clobber_d6:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d6, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d7
-.private_extern	_abi_test_clobber_d7
-.align	4
-_abi_test_clobber_d7:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d7, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d8
-.private_extern	_abi_test_clobber_d8
-.align	4
-_abi_test_clobber_d8:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d8, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d9
-.private_extern	_abi_test_clobber_d9
-.align	4
-_abi_test_clobber_d9:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d9, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d10
-.private_extern	_abi_test_clobber_d10
-.align	4
-_abi_test_clobber_d10:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d10, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d11
-.private_extern	_abi_test_clobber_d11
-.align	4
-_abi_test_clobber_d11:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d11, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d12
-.private_extern	_abi_test_clobber_d12
-.align	4
-_abi_test_clobber_d12:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d12, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d13
-.private_extern	_abi_test_clobber_d13
-.align	4
-_abi_test_clobber_d13:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d13, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d14
-.private_extern	_abi_test_clobber_d14
-.align	4
-_abi_test_clobber_d14:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d14, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d15
-.private_extern	_abi_test_clobber_d15
-.align	4
-_abi_test_clobber_d15:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d15, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d16
-.private_extern	_abi_test_clobber_d16
-.align	4
-_abi_test_clobber_d16:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d16, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d17
-.private_extern	_abi_test_clobber_d17
-.align	4
-_abi_test_clobber_d17:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d17, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d18
-.private_extern	_abi_test_clobber_d18
-.align	4
-_abi_test_clobber_d18:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d18, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d19
-.private_extern	_abi_test_clobber_d19
-.align	4
-_abi_test_clobber_d19:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d19, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d20
-.private_extern	_abi_test_clobber_d20
-.align	4
-_abi_test_clobber_d20:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d20, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d21
-.private_extern	_abi_test_clobber_d21
-.align	4
-_abi_test_clobber_d21:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d21, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d22
-.private_extern	_abi_test_clobber_d22
-.align	4
-_abi_test_clobber_d22:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d22, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d23
-.private_extern	_abi_test_clobber_d23
-.align	4
-_abi_test_clobber_d23:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d23, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d24
-.private_extern	_abi_test_clobber_d24
-.align	4
-_abi_test_clobber_d24:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d24, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d25
-.private_extern	_abi_test_clobber_d25
-.align	4
-_abi_test_clobber_d25:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d25, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d26
-.private_extern	_abi_test_clobber_d26
-.align	4
-_abi_test_clobber_d26:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d26, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d27
-.private_extern	_abi_test_clobber_d27
-.align	4
-_abi_test_clobber_d27:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d27, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d28
-.private_extern	_abi_test_clobber_d28
-.align	4
-_abi_test_clobber_d28:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d28, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d29
-.private_extern	_abi_test_clobber_d29
-.align	4
-_abi_test_clobber_d29:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d29, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d30
-.private_extern	_abi_test_clobber_d30
-.align	4
-_abi_test_clobber_d30:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d30, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d31
-.private_extern	_abi_test_clobber_d31
-.align	4
-_abi_test_clobber_d31:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d31, xzr
-	ret
-
-
-.globl	_abi_test_clobber_v8_upper
-.private_extern	_abi_test_clobber_v8_upper
-.align	4
-_abi_test_clobber_v8_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v8.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v9_upper
-.private_extern	_abi_test_clobber_v9_upper
-.align	4
-_abi_test_clobber_v9_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v9.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v10_upper
-.private_extern	_abi_test_clobber_v10_upper
-.align	4
-_abi_test_clobber_v10_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v10.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v11_upper
-.private_extern	_abi_test_clobber_v11_upper
-.align	4
-_abi_test_clobber_v11_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v11.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v12_upper
-.private_extern	_abi_test_clobber_v12_upper
-.align	4
-_abi_test_clobber_v12_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v12.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v13_upper
-.private_extern	_abi_test_clobber_v13_upper
-.align	4
-_abi_test_clobber_v13_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v13.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v14_upper
-.private_extern	_abi_test_clobber_v14_upper
-.align	4
-_abi_test_clobber_v14_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v14.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v15_upper
-.private_extern	_abi_test_clobber_v15_upper
-.align	4
-_abi_test_clobber_v15_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v15.d[1], xzr
-	ret
-
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-arm/crypto/chacha/chacha-armv4.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/chacha/chacha-armv4.S
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/aesv8-armx32.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/aesv8-armx32.S
@ -1,790 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-#if __ARM_MAX_ARCH__>=7
-.text
-
-
-.code	32
-#undef	__thumb2__
-.align	5
-Lrcon:
-.long	0x01,0x01,0x01,0x01
-.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
-.long	0x1b,0x1b,0x1b,0x1b
-
-.text
-
-.globl	_aes_hw_set_encrypt_key
-.private_extern	_aes_hw_set_encrypt_key
-#ifdef __thumb2__
-.thumb_func	_aes_hw_set_encrypt_key
-#endif
-.align	5
-_aes_hw_set_encrypt_key:
-Lenc_key:
-	mov	r3,#-1
-	cmp	r0,#0
-	beq	Lenc_key_abort
-	cmp	r2,#0
-	beq	Lenc_key_abort
-	mov	r3,#-2
-	cmp	r1,#128
-	blt	Lenc_key_abort
-	cmp	r1,#256
-	bgt	Lenc_key_abort
-	tst	r1,#0x3f
-	bne	Lenc_key_abort
-
-	adr	r3,Lrcon
-	cmp	r1,#192
-
-	veor	q0,q0,q0
-	vld1.8	{q3},[r0]!
-	mov	r1,#8		@ reuse r1
-	vld1.32	{q1,q2},[r3]!
-
-	blt	Loop128
-	beq	L192
-	b	L256
-
-.align	4
-Loop128:
-	vtbl.8	d20,{q3},d4
-	vtbl.8	d21,{q3},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q3},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-	subs	r1,r1,#1
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	vshl.u8	q1,q1,#1
-	veor	q3,q3,q10
-	bne	Loop128
-
-	vld1.32	{q1},[r3]
-
-	vtbl.8	d20,{q3},d4
-	vtbl.8	d21,{q3},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q3},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	vshl.u8	q1,q1,#1
-	veor	q3,q3,q10
-
-	vtbl.8	d20,{q3},d4
-	vtbl.8	d21,{q3},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q3},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	veor	q3,q3,q10
-	vst1.32	{q3},[r2]
-	add	r2,r2,#0x50
-
-	mov	r12,#10
-	b	Ldone
-
-.align	4
-L192:
-	vld1.8	{d16},[r0]!
-	vmov.i8	q10,#8			@ borrow q10
-	vst1.32	{q3},[r2]!
-	vsub.i8	q2,q2,q10	@ adjust the mask
-
-Loop192:
-	vtbl.8	d20,{q8},d4
-	vtbl.8	d21,{q8},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{d16},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-	subs	r1,r1,#1
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-
-	vdup.32	q9,d7[1]
-	veor	q9,q9,q8
-	veor	q10,q10,q1
-	vext.8	q8,q0,q8,#12
-	vshl.u8	q1,q1,#1
-	veor	q8,q8,q9
-	veor	q3,q3,q10
-	veor	q8,q8,q10
-	vst1.32	{q3},[r2]!
-	bne	Loop192
-
-	mov	r12,#12
-	add	r2,r2,#0x20
-	b	Ldone
-
-.align	4
-L256:
-	vld1.8	{q8},[r0]
-	mov	r1,#7
-	mov	r12,#14
-	vst1.32	{q3},[r2]!
-
-Loop256:
-	vtbl.8	d20,{q8},d4
-	vtbl.8	d21,{q8},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q8},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-	subs	r1,r1,#1
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	vshl.u8	q1,q1,#1
-	veor	q3,q3,q10
-	vst1.32	{q3},[r2]!
-	beq	Ldone
-
-	vdup.32	q10,d7[1]
-	vext.8	q9,q0,q8,#12
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-
-	veor	q8,q8,q9
-	vext.8	q9,q0,q9,#12
-	veor	q8,q8,q9
-	vext.8	q9,q0,q9,#12
-	veor	q8,q8,q9
-
-	veor	q8,q8,q10
-	b	Loop256
-
-Ldone:
-	str	r12,[r2]
-	mov	r3,#0
-
-Lenc_key_abort:
-	mov	r0,r3			@ return value
-
-	bx	lr
-
-
-.globl	_aes_hw_set_decrypt_key
-.private_extern	_aes_hw_set_decrypt_key
-#ifdef __thumb2__
-.thumb_func	_aes_hw_set_decrypt_key
-#endif
-.align	5
-_aes_hw_set_decrypt_key:
-	stmdb	sp!,{r4,lr}
-	bl	Lenc_key
-
-	cmp	r0,#0
-	bne	Ldec_key_abort
-
-	sub	r2,r2,#240		@ restore original r2
-	mov	r4,#-16
-	add	r0,r2,r12,lsl#4	@ end of key schedule
-
-	vld1.32	{q0},[r2]
-	vld1.32	{q1},[r0]
-	vst1.32	{q0},[r0],r4
-	vst1.32	{q1},[r2]!
-
-Loop_imc:
-	vld1.32	{q0},[r2]
-	vld1.32	{q1},[r0]
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-	vst1.32	{q0},[r0],r4
-	vst1.32	{q1},[r2]!
-	cmp	r0,r2
-	bhi	Loop_imc
-
-	vld1.32	{q0},[r2]
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-	vst1.32	{q0},[r0]
-
-	eor	r0,r0,r0		@ return value
-Ldec_key_abort:
-	ldmia	sp!,{r4,pc}
-
-.globl	_aes_hw_encrypt
-.private_extern	_aes_hw_encrypt
-#ifdef __thumb2__
-.thumb_func	_aes_hw_encrypt
-#endif
-.align	5
-_aes_hw_encrypt:
-	ldr	r3,[r2,#240]
-	vld1.32	{q0},[r2]!
-	vld1.8	{q2},[r0]
-	sub	r3,r3,#2
-	vld1.32	{q1},[r2]!
-
-Loop_enc:
-.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
-.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
-	vld1.32	{q0},[r2]!
-	subs	r3,r3,#2
-.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
-.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
-	vld1.32	{q1},[r2]!
-	bgt	Loop_enc
-
-.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
-.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
-	vld1.32	{q0},[r2]
-.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
-	veor	q2,q2,q0
-
-	vst1.8	{q2},[r1]
-	bx	lr
-
-.globl	_aes_hw_decrypt
-.private_extern	_aes_hw_decrypt
-#ifdef __thumb2__
-.thumb_func	_aes_hw_decrypt
-#endif
-.align	5
-_aes_hw_decrypt:
-	ldr	r3,[r2,#240]
-	vld1.32	{q0},[r2]!
-	vld1.8	{q2},[r0]
-	sub	r3,r3,#2
-	vld1.32	{q1},[r2]!
-
-Loop_dec:
-.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
-.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
-	vld1.32	{q0},[r2]!
-	subs	r3,r3,#2
-.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
-.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
-	vld1.32	{q1},[r2]!
-	bgt	Loop_dec
-
-.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
-.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
-	vld1.32	{q0},[r2]
-.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
-	veor	q2,q2,q0
-
-	vst1.8	{q2},[r1]
-	bx	lr
-
-.globl	_aes_hw_cbc_encrypt
-.private_extern	_aes_hw_cbc_encrypt
-#ifdef __thumb2__
-.thumb_func	_aes_hw_cbc_encrypt
-#endif
-.align	5
-_aes_hw_cbc_encrypt:
-	mov	ip,sp
-	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
-	ldmia	ip,{r4,r5}		@ load remaining args
-	subs	r2,r2,#16
-	mov	r8,#16
-	blo	Lcbc_abort
-	moveq	r8,#0
-
-	cmp	r5,#0			@ en- or decrypting?
-	ldr	r5,[r3,#240]
-	and	r2,r2,#-16
-	vld1.8	{q6},[r4]
-	vld1.8	{q0},[r0],r8
-
-	vld1.32	{q8,q9},[r3]		@ load key schedule...
-	sub	r5,r5,#6
-	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
-	sub	r5,r5,#2
-	vld1.32	{q10,q11},[r7]!
-	vld1.32	{q12,q13},[r7]!
-	vld1.32	{q14,q15},[r7]!
-	vld1.32	{q7},[r7]
-
-	add	r7,r3,#32
-	mov	r6,r5
-	beq	Lcbc_dec
-
-	cmp	r5,#2
-	veor	q0,q0,q6
-	veor	q5,q8,q7
-	beq	Lcbc_enc128
-
-	vld1.32	{q2,q3},[r7]
-	add	r7,r3,#16
-	add	r6,r3,#16*4
-	add	r12,r3,#16*5
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	add	r14,r3,#16*6
-	add	r3,r3,#16*7
-	b	Lenter_cbc_enc
-
-.align	4
-Loop_cbc_enc:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vst1.8	{q6},[r1]!
-Lenter_cbc_enc:
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q8},[r6]
-	cmp	r5,#4
-.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q9},[r12]
-	beq	Lcbc_enc192
-
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q8},[r14]
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q9},[r3]
-	nop
-
-Lcbc_enc192:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	subs	r2,r2,#16
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	moveq	r8,#0
-.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.8	{q8},[r0],r8
-.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	veor	q8,q8,q5
-.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
-.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
-	veor	q6,q0,q7
-	bhs	Loop_cbc_enc
-
-	vst1.8	{q6},[r1]!
-	b	Lcbc_done
-
-.align	5
-Lcbc_enc128:
-	vld1.32	{q2,q3},[r7]
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	b	Lenter_cbc_enc128
-Loop_cbc_enc128:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vst1.8	{q6},[r1]!
-Lenter_cbc_enc128:
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	subs	r2,r2,#16
-.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	moveq	r8,#0
-.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.8	{q8},[r0],r8
-.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	veor	q8,q8,q5
-.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
-	veor	q6,q0,q7
-	bhs	Loop_cbc_enc128
-
-	vst1.8	{q6},[r1]!
-	b	Lcbc_done
-.align	5
-Lcbc_dec:
-	vld1.8	{q10},[r0]!
-	subs	r2,r2,#32		@ bias
-	add	r6,r5,#2
-	vorr	q3,q0,q0
-	vorr	q1,q0,q0
-	vorr	q11,q10,q10
-	blo	Lcbc_dec_tail
-
-	vorr	q1,q10,q10
-	vld1.8	{q10},[r0]!
-	vorr	q2,q0,q0
-	vorr	q3,q1,q1
-	vorr	q11,q10,q10
-
-Loop3x_cbc_dec:
-.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q9},[r7]!
-	bgt	Loop3x_cbc_dec
-
-.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q4,q6,q7
-	subs	r2,r2,#0x30
-	veor	q5,q2,q7
-	movlo	r6,r2			@ r6, r6, is zero at this point
-.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q9,q3,q7
-	add	r0,r0,r6		@ r0 is adjusted in such way that
-					@ at exit from the loop q1-q10
-					@ are loaded with last "words"
-	vorr	q6,q11,q11
-	mov	r7,r3
-.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.8	{q2},[r0]!
-.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.8	{q3},[r0]!
-.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.8	{q11},[r0]!
-.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
-.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
-.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
-	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
-	add	r6,r5,#2
-	veor	q4,q4,q0
-	veor	q5,q5,q1
-	veor	q10,q10,q9
-	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
-	vst1.8	{q4},[r1]!
-	vorr	q0,q2,q2
-	vst1.8	{q5},[r1]!
-	vorr	q1,q3,q3
-	vst1.8	{q10},[r1]!
-	vorr	q10,q11,q11
-	bhs	Loop3x_cbc_dec
-
-	cmn	r2,#0x30
-	beq	Lcbc_done
-	nop
-
-Lcbc_dec_tail:
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q9},[r7]!
-	bgt	Lcbc_dec_tail
-
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	cmn	r2,#0x20
-.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q5,q6,q7
-.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q9,q3,q7
-.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
-.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
-	beq	Lcbc_dec_one
-	veor	q5,q5,q1
-	veor	q9,q9,q10
-	vorr	q6,q11,q11
-	vst1.8	{q5},[r1]!
-	vst1.8	{q9},[r1]!
-	b	Lcbc_done
-
-Lcbc_dec_one:
-	veor	q5,q5,q10
-	vorr	q6,q11,q11
-	vst1.8	{q5},[r1]!
-
-Lcbc_done:
-	vst1.8	{q6},[r4]
-Lcbc_abort:
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
-	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
-
-.globl	_aes_hw_ctr32_encrypt_blocks
-.private_extern	_aes_hw_ctr32_encrypt_blocks
-#ifdef __thumb2__
-.thumb_func	_aes_hw_ctr32_encrypt_blocks
-#endif
-.align	5
-_aes_hw_ctr32_encrypt_blocks:
-	mov	ip,sp
-	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
-	ldr	r4, [ip]		@ load remaining arg
-	ldr	r5,[r3,#240]
-
-	ldr	r8, [r4, #12]
-	vld1.32	{q0},[r4]
-
-	vld1.32	{q8,q9},[r3]		@ load key schedule...
-	sub	r5,r5,#4
-	mov	r12,#16
-	cmp	r2,#2
-	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
-	sub	r5,r5,#2
-	vld1.32	{q12,q13},[r7]!
-	vld1.32	{q14,q15},[r7]!
-	vld1.32	{q7},[r7]
-	add	r7,r3,#32
-	mov	r6,r5
-	movlo	r12,#0
-#ifndef __ARMEB__
-	rev	r8, r8
-#endif
-	vorr	q1,q0,q0
-	add	r10, r8, #1
-	vorr	q10,q0,q0
-	add	r8, r8, #2
-	vorr	q6,q0,q0
-	rev	r10, r10
-	vmov.32	d3[1],r10
-	bls	Lctr32_tail
-	rev	r12, r8
-	sub	r2,r2,#3		@ bias
-	vmov.32	d21[1],r12
-	b	Loop3x_ctr32
-
-.align	4
-Loop3x_ctr32:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
-.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
-.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
-	vld1.32	{q9},[r7]!
-	bgt	Loop3x_ctr32
-
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
-	vld1.8	{q2},[r0]!
-	vorr	q0,q6,q6
-.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
-.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
-	vld1.8	{q3},[r0]!
-	vorr	q1,q6,q6
-.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	vld1.8	{q11},[r0]!
-	mov	r7,r3
-.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
-.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
-	vorr	q10,q6,q6
-	add	r9,r8,#1
-.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	veor	q2,q2,q7
-	add	r10,r8,#2
-.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
-.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	veor	q3,q3,q7
-	add	r8,r8,#3
-.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	veor	q11,q11,q7
-	rev	r9,r9
-.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
-.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d1[1], r9
-	rev	r10,r10
-.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	vmov.32	d3[1], r10
-	rev	r12,r8
-.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
-.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d21[1], r12
-	subs	r2,r2,#3
-.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
-.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
-.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
-
-	veor	q2,q2,q4
-	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
-	vst1.8	{q2},[r1]!
-	veor	q3,q3,q5
-	mov	r6,r5
-	vst1.8	{q3},[r1]!
-	veor	q11,q11,q9
-	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
-	vst1.8	{q11},[r1]!
-	bhs	Loop3x_ctr32
-
-	adds	r2,r2,#3
-	beq	Lctr32_done
-	cmp	r2,#1
-	mov	r12,#16
-	moveq	r12,#0
-
-Lctr32_tail:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.32	{q9},[r7]!
-	bgt	Lctr32_tail
-
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.8	{q2},[r0],r12
-.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.8	{q3},[r0]
-.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	veor	q2,q2,q7
-.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	veor	q3,q3,q7
-.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
-.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
-
-	cmp	r2,#1
-	veor	q2,q2,q0
-	veor	q3,q3,q1
-	vst1.8	{q2},[r1]!
-	beq	Lctr32_done
-	vst1.8	{q3},[r1]
-
-Lctr32_done:
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
-	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
-
-#endif
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/armv4-mont.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/armv4-mont.S
@ -1,982 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
-@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
-
-
-.text
-#if defined(__thumb2__)
-.syntax	unified
-.thumb
-#else
-.code	32
-#endif
-
-#if __ARM_MAX_ARCH__>=7
-.align	5
-LOPENSSL_armcap:
-.word	OPENSSL_armcap_P-Lbn_mul_mont
-#endif
-
-.globl	_bn_mul_mont
-.private_extern	_bn_mul_mont
-#ifdef __thumb2__
-.thumb_func	_bn_mul_mont
-#endif
-
-.align	5
-_bn_mul_mont:
-Lbn_mul_mont:
-	ldr	ip,[sp,#4]		@ load num
-	stmdb	sp!,{r0,r2}		@ sp points at argument block
-#if __ARM_MAX_ARCH__>=7
-	tst	ip,#7
-	bne	Lialu
-	adr	r0,Lbn_mul_mont
-	ldr	r2,LOPENSSL_armcap
-	ldr	r0,[r0,r2]
-#ifdef	__APPLE__
-	ldr	r0,[r0]
-#endif
-	tst	r0,#ARMV7_NEON		@ NEON available?
-	ldmia	sp, {r0,r2}
-	beq	Lialu
-	add	sp,sp,#8
-	b	bn_mul8x_mont_neon
-.align	4
-Lialu:
-#endif
-	cmp	ip,#2
-	mov	r0,ip			@ load num
-#ifdef	__thumb2__
-	ittt	lt
-#endif
-	movlt	r0,#0
-	addlt	sp,sp,#2*4
-	blt	Labrt
-
-	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ save 10 registers
-
-	mov	r0,r0,lsl#2		@ rescale r0 for byte count
-	sub	sp,sp,r0		@ alloca(4*num)
-	sub	sp,sp,#4		@ +extra dword
-	sub	r0,r0,#4		@ "num=num-1"
-	add	r4,r2,r0		@ &bp[num-1]
-
-	add	r0,sp,r0		@ r0 to point at &tp[num-1]
-	ldr	r8,[r0,#14*4]		@ &n0
-	ldr	r2,[r2]		@ bp[0]
-	ldr	r5,[r1],#4		@ ap[0],ap++
-	ldr	r6,[r3],#4		@ np[0],np++
-	ldr	r8,[r8]		@ *n0
-	str	r4,[r0,#15*4]		@ save &bp[num]
-
-	umull	r10,r11,r5,r2	@ ap[0]*bp[0]
-	str	r8,[r0,#14*4]		@ save n0 value
-	mul	r8,r10,r8		@ "tp[0]"*n0
-	mov	r12,#0
-	umlal	r10,r12,r6,r8	@ np[0]*n0+"t[0]"
-	mov	r4,sp
-
-L1st:
-	ldr	r5,[r1],#4		@ ap[j],ap++
-	mov	r10,r11
-	ldr	r6,[r3],#4		@ np[j],np++
-	mov	r11,#0
-	umlal	r10,r11,r5,r2	@ ap[j]*bp[0]
-	mov	r14,#0
-	umlal	r12,r14,r6,r8	@ np[j]*n0
-	adds	r12,r12,r10
-	str	r12,[r4],#4		@ tp[j-1]=,tp++
-	adc	r12,r14,#0
-	cmp	r4,r0
-	bne	L1st
-
-	adds	r12,r12,r11
-	ldr	r4,[r0,#13*4]		@ restore bp
-	mov	r14,#0
-	ldr	r8,[r0,#14*4]		@ restore n0
-	adc	r14,r14,#0
-	str	r12,[r0]		@ tp[num-1]=
-	mov	r7,sp
-	str	r14,[r0,#4]		@ tp[num]=
-
-Louter:
-	sub	r7,r0,r7		@ "original" r0-1 value
-	sub	r1,r1,r7		@ "rewind" ap to &ap[1]
-	ldr	r2,[r4,#4]!		@ *(++bp)
-	sub	r3,r3,r7		@ "rewind" np to &np[1]
-	ldr	r5,[r1,#-4]		@ ap[0]
-	ldr	r10,[sp]		@ tp[0]
-	ldr	r6,[r3,#-4]		@ np[0]
-	ldr	r7,[sp,#4]		@ tp[1]
-
-	mov	r11,#0
-	umlal	r10,r11,r5,r2	@ ap[0]*bp[i]+tp[0]
-	str	r4,[r0,#13*4]		@ save bp
-	mul	r8,r10,r8
-	mov	r12,#0
-	umlal	r10,r12,r6,r8	@ np[0]*n0+"tp[0]"
-	mov	r4,sp
-
-Linner:
-	ldr	r5,[r1],#4		@ ap[j],ap++
-	adds	r10,r11,r7		@ +=tp[j]
-	ldr	r6,[r3],#4		@ np[j],np++
-	mov	r11,#0
-	umlal	r10,r11,r5,r2	@ ap[j]*bp[i]
-	mov	r14,#0
-	umlal	r12,r14,r6,r8	@ np[j]*n0
-	adc	r11,r11,#0
-	ldr	r7,[r4,#8]		@ tp[j+1]
-	adds	r12,r12,r10
-	str	r12,[r4],#4		@ tp[j-1]=,tp++
-	adc	r12,r14,#0
-	cmp	r4,r0
-	bne	Linner
-
-	adds	r12,r12,r11
-	mov	r14,#0
-	ldr	r4,[r0,#13*4]		@ restore bp
-	adc	r14,r14,#0
-	ldr	r8,[r0,#14*4]		@ restore n0
-	adds	r12,r12,r7
-	ldr	r7,[r0,#15*4]		@ restore &bp[num]
-	adc	r14,r14,#0
-	str	r12,[r0]		@ tp[num-1]=
-	str	r14,[r0,#4]		@ tp[num]=
-
-	cmp	r4,r7
-#ifdef	__thumb2__
-	itt	ne
-#endif
-	movne	r7,sp
-	bne	Louter
-
-	ldr	r2,[r0,#12*4]		@ pull rp
-	mov	r5,sp
-	add	r0,r0,#4		@ r0 to point at &tp[num]
-	sub	r5,r0,r5		@ "original" num value
-	mov	r4,sp			@ "rewind" r4
-	mov	r1,r4			@ "borrow" r1
-	sub	r3,r3,r5		@ "rewind" r3 to &np[0]
-
-	subs	r7,r7,r7		@ "clear" carry flag
-Lsub:	ldr	r7,[r4],#4
-	ldr	r6,[r3],#4
-	sbcs	r7,r7,r6		@ tp[j]-np[j]
-	str	r7,[r2],#4		@ rp[j]=
-	teq	r4,r0		@ preserve carry
-	bne	Lsub
-	sbcs	r14,r14,#0		@ upmost carry
-	mov	r4,sp			@ "rewind" r4
-	sub	r2,r2,r5		@ "rewind" r2
-
-Lcopy:	ldr	r7,[r4]		@ conditional copy
-	ldr	r5,[r2]
-	str	sp,[r4],#4		@ zap tp
-#ifdef	__thumb2__
-	it	cc
-#endif
-	movcc	r5,r7
-	str	r5,[r2],#4
-	teq	r4,r0		@ preserve carry
-	bne	Lcopy
-
-	mov	sp,r0
-	add	sp,sp,#4		@ skip over tp[num+1]
-	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ restore registers
-	add	sp,sp,#2*4		@ skip over {r0,r2}
-	mov	r0,#1
-Labrt:
-#if __ARM_ARCH__>=5
-	bx	lr				@ bx lr
-#else
-	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-#endif
-
-#if __ARM_MAX_ARCH__>=7
-
-
-
-#ifdef __thumb2__
-.thumb_func	bn_mul8x_mont_neon
-#endif
-.align	5
-bn_mul8x_mont_neon:
-	mov	ip,sp
-	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ ABI specification says so
-	ldmia	ip,{r4,r5}		@ load rest of parameter block
-	mov	ip,sp
-
-	cmp	r5,#8
-	bhi	LNEON_8n
-
-	@ special case for r5==8, everything is in register bank...
-
-	vld1.32	{d28[0]}, [r2,:32]!
-	veor	d8,d8,d8
-	sub	r7,sp,r5,lsl#4
-	vld1.32	{d0,d1,d2,d3},  [r1]!		@ can't specify :32 :-(
-	and	r7,r7,#-64
-	vld1.32	{d30[0]}, [r4,:32]
-	mov	sp,r7			@ alloca
-	vzip.16	d28,d8
-
-	vmull.u32	q6,d28,d0[0]
-	vmull.u32	q7,d28,d0[1]
-	vmull.u32	q8,d28,d1[0]
-	vshl.i64	d29,d13,#16
-	vmull.u32	q9,d28,d1[1]
-
-	vadd.u64	d29,d29,d12
-	veor	d8,d8,d8
-	vmul.u32	d29,d29,d30
-
-	vmull.u32	q10,d28,d2[0]
-	vld1.32	{d4,d5,d6,d7}, [r3]!
-	vmull.u32	q11,d28,d2[1]
-	vmull.u32	q12,d28,d3[0]
-	vzip.16	d29,d8
-	vmull.u32	q13,d28,d3[1]
-
-	vmlal.u32	q6,d29,d4[0]
-	sub	r9,r5,#1
-	vmlal.u32	q7,d29,d4[1]
-	vmlal.u32	q8,d29,d5[0]
-	vmlal.u32	q9,d29,d5[1]
-
-	vmlal.u32	q10,d29,d6[0]
-	vmov	q5,q6
-	vmlal.u32	q11,d29,d6[1]
-	vmov	q6,q7
-	vmlal.u32	q12,d29,d7[0]
-	vmov	q7,q8
-	vmlal.u32	q13,d29,d7[1]
-	vmov	q8,q9
-	vmov	q9,q10
-	vshr.u64	d10,d10,#16
-	vmov	q10,q11
-	vmov	q11,q12
-	vadd.u64	d10,d10,d11
-	vmov	q12,q13
-	veor	q13,q13
-	vshr.u64	d10,d10,#16
-
-	b	LNEON_outer8
-
-.align	4
-LNEON_outer8:
-	vld1.32	{d28[0]}, [r2,:32]!
-	veor	d8,d8,d8
-	vzip.16	d28,d8
-	vadd.u64	d12,d12,d10
-
-	vmlal.u32	q6,d28,d0[0]
-	vmlal.u32	q7,d28,d0[1]
-	vmlal.u32	q8,d28,d1[0]
-	vshl.i64	d29,d13,#16
-	vmlal.u32	q9,d28,d1[1]
-
-	vadd.u64	d29,d29,d12
-	veor	d8,d8,d8
-	subs	r9,r9,#1
-	vmul.u32	d29,d29,d30
-
-	vmlal.u32	q10,d28,d2[0]
-	vmlal.u32	q11,d28,d2[1]
-	vmlal.u32	q12,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q13,d28,d3[1]
-
-	vmlal.u32	q6,d29,d4[0]
-	vmlal.u32	q7,d29,d4[1]
-	vmlal.u32	q8,d29,d5[0]
-	vmlal.u32	q9,d29,d5[1]
-
-	vmlal.u32	q10,d29,d6[0]
-	vmov	q5,q6
-	vmlal.u32	q11,d29,d6[1]
-	vmov	q6,q7
-	vmlal.u32	q12,d29,d7[0]
-	vmov	q7,q8
-	vmlal.u32	q13,d29,d7[1]
-	vmov	q8,q9
-	vmov	q9,q10
-	vshr.u64	d10,d10,#16
-	vmov	q10,q11
-	vmov	q11,q12
-	vadd.u64	d10,d10,d11
-	vmov	q12,q13
-	veor	q13,q13
-	vshr.u64	d10,d10,#16
-
-	bne	LNEON_outer8
-
-	vadd.u64	d12,d12,d10
-	mov	r7,sp
-	vshr.u64	d10,d12,#16
-	mov	r8,r5
-	vadd.u64	d13,d13,d10
-	add	r6,sp,#96
-	vshr.u64	d10,d13,#16
-	vzip.16	d12,d13
-
-	b	LNEON_tail_entry
-
-.align	4
-LNEON_8n:
-	veor	q6,q6,q6
-	sub	r7,sp,#128
-	veor	q7,q7,q7
-	sub	r7,r7,r5,lsl#4
-	veor	q8,q8,q8
-	and	r7,r7,#-64
-	veor	q9,q9,q9
-	mov	sp,r7			@ alloca
-	veor	q10,q10,q10
-	add	r7,r7,#256
-	veor	q11,q11,q11
-	sub	r8,r5,#8
-	veor	q12,q12,q12
-	veor	q13,q13,q13
-
-LNEON_8n_init:
-	vst1.64	{q6,q7},[r7,:256]!
-	subs	r8,r8,#8
-	vst1.64	{q8,q9},[r7,:256]!
-	vst1.64	{q10,q11},[r7,:256]!
-	vst1.64	{q12,q13},[r7,:256]!
-	bne	LNEON_8n_init
-
-	add	r6,sp,#256
-	vld1.32	{d0,d1,d2,d3},[r1]!
-	add	r10,sp,#8
-	vld1.32	{d30[0]},[r4,:32]
-	mov	r9,r5
-	b	LNEON_8n_outer
-
-.align	4
-LNEON_8n_outer:
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	veor	d8,d8,d8
-	vzip.16	d28,d8
-	add	r7,sp,#128
-	vld1.32	{d4,d5,d6,d7},[r3]!
-
-	vmlal.u32	q6,d28,d0[0]
-	vmlal.u32	q7,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q8,d28,d1[0]
-	vshl.i64	d29,d13,#16
-	vmlal.u32	q9,d28,d1[1]
-	vadd.u64	d29,d29,d12
-	vmlal.u32	q10,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q11,d28,d2[1]
-	vst1.32	{d28},[sp,:64]		@ put aside smashed b[8*i+0]
-	vmlal.u32	q12,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q13,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q6,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q7,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q8,d29,d5[0]
-	vshr.u64	d12,d12,#16
-	vmlal.u32	q9,d29,d5[1]
-	vmlal.u32	q10,d29,d6[0]
-	vadd.u64	d12,d12,d13
-	vmlal.u32	q11,d29,d6[1]
-	vshr.u64	d12,d12,#16
-	vmlal.u32	q12,d29,d7[0]
-	vmlal.u32	q13,d29,d7[1]
-	vadd.u64	d14,d14,d12
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+0]
-	vmlal.u32	q7,d28,d0[0]
-	vld1.64	{q6},[r6,:128]!
-	vmlal.u32	q8,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q9,d28,d1[0]
-	vshl.i64	d29,d15,#16
-	vmlal.u32	q10,d28,d1[1]
-	vadd.u64	d29,d29,d14
-	vmlal.u32	q11,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q12,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+1]
-	vmlal.u32	q13,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q6,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q7,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q8,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q9,d29,d5[0]
-	vshr.u64	d14,d14,#16
-	vmlal.u32	q10,d29,d5[1]
-	vmlal.u32	q11,d29,d6[0]
-	vadd.u64	d14,d14,d15
-	vmlal.u32	q12,d29,d6[1]
-	vshr.u64	d14,d14,#16
-	vmlal.u32	q13,d29,d7[0]
-	vmlal.u32	q6,d29,d7[1]
-	vadd.u64	d16,d16,d14
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+1]
-	vmlal.u32	q8,d28,d0[0]
-	vld1.64	{q7},[r6,:128]!
-	vmlal.u32	q9,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q10,d28,d1[0]
-	vshl.i64	d29,d17,#16
-	vmlal.u32	q11,d28,d1[1]
-	vadd.u64	d29,d29,d16
-	vmlal.u32	q12,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q13,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+2]
-	vmlal.u32	q6,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q7,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q8,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q9,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q10,d29,d5[0]
-	vshr.u64	d16,d16,#16
-	vmlal.u32	q11,d29,d5[1]
-	vmlal.u32	q12,d29,d6[0]
-	vadd.u64	d16,d16,d17
-	vmlal.u32	q13,d29,d6[1]
-	vshr.u64	d16,d16,#16
-	vmlal.u32	q6,d29,d7[0]
-	vmlal.u32	q7,d29,d7[1]
-	vadd.u64	d18,d18,d16
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+2]
-	vmlal.u32	q9,d28,d0[0]
-	vld1.64	{q8},[r6,:128]!
-	vmlal.u32	q10,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q11,d28,d1[0]
-	vshl.i64	d29,d19,#16
-	vmlal.u32	q12,d28,d1[1]
-	vadd.u64	d29,d29,d18
-	vmlal.u32	q13,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q6,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+3]
-	vmlal.u32	q7,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q8,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q9,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q10,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q11,d29,d5[0]
-	vshr.u64	d18,d18,#16
-	vmlal.u32	q12,d29,d5[1]
-	vmlal.u32	q13,d29,d6[0]
-	vadd.u64	d18,d18,d19
-	vmlal.u32	q6,d29,d6[1]
-	vshr.u64	d18,d18,#16
-	vmlal.u32	q7,d29,d7[0]
-	vmlal.u32	q8,d29,d7[1]
-	vadd.u64	d20,d20,d18
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+3]
-	vmlal.u32	q10,d28,d0[0]
-	vld1.64	{q9},[r6,:128]!
-	vmlal.u32	q11,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q12,d28,d1[0]
-	vshl.i64	d29,d21,#16
-	vmlal.u32	q13,d28,d1[1]
-	vadd.u64	d29,d29,d20
-	vmlal.u32	q6,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q7,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+4]
-	vmlal.u32	q8,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q9,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q10,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q11,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q12,d29,d5[0]
-	vshr.u64	d20,d20,#16
-	vmlal.u32	q13,d29,d5[1]
-	vmlal.u32	q6,d29,d6[0]
-	vadd.u64	d20,d20,d21
-	vmlal.u32	q7,d29,d6[1]
-	vshr.u64	d20,d20,#16
-	vmlal.u32	q8,d29,d7[0]
-	vmlal.u32	q9,d29,d7[1]
-	vadd.u64	d22,d22,d20
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+4]
-	vmlal.u32	q11,d28,d0[0]
-	vld1.64	{q10},[r6,:128]!
-	vmlal.u32	q12,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q13,d28,d1[0]
-	vshl.i64	d29,d23,#16
-	vmlal.u32	q6,d28,d1[1]
-	vadd.u64	d29,d29,d22
-	vmlal.u32	q7,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q8,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+5]
-	vmlal.u32	q9,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q10,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q11,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q12,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q13,d29,d5[0]
-	vshr.u64	d22,d22,#16
-	vmlal.u32	q6,d29,d5[1]
-	vmlal.u32	q7,d29,d6[0]
-	vadd.u64	d22,d22,d23
-	vmlal.u32	q8,d29,d6[1]
-	vshr.u64	d22,d22,#16
-	vmlal.u32	q9,d29,d7[0]
-	vmlal.u32	q10,d29,d7[1]
-	vadd.u64	d24,d24,d22
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+5]
-	vmlal.u32	q12,d28,d0[0]
-	vld1.64	{q11},[r6,:128]!
-	vmlal.u32	q13,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q6,d28,d1[0]
-	vshl.i64	d29,d25,#16
-	vmlal.u32	q7,d28,d1[1]
-	vadd.u64	d29,d29,d24
-	vmlal.u32	q8,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q9,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+6]
-	vmlal.u32	q10,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q11,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q12,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q13,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q6,d29,d5[0]
-	vshr.u64	d24,d24,#16
-	vmlal.u32	q7,d29,d5[1]
-	vmlal.u32	q8,d29,d6[0]
-	vadd.u64	d24,d24,d25
-	vmlal.u32	q9,d29,d6[1]
-	vshr.u64	d24,d24,#16
-	vmlal.u32	q10,d29,d7[0]
-	vmlal.u32	q11,d29,d7[1]
-	vadd.u64	d26,d26,d24
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+6]
-	vmlal.u32	q13,d28,d0[0]
-	vld1.64	{q12},[r6,:128]!
-	vmlal.u32	q6,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q7,d28,d1[0]
-	vshl.i64	d29,d27,#16
-	vmlal.u32	q8,d28,d1[1]
-	vadd.u64	d29,d29,d26
-	vmlal.u32	q9,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q10,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+7]
-	vmlal.u32	q11,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q12,d28,d3[1]
-	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
-	vmlal.u32	q13,d29,d4[0]
-	vld1.32	{d0,d1,d2,d3},[r1]!
-	vmlal.u32	q6,d29,d4[1]
-	vmlal.u32	q7,d29,d5[0]
-	vshr.u64	d26,d26,#16
-	vmlal.u32	q8,d29,d5[1]
-	vmlal.u32	q9,d29,d6[0]
-	vadd.u64	d26,d26,d27
-	vmlal.u32	q10,d29,d6[1]
-	vshr.u64	d26,d26,#16
-	vmlal.u32	q11,d29,d7[0]
-	vmlal.u32	q12,d29,d7[1]
-	vadd.u64	d12,d12,d26
-	vst1.32	{d29},[r10,:64]	@ put aside smashed m[8*i+7]
-	add	r10,sp,#8		@ rewind
-	sub	r8,r5,#8
-	b	LNEON_8n_inner
-
-.align	4
-LNEON_8n_inner:
-	subs	r8,r8,#8
-	vmlal.u32	q6,d28,d0[0]
-	vld1.64	{q13},[r6,:128]
-	vmlal.u32	q7,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+0]
-	vmlal.u32	q8,d28,d1[0]
-	vld1.32	{d4,d5,d6,d7},[r3]!
-	vmlal.u32	q9,d28,d1[1]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q10,d28,d2[0]
-	vmlal.u32	q11,d28,d2[1]
-	vmlal.u32	q12,d28,d3[0]
-	vmlal.u32	q13,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+1]
-	vmlal.u32	q6,d29,d4[0]
-	vmlal.u32	q7,d29,d4[1]
-	vmlal.u32	q8,d29,d5[0]
-	vmlal.u32	q9,d29,d5[1]
-	vmlal.u32	q10,d29,d6[0]
-	vmlal.u32	q11,d29,d6[1]
-	vmlal.u32	q12,d29,d7[0]
-	vmlal.u32	q13,d29,d7[1]
-	vst1.64	{q6},[r7,:128]!
-	vmlal.u32	q7,d28,d0[0]
-	vld1.64	{q6},[r6,:128]
-	vmlal.u32	q8,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+1]
-	vmlal.u32	q9,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q10,d28,d1[1]
-	vmlal.u32	q11,d28,d2[0]
-	vmlal.u32	q12,d28,d2[1]
-	vmlal.u32	q13,d28,d3[0]
-	vmlal.u32	q6,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+2]
-	vmlal.u32	q7,d29,d4[0]
-	vmlal.u32	q8,d29,d4[1]
-	vmlal.u32	q9,d29,d5[0]
-	vmlal.u32	q10,d29,d5[1]
-	vmlal.u32	q11,d29,d6[0]
-	vmlal.u32	q12,d29,d6[1]
-	vmlal.u32	q13,d29,d7[0]
-	vmlal.u32	q6,d29,d7[1]
-	vst1.64	{q7},[r7,:128]!
-	vmlal.u32	q8,d28,d0[0]
-	vld1.64	{q7},[r6,:128]
-	vmlal.u32	q9,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+2]
-	vmlal.u32	q10,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q11,d28,d1[1]
-	vmlal.u32	q12,d28,d2[0]
-	vmlal.u32	q13,d28,d2[1]
-	vmlal.u32	q6,d28,d3[0]
-	vmlal.u32	q7,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+3]
-	vmlal.u32	q8,d29,d4[0]
-	vmlal.u32	q9,d29,d4[1]
-	vmlal.u32	q10,d29,d5[0]
-	vmlal.u32	q11,d29,d5[1]
-	vmlal.u32	q12,d29,d6[0]
-	vmlal.u32	q13,d29,d6[1]
-	vmlal.u32	q6,d29,d7[0]
-	vmlal.u32	q7,d29,d7[1]
-	vst1.64	{q8},[r7,:128]!
-	vmlal.u32	q9,d28,d0[0]
-	vld1.64	{q8},[r6,:128]
-	vmlal.u32	q10,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+3]
-	vmlal.u32	q11,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q12,d28,d1[1]
-	vmlal.u32	q13,d28,d2[0]
-	vmlal.u32	q6,d28,d2[1]
-	vmlal.u32	q7,d28,d3[0]
-	vmlal.u32	q8,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+4]
-	vmlal.u32	q9,d29,d4[0]
-	vmlal.u32	q10,d29,d4[1]
-	vmlal.u32	q11,d29,d5[0]
-	vmlal.u32	q12,d29,d5[1]
-	vmlal.u32	q13,d29,d6[0]
-	vmlal.u32	q6,d29,d6[1]
-	vmlal.u32	q7,d29,d7[0]
-	vmlal.u32	q8,d29,d7[1]
-	vst1.64	{q9},[r7,:128]!
-	vmlal.u32	q10,d28,d0[0]
-	vld1.64	{q9},[r6,:128]
-	vmlal.u32	q11,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+4]
-	vmlal.u32	q12,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q13,d28,d1[1]
-	vmlal.u32	q6,d28,d2[0]
-	vmlal.u32	q7,d28,d2[1]
-	vmlal.u32	q8,d28,d3[0]
-	vmlal.u32	q9,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+5]
-	vmlal.u32	q10,d29,d4[0]
-	vmlal.u32	q11,d29,d4[1]
-	vmlal.u32	q12,d29,d5[0]
-	vmlal.u32	q13,d29,d5[1]
-	vmlal.u32	q6,d29,d6[0]
-	vmlal.u32	q7,d29,d6[1]
-	vmlal.u32	q8,d29,d7[0]
-	vmlal.u32	q9,d29,d7[1]
-	vst1.64	{q10},[r7,:128]!
-	vmlal.u32	q11,d28,d0[0]
-	vld1.64	{q10},[r6,:128]
-	vmlal.u32	q12,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+5]
-	vmlal.u32	q13,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q6,d28,d1[1]
-	vmlal.u32	q7,d28,d2[0]
-	vmlal.u32	q8,d28,d2[1]
-	vmlal.u32	q9,d28,d3[0]
-	vmlal.u32	q10,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+6]
-	vmlal.u32	q11,d29,d4[0]
-	vmlal.u32	q12,d29,d4[1]
-	vmlal.u32	q13,d29,d5[0]
-	vmlal.u32	q6,d29,d5[1]
-	vmlal.u32	q7,d29,d6[0]
-	vmlal.u32	q8,d29,d6[1]
-	vmlal.u32	q9,d29,d7[0]
-	vmlal.u32	q10,d29,d7[1]
-	vst1.64	{q11},[r7,:128]!
-	vmlal.u32	q12,d28,d0[0]
-	vld1.64	{q11},[r6,:128]
-	vmlal.u32	q13,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+6]
-	vmlal.u32	q6,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q7,d28,d1[1]
-	vmlal.u32	q8,d28,d2[0]
-	vmlal.u32	q9,d28,d2[1]
-	vmlal.u32	q10,d28,d3[0]
-	vmlal.u32	q11,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+7]
-	vmlal.u32	q12,d29,d4[0]
-	vmlal.u32	q13,d29,d4[1]
-	vmlal.u32	q6,d29,d5[0]
-	vmlal.u32	q7,d29,d5[1]
-	vmlal.u32	q8,d29,d6[0]
-	vmlal.u32	q9,d29,d6[1]
-	vmlal.u32	q10,d29,d7[0]
-	vmlal.u32	q11,d29,d7[1]
-	vst1.64	{q12},[r7,:128]!
-	vmlal.u32	q13,d28,d0[0]
-	vld1.64	{q12},[r6,:128]
-	vmlal.u32	q6,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+7]
-	vmlal.u32	q7,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q8,d28,d1[1]
-	vmlal.u32	q9,d28,d2[0]
-	vmlal.u32	q10,d28,d2[1]
-	vmlal.u32	q11,d28,d3[0]
-	vmlal.u32	q12,d28,d3[1]
-	it	eq
-	subeq	r1,r1,r5,lsl#2	@ rewind
-	vmlal.u32	q13,d29,d4[0]
-	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
-	vmlal.u32	q6,d29,d4[1]
-	vld1.32	{d0,d1,d2,d3},[r1]!
-	vmlal.u32	q7,d29,d5[0]
-	add	r10,sp,#8		@ rewind
-	vmlal.u32	q8,d29,d5[1]
-	vmlal.u32	q9,d29,d6[0]
-	vmlal.u32	q10,d29,d6[1]
-	vmlal.u32	q11,d29,d7[0]
-	vst1.64	{q13},[r7,:128]!
-	vmlal.u32	q12,d29,d7[1]
-
-	bne	LNEON_8n_inner
-	add	r6,sp,#128
-	vst1.64	{q6,q7},[r7,:256]!
-	veor	q2,q2,q2		@ d4-d5
-	vst1.64	{q8,q9},[r7,:256]!
-	veor	q3,q3,q3		@ d6-d7
-	vst1.64	{q10,q11},[r7,:256]!
-	vst1.64	{q12},[r7,:128]
-
-	subs	r9,r9,#8
-	vld1.64	{q6,q7},[r6,:256]!
-	vld1.64	{q8,q9},[r6,:256]!
-	vld1.64	{q10,q11},[r6,:256]!
-	vld1.64	{q12,q13},[r6,:256]!
-
-	itt	ne
-	subne	r3,r3,r5,lsl#2	@ rewind
-	bne	LNEON_8n_outer
-
-	add	r7,sp,#128
-	vst1.64	{q2,q3}, [sp,:256]!	@ start wiping stack frame
-	vshr.u64	d10,d12,#16
-	vst1.64	{q2,q3},[sp,:256]!
-	vadd.u64	d13,d13,d10
-	vst1.64	{q2,q3}, [sp,:256]!
-	vshr.u64	d10,d13,#16
-	vst1.64	{q2,q3}, [sp,:256]!
-	vzip.16	d12,d13
-
-	mov	r8,r5
-	b	LNEON_tail_entry
-
-.align	4
-LNEON_tail:
-	vadd.u64	d12,d12,d10
-	vshr.u64	d10,d12,#16
-	vld1.64	{q8,q9}, [r6, :256]!
-	vadd.u64	d13,d13,d10
-	vld1.64	{q10,q11}, [r6, :256]!
-	vshr.u64	d10,d13,#16
-	vld1.64	{q12,q13}, [r6, :256]!
-	vzip.16	d12,d13
-
-LNEON_tail_entry:
-	vadd.u64	d14,d14,d10
-	vst1.32	{d12[0]}, [r7, :32]!
-	vshr.u64	d10,d14,#16
-	vadd.u64	d15,d15,d10
-	vshr.u64	d10,d15,#16
-	vzip.16	d14,d15
-	vadd.u64	d16,d16,d10
-	vst1.32	{d14[0]}, [r7, :32]!
-	vshr.u64	d10,d16,#16
-	vadd.u64	d17,d17,d10
-	vshr.u64	d10,d17,#16
-	vzip.16	d16,d17
-	vadd.u64	d18,d18,d10
-	vst1.32	{d16[0]}, [r7, :32]!
-	vshr.u64	d10,d18,#16
-	vadd.u64	d19,d19,d10
-	vshr.u64	d10,d19,#16
-	vzip.16	d18,d19
-	vadd.u64	d20,d20,d10
-	vst1.32	{d18[0]}, [r7, :32]!
-	vshr.u64	d10,d20,#16
-	vadd.u64	d21,d21,d10
-	vshr.u64	d10,d21,#16
-	vzip.16	d20,d21
-	vadd.u64	d22,d22,d10
-	vst1.32	{d20[0]}, [r7, :32]!
-	vshr.u64	d10,d22,#16
-	vadd.u64	d23,d23,d10
-	vshr.u64	d10,d23,#16
-	vzip.16	d22,d23
-	vadd.u64	d24,d24,d10
-	vst1.32	{d22[0]}, [r7, :32]!
-	vshr.u64	d10,d24,#16
-	vadd.u64	d25,d25,d10
-	vshr.u64	d10,d25,#16
-	vzip.16	d24,d25
-	vadd.u64	d26,d26,d10
-	vst1.32	{d24[0]}, [r7, :32]!
-	vshr.u64	d10,d26,#16
-	vadd.u64	d27,d27,d10
-	vshr.u64	d10,d27,#16
-	vzip.16	d26,d27
-	vld1.64	{q6,q7}, [r6, :256]!
-	subs	r8,r8,#8
-	vst1.32	{d26[0]},   [r7, :32]!
-	bne	LNEON_tail
-
-	vst1.32	{d10[0]}, [r7, :32]		@ top-most bit
-	sub	r3,r3,r5,lsl#2			@ rewind r3
-	subs	r1,sp,#0				@ clear carry flag
-	add	r2,sp,r5,lsl#2
-
-LNEON_sub:
-	ldmia	r1!, {r4,r5,r6,r7}
-	ldmia	r3!, {r8,r9,r10,r11}
-	sbcs	r8, r4,r8
-	sbcs	r9, r5,r9
-	sbcs	r10,r6,r10
-	sbcs	r11,r7,r11
-	teq	r1,r2				@ preserves carry
-	stmia	r0!, {r8,r9,r10,r11}
-	bne	LNEON_sub
-
-	ldr	r10, [r1]				@ load top-most bit
-	mov	r11,sp
-	veor	q0,q0,q0
-	sub	r11,r2,r11				@ this is num*4
-	veor	q1,q1,q1
-	mov	r1,sp
-	sub	r0,r0,r11				@ rewind r0
-	mov	r3,r2				@ second 3/4th of frame
-	sbcs	r10,r10,#0				@ result is carry flag
-
-LNEON_copy_n_zap:
-	ldmia	r1!, {r4,r5,r6,r7}
-	ldmia	r0,  {r8,r9,r10,r11}
-	it	cc
-	movcc	r8, r4
-	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
-	itt	cc
-	movcc	r9, r5
-	movcc	r10,r6
-	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
-	it	cc
-	movcc	r11,r7
-	ldmia	r1, {r4,r5,r6,r7}
-	stmia	r0!, {r8,r9,r10,r11}
-	sub	r1,r1,#16
-	ldmia	r0, {r8,r9,r10,r11}
-	it	cc
-	movcc	r8, r4
-	vst1.64	{q0,q1}, [r1,:256]!			@ wipe
-	itt	cc
-	movcc	r9, r5
-	movcc	r10,r6
-	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
-	it	cc
-	movcc	r11,r7
-	teq	r1,r2				@ preserves carry
-	stmia	r0!, {r8,r9,r10,r11}
-	bne	LNEON_copy_n_zap
-
-	mov	sp,ip
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
-	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
-	bx	lr						@ bx lr
-
-#endif
-.byte	77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#if __ARM_MAX_ARCH__>=7
-.comm	_OPENSSL_armcap_P,4
-.non_lazy_symbol_pointer
-OPENSSL_armcap_P:
-.indirect_symbol	_OPENSSL_armcap_P
-.long	0
-.private_extern	_OPENSSL_armcap_P
-#endif
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/bsaes-armv7.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/bsaes-armv7.S
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/ghash-armv4.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/ghash-armv4.S
@ -1,258 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
-@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
-@ instructions are in aesv8-armx.pl.)
-
-
-.text
-#if defined(__thumb2__) || defined(__clang__)
-.syntax	unified
-#define ldrplb  ldrbpl
-#define ldrneb  ldrbne
-#endif
-#if defined(__thumb2__)
-.thumb
-#else
-.code	32
-#endif
-#if __ARM_MAX_ARCH__>=7
-
-
-
-.globl	_gcm_init_neon
-.private_extern	_gcm_init_neon
-#ifdef __thumb2__
-.thumb_func	_gcm_init_neon
-#endif
-.align	4
-_gcm_init_neon:
-	vld1.64	d7,[r1]!		@ load H
-	vmov.i8	q8,#0xe1
-	vld1.64	d6,[r1]
-	vshl.i64	d17,#57
-	vshr.u64	d16,#63		@ t0=0xc2....01
-	vdup.8	q9,d7[7]
-	vshr.u64	d26,d6,#63
-	vshr.s8	q9,#7			@ broadcast carry bit
-	vshl.i64	q3,q3,#1
-	vand	q8,q8,q9
-	vorr	d7,d26		@ H<<<=1
-	veor	q3,q3,q8		@ twisted H
-	vstmia	r0,{q3}
-
-	bx	lr					@ bx lr
-
-
-.globl	_gcm_gmult_neon
-.private_extern	_gcm_gmult_neon
-#ifdef __thumb2__
-.thumb_func	_gcm_gmult_neon
-#endif
-.align	4
-_gcm_gmult_neon:
-	vld1.64	d7,[r0]!		@ load Xi
-	vld1.64	d6,[r0]!
-	vmov.i64	d29,#0x0000ffffffffffff
-	vldmia	r1,{d26,d27}	@ load twisted H
-	vmov.i64	d30,#0x00000000ffffffff
-#ifdef __ARMEL__
-	vrev64.8	q3,q3
-#endif
-	vmov.i64	d31,#0x000000000000ffff
-	veor	d28,d26,d27		@ Karatsuba pre-processing
-	mov	r3,#16
-	b	Lgmult_neon
-
-
-.globl	_gcm_ghash_neon
-.private_extern	_gcm_ghash_neon
-#ifdef __thumb2__
-.thumb_func	_gcm_ghash_neon
-#endif
-.align	4
-_gcm_ghash_neon:
-	vld1.64	d1,[r0]!		@ load Xi
-	vld1.64	d0,[r0]!
-	vmov.i64	d29,#0x0000ffffffffffff
-	vldmia	r1,{d26,d27}	@ load twisted H
-	vmov.i64	d30,#0x00000000ffffffff
-#ifdef __ARMEL__
-	vrev64.8	q0,q0
-#endif
-	vmov.i64	d31,#0x000000000000ffff
-	veor	d28,d26,d27		@ Karatsuba pre-processing
-
-Loop_neon:
-	vld1.64	d7,[r2]!		@ load inp
-	vld1.64	d6,[r2]!
-#ifdef __ARMEL__
-	vrev64.8	q3,q3
-#endif
-	veor	q3,q0			@ inp^=Xi
-Lgmult_neon:
-	vext.8	d16, d26, d26, #1	@ A1
-	vmull.p8	q8, d16, d6		@ F = A1*B
-	vext.8	d0, d6, d6, #1	@ B1
-	vmull.p8	q0, d26, d0		@ E = A*B1
-	vext.8	d18, d26, d26, #2	@ A2
-	vmull.p8	q9, d18, d6		@ H = A2*B
-	vext.8	d22, d6, d6, #2	@ B2
-	vmull.p8	q11, d26, d22		@ G = A*B2
-	vext.8	d20, d26, d26, #3	@ A3
-	veor	q8, q8, q0		@ L = E + F
-	vmull.p8	q10, d20, d6		@ J = A3*B
-	vext.8	d0, d6, d6, #3	@ B3
-	veor	q9, q9, q11		@ M = G + H
-	vmull.p8	q0, d26, d0		@ I = A*B3
-	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
-	vand	d17, d17, d29
-	vext.8	d22, d6, d6, #4	@ B4
-	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
-	vand	d19, d19, d30
-	vmull.p8	q11, d26, d22		@ K = A*B4
-	veor	q10, q10, q0		@ N = I + J
-	veor	d16, d16, d17
-	veor	d18, d18, d19
-	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
-	vand	d21, d21, d31
-	vext.8	q8, q8, q8, #15
-	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
-	vmov.i64	d23, #0
-	vext.8	q9, q9, q9, #14
-	veor	d20, d20, d21
-	vmull.p8	q0, d26, d6		@ D = A*B
-	vext.8	q11, q11, q11, #12
-	vext.8	q10, q10, q10, #13
-	veor	q8, q8, q9
-	veor	q10, q10, q11
-	veor	q0, q0, q8
-	veor	q0, q0, q10
-	veor	d6,d6,d7	@ Karatsuba pre-processing
-	vext.8	d16, d28, d28, #1	@ A1
-	vmull.p8	q8, d16, d6		@ F = A1*B
-	vext.8	d2, d6, d6, #1	@ B1
-	vmull.p8	q1, d28, d2		@ E = A*B1
-	vext.8	d18, d28, d28, #2	@ A2
-	vmull.p8	q9, d18, d6		@ H = A2*B
-	vext.8	d22, d6, d6, #2	@ B2
-	vmull.p8	q11, d28, d22		@ G = A*B2
-	vext.8	d20, d28, d28, #3	@ A3
-	veor	q8, q8, q1		@ L = E + F
-	vmull.p8	q10, d20, d6		@ J = A3*B
-	vext.8	d2, d6, d6, #3	@ B3
-	veor	q9, q9, q11		@ M = G + H
-	vmull.p8	q1, d28, d2		@ I = A*B3
-	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
-	vand	d17, d17, d29
-	vext.8	d22, d6, d6, #4	@ B4
-	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
-	vand	d19, d19, d30
-	vmull.p8	q11, d28, d22		@ K = A*B4
-	veor	q10, q10, q1		@ N = I + J
-	veor	d16, d16, d17
-	veor	d18, d18, d19
-	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
-	vand	d21, d21, d31
-	vext.8	q8, q8, q8, #15
-	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
-	vmov.i64	d23, #0
-	vext.8	q9, q9, q9, #14
-	veor	d20, d20, d21
-	vmull.p8	q1, d28, d6		@ D = A*B
-	vext.8	q11, q11, q11, #12
-	vext.8	q10, q10, q10, #13
-	veor	q8, q8, q9
-	veor	q10, q10, q11
-	veor	q1, q1, q8
-	veor	q1, q1, q10
-	vext.8	d16, d27, d27, #1	@ A1
-	vmull.p8	q8, d16, d7		@ F = A1*B
-	vext.8	d4, d7, d7, #1	@ B1
-	vmull.p8	q2, d27, d4		@ E = A*B1
-	vext.8	d18, d27, d27, #2	@ A2
-	vmull.p8	q9, d18, d7		@ H = A2*B
-	vext.8	d22, d7, d7, #2	@ B2
-	vmull.p8	q11, d27, d22		@ G = A*B2
-	vext.8	d20, d27, d27, #3	@ A3
-	veor	q8, q8, q2		@ L = E + F
-	vmull.p8	q10, d20, d7		@ J = A3*B
-	vext.8	d4, d7, d7, #3	@ B3
-	veor	q9, q9, q11		@ M = G + H
-	vmull.p8	q2, d27, d4		@ I = A*B3
-	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
-	vand	d17, d17, d29
-	vext.8	d22, d7, d7, #4	@ B4
-	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
-	vand	d19, d19, d30
-	vmull.p8	q11, d27, d22		@ K = A*B4
-	veor	q10, q10, q2		@ N = I + J
-	veor	d16, d16, d17
-	veor	d18, d18, d19
-	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
-	vand	d21, d21, d31
-	vext.8	q8, q8, q8, #15
-	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
-	vmov.i64	d23, #0
-	vext.8	q9, q9, q9, #14
-	veor	d20, d20, d21
-	vmull.p8	q2, d27, d7		@ D = A*B
-	vext.8	q11, q11, q11, #12
-	vext.8	q10, q10, q10, #13
-	veor	q8, q8, q9
-	veor	q10, q10, q11
-	veor	q2, q2, q8
-	veor	q2, q2, q10
-	veor	q1,q1,q0		@ Karatsuba post-processing
-	veor	q1,q1,q2
-	veor	d1,d1,d2
-	veor	d4,d4,d3	@ Xh|Xl - 256-bit result
-
-	@ equivalent of reduction_avx from ghash-x86_64.pl
-	vshl.i64	q9,q0,#57		@ 1st phase
-	vshl.i64	q10,q0,#62
-	veor	q10,q10,q9		@
-	vshl.i64	q9,q0,#63
-	veor	q10, q10, q9		@
-	veor	d1,d1,d20	@
-	veor	d4,d4,d21
-
-	vshr.u64	q10,q0,#1		@ 2nd phase
-	veor	q2,q2,q0
-	veor	q0,q0,q10		@
-	vshr.u64	q10,q10,#6
-	vshr.u64	q0,q0,#1		@
-	veor	q0,q0,q2		@
-	veor	q0,q0,q10		@
-
-	subs	r3,#16
-	bne	Loop_neon
-
-#ifdef __ARMEL__
-	vrev64.8	q0,q0
-#endif
-	sub	r0,#16
-	vst1.64	d1,[r0]!		@ write out Xi
-	vst1.64	d0,[r0]
-
-	bx	lr					@ bx lr
-
-#endif
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/ghashv8-armx32.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/ghashv8-armx32.S
@ -1,256 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-
-.code	32
-#undef	__thumb2__
-.globl	_gcm_init_v8
-.private_extern	_gcm_init_v8
-#ifdef __thumb2__
-.thumb_func	_gcm_init_v8
-#endif
-.align	4
-_gcm_init_v8:
-	vld1.64	{q9},[r1]		@ load input H
-	vmov.i8	q11,#0xe1
-	vshl.i64	q11,q11,#57		@ 0xc2.0
-	vext.8	q3,q9,q9,#8
-	vshr.u64	q10,q11,#63
-	vdup.32	q9,d18[1]
-	vext.8	q8,q10,q11,#8		@ t0=0xc2....01
-	vshr.u64	q10,q3,#63
-	vshr.s32	q9,q9,#31		@ broadcast carry bit
-	vand	q10,q10,q8
-	vshl.i64	q3,q3,#1
-	vext.8	q10,q10,q10,#8
-	vand	q8,q8,q9
-	vorr	q3,q3,q10		@ H<<<=1
-	veor	q12,q3,q8		@ twisted H
-	vst1.64	{q12},[r0]!		@ store Htable[0]
-
-	@ calculate H^2
-	vext.8	q8,q12,q12,#8		@ Karatsuba pre-processing
-.byte	0xa8,0x0e,0xa8,0xf2	@ pmull q0,q12,q12
-	veor	q8,q8,q12
-.byte	0xa9,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q12
-.byte	0xa0,0x2e,0xa0,0xf2	@ pmull q1,q8,q8
-
-	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
-	veor	q10,q0,q2
-	veor	q1,q1,q9
-	veor	q1,q1,q10
-.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase
-
-	vmov	d4,d3		@ Xh|Xm - 256-bit result
-	vmov	d3,d0		@ Xm is rotated Xl
-	veor	q0,q1,q10
-
-	vext.8	q10,q0,q0,#8		@ 2nd phase
-.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
-	veor	q10,q10,q2
-	veor	q14,q0,q10
-
-	vext.8	q9,q14,q14,#8		@ Karatsuba pre-processing
-	veor	q9,q9,q14
-	vext.8	q13,q8,q9,#8		@ pack Karatsuba pre-processed
-	vst1.64	{q13,q14},[r0]		@ store Htable[1..2]
-
-	bx	lr
-
-.globl	_gcm_gmult_v8
-.private_extern	_gcm_gmult_v8
-#ifdef __thumb2__
-.thumb_func	_gcm_gmult_v8
-#endif
-.align	4
-_gcm_gmult_v8:
-	vld1.64	{q9},[r0]		@ load Xi
-	vmov.i8	q11,#0xe1
-	vld1.64	{q12,q13},[r1]	@ load twisted H, ...
-	vshl.u64	q11,q11,#57
-#ifndef __ARMEB__
-	vrev64.8	q9,q9
-#endif
-	vext.8	q3,q9,q9,#8
-
-.byte	0x86,0x0e,0xa8,0xf2	@ pmull q0,q12,q3		@ H.lo·Xi.lo
-	veor	q9,q9,q3		@ Karatsuba pre-processing
-.byte	0x87,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q3		@ H.hi·Xi.hi
-.byte	0xa2,0x2e,0xaa,0xf2	@ pmull q1,q13,q9		@ (H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
-	veor	q10,q0,q2
-	veor	q1,q1,q9
-	veor	q1,q1,q10
-.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
-
-	vmov	d4,d3		@ Xh|Xm - 256-bit result
-	vmov	d3,d0		@ Xm is rotated Xl
-	veor	q0,q1,q10
-
-	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
-.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
-	veor	q10,q10,q2
-	veor	q0,q0,q10
-
-#ifndef __ARMEB__
-	vrev64.8	q0,q0
-#endif
-	vext.8	q0,q0,q0,#8
-	vst1.64	{q0},[r0]		@ write out Xi
-
-	bx	lr
-
-.globl	_gcm_ghash_v8
-.private_extern	_gcm_ghash_v8
-#ifdef __thumb2__
-.thumb_func	_gcm_ghash_v8
-#endif
-.align	4
-_gcm_ghash_v8:
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
-	vld1.64	{q0},[r0]		@ load [rotated] Xi
-						@ "[rotated]" means that
-						@ loaded value would have
-						@ to be rotated in order to
-						@ make it appear as in
-						@ algorithm specification
-	subs	r3,r3,#32		@ see if r3 is 32 or larger
-	mov	r12,#16		@ r12 is used as post-
-						@ increment for input pointer;
-						@ as loop is modulo-scheduled
-						@ r12 is zeroed just in time
-						@ to preclude overstepping
-						@ inp[len], which means that
-						@ last block[s] are actually
-						@ loaded twice, but last
-						@ copy is not processed
-	vld1.64	{q12,q13},[r1]!	@ load twisted H, ..., H^2
-	vmov.i8	q11,#0xe1
-	vld1.64	{q14},[r1]
-	moveq	r12,#0			@ is it time to zero r12?
-	vext.8	q0,q0,q0,#8		@ rotate Xi
-	vld1.64	{q8},[r2]!	@ load [rotated] I[0]
-	vshl.u64	q11,q11,#57		@ compose 0xc2.0 constant
-#ifndef __ARMEB__
-	vrev64.8	q8,q8
-	vrev64.8	q0,q0
-#endif
-	vext.8	q3,q8,q8,#8		@ rotate I[0]
-	blo	Lodd_tail_v8		@ r3 was less than 32
-	vld1.64	{q9},[r2],r12	@ load [rotated] I[1]
-#ifndef __ARMEB__
-	vrev64.8	q9,q9
-#endif
-	vext.8	q7,q9,q9,#8
-	veor	q3,q3,q0		@ I[i]^=Xi
-.byte	0x8e,0x8e,0xa8,0xf2	@ pmull q4,q12,q7		@ H·Ii+1
-	veor	q9,q9,q7		@ Karatsuba pre-processing
-.byte	0x8f,0xce,0xa9,0xf2	@ pmull2 q6,q12,q7
-	b	Loop_mod2x_v8
-
-.align	4
-Loop_mod2x_v8:
-	vext.8	q10,q3,q3,#8
-	subs	r3,r3,#32		@ is there more data?
-.byte	0x86,0x0e,0xac,0xf2	@ pmull q0,q14,q3		@ H^2.lo·Xi.lo
-	movlo	r12,#0			@ is it time to zero r12?
-
-.byte	0xa2,0xae,0xaa,0xf2	@ pmull q5,q13,q9
-	veor	q10,q10,q3		@ Karatsuba pre-processing
-.byte	0x87,0x4e,0xad,0xf2	@ pmull2 q2,q14,q3		@ H^2.hi·Xi.hi
-	veor	q0,q0,q4		@ accumulate
-.byte	0xa5,0x2e,0xab,0xf2	@ pmull2 q1,q13,q10		@ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
-	vld1.64	{q8},[r2],r12	@ load [rotated] I[i+2]
-
-	veor	q2,q2,q6
-	moveq	r12,#0			@ is it time to zero r12?
-	veor	q1,q1,q5
-
-	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
-	veor	q10,q0,q2
-	veor	q1,q1,q9
-	vld1.64	{q9},[r2],r12	@ load [rotated] I[i+3]
-#ifndef __ARMEB__
-	vrev64.8	q8,q8
-#endif
-	veor	q1,q1,q10
-.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
-
-#ifndef __ARMEB__
-	vrev64.8	q9,q9
-#endif
-	vmov	d4,d3		@ Xh|Xm - 256-bit result
-	vmov	d3,d0		@ Xm is rotated Xl
-	vext.8	q7,q9,q9,#8
-	vext.8	q3,q8,q8,#8
-	veor	q0,q1,q10
-.byte	0x8e,0x8e,0xa8,0xf2	@ pmull q4,q12,q7		@ H·Ii+1
-	veor	q3,q3,q2		@ accumulate q3 early
-
-	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
-.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
-	veor	q3,q3,q10
-	veor	q9,q9,q7		@ Karatsuba pre-processing
-	veor	q3,q3,q0
-.byte	0x8f,0xce,0xa9,0xf2	@ pmull2 q6,q12,q7
-	bhs	Loop_mod2x_v8		@ there was at least 32 more bytes
-
-	veor	q2,q2,q10
-	vext.8	q3,q8,q8,#8		@ re-construct q3
-	adds	r3,r3,#32		@ re-construct r3
-	veor	q0,q0,q2		@ re-construct q0
-	beq	Ldone_v8		@ is r3 zero?
-Lodd_tail_v8:
-	vext.8	q10,q0,q0,#8
-	veor	q3,q3,q0		@ inp^=Xi
-	veor	q9,q8,q10		@ q9 is rotated inp^Xi
-
-.byte	0x86,0x0e,0xa8,0xf2	@ pmull q0,q12,q3		@ H.lo·Xi.lo
-	veor	q9,q9,q3		@ Karatsuba pre-processing
-.byte	0x87,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q3		@ H.hi·Xi.hi
-.byte	0xa2,0x2e,0xaa,0xf2	@ pmull q1,q13,q9		@ (H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
-	veor	q10,q0,q2
-	veor	q1,q1,q9
-	veor	q1,q1,q10
-.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
-
-	vmov	d4,d3		@ Xh|Xm - 256-bit result
-	vmov	d3,d0		@ Xm is rotated Xl
-	veor	q0,q1,q10
-
-	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
-.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
-	veor	q10,q10,q2
-	veor	q0,q0,q10
-
-Ldone_v8:
-#ifndef __ARMEB__
-	vrev64.8	q0,q0
-#endif
-	vext.8	q0,q0,q0,#8
-	vst1.64	{q0},[r0]		@ write out Xi
-
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
-	bx	lr
-
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha1-armv4-large.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha1-armv4-large.S
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha256-armv4.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha256-armv4.S
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha512-armv4.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha512-armv4.S
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/vpaes-armv7.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/vpaes-armv7.S
--- a/contrib/boringssl-cmake/ios-arm/crypto/test/trampoline-armv4.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/test/trampoline-armv4.S
@ -1,376 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-.syntax	unified
-
-
-
-
-.text
-
-@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
-@ with |argv|, then saves the callee-saved registers into |state|. It returns
-@ the result of |func|. The |unwind| argument is unused.
-@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
-@                              const uint32_t *argv, size_t argc,
-@                              int unwind);
-
-.globl	_abi_test_trampoline
-.private_extern	_abi_test_trampoline
-.align	4
-_abi_test_trampoline:
-	@ Save parameters and all callee-saved registers. For convenience, we
-	@ save r9 on iOS even though it's volatile.
-	vstmdb	sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
-	stmdb	sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
-
-	@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
-	@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
-	sub	sp, sp, #28
-
-	@ Every register in AAPCS is either non-volatile or a parameter (except
-	@ r9 on iOS), so this code, by the actual call, loses all its scratch
-	@ registers. First fill in stack parameters while there are registers
-	@ to spare.
-	cmp	r3, #4
-	bls	Lstack_args_done
-	mov	r4, sp				@ r4 is the output pointer.
-	add	r5, r2, r3, lsl #2	@ Set r5 to the end of argv.
-	add	r2, r2, #16		@ Skip four arguments.
-Lstack_args_loop:
-	ldr	r6, [r2], #4
-	cmp	r2, r5
-	str	r6, [r4], #4
-	bne	Lstack_args_loop
-
-Lstack_args_done:
-	@ Load registers from |r1|.
-	vldmia	r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
-#if defined(__APPLE__)
-	@ r9 is not volatile on iOS.
-	ldmia	r1!, {r4,r5,r6,r7,r8,r10-r11}
-#else
-	ldmia	r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
-#endif
-
-	@ Load register parameters. This uses up our remaining registers, so we
-	@ repurpose lr as scratch space.
-	ldr	r3, [sp, #40]	@ Reload argc.
-	ldr	lr, [sp, #36]		@ Load argv into lr.
-	cmp	r3, #3
-	bhi	Larg_r3
-	beq	Larg_r2
-	cmp	r3, #1
-	bhi	Larg_r1
-	beq	Larg_r0
-	b	Largs_done
-
-Larg_r3:
-	ldr	r3, [lr, #12]	@ argv[3]
-Larg_r2:
-	ldr	r2, [lr, #8]	@ argv[2]
-Larg_r1:
-	ldr	r1, [lr, #4]	@ argv[1]
-Larg_r0:
-	ldr	r0, [lr]	@ argv[0]
-Largs_done:
-
-	@ With every other register in use, load the function pointer into lr
-	@ and call the function.
-	ldr	lr, [sp, #28]
-	blx	lr
-
-	@ r1-r3 are free for use again. The trampoline only supports
-	@ single-return functions. Pass r4-r11 to the caller.
-	ldr	r1, [sp, #32]
-	vstmia	r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
-#if defined(__APPLE__)
-	@ r9 is not volatile on iOS.
-	stmia	r1!, {r4,r5,r6,r7,r8,r10-r11}
-#else
-	stmia	r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
-#endif
-
-	@ Unwind the stack and restore registers.
-	add	sp, sp, #44		@ 44 = 28+16
-	ldmia	sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr}	@ Skip r0-r3 (see +16 above).
-	vldmia	sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
-
-	bx	lr
-
-
-.globl	_abi_test_clobber_r0
-.private_extern	_abi_test_clobber_r0
-.align	4
-_abi_test_clobber_r0:
-	mov	r0, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r1
-.private_extern	_abi_test_clobber_r1
-.align	4
-_abi_test_clobber_r1:
-	mov	r1, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r2
-.private_extern	_abi_test_clobber_r2
-.align	4
-_abi_test_clobber_r2:
-	mov	r2, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r3
-.private_extern	_abi_test_clobber_r3
-.align	4
-_abi_test_clobber_r3:
-	mov	r3, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r4
-.private_extern	_abi_test_clobber_r4
-.align	4
-_abi_test_clobber_r4:
-	mov	r4, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r5
-.private_extern	_abi_test_clobber_r5
-.align	4
-_abi_test_clobber_r5:
-	mov	r5, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r6
-.private_extern	_abi_test_clobber_r6
-.align	4
-_abi_test_clobber_r6:
-	mov	r6, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r7
-.private_extern	_abi_test_clobber_r7
-.align	4
-_abi_test_clobber_r7:
-	mov	r7, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r8
-.private_extern	_abi_test_clobber_r8
-.align	4
-_abi_test_clobber_r8:
-	mov	r8, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r9
-.private_extern	_abi_test_clobber_r9
-.align	4
-_abi_test_clobber_r9:
-	mov	r9, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r10
-.private_extern	_abi_test_clobber_r10
-.align	4
-_abi_test_clobber_r10:
-	mov	r10, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r11
-.private_extern	_abi_test_clobber_r11
-.align	4
-_abi_test_clobber_r11:
-	mov	r11, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r12
-.private_extern	_abi_test_clobber_r12
-.align	4
-_abi_test_clobber_r12:
-	mov	r12, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d0
-.private_extern	_abi_test_clobber_d0
-.align	4
-_abi_test_clobber_d0:
-	mov	r0, #0
-	vmov	s0, r0
-	vmov	s1, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d1
-.private_extern	_abi_test_clobber_d1
-.align	4
-_abi_test_clobber_d1:
-	mov	r0, #0
-	vmov	s2, r0
-	vmov	s3, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d2
-.private_extern	_abi_test_clobber_d2
-.align	4
-_abi_test_clobber_d2:
-	mov	r0, #0
-	vmov	s4, r0
-	vmov	s5, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d3
-.private_extern	_abi_test_clobber_d3
-.align	4
-_abi_test_clobber_d3:
-	mov	r0, #0
-	vmov	s6, r0
-	vmov	s7, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d4
-.private_extern	_abi_test_clobber_d4
-.align	4
-_abi_test_clobber_d4:
-	mov	r0, #0
-	vmov	s8, r0
-	vmov	s9, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d5
-.private_extern	_abi_test_clobber_d5
-.align	4
-_abi_test_clobber_d5:
-	mov	r0, #0
-	vmov	s10, r0
-	vmov	s11, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d6
-.private_extern	_abi_test_clobber_d6
-.align	4
-_abi_test_clobber_d6:
-	mov	r0, #0
-	vmov	s12, r0
-	vmov	s13, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d7
-.private_extern	_abi_test_clobber_d7
-.align	4
-_abi_test_clobber_d7:
-	mov	r0, #0
-	vmov	s14, r0
-	vmov	s15, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d8
-.private_extern	_abi_test_clobber_d8
-.align	4
-_abi_test_clobber_d8:
-	mov	r0, #0
-	vmov	s16, r0
-	vmov	s17, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d9
-.private_extern	_abi_test_clobber_d9
-.align	4
-_abi_test_clobber_d9:
-	mov	r0, #0
-	vmov	s18, r0
-	vmov	s19, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d10
-.private_extern	_abi_test_clobber_d10
-.align	4
-_abi_test_clobber_d10:
-	mov	r0, #0
-	vmov	s20, r0
-	vmov	s21, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d11
-.private_extern	_abi_test_clobber_d11
-.align	4
-_abi_test_clobber_d11:
-	mov	r0, #0
-	vmov	s22, r0
-	vmov	s23, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d12
-.private_extern	_abi_test_clobber_d12
-.align	4
-_abi_test_clobber_d12:
-	mov	r0, #0
-	vmov	s24, r0
-	vmov	s25, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d13
-.private_extern	_abi_test_clobber_d13
-.align	4
-_abi_test_clobber_d13:
-	mov	r0, #0
-	vmov	s26, r0
-	vmov	s27, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d14
-.private_extern	_abi_test_clobber_d14
-.align	4
-_abi_test_clobber_d14:
-	mov	r0, #0
-	vmov	s28, r0
-	vmov	s29, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d15
-.private_extern	_abi_test_clobber_d15
-.align	4
-_abi_test_clobber_d15:
-	mov	r0, #0
-	vmov	s30, r0
-	vmov	s31, r0
-	bx	lr
-
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/chacha/chacha-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/chacha/chacha-armv8.S
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/aesv8-armx64.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/aesv8-armx64.S
@ -1,785 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__aarch64__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-#if __ARM_MAX_ARCH__>=7
-.text
-.arch	armv8-a+crypto
-.section	.rodata
-.align	5
-.Lrcon:
-.long	0x01,0x01,0x01,0x01
-.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
-.long	0x1b,0x1b,0x1b,0x1b
-
-.text
-
-.globl	aes_hw_set_encrypt_key
-.hidden	aes_hw_set_encrypt_key
-.type	aes_hw_set_encrypt_key,%function
-.align	5
-aes_hw_set_encrypt_key:
-.Lenc_key:
-	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
-	AARCH64_VALID_CALL_TARGET
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	mov	x3,#-1
-	cmp	x0,#0
-	b.eq	.Lenc_key_abort
-	cmp	x2,#0
-	b.eq	.Lenc_key_abort
-	mov	x3,#-2
-	cmp	w1,#128
-	b.lt	.Lenc_key_abort
-	cmp	w1,#256
-	b.gt	.Lenc_key_abort
-	tst	w1,#0x3f
-	b.ne	.Lenc_key_abort
-
-	adrp	x3,.Lrcon
-	add	x3,x3,:lo12:.Lrcon
-	cmp	w1,#192
-
-	eor	v0.16b,v0.16b,v0.16b
-	ld1	{v3.16b},[x0],#16
-	mov	w1,#8		// reuse w1
-	ld1	{v1.4s,v2.4s},[x3],#32
-
-	b.lt	.Loop128
-	b.eq	.L192
-	b	.L256
-
-.align	4
-.Loop128:
-	tbl	v6.16b,{v3.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v3.4s},[x2],#16
-	aese	v6.16b,v0.16b
-	subs	w1,w1,#1
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	shl	v1.16b,v1.16b,#1
-	eor	v3.16b,v3.16b,v6.16b
-	b.ne	.Loop128
-
-	ld1	{v1.4s},[x3]
-
-	tbl	v6.16b,{v3.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v3.4s},[x2],#16
-	aese	v6.16b,v0.16b
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	shl	v1.16b,v1.16b,#1
-	eor	v3.16b,v3.16b,v6.16b
-
-	tbl	v6.16b,{v3.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v3.4s},[x2],#16
-	aese	v6.16b,v0.16b
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	eor	v3.16b,v3.16b,v6.16b
-	st1	{v3.4s},[x2]
-	add	x2,x2,#0x50
-
-	mov	w12,#10
-	b	.Ldone
-
-.align	4
-.L192:
-	ld1	{v4.8b},[x0],#8
-	movi	v6.16b,#8			// borrow v6.16b
-	st1	{v3.4s},[x2],#16
-	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
-
-.Loop192:
-	tbl	v6.16b,{v4.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v4.8b},[x2],#8
-	aese	v6.16b,v0.16b
-	subs	w1,w1,#1
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-
-	dup	v5.4s,v3.s[3]
-	eor	v5.16b,v5.16b,v4.16b
-	eor	v6.16b,v6.16b,v1.16b
-	ext	v4.16b,v0.16b,v4.16b,#12
-	shl	v1.16b,v1.16b,#1
-	eor	v4.16b,v4.16b,v5.16b
-	eor	v3.16b,v3.16b,v6.16b
-	eor	v4.16b,v4.16b,v6.16b
-	st1	{v3.4s},[x2],#16
-	b.ne	.Loop192
-
-	mov	w12,#12
-	add	x2,x2,#0x20
-	b	.Ldone
-
-.align	4
-.L256:
-	ld1	{v4.16b},[x0]
-	mov	w1,#7
-	mov	w12,#14
-	st1	{v3.4s},[x2],#16
-
-.Loop256:
-	tbl	v6.16b,{v4.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v4.4s},[x2],#16
-	aese	v6.16b,v0.16b
-	subs	w1,w1,#1
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	shl	v1.16b,v1.16b,#1
-	eor	v3.16b,v3.16b,v6.16b
-	st1	{v3.4s},[x2],#16
-	b.eq	.Ldone
-
-	dup	v6.4s,v3.s[3]		// just splat
-	ext	v5.16b,v0.16b,v4.16b,#12
-	aese	v6.16b,v0.16b
-
-	eor	v4.16b,v4.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v4.16b,v4.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v4.16b,v4.16b,v5.16b
-
-	eor	v4.16b,v4.16b,v6.16b
-	b	.Loop256
-
-.Ldone:
-	str	w12,[x2]
-	mov	x3,#0
-
-.Lenc_key_abort:
-	mov	x0,x3			// return value
-	ldr	x29,[sp],#16
-	ret
-.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
-
-.globl	aes_hw_set_decrypt_key
-.hidden	aes_hw_set_decrypt_key
-.type	aes_hw_set_decrypt_key,%function
-.align	5
-aes_hw_set_decrypt_key:
-	AARCH64_SIGN_LINK_REGISTER
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	bl	.Lenc_key
-
-	cmp	x0,#0
-	b.ne	.Ldec_key_abort
-
-	sub	x2,x2,#240		// restore original x2
-	mov	x4,#-16
-	add	x0,x2,x12,lsl#4	// end of key schedule
-
-	ld1	{v0.4s},[x2]
-	ld1	{v1.4s},[x0]
-	st1	{v0.4s},[x0],x4
-	st1	{v1.4s},[x2],#16
-
-.Loop_imc:
-	ld1	{v0.4s},[x2]
-	ld1	{v1.4s},[x0]
-	aesimc	v0.16b,v0.16b
-	aesimc	v1.16b,v1.16b
-	st1	{v0.4s},[x0],x4
-	st1	{v1.4s},[x2],#16
-	cmp	x0,x2
-	b.hi	.Loop_imc
-
-	ld1	{v0.4s},[x2]
-	aesimc	v0.16b,v0.16b
-	st1	{v0.4s},[x0]
-
-	eor	x0,x0,x0		// return value
-.Ldec_key_abort:
-	ldp	x29,x30,[sp],#16
-	AARCH64_VALIDATE_LINK_REGISTER
-	ret
-.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
-.globl	aes_hw_encrypt
-.hidden	aes_hw_encrypt
-.type	aes_hw_encrypt,%function
-.align	5
-aes_hw_encrypt:
-	AARCH64_VALID_CALL_TARGET
-	ldr	w3,[x2,#240]
-	ld1	{v0.4s},[x2],#16
-	ld1	{v2.16b},[x0]
-	sub	w3,w3,#2
-	ld1	{v1.4s},[x2],#16
-
-.Loop_enc:
-	aese	v2.16b,v0.16b
-	aesmc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2],#16
-	subs	w3,w3,#2
-	aese	v2.16b,v1.16b
-	aesmc	v2.16b,v2.16b
-	ld1	{v1.4s},[x2],#16
-	b.gt	.Loop_enc
-
-	aese	v2.16b,v0.16b
-	aesmc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2]
-	aese	v2.16b,v1.16b
-	eor	v2.16b,v2.16b,v0.16b
-
-	st1	{v2.16b},[x1]
-	ret
-.size	aes_hw_encrypt,.-aes_hw_encrypt
-.globl	aes_hw_decrypt
-.hidden	aes_hw_decrypt
-.type	aes_hw_decrypt,%function
-.align	5
-aes_hw_decrypt:
-	AARCH64_VALID_CALL_TARGET
-	ldr	w3,[x2,#240]
-	ld1	{v0.4s},[x2],#16
-	ld1	{v2.16b},[x0]
-	sub	w3,w3,#2
-	ld1	{v1.4s},[x2],#16
-
-.Loop_dec:
-	aesd	v2.16b,v0.16b
-	aesimc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2],#16
-	subs	w3,w3,#2
-	aesd	v2.16b,v1.16b
-	aesimc	v2.16b,v2.16b
-	ld1	{v1.4s},[x2],#16
-	b.gt	.Loop_dec
-
-	aesd	v2.16b,v0.16b
-	aesimc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2]
-	aesd	v2.16b,v1.16b
-	eor	v2.16b,v2.16b,v0.16b
-
-	st1	{v2.16b},[x1]
-	ret
-.size	aes_hw_decrypt,.-aes_hw_decrypt
-.globl	aes_hw_cbc_encrypt
-.hidden	aes_hw_cbc_encrypt
-.type	aes_hw_cbc_encrypt,%function
-.align	5
-aes_hw_cbc_encrypt:
-	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
-	AARCH64_VALID_CALL_TARGET
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	subs	x2,x2,#16
-	mov	x8,#16
-	b.lo	.Lcbc_abort
-	csel	x8,xzr,x8,eq
-
-	cmp	w5,#0			// en- or decrypting?
-	ldr	w5,[x3,#240]
-	and	x2,x2,#-16
-	ld1	{v6.16b},[x4]
-	ld1	{v0.16b},[x0],x8
-
-	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
-	sub	w5,w5,#6
-	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
-	sub	w5,w5,#2
-	ld1	{v18.4s,v19.4s},[x7],#32
-	ld1	{v20.4s,v21.4s},[x7],#32
-	ld1	{v22.4s,v23.4s},[x7],#32
-	ld1	{v7.4s},[x7]
-
-	add	x7,x3,#32
-	mov	w6,w5
-	b.eq	.Lcbc_dec
-
-	cmp	w5,#2
-	eor	v0.16b,v0.16b,v6.16b
-	eor	v5.16b,v16.16b,v7.16b
-	b.eq	.Lcbc_enc128
-
-	ld1	{v2.4s,v3.4s},[x7]
-	add	x7,x3,#16
-	add	x6,x3,#16*4
-	add	x12,x3,#16*5
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	add	x14,x3,#16*6
-	add	x3,x3,#16*7
-	b	.Lenter_cbc_enc
-
-.align	4
-.Loop_cbc_enc:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	st1	{v6.16b},[x1],#16
-.Lenter_cbc_enc:
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v2.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.4s},[x6]
-	cmp	w5,#4
-	aese	v0.16b,v3.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v17.4s},[x12]
-	b.eq	.Lcbc_enc192
-
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.4s},[x14]
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v17.4s},[x3]
-	nop
-
-.Lcbc_enc192:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	subs	x2,x2,#16
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	csel	x8,xzr,x8,eq
-	aese	v0.16b,v18.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v19.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.16b},[x0],x8
-	aese	v0.16b,v20.16b
-	aesmc	v0.16b,v0.16b
-	eor	v16.16b,v16.16b,v5.16b
-	aese	v0.16b,v21.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
-	aese	v0.16b,v22.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v23.16b
-	eor	v6.16b,v0.16b,v7.16b
-	b.hs	.Loop_cbc_enc
-
-	st1	{v6.16b},[x1],#16
-	b	.Lcbc_done
-
-.align	5
-.Lcbc_enc128:
-	ld1	{v2.4s,v3.4s},[x7]
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	b	.Lenter_cbc_enc128
-.Loop_cbc_enc128:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	st1	{v6.16b},[x1],#16
-.Lenter_cbc_enc128:
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	subs	x2,x2,#16
-	aese	v0.16b,v2.16b
-	aesmc	v0.16b,v0.16b
-	csel	x8,xzr,x8,eq
-	aese	v0.16b,v3.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v18.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v19.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.16b},[x0],x8
-	aese	v0.16b,v20.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v21.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v22.16b
-	aesmc	v0.16b,v0.16b
-	eor	v16.16b,v16.16b,v5.16b
-	aese	v0.16b,v23.16b
-	eor	v6.16b,v0.16b,v7.16b
-	b.hs	.Loop_cbc_enc128
-
-	st1	{v6.16b},[x1],#16
-	b	.Lcbc_done
-.align	5
-.Lcbc_dec:
-	ld1	{v18.16b},[x0],#16
-	subs	x2,x2,#32		// bias
-	add	w6,w5,#2
-	orr	v3.16b,v0.16b,v0.16b
-	orr	v1.16b,v0.16b,v0.16b
-	orr	v19.16b,v18.16b,v18.16b
-	b.lo	.Lcbc_dec_tail
-
-	orr	v1.16b,v18.16b,v18.16b
-	ld1	{v18.16b},[x0],#16
-	orr	v2.16b,v0.16b,v0.16b
-	orr	v3.16b,v1.16b,v1.16b
-	orr	v19.16b,v18.16b,v18.16b
-
-.Loop3x_cbc_dec:
-	aesd	v0.16b,v16.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aesd	v0.16b,v17.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	.Loop3x_cbc_dec
-
-	aesd	v0.16b,v16.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	eor	v4.16b,v6.16b,v7.16b
-	subs	x2,x2,#0x30
-	eor	v5.16b,v2.16b,v7.16b
-	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
-	aesd	v0.16b,v17.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	eor	v17.16b,v3.16b,v7.16b
-	add	x0,x0,x6		// x0 is adjusted in such way that
-					// at exit from the loop v1.16b-v18.16b
-					// are loaded with last "words"
-	orr	v6.16b,v19.16b,v19.16b
-	mov	x7,x3
-	aesd	v0.16b,v20.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v20.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v20.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v2.16b},[x0],#16
-	aesd	v0.16b,v21.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v21.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v21.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v3.16b},[x0],#16
-	aesd	v0.16b,v22.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v22.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v22.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v19.16b},[x0],#16
-	aesd	v0.16b,v23.16b
-	aesd	v1.16b,v23.16b
-	aesd	v18.16b,v23.16b
-	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
-	add	w6,w5,#2
-	eor	v4.16b,v4.16b,v0.16b
-	eor	v5.16b,v5.16b,v1.16b
-	eor	v18.16b,v18.16b,v17.16b
-	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
-	st1	{v4.16b},[x1],#16
-	orr	v0.16b,v2.16b,v2.16b
-	st1	{v5.16b},[x1],#16
-	orr	v1.16b,v3.16b,v3.16b
-	st1	{v18.16b},[x1],#16
-	orr	v18.16b,v19.16b,v19.16b
-	b.hs	.Loop3x_cbc_dec
-
-	cmn	x2,#0x30
-	b.eq	.Lcbc_done
-	nop
-
-.Lcbc_dec_tail:
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	.Lcbc_dec_tail
-
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	aesd	v1.16b,v20.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v20.16b
-	aesimc	v18.16b,v18.16b
-	cmn	x2,#0x20
-	aesd	v1.16b,v21.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v21.16b
-	aesimc	v18.16b,v18.16b
-	eor	v5.16b,v6.16b,v7.16b
-	aesd	v1.16b,v22.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v22.16b
-	aesimc	v18.16b,v18.16b
-	eor	v17.16b,v3.16b,v7.16b
-	aesd	v1.16b,v23.16b
-	aesd	v18.16b,v23.16b
-	b.eq	.Lcbc_dec_one
-	eor	v5.16b,v5.16b,v1.16b
-	eor	v17.16b,v17.16b,v18.16b
-	orr	v6.16b,v19.16b,v19.16b
-	st1	{v5.16b},[x1],#16
-	st1	{v17.16b},[x1],#16
-	b	.Lcbc_done
-
-.Lcbc_dec_one:
-	eor	v5.16b,v5.16b,v18.16b
-	orr	v6.16b,v19.16b,v19.16b
-	st1	{v5.16b},[x1],#16
-
-.Lcbc_done:
-	st1	{v6.16b},[x4]
-.Lcbc_abort:
-	ldr	x29,[sp],#16
-	ret
-.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
-.globl	aes_hw_ctr32_encrypt_blocks
-.hidden	aes_hw_ctr32_encrypt_blocks
-.type	aes_hw_ctr32_encrypt_blocks,%function
-.align	5
-aes_hw_ctr32_encrypt_blocks:
-	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
-	AARCH64_VALID_CALL_TARGET
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	ldr	w5,[x3,#240]
-
-	ldr	w8, [x4, #12]
-	ld1	{v0.4s},[x4]
-
-	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
-	sub	w5,w5,#4
-	mov	x12,#16
-	cmp	x2,#2
-	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
-	sub	w5,w5,#2
-	ld1	{v20.4s,v21.4s},[x7],#32
-	ld1	{v22.4s,v23.4s},[x7],#32
-	ld1	{v7.4s},[x7]
-	add	x7,x3,#32
-	mov	w6,w5
-	csel	x12,xzr,x12,lo
-#ifndef __ARMEB__
-	rev	w8, w8
-#endif
-	orr	v1.16b,v0.16b,v0.16b
-	add	w10, w8, #1
-	orr	v18.16b,v0.16b,v0.16b
-	add	w8, w8, #2
-	orr	v6.16b,v0.16b,v0.16b
-	rev	w10, w10
-	mov	v1.s[3],w10
-	b.ls	.Lctr32_tail
-	rev	w12, w8
-	sub	x2,x2,#3		// bias
-	mov	v18.s[3],w12
-	b	.Loop3x_ctr32
-
-.align	4
-.Loop3x_ctr32:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v1.16b,v1.16b
-	aese	v18.16b,v16.16b
-	aesmc	v18.16b,v18.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v17.16b
-	aesmc	v1.16b,v1.16b
-	aese	v18.16b,v17.16b
-	aesmc	v18.16b,v18.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	.Loop3x_ctr32
-
-	aese	v0.16b,v16.16b
-	aesmc	v4.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v5.16b,v1.16b
-	ld1	{v2.16b},[x0],#16
-	orr	v0.16b,v6.16b,v6.16b
-	aese	v18.16b,v16.16b
-	aesmc	v18.16b,v18.16b
-	ld1	{v3.16b},[x0],#16
-	orr	v1.16b,v6.16b,v6.16b
-	aese	v4.16b,v17.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v17.16b
-	aesmc	v5.16b,v5.16b
-	ld1	{v19.16b},[x0],#16
-	mov	x7,x3
-	aese	v18.16b,v17.16b
-	aesmc	v17.16b,v18.16b
-	orr	v18.16b,v6.16b,v6.16b
-	add	w9,w8,#1
-	aese	v4.16b,v20.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v20.16b
-	aesmc	v5.16b,v5.16b
-	eor	v2.16b,v2.16b,v7.16b
-	add	w10,w8,#2
-	aese	v17.16b,v20.16b
-	aesmc	v17.16b,v17.16b
-	eor	v3.16b,v3.16b,v7.16b
-	add	w8,w8,#3
-	aese	v4.16b,v21.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v21.16b
-	aesmc	v5.16b,v5.16b
-	eor	v19.16b,v19.16b,v7.16b
-	rev	w9,w9
-	aese	v17.16b,v21.16b
-	aesmc	v17.16b,v17.16b
-	mov	v0.s[3], w9
-	rev	w10,w10
-	aese	v4.16b,v22.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v22.16b
-	aesmc	v5.16b,v5.16b
-	mov	v1.s[3], w10
-	rev	w12,w8
-	aese	v17.16b,v22.16b
-	aesmc	v17.16b,v17.16b
-	mov	v18.s[3], w12
-	subs	x2,x2,#3
-	aese	v4.16b,v23.16b
-	aese	v5.16b,v23.16b
-	aese	v17.16b,v23.16b
-
-	eor	v2.16b,v2.16b,v4.16b
-	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
-	st1	{v2.16b},[x1],#16
-	eor	v3.16b,v3.16b,v5.16b
-	mov	w6,w5
-	st1	{v3.16b},[x1],#16
-	eor	v19.16b,v19.16b,v17.16b
-	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
-	st1	{v19.16b},[x1],#16
-	b.hs	.Loop3x_ctr32
-
-	adds	x2,x2,#3
-	b.eq	.Lctr32_done
-	cmp	x2,#1
-	mov	x12,#16
-	csel	x12,xzr,x12,eq
-
-.Lctr32_tail:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v17.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	.Lctr32_tail
-
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v1.16b,v1.16b
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v17.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v2.16b},[x0],x12
-	aese	v0.16b,v20.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v20.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v3.16b},[x0]
-	aese	v0.16b,v21.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v21.16b
-	aesmc	v1.16b,v1.16b
-	eor	v2.16b,v2.16b,v7.16b
-	aese	v0.16b,v22.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v22.16b
-	aesmc	v1.16b,v1.16b
-	eor	v3.16b,v3.16b,v7.16b
-	aese	v0.16b,v23.16b
-	aese	v1.16b,v23.16b
-
-	cmp	x2,#1
-	eor	v2.16b,v2.16b,v0.16b
-	eor	v3.16b,v3.16b,v1.16b
-	st1	{v2.16b},[x1],#16
-	b.eq	.Lctr32_done
-	st1	{v3.16b},[x1]
-
-.Lctr32_done:
-	ldr	x29,[sp],#16
-	ret
-.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
-#endif
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/armv8-mont.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/armv8-mont.S
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
@ -1,346 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__aarch64__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-
-.globl	gcm_init_neon
-.hidden	gcm_init_neon
-.type	gcm_init_neon,%function
-.align	4
-gcm_init_neon:
-	AARCH64_VALID_CALL_TARGET
-	// This function is adapted from gcm_init_v8. xC2 is t3.
-	ld1	{v17.2d}, [x1]			// load H
-	movi	v19.16b, #0xe1
-	shl	v19.2d, v19.2d, #57		// 0xc2.0
-	ext	v3.16b, v17.16b, v17.16b, #8
-	ushr	v18.2d, v19.2d, #63
-	dup	v17.4s, v17.s[1]
-	ext	v16.16b, v18.16b, v19.16b, #8	// t0=0xc2....01
-	ushr	v18.2d, v3.2d, #63
-	sshr	v17.4s, v17.4s, #31		// broadcast carry bit
-	and	v18.16b, v18.16b, v16.16b
-	shl	v3.2d, v3.2d, #1
-	ext	v18.16b, v18.16b, v18.16b, #8
-	and	v16.16b, v16.16b, v17.16b
-	orr	v3.16b, v3.16b, v18.16b	// H<<<=1
-	eor	v5.16b, v3.16b, v16.16b	// twisted H
-	st1	{v5.2d}, [x0]			// store Htable[0]
-	ret
-.size	gcm_init_neon,.-gcm_init_neon
-
-.globl	gcm_gmult_neon
-.hidden	gcm_gmult_neon
-.type	gcm_gmult_neon,%function
-.align	4
-gcm_gmult_neon:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v3.16b}, [x0]		// load Xi
-	ld1	{v5.1d}, [x1], #8		// load twisted H
-	ld1	{v6.1d}, [x1]
-	adrp	x9, .Lmasks		// load constants
-	add	x9, x9, :lo12:.Lmasks
-	ld1	{v24.2d, v25.2d}, [x9]
-	rev64	v3.16b, v3.16b		// byteswap Xi
-	ext	v3.16b, v3.16b, v3.16b, #8
-	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
-
-	mov	x3, #16
-	b	.Lgmult_neon
-.size	gcm_gmult_neon,.-gcm_gmult_neon
-
-.globl	gcm_ghash_neon
-.hidden	gcm_ghash_neon
-.type	gcm_ghash_neon,%function
-.align	4
-gcm_ghash_neon:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v0.16b}, [x0]		// load Xi
-	ld1	{v5.1d}, [x1], #8		// load twisted H
-	ld1	{v6.1d}, [x1]
-	adrp	x9, .Lmasks		// load constants
-	add	x9, x9, :lo12:.Lmasks
-	ld1	{v24.2d, v25.2d}, [x9]
-	rev64	v0.16b, v0.16b		// byteswap Xi
-	ext	v0.16b, v0.16b, v0.16b, #8
-	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
-
-.Loop_neon:
-	ld1	{v3.16b}, [x2], #16	// load inp
-	rev64	v3.16b, v3.16b		// byteswap inp
-	ext	v3.16b, v3.16b, v3.16b, #8
-	eor	v3.16b, v3.16b, v0.16b	// inp ^= Xi
-
-.Lgmult_neon:
-	// Split the input into v3 and v4. (The upper halves are unused,
-	// so it is okay to leave them alone.)
-	ins	v4.d[0], v3.d[1]
-	ext	v16.8b, v5.8b, v5.8b, #1	// A1
-	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
-	ext	v0.8b, v3.8b, v3.8b, #1		// B1
-	pmull	v0.8h, v5.8b, v0.8b		// E = A*B1
-	ext	v17.8b, v5.8b, v5.8b, #2	// A2
-	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
-	ext	v19.8b, v3.8b, v3.8b, #2	// B2
-	pmull	v19.8h, v5.8b, v19.8b		// G = A*B2
-	ext	v18.8b, v5.8b, v5.8b, #3	// A3
-	eor	v16.16b, v16.16b, v0.16b	// L = E + F
-	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
-	ext	v0.8b, v3.8b, v3.8b, #3		// B3
-	eor	v17.16b, v17.16b, v19.16b	// M = G + H
-	pmull	v0.8h, v5.8b, v0.8b		// I = A*B3
-
-	// Here we diverge from the 32-bit version. It computes the following
-	// (instructions reordered for clarity):
-	//
-	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
-	//     vand	$t0#hi, $t0#hi, $k48
-	//     veor	$t0#lo, $t0#lo, $t0#hi
-	//
-	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
-	//     vand	$t1#hi, $t1#hi, $k32
-	//     veor	$t1#lo, $t1#lo, $t1#hi
-	//
-	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
-	//     vand	$t2#hi, $t2#hi, $k16
-	//     veor	$t2#lo, $t2#lo, $t2#hi
-	//
-	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
-	//     vmov.i64	$t3#hi, #0
-	//
-	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
-	// upper halves of SIMD registers, so we must split each half into
-	// separate registers. To compensate, we pair computations up and
-	// parallelize.
-
-	ext	v19.8b, v3.8b, v3.8b, #4	// B4
-	eor	v18.16b, v18.16b, v0.16b	// N = I + J
-	pmull	v19.8h, v5.8b, v19.8b		// K = A*B4
-
-	// This can probably be scheduled more efficiently. For now, we just
-	// pair up independent instructions.
-	zip1	v20.2d, v16.2d, v17.2d
-	zip1	v22.2d, v18.2d, v19.2d
-	zip2	v21.2d, v16.2d, v17.2d
-	zip2	v23.2d, v18.2d, v19.2d
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	and	v21.16b, v21.16b, v24.16b
-	and	v23.16b, v23.16b, v25.16b
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	zip1	v16.2d, v20.2d, v21.2d
-	zip1	v18.2d, v22.2d, v23.2d
-	zip2	v17.2d, v20.2d, v21.2d
-	zip2	v19.2d, v22.2d, v23.2d
-
-	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
-	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
-	pmull	v0.8h, v5.8b, v3.8b		// D = A*B
-	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
-	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
-	eor	v16.16b, v16.16b, v17.16b
-	eor	v18.16b, v18.16b, v19.16b
-	eor	v0.16b, v0.16b, v16.16b
-	eor	v0.16b, v0.16b, v18.16b
-	eor	v3.8b, v3.8b, v4.8b	// Karatsuba pre-processing
-	ext	v16.8b, v7.8b, v7.8b, #1	// A1
-	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
-	ext	v1.8b, v3.8b, v3.8b, #1		// B1
-	pmull	v1.8h, v7.8b, v1.8b		// E = A*B1
-	ext	v17.8b, v7.8b, v7.8b, #2	// A2
-	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
-	ext	v19.8b, v3.8b, v3.8b, #2	// B2
-	pmull	v19.8h, v7.8b, v19.8b		// G = A*B2
-	ext	v18.8b, v7.8b, v7.8b, #3	// A3
-	eor	v16.16b, v16.16b, v1.16b	// L = E + F
-	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
-	ext	v1.8b, v3.8b, v3.8b, #3		// B3
-	eor	v17.16b, v17.16b, v19.16b	// M = G + H
-	pmull	v1.8h, v7.8b, v1.8b		// I = A*B3
-
-	// Here we diverge from the 32-bit version. It computes the following
-	// (instructions reordered for clarity):
-	//
-	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
-	//     vand	$t0#hi, $t0#hi, $k48
-	//     veor	$t0#lo, $t0#lo, $t0#hi
-	//
-	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
-	//     vand	$t1#hi, $t1#hi, $k32
-	//     veor	$t1#lo, $t1#lo, $t1#hi
-	//
-	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
-	//     vand	$t2#hi, $t2#hi, $k16
-	//     veor	$t2#lo, $t2#lo, $t2#hi
-	//
-	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
-	//     vmov.i64	$t3#hi, #0
-	//
-	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
-	// upper halves of SIMD registers, so we must split each half into
-	// separate registers. To compensate, we pair computations up and
-	// parallelize.
-
-	ext	v19.8b, v3.8b, v3.8b, #4	// B4
-	eor	v18.16b, v18.16b, v1.16b	// N = I + J
-	pmull	v19.8h, v7.8b, v19.8b		// K = A*B4
-
-	// This can probably be scheduled more efficiently. For now, we just
-	// pair up independent instructions.
-	zip1	v20.2d, v16.2d, v17.2d
-	zip1	v22.2d, v18.2d, v19.2d
-	zip2	v21.2d, v16.2d, v17.2d
-	zip2	v23.2d, v18.2d, v19.2d
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	and	v21.16b, v21.16b, v24.16b
-	and	v23.16b, v23.16b, v25.16b
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	zip1	v16.2d, v20.2d, v21.2d
-	zip1	v18.2d, v22.2d, v23.2d
-	zip2	v17.2d, v20.2d, v21.2d
-	zip2	v19.2d, v22.2d, v23.2d
-
-	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
-	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
-	pmull	v1.8h, v7.8b, v3.8b		// D = A*B
-	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
-	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
-	eor	v16.16b, v16.16b, v17.16b
-	eor	v18.16b, v18.16b, v19.16b
-	eor	v1.16b, v1.16b, v16.16b
-	eor	v1.16b, v1.16b, v18.16b
-	ext	v16.8b, v6.8b, v6.8b, #1	// A1
-	pmull	v16.8h, v16.8b, v4.8b		// F = A1*B
-	ext	v2.8b, v4.8b, v4.8b, #1		// B1
-	pmull	v2.8h, v6.8b, v2.8b		// E = A*B1
-	ext	v17.8b, v6.8b, v6.8b, #2	// A2
-	pmull	v17.8h, v17.8b, v4.8b		// H = A2*B
-	ext	v19.8b, v4.8b, v4.8b, #2	// B2
-	pmull	v19.8h, v6.8b, v19.8b		// G = A*B2
-	ext	v18.8b, v6.8b, v6.8b, #3	// A3
-	eor	v16.16b, v16.16b, v2.16b	// L = E + F
-	pmull	v18.8h, v18.8b, v4.8b		// J = A3*B
-	ext	v2.8b, v4.8b, v4.8b, #3		// B3
-	eor	v17.16b, v17.16b, v19.16b	// M = G + H
-	pmull	v2.8h, v6.8b, v2.8b		// I = A*B3
-
-	// Here we diverge from the 32-bit version. It computes the following
-	// (instructions reordered for clarity):
-	//
-	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
-	//     vand	$t0#hi, $t0#hi, $k48
-	//     veor	$t0#lo, $t0#lo, $t0#hi
-	//
-	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
-	//     vand	$t1#hi, $t1#hi, $k32
-	//     veor	$t1#lo, $t1#lo, $t1#hi
-	//
-	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
-	//     vand	$t2#hi, $t2#hi, $k16
-	//     veor	$t2#lo, $t2#lo, $t2#hi
-	//
-	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
-	//     vmov.i64	$t3#hi, #0
-	//
-	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
-	// upper halves of SIMD registers, so we must split each half into
-	// separate registers. To compensate, we pair computations up and
-	// parallelize.
-
-	ext	v19.8b, v4.8b, v4.8b, #4	// B4
-	eor	v18.16b, v18.16b, v2.16b	// N = I + J
-	pmull	v19.8h, v6.8b, v19.8b		// K = A*B4
-
-	// This can probably be scheduled more efficiently. For now, we just
-	// pair up independent instructions.
-	zip1	v20.2d, v16.2d, v17.2d
-	zip1	v22.2d, v18.2d, v19.2d
-	zip2	v21.2d, v16.2d, v17.2d
-	zip2	v23.2d, v18.2d, v19.2d
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	and	v21.16b, v21.16b, v24.16b
-	and	v23.16b, v23.16b, v25.16b
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	zip1	v16.2d, v20.2d, v21.2d
-	zip1	v18.2d, v22.2d, v23.2d
-	zip2	v17.2d, v20.2d, v21.2d
-	zip2	v19.2d, v22.2d, v23.2d
-
-	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
-	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
-	pmull	v2.8h, v6.8b, v4.8b		// D = A*B
-	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
-	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
-	eor	v16.16b, v16.16b, v17.16b
-	eor	v18.16b, v18.16b, v19.16b
-	eor	v2.16b, v2.16b, v16.16b
-	eor	v2.16b, v2.16b, v18.16b
-	ext	v16.16b, v0.16b, v2.16b, #8
-	eor	v1.16b, v1.16b, v0.16b	// Karatsuba post-processing
-	eor	v1.16b, v1.16b, v2.16b
-	eor	v1.16b, v1.16b, v16.16b	// Xm overlaps Xh.lo and Xl.hi
-	ins	v0.d[1], v1.d[0]		// Xh|Xl - 256-bit result
-	// This is a no-op due to the ins instruction below.
-	// ins	v2.d[0], v1.d[1]
-
-	// equivalent of reduction_avx from ghash-x86_64.pl
-	shl	v17.2d, v0.2d, #57		// 1st phase
-	shl	v18.2d, v0.2d, #62
-	eor	v18.16b, v18.16b, v17.16b	//
-	shl	v17.2d, v0.2d, #63
-	eor	v18.16b, v18.16b, v17.16b	//
-	// Note Xm contains {Xl.d[1], Xh.d[0]}.
-	eor	v18.16b, v18.16b, v1.16b
-	ins	v0.d[1], v18.d[0]		// Xl.d[1] ^= t2.d[0]
-	ins	v2.d[0], v18.d[1]		// Xh.d[0] ^= t2.d[1]
-
-	ushr	v18.2d, v0.2d, #1		// 2nd phase
-	eor	v2.16b, v2.16b,v0.16b
-	eor	v0.16b, v0.16b,v18.16b	//
-	ushr	v18.2d, v18.2d, #6
-	ushr	v0.2d, v0.2d, #1		//
-	eor	v0.16b, v0.16b, v2.16b	//
-	eor	v0.16b, v0.16b, v18.16b	//
-
-	subs	x3, x3, #16
-	bne	.Loop_neon
-
-	rev64	v0.16b, v0.16b		// byteswap Xi and write
-	ext	v0.16b, v0.16b, v0.16b, #8
-	st1	{v0.16b}, [x0]
-
-	ret
-.size	gcm_ghash_neon,.-gcm_ghash_neon
-
-.section	.rodata
-.align	4
-.Lmasks:
-.quad	0x0000ffffffffffff	// k48
-.quad	0x00000000ffffffff	// k32
-.quad	0x000000000000ffff	// k16
-.quad	0x0000000000000000	// k0
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S
@ -1,252 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__aarch64__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-.arch	armv8-a+crypto
-.globl	gcm_init_v8
-.hidden	gcm_init_v8
-.type	gcm_init_v8,%function
-.align	4
-gcm_init_v8:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v17.2d},[x1]		//load input H
-	movi	v19.16b,#0xe1
-	shl	v19.2d,v19.2d,#57		//0xc2.0
-	ext	v3.16b,v17.16b,v17.16b,#8
-	ushr	v18.2d,v19.2d,#63
-	dup	v17.4s,v17.s[1]
-	ext	v16.16b,v18.16b,v19.16b,#8		//t0=0xc2....01
-	ushr	v18.2d,v3.2d,#63
-	sshr	v17.4s,v17.4s,#31		//broadcast carry bit
-	and	v18.16b,v18.16b,v16.16b
-	shl	v3.2d,v3.2d,#1
-	ext	v18.16b,v18.16b,v18.16b,#8
-	and	v16.16b,v16.16b,v17.16b
-	orr	v3.16b,v3.16b,v18.16b		//H<<<=1
-	eor	v20.16b,v3.16b,v16.16b		//twisted H
-	st1	{v20.2d},[x0],#16		//store Htable[0]
-
-	//calculate H^2
-	ext	v16.16b,v20.16b,v20.16b,#8		//Karatsuba pre-processing
-	pmull	v0.1q,v20.1d,v20.1d
-	eor	v16.16b,v16.16b,v20.16b
-	pmull2	v2.1q,v20.2d,v20.2d
-	pmull	v1.1q,v16.1d,v16.1d
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase
-
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	eor	v0.16b,v1.16b,v18.16b
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v18.16b,v18.16b,v2.16b
-	eor	v22.16b,v0.16b,v18.16b
-
-	ext	v17.16b,v22.16b,v22.16b,#8		//Karatsuba pre-processing
-	eor	v17.16b,v17.16b,v22.16b
-	ext	v21.16b,v16.16b,v17.16b,#8		//pack Karatsuba pre-processed
-	st1	{v21.2d,v22.2d},[x0]		//store Htable[1..2]
-
-	ret
-.size	gcm_init_v8,.-gcm_init_v8
-.globl	gcm_gmult_v8
-.hidden	gcm_gmult_v8
-.type	gcm_gmult_v8,%function
-.align	4
-gcm_gmult_v8:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v17.2d},[x0]		//load Xi
-	movi	v19.16b,#0xe1
-	ld1	{v20.2d,v21.2d},[x1]	//load twisted H, ...
-	shl	v19.2d,v19.2d,#57
-#ifndef __ARMEB__
-	rev64	v17.16b,v17.16b
-#endif
-	ext	v3.16b,v17.16b,v17.16b,#8
-
-	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
-	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
-	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
-	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
-
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	eor	v0.16b,v1.16b,v18.16b
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v18.16b,v18.16b,v2.16b
-	eor	v0.16b,v0.16b,v18.16b
-
-#ifndef __ARMEB__
-	rev64	v0.16b,v0.16b
-#endif
-	ext	v0.16b,v0.16b,v0.16b,#8
-	st1	{v0.2d},[x0]		//write out Xi
-
-	ret
-.size	gcm_gmult_v8,.-gcm_gmult_v8
-.globl	gcm_ghash_v8
-.hidden	gcm_ghash_v8
-.type	gcm_ghash_v8,%function
-.align	4
-gcm_ghash_v8:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v0.2d},[x0]		//load [rotated] Xi
-						//"[rotated]" means that
-						//loaded value would have
-						//to be rotated in order to
-						//make it appear as in
-						//algorithm specification
-	subs	x3,x3,#32		//see if x3 is 32 or larger
-	mov	x12,#16		//x12 is used as post-
-						//increment for input pointer;
-						//as loop is modulo-scheduled
-						//x12 is zeroed just in time
-						//to preclude overstepping
-						//inp[len], which means that
-						//last block[s] are actually
-						//loaded twice, but last
-						//copy is not processed
-	ld1	{v20.2d,v21.2d},[x1],#32	//load twisted H, ..., H^2
-	movi	v19.16b,#0xe1
-	ld1	{v22.2d},[x1]
-	csel	x12,xzr,x12,eq			//is it time to zero x12?
-	ext	v0.16b,v0.16b,v0.16b,#8		//rotate Xi
-	ld1	{v16.2d},[x2],#16	//load [rotated] I[0]
-	shl	v19.2d,v19.2d,#57		//compose 0xc2.0 constant
-#ifndef __ARMEB__
-	rev64	v16.16b,v16.16b
-	rev64	v0.16b,v0.16b
-#endif
-	ext	v3.16b,v16.16b,v16.16b,#8		//rotate I[0]
-	b.lo	.Lodd_tail_v8		//x3 was less than 32
-	ld1	{v17.2d},[x2],x12	//load [rotated] I[1]
-#ifndef __ARMEB__
-	rev64	v17.16b,v17.16b
-#endif
-	ext	v7.16b,v17.16b,v17.16b,#8
-	eor	v3.16b,v3.16b,v0.16b		//I[i]^=Xi
-	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
-	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
-	pmull2	v6.1q,v20.2d,v7.2d
-	b	.Loop_mod2x_v8
-
-.align	4
-.Loop_mod2x_v8:
-	ext	v18.16b,v3.16b,v3.16b,#8
-	subs	x3,x3,#32		//is there more data?
-	pmull	v0.1q,v22.1d,v3.1d		//H^2.lo·Xi.lo
-	csel	x12,xzr,x12,lo			//is it time to zero x12?
-
-	pmull	v5.1q,v21.1d,v17.1d
-	eor	v18.16b,v18.16b,v3.16b		//Karatsuba pre-processing
-	pmull2	v2.1q,v22.2d,v3.2d		//H^2.hi·Xi.hi
-	eor	v0.16b,v0.16b,v4.16b		//accumulate
-	pmull2	v1.1q,v21.2d,v18.2d		//(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
-	ld1	{v16.2d},[x2],x12	//load [rotated] I[i+2]
-
-	eor	v2.16b,v2.16b,v6.16b
-	csel	x12,xzr,x12,eq			//is it time to zero x12?
-	eor	v1.16b,v1.16b,v5.16b
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	ld1	{v17.2d},[x2],x12	//load [rotated] I[i+3]
-#ifndef __ARMEB__
-	rev64	v16.16b,v16.16b
-#endif
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
-
-#ifndef __ARMEB__
-	rev64	v17.16b,v17.16b
-#endif
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	ext	v7.16b,v17.16b,v17.16b,#8
-	ext	v3.16b,v16.16b,v16.16b,#8
-	eor	v0.16b,v1.16b,v18.16b
-	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
-	eor	v3.16b,v3.16b,v2.16b		//accumulate v3.16b early
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v3.16b,v3.16b,v18.16b
-	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
-	eor	v3.16b,v3.16b,v0.16b
-	pmull2	v6.1q,v20.2d,v7.2d
-	b.hs	.Loop_mod2x_v8		//there was at least 32 more bytes
-
-	eor	v2.16b,v2.16b,v18.16b
-	ext	v3.16b,v16.16b,v16.16b,#8		//re-construct v3.16b
-	adds	x3,x3,#32		//re-construct x3
-	eor	v0.16b,v0.16b,v2.16b		//re-construct v0.16b
-	b.eq	.Ldone_v8		//is x3 zero?
-.Lodd_tail_v8:
-	ext	v18.16b,v0.16b,v0.16b,#8
-	eor	v3.16b,v3.16b,v0.16b		//inp^=Xi
-	eor	v17.16b,v16.16b,v18.16b		//v17.16b is rotated inp^Xi
-
-	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
-	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
-	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
-	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
-
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	eor	v0.16b,v1.16b,v18.16b
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v18.16b,v18.16b,v2.16b
-	eor	v0.16b,v0.16b,v18.16b
-
-.Ldone_v8:
-#ifndef __ARMEB__
-	rev64	v0.16b,v0.16b
-#endif
-	ext	v0.16b,v0.16b,v0.16b,#8
-	st1	{v0.2d},[x0]		//write out Xi
-
-	ret
-.size	gcm_ghash_v8,.-gcm_ghash_v8
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha1-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha1-armv8.S
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha256-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha256-armv8.S
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha512-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha512-armv8.S
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/test/trampoline-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/test/trampoline-armv8.S
@ -1,761 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__aarch64__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-
-// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
-// with |argv|, then saves the callee-saved registers into |state|. It returns
-// the result of |func|. The |unwind| argument is unused.
-// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
-//                              const uint64_t *argv, size_t argc,
-//                              uint64_t unwind);
-.type	abi_test_trampoline, %function
-.globl	abi_test_trampoline
-.hidden	abi_test_trampoline
-.align	4
-abi_test_trampoline:
-.Labi_test_trampoline_begin:
-	AARCH64_SIGN_LINK_REGISTER
-	// Stack layout (low to high addresses)
-	//   x29,x30 (16 bytes)
-	//    d8-d15 (64 bytes)
-	//   x19-x28 (80 bytes)
-	//    x1 (8 bytes)
-	//   padding (8 bytes)
-	stp	x29, x30, [sp, #-176]!
-	mov	x29, sp
-
-	// Saved callee-saved registers and |state|.
-	stp	d8, d9, [sp, #16]
-	stp	d10, d11, [sp, #32]
-	stp	d12, d13, [sp, #48]
-	stp	d14, d15, [sp, #64]
-	stp	x19, x20, [sp, #80]
-	stp	x21, x22, [sp, #96]
-	stp	x23, x24, [sp, #112]
-	stp	x25, x26, [sp, #128]
-	stp	x27, x28, [sp, #144]
-	str	x1, [sp, #160]
-
-	// Load registers from |state|, with the exception of x29. x29 is the
-	// frame pointer and also callee-saved, but AAPCS64 allows platforms to
-	// mandate that x29 always point to a frame. iOS64 does so, which means
-	// we cannot fill x29 with entropy without violating ABI rules
-	// ourselves. x29 is tested separately below.
-	ldp	d8, d9, [x1], #16
-	ldp	d10, d11, [x1], #16
-	ldp	d12, d13, [x1], #16
-	ldp	d14, d15, [x1], #16
-	ldp	x19, x20, [x1], #16
-	ldp	x21, x22, [x1], #16
-	ldp	x23, x24, [x1], #16
-	ldp	x25, x26, [x1], #16
-	ldp	x27, x28, [x1], #16
-
-	// Move parameters into temporary registers.
-	mov	x9, x0
-	mov	x10, x2
-	mov	x11, x3
-
-	// Load parameters into registers.
-	cbz	x11, .Largs_done
-	ldr	x0, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x1, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x2, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x3, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x4, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x5, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x6, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x7, [x10], #8
-
-.Largs_done:
-	blr	x9
-
-	// Reload |state| and store registers.
-	ldr	x1, [sp, #160]
-	stp	d8, d9, [x1], #16
-	stp	d10, d11, [x1], #16
-	stp	d12, d13, [x1], #16
-	stp	d14, d15, [x1], #16
-	stp	x19, x20, [x1], #16
-	stp	x21, x22, [x1], #16
-	stp	x23, x24, [x1], #16
-	stp	x25, x26, [x1], #16
-	stp	x27, x28, [x1], #16
-
-	// |func| is required to preserve x29, the frame pointer. We cannot load
-	// random values into x29 (see comment above), so compare it against the
-	// expected value and zero the field of |state| if corrupted.
-	mov	x9, sp
-	cmp	x29, x9
-	b.eq	.Lx29_ok
-	str	xzr, [x1]
-
-.Lx29_ok:
-	// Restore callee-saved registers.
-	ldp	d8, d9, [sp, #16]
-	ldp	d10, d11, [sp, #32]
-	ldp	d12, d13, [sp, #48]
-	ldp	d14, d15, [sp, #64]
-	ldp	x19, x20, [sp, #80]
-	ldp	x21, x22, [sp, #96]
-	ldp	x23, x24, [sp, #112]
-	ldp	x25, x26, [sp, #128]
-	ldp	x27, x28, [sp, #144]
-
-	ldp	x29, x30, [sp], #176
-	AARCH64_VALIDATE_LINK_REGISTER
-	ret
-.size	abi_test_trampoline,.-abi_test_trampoline
-.type	abi_test_clobber_x0, %function
-.globl	abi_test_clobber_x0
-.hidden	abi_test_clobber_x0
-.align	4
-abi_test_clobber_x0:
-	AARCH64_VALID_CALL_TARGET
-	mov	x0, xzr
-	ret
-.size	abi_test_clobber_x0,.-abi_test_clobber_x0
-.type	abi_test_clobber_x1, %function
-.globl	abi_test_clobber_x1
-.hidden	abi_test_clobber_x1
-.align	4
-abi_test_clobber_x1:
-	AARCH64_VALID_CALL_TARGET
-	mov	x1, xzr
-	ret
-.size	abi_test_clobber_x1,.-abi_test_clobber_x1
-.type	abi_test_clobber_x2, %function
-.globl	abi_test_clobber_x2
-.hidden	abi_test_clobber_x2
-.align	4
-abi_test_clobber_x2:
-	AARCH64_VALID_CALL_TARGET
-	mov	x2, xzr
-	ret
-.size	abi_test_clobber_x2,.-abi_test_clobber_x2
-.type	abi_test_clobber_x3, %function
-.globl	abi_test_clobber_x3
-.hidden	abi_test_clobber_x3
-.align	4
-abi_test_clobber_x3:
-	AARCH64_VALID_CALL_TARGET
-	mov	x3, xzr
-	ret
-.size	abi_test_clobber_x3,.-abi_test_clobber_x3
-.type	abi_test_clobber_x4, %function
-.globl	abi_test_clobber_x4
-.hidden	abi_test_clobber_x4
-.align	4
-abi_test_clobber_x4:
-	AARCH64_VALID_CALL_TARGET
-	mov	x4, xzr
-	ret
-.size	abi_test_clobber_x4,.-abi_test_clobber_x4
-.type	abi_test_clobber_x5, %function
-.globl	abi_test_clobber_x5
-.hidden	abi_test_clobber_x5
-.align	4
-abi_test_clobber_x5:
-	AARCH64_VALID_CALL_TARGET
-	mov	x5, xzr
-	ret
-.size	abi_test_clobber_x5,.-abi_test_clobber_x5
-.type	abi_test_clobber_x6, %function
-.globl	abi_test_clobber_x6
-.hidden	abi_test_clobber_x6
-.align	4
-abi_test_clobber_x6:
-	AARCH64_VALID_CALL_TARGET
-	mov	x6, xzr
-	ret
-.size	abi_test_clobber_x6,.-abi_test_clobber_x6
-.type	abi_test_clobber_x7, %function
-.globl	abi_test_clobber_x7
-.hidden	abi_test_clobber_x7
-.align	4
-abi_test_clobber_x7:
-	AARCH64_VALID_CALL_TARGET
-	mov	x7, xzr
-	ret
-.size	abi_test_clobber_x7,.-abi_test_clobber_x7
-.type	abi_test_clobber_x8, %function
-.globl	abi_test_clobber_x8
-.hidden	abi_test_clobber_x8
-.align	4
-abi_test_clobber_x8:
-	AARCH64_VALID_CALL_TARGET
-	mov	x8, xzr
-	ret
-.size	abi_test_clobber_x8,.-abi_test_clobber_x8
-.type	abi_test_clobber_x9, %function
-.globl	abi_test_clobber_x9
-.hidden	abi_test_clobber_x9
-.align	4
-abi_test_clobber_x9:
-	AARCH64_VALID_CALL_TARGET
-	mov	x9, xzr
-	ret
-.size	abi_test_clobber_x9,.-abi_test_clobber_x9
-.type	abi_test_clobber_x10, %function
-.globl	abi_test_clobber_x10
-.hidden	abi_test_clobber_x10
-.align	4
-abi_test_clobber_x10:
-	AARCH64_VALID_CALL_TARGET
-	mov	x10, xzr
-	ret
-.size	abi_test_clobber_x10,.-abi_test_clobber_x10
-.type	abi_test_clobber_x11, %function
-.globl	abi_test_clobber_x11
-.hidden	abi_test_clobber_x11
-.align	4
-abi_test_clobber_x11:
-	AARCH64_VALID_CALL_TARGET
-	mov	x11, xzr
-	ret
-.size	abi_test_clobber_x11,.-abi_test_clobber_x11
-.type	abi_test_clobber_x12, %function
-.globl	abi_test_clobber_x12
-.hidden	abi_test_clobber_x12
-.align	4
-abi_test_clobber_x12:
-	AARCH64_VALID_CALL_TARGET
-	mov	x12, xzr
-	ret
-.size	abi_test_clobber_x12,.-abi_test_clobber_x12
-.type	abi_test_clobber_x13, %function
-.globl	abi_test_clobber_x13
-.hidden	abi_test_clobber_x13
-.align	4
-abi_test_clobber_x13:
-	AARCH64_VALID_CALL_TARGET
-	mov	x13, xzr
-	ret
-.size	abi_test_clobber_x13,.-abi_test_clobber_x13
-.type	abi_test_clobber_x14, %function
-.globl	abi_test_clobber_x14
-.hidden	abi_test_clobber_x14
-.align	4
-abi_test_clobber_x14:
-	AARCH64_VALID_CALL_TARGET
-	mov	x14, xzr
-	ret
-.size	abi_test_clobber_x14,.-abi_test_clobber_x14
-.type	abi_test_clobber_x15, %function
-.globl	abi_test_clobber_x15
-.hidden	abi_test_clobber_x15
-.align	4
-abi_test_clobber_x15:
-	AARCH64_VALID_CALL_TARGET
-	mov	x15, xzr
-	ret
-.size	abi_test_clobber_x15,.-abi_test_clobber_x15
-.type	abi_test_clobber_x16, %function
-.globl	abi_test_clobber_x16
-.hidden	abi_test_clobber_x16
-.align	4
-abi_test_clobber_x16:
-	AARCH64_VALID_CALL_TARGET
-	mov	x16, xzr
-	ret
-.size	abi_test_clobber_x16,.-abi_test_clobber_x16
-.type	abi_test_clobber_x17, %function
-.globl	abi_test_clobber_x17
-.hidden	abi_test_clobber_x17
-.align	4
-abi_test_clobber_x17:
-	AARCH64_VALID_CALL_TARGET
-	mov	x17, xzr
-	ret
-.size	abi_test_clobber_x17,.-abi_test_clobber_x17
-.type	abi_test_clobber_x19, %function
-.globl	abi_test_clobber_x19
-.hidden	abi_test_clobber_x19
-.align	4
-abi_test_clobber_x19:
-	AARCH64_VALID_CALL_TARGET
-	mov	x19, xzr
-	ret
-.size	abi_test_clobber_x19,.-abi_test_clobber_x19
-.type	abi_test_clobber_x20, %function
-.globl	abi_test_clobber_x20
-.hidden	abi_test_clobber_x20
-.align	4
-abi_test_clobber_x20:
-	AARCH64_VALID_CALL_TARGET
-	mov	x20, xzr
-	ret
-.size	abi_test_clobber_x20,.-abi_test_clobber_x20
-.type	abi_test_clobber_x21, %function
-.globl	abi_test_clobber_x21
-.hidden	abi_test_clobber_x21
-.align	4
-abi_test_clobber_x21:
-	AARCH64_VALID_CALL_TARGET
-	mov	x21, xzr
-	ret
-.size	abi_test_clobber_x21,.-abi_test_clobber_x21
-.type	abi_test_clobber_x22, %function
-.globl	abi_test_clobber_x22
-.hidden	abi_test_clobber_x22
-.align	4
-abi_test_clobber_x22:
-	AARCH64_VALID_CALL_TARGET
-	mov	x22, xzr
-	ret
-.size	abi_test_clobber_x22,.-abi_test_clobber_x22
-.type	abi_test_clobber_x23, %function
-.globl	abi_test_clobber_x23
-.hidden	abi_test_clobber_x23
-.align	4
-abi_test_clobber_x23:
-	AARCH64_VALID_CALL_TARGET
-	mov	x23, xzr
-	ret
-.size	abi_test_clobber_x23,.-abi_test_clobber_x23
-.type	abi_test_clobber_x24, %function
-.globl	abi_test_clobber_x24
-.hidden	abi_test_clobber_x24
-.align	4
-abi_test_clobber_x24:
-	AARCH64_VALID_CALL_TARGET
-	mov	x24, xzr
-	ret
-.size	abi_test_clobber_x24,.-abi_test_clobber_x24
-.type	abi_test_clobber_x25, %function
-.globl	abi_test_clobber_x25
-.hidden	abi_test_clobber_x25
-.align	4
-abi_test_clobber_x25:
-	AARCH64_VALID_CALL_TARGET
-	mov	x25, xzr
-	ret
-.size	abi_test_clobber_x25,.-abi_test_clobber_x25
-.type	abi_test_clobber_x26, %function
-.globl	abi_test_clobber_x26
-.hidden	abi_test_clobber_x26
-.align	4
-abi_test_clobber_x26:
-	AARCH64_VALID_CALL_TARGET
-	mov	x26, xzr
-	ret
-.size	abi_test_clobber_x26,.-abi_test_clobber_x26
-.type	abi_test_clobber_x27, %function
-.globl	abi_test_clobber_x27
-.hidden	abi_test_clobber_x27
-.align	4
-abi_test_clobber_x27:
-	AARCH64_VALID_CALL_TARGET
-	mov	x27, xzr
-	ret
-.size	abi_test_clobber_x27,.-abi_test_clobber_x27
-.type	abi_test_clobber_x28, %function
-.globl	abi_test_clobber_x28
-.hidden	abi_test_clobber_x28
-.align	4
-abi_test_clobber_x28:
-	AARCH64_VALID_CALL_TARGET
-	mov	x28, xzr
-	ret
-.size	abi_test_clobber_x28,.-abi_test_clobber_x28
-.type	abi_test_clobber_x29, %function
-.globl	abi_test_clobber_x29
-.hidden	abi_test_clobber_x29
-.align	4
-abi_test_clobber_x29:
-	AARCH64_VALID_CALL_TARGET
-	mov	x29, xzr
-	ret
-.size	abi_test_clobber_x29,.-abi_test_clobber_x29
-.type	abi_test_clobber_d0, %function
-.globl	abi_test_clobber_d0
-.hidden	abi_test_clobber_d0
-.align	4
-abi_test_clobber_d0:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d0, xzr
-	ret
-.size	abi_test_clobber_d0,.-abi_test_clobber_d0
-.type	abi_test_clobber_d1, %function
-.globl	abi_test_clobber_d1
-.hidden	abi_test_clobber_d1
-.align	4
-abi_test_clobber_d1:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d1, xzr
-	ret
-.size	abi_test_clobber_d1,.-abi_test_clobber_d1
-.type	abi_test_clobber_d2, %function
-.globl	abi_test_clobber_d2
-.hidden	abi_test_clobber_d2
-.align	4
-abi_test_clobber_d2:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d2, xzr
-	ret
-.size	abi_test_clobber_d2,.-abi_test_clobber_d2
-.type	abi_test_clobber_d3, %function
-.globl	abi_test_clobber_d3
-.hidden	abi_test_clobber_d3
-.align	4
-abi_test_clobber_d3:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d3, xzr
-	ret
-.size	abi_test_clobber_d3,.-abi_test_clobber_d3
-.type	abi_test_clobber_d4, %function
-.globl	abi_test_clobber_d4
-.hidden	abi_test_clobber_d4
-.align	4
-abi_test_clobber_d4:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d4, xzr
-	ret
-.size	abi_test_clobber_d4,.-abi_test_clobber_d4
-.type	abi_test_clobber_d5, %function
-.globl	abi_test_clobber_d5
-.hidden	abi_test_clobber_d5
-.align	4
-abi_test_clobber_d5:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d5, xzr
-	ret
-.size	abi_test_clobber_d5,.-abi_test_clobber_d5
-.type	abi_test_clobber_d6, %function
-.globl	abi_test_clobber_d6
-.hidden	abi_test_clobber_d6
-.align	4
-abi_test_clobber_d6:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d6, xzr
-	ret
-.size	abi_test_clobber_d6,.-abi_test_clobber_d6
-.type	abi_test_clobber_d7, %function
-.globl	abi_test_clobber_d7
-.hidden	abi_test_clobber_d7
-.align	4
-abi_test_clobber_d7:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d7, xzr
-	ret
-.size	abi_test_clobber_d7,.-abi_test_clobber_d7
-.type	abi_test_clobber_d8, %function
-.globl	abi_test_clobber_d8
-.hidden	abi_test_clobber_d8
-.align	4
-abi_test_clobber_d8:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d8, xzr
-	ret
-.size	abi_test_clobber_d8,.-abi_test_clobber_d8
-.type	abi_test_clobber_d9, %function
-.globl	abi_test_clobber_d9
-.hidden	abi_test_clobber_d9
-.align	4
-abi_test_clobber_d9:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d9, xzr
-	ret
-.size	abi_test_clobber_d9,.-abi_test_clobber_d9
-.type	abi_test_clobber_d10, %function
-.globl	abi_test_clobber_d10
-.hidden	abi_test_clobber_d10
-.align	4
-abi_test_clobber_d10:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d10, xzr
-	ret
-.size	abi_test_clobber_d10,.-abi_test_clobber_d10
-.type	abi_test_clobber_d11, %function
-.globl	abi_test_clobber_d11
-.hidden	abi_test_clobber_d11
-.align	4
-abi_test_clobber_d11:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d11, xzr
-	ret
-.size	abi_test_clobber_d11,.-abi_test_clobber_d11
-.type	abi_test_clobber_d12, %function
-.globl	abi_test_clobber_d12
-.hidden	abi_test_clobber_d12
-.align	4
-abi_test_clobber_d12:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d12, xzr
-	ret
-.size	abi_test_clobber_d12,.-abi_test_clobber_d12
-.type	abi_test_clobber_d13, %function
-.globl	abi_test_clobber_d13
-.hidden	abi_test_clobber_d13
-.align	4
-abi_test_clobber_d13:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d13, xzr
-	ret
-.size	abi_test_clobber_d13,.-abi_test_clobber_d13
-.type	abi_test_clobber_d14, %function
-.globl	abi_test_clobber_d14
-.hidden	abi_test_clobber_d14
-.align	4
-abi_test_clobber_d14:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d14, xzr
-	ret
-.size	abi_test_clobber_d14,.-abi_test_clobber_d14
-.type	abi_test_clobber_d15, %function
-.globl	abi_test_clobber_d15
-.hidden	abi_test_clobber_d15
-.align	4
-abi_test_clobber_d15:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d15, xzr
-	ret
-.size	abi_test_clobber_d15,.-abi_test_clobber_d15
-.type	abi_test_clobber_d16, %function
-.globl	abi_test_clobber_d16
-.hidden	abi_test_clobber_d16
-.align	4
-abi_test_clobber_d16:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d16, xzr
-	ret
-.size	abi_test_clobber_d16,.-abi_test_clobber_d16
-.type	abi_test_clobber_d17, %function
-.globl	abi_test_clobber_d17
-.hidden	abi_test_clobber_d17
-.align	4
-abi_test_clobber_d17:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d17, xzr
-	ret
-.size	abi_test_clobber_d17,.-abi_test_clobber_d17
-.type	abi_test_clobber_d18, %function
-.globl	abi_test_clobber_d18
-.hidden	abi_test_clobber_d18
-.align	4
-abi_test_clobber_d18:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d18, xzr
-	ret
-.size	abi_test_clobber_d18,.-abi_test_clobber_d18
-.type	abi_test_clobber_d19, %function
-.globl	abi_test_clobber_d19
-.hidden	abi_test_clobber_d19
-.align	4
-abi_test_clobber_d19:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d19, xzr
-	ret
-.size	abi_test_clobber_d19,.-abi_test_clobber_d19
-.type	abi_test_clobber_d20, %function
-.globl	abi_test_clobber_d20
-.hidden	abi_test_clobber_d20
-.align	4
-abi_test_clobber_d20:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d20, xzr
-	ret
-.size	abi_test_clobber_d20,.-abi_test_clobber_d20
-.type	abi_test_clobber_d21, %function
-.globl	abi_test_clobber_d21
-.hidden	abi_test_clobber_d21
-.align	4
-abi_test_clobber_d21:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d21, xzr
-	ret
-.size	abi_test_clobber_d21,.-abi_test_clobber_d21
-.type	abi_test_clobber_d22, %function
-.globl	abi_test_clobber_d22
-.hidden	abi_test_clobber_d22
-.align	4
-abi_test_clobber_d22:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d22, xzr
-	ret
-.size	abi_test_clobber_d22,.-abi_test_clobber_d22
-.type	abi_test_clobber_d23, %function
-.globl	abi_test_clobber_d23
-.hidden	abi_test_clobber_d23
-.align	4
-abi_test_clobber_d23:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d23, xzr
-	ret
-.size	abi_test_clobber_d23,.-abi_test_clobber_d23
-.type	abi_test_clobber_d24, %function
-.globl	abi_test_clobber_d24
-.hidden	abi_test_clobber_d24
-.align	4
-abi_test_clobber_d24:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d24, xzr
-	ret
-.size	abi_test_clobber_d24,.-abi_test_clobber_d24
-.type	abi_test_clobber_d25, %function
-.globl	abi_test_clobber_d25
-.hidden	abi_test_clobber_d25
-.align	4
-abi_test_clobber_d25:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d25, xzr
-	ret
-.size	abi_test_clobber_d25,.-abi_test_clobber_d25
-.type	abi_test_clobber_d26, %function
-.globl	abi_test_clobber_d26
-.hidden	abi_test_clobber_d26
-.align	4
-abi_test_clobber_d26:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d26, xzr
-	ret
-.size	abi_test_clobber_d26,.-abi_test_clobber_d26
-.type	abi_test_clobber_d27, %function
-.globl	abi_test_clobber_d27
-.hidden	abi_test_clobber_d27
-.align	4
-abi_test_clobber_d27:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d27, xzr
-	ret
-.size	abi_test_clobber_d27,.-abi_test_clobber_d27
-.type	abi_test_clobber_d28, %function
-.globl	abi_test_clobber_d28
-.hidden	abi_test_clobber_d28
-.align	4
-abi_test_clobber_d28:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d28, xzr
-	ret
-.size	abi_test_clobber_d28,.-abi_test_clobber_d28
-.type	abi_test_clobber_d29, %function
-.globl	abi_test_clobber_d29
-.hidden	abi_test_clobber_d29
-.align	4
-abi_test_clobber_d29:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d29, xzr
-	ret
-.size	abi_test_clobber_d29,.-abi_test_clobber_d29
-.type	abi_test_clobber_d30, %function
-.globl	abi_test_clobber_d30
-.hidden	abi_test_clobber_d30
-.align	4
-abi_test_clobber_d30:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d30, xzr
-	ret
-.size	abi_test_clobber_d30,.-abi_test_clobber_d30
-.type	abi_test_clobber_d31, %function
-.globl	abi_test_clobber_d31
-.hidden	abi_test_clobber_d31
-.align	4
-abi_test_clobber_d31:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d31, xzr
-	ret
-.size	abi_test_clobber_d31,.-abi_test_clobber_d31
-.type	abi_test_clobber_v8_upper, %function
-.globl	abi_test_clobber_v8_upper
-.hidden	abi_test_clobber_v8_upper
-.align	4
-abi_test_clobber_v8_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v8.d[1], xzr
-	ret
-.size	abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
-.type	abi_test_clobber_v9_upper, %function
-.globl	abi_test_clobber_v9_upper
-.hidden	abi_test_clobber_v9_upper
-.align	4
-abi_test_clobber_v9_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v9.d[1], xzr
-	ret
-.size	abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
-.type	abi_test_clobber_v10_upper, %function
-.globl	abi_test_clobber_v10_upper
-.hidden	abi_test_clobber_v10_upper
-.align	4
-abi_test_clobber_v10_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v10.d[1], xzr
-	ret
-.size	abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
-.type	abi_test_clobber_v11_upper, %function
-.globl	abi_test_clobber_v11_upper
-.hidden	abi_test_clobber_v11_upper
-.align	4
-abi_test_clobber_v11_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v11.d[1], xzr
-	ret
-.size	abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
-.type	abi_test_clobber_v12_upper, %function
-.globl	abi_test_clobber_v12_upper
-.hidden	abi_test_clobber_v12_upper
-.align	4
-abi_test_clobber_v12_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v12.d[1], xzr
-	ret
-.size	abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
-.type	abi_test_clobber_v13_upper, %function
-.globl	abi_test_clobber_v13_upper
-.hidden	abi_test_clobber_v13_upper
-.align	4
-abi_test_clobber_v13_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v13.d[1], xzr
-	ret
-.size	abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
-.type	abi_test_clobber_v14_upper, %function
-.globl	abi_test_clobber_v14_upper
-.hidden	abi_test_clobber_v14_upper
-.align	4
-abi_test_clobber_v14_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v14.d[1], xzr
-	ret
-.size	abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
-.type	abi_test_clobber_v15_upper, %function
-.globl	abi_test_clobber_v15_upper
-.hidden	abi_test_clobber_v15_upper
-.align	4
-abi_test_clobber_v15_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v15.d[1], xzr
-	ret
-.size	abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-arm/crypto/chacha/chacha-armv4.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/chacha/chacha-armv4.S
--- a/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/aesv8-armx32.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/aesv8-armx32.S
@ -1,781 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__arm__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-#if __ARM_MAX_ARCH__>=7
-.text
-.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
-.fpu	neon
-.code	32
-#undef	__thumb2__
-.align	5
-.Lrcon:
-.long	0x01,0x01,0x01,0x01
-.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
-.long	0x1b,0x1b,0x1b,0x1b
-
-.text
-
-.globl	aes_hw_set_encrypt_key
-.hidden	aes_hw_set_encrypt_key
-.type	aes_hw_set_encrypt_key,%function
-.align	5
-aes_hw_set_encrypt_key:
-.Lenc_key:
-	mov	r3,#-1
-	cmp	r0,#0
-	beq	.Lenc_key_abort
-	cmp	r2,#0
-	beq	.Lenc_key_abort
-	mov	r3,#-2
-	cmp	r1,#128
-	blt	.Lenc_key_abort
-	cmp	r1,#256
-	bgt	.Lenc_key_abort
-	tst	r1,#0x3f
-	bne	.Lenc_key_abort
-
-	adr	r3,.Lrcon
-	cmp	r1,#192
-
-	veor	q0,q0,q0
-	vld1.8	{q3},[r0]!
-	mov	r1,#8		@ reuse r1
-	vld1.32	{q1,q2},[r3]!
-
-	blt	.Loop128
-	beq	.L192
-	b	.L256
-
-.align	4
-.Loop128:
-	vtbl.8	d20,{q3},d4
-	vtbl.8	d21,{q3},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q3},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-	subs	r1,r1,#1
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	vshl.u8	q1,q1,#1
-	veor	q3,q3,q10
-	bne	.Loop128
-
-	vld1.32	{q1},[r3]
-
-	vtbl.8	d20,{q3},d4
-	vtbl.8	d21,{q3},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q3},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	vshl.u8	q1,q1,#1
-	veor	q3,q3,q10
-
-	vtbl.8	d20,{q3},d4
-	vtbl.8	d21,{q3},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q3},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	veor	q3,q3,q10
-	vst1.32	{q3},[r2]
-	add	r2,r2,#0x50
-
-	mov	r12,#10
-	b	.Ldone
-
-.align	4
-.L192:
-	vld1.8	{d16},[r0]!
-	vmov.i8	q10,#8			@ borrow q10
-	vst1.32	{q3},[r2]!
-	vsub.i8	q2,q2,q10	@ adjust the mask
-
-.Loop192:
-	vtbl.8	d20,{q8},d4
-	vtbl.8	d21,{q8},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{d16},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-	subs	r1,r1,#1
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-
-	vdup.32	q9,d7[1]
-	veor	q9,q9,q8
-	veor	q10,q10,q1
-	vext.8	q8,q0,q8,#12
-	vshl.u8	q1,q1,#1
-	veor	q8,q8,q9
-	veor	q3,q3,q10
-	veor	q8,q8,q10
-	vst1.32	{q3},[r2]!
-	bne	.Loop192
-
-	mov	r12,#12
-	add	r2,r2,#0x20
-	b	.Ldone
-
-.align	4
-.L256:
-	vld1.8	{q8},[r0]
-	mov	r1,#7
-	mov	r12,#14
-	vst1.32	{q3},[r2]!
-
-.Loop256:
-	vtbl.8	d20,{q8},d4
-	vtbl.8	d21,{q8},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q8},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-	subs	r1,r1,#1
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	vshl.u8	q1,q1,#1
-	veor	q3,q3,q10
-	vst1.32	{q3},[r2]!
-	beq	.Ldone
-
-	vdup.32	q10,d7[1]
-	vext.8	q9,q0,q8,#12
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-
-	veor	q8,q8,q9
-	vext.8	q9,q0,q9,#12
-	veor	q8,q8,q9
-	vext.8	q9,q0,q9,#12
-	veor	q8,q8,q9
-
-	veor	q8,q8,q10
-	b	.Loop256
-
-.Ldone:
-	str	r12,[r2]
-	mov	r3,#0
-
-.Lenc_key_abort:
-	mov	r0,r3			@ return value
-
-	bx	lr
-.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
-
-.globl	aes_hw_set_decrypt_key
-.hidden	aes_hw_set_decrypt_key
-.type	aes_hw_set_decrypt_key,%function
-.align	5
-aes_hw_set_decrypt_key:
-	stmdb	sp!,{r4,lr}
-	bl	.Lenc_key
-
-	cmp	r0,#0
-	bne	.Ldec_key_abort
-
-	sub	r2,r2,#240		@ restore original r2
-	mov	r4,#-16
-	add	r0,r2,r12,lsl#4	@ end of key schedule
-
-	vld1.32	{q0},[r2]
-	vld1.32	{q1},[r0]
-	vst1.32	{q0},[r0],r4
-	vst1.32	{q1},[r2]!
-
-.Loop_imc:
-	vld1.32	{q0},[r2]
-	vld1.32	{q1},[r0]
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-	vst1.32	{q0},[r0],r4
-	vst1.32	{q1},[r2]!
-	cmp	r0,r2
-	bhi	.Loop_imc
-
-	vld1.32	{q0},[r2]
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-	vst1.32	{q0},[r0]
-
-	eor	r0,r0,r0		@ return value
-.Ldec_key_abort:
-	ldmia	sp!,{r4,pc}
-.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
-.globl	aes_hw_encrypt
-.hidden	aes_hw_encrypt
-.type	aes_hw_encrypt,%function
-.align	5
-aes_hw_encrypt:
-	ldr	r3,[r2,#240]
-	vld1.32	{q0},[r2]!
-	vld1.8	{q2},[r0]
-	sub	r3,r3,#2
-	vld1.32	{q1},[r2]!
-
-.Loop_enc:
-.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
-.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
-	vld1.32	{q0},[r2]!
-	subs	r3,r3,#2
-.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
-.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
-	vld1.32	{q1},[r2]!
-	bgt	.Loop_enc
-
-.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
-.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
-	vld1.32	{q0},[r2]
-.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
-	veor	q2,q2,q0
-
-	vst1.8	{q2},[r1]
-	bx	lr
-.size	aes_hw_encrypt,.-aes_hw_encrypt
-.globl	aes_hw_decrypt
-.hidden	aes_hw_decrypt
-.type	aes_hw_decrypt,%function
-.align	5
-aes_hw_decrypt:
-	ldr	r3,[r2,#240]
-	vld1.32	{q0},[r2]!
-	vld1.8	{q2},[r0]
-	sub	r3,r3,#2
-	vld1.32	{q1},[r2]!
-
-.Loop_dec:
-.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
-.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
-	vld1.32	{q0},[r2]!
-	subs	r3,r3,#2
-.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
-.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
-	vld1.32	{q1},[r2]!
-	bgt	.Loop_dec
-
-.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
-.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
-	vld1.32	{q0},[r2]
-.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
-	veor	q2,q2,q0
-
-	vst1.8	{q2},[r1]
-	bx	lr
-.size	aes_hw_decrypt,.-aes_hw_decrypt
-.globl	aes_hw_cbc_encrypt
-.hidden	aes_hw_cbc_encrypt
-.type	aes_hw_cbc_encrypt,%function
-.align	5
-aes_hw_cbc_encrypt:
-	mov	ip,sp
-	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
-	ldmia	ip,{r4,r5}		@ load remaining args
-	subs	r2,r2,#16
-	mov	r8,#16
-	blo	.Lcbc_abort
-	moveq	r8,#0
-
-	cmp	r5,#0			@ en- or decrypting?
-	ldr	r5,[r3,#240]
-	and	r2,r2,#-16
-	vld1.8	{q6},[r4]
-	vld1.8	{q0},[r0],r8
-
-	vld1.32	{q8,q9},[r3]		@ load key schedule...
-	sub	r5,r5,#6
-	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
-	sub	r5,r5,#2
-	vld1.32	{q10,q11},[r7]!
-	vld1.32	{q12,q13},[r7]!
-	vld1.32	{q14,q15},[r7]!
-	vld1.32	{q7},[r7]
-
-	add	r7,r3,#32
-	mov	r6,r5
-	beq	.Lcbc_dec
-
-	cmp	r5,#2
-	veor	q0,q0,q6
-	veor	q5,q8,q7
-	beq	.Lcbc_enc128
-
-	vld1.32	{q2,q3},[r7]
-	add	r7,r3,#16
-	add	r6,r3,#16*4
-	add	r12,r3,#16*5
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	add	r14,r3,#16*6
-	add	r3,r3,#16*7
-	b	.Lenter_cbc_enc
-
-.align	4
-.Loop_cbc_enc:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vst1.8	{q6},[r1]!
-.Lenter_cbc_enc:
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q8},[r6]
-	cmp	r5,#4
-.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q9},[r12]
-	beq	.Lcbc_enc192
-
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q8},[r14]
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q9},[r3]
-	nop
-
-.Lcbc_enc192:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	subs	r2,r2,#16
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	moveq	r8,#0
-.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.8	{q8},[r0],r8
-.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	veor	q8,q8,q5
-.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
-.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
-	veor	q6,q0,q7
-	bhs	.Loop_cbc_enc
-
-	vst1.8	{q6},[r1]!
-	b	.Lcbc_done
-
-.align	5
-.Lcbc_enc128:
-	vld1.32	{q2,q3},[r7]
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	b	.Lenter_cbc_enc128
-.Loop_cbc_enc128:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vst1.8	{q6},[r1]!
-.Lenter_cbc_enc128:
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	subs	r2,r2,#16
-.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	moveq	r8,#0
-.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.8	{q8},[r0],r8
-.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	veor	q8,q8,q5
-.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
-	veor	q6,q0,q7
-	bhs	.Loop_cbc_enc128
-
-	vst1.8	{q6},[r1]!
-	b	.Lcbc_done
-.align	5
-.Lcbc_dec:
-	vld1.8	{q10},[r0]!
-	subs	r2,r2,#32		@ bias
-	add	r6,r5,#2
-	vorr	q3,q0,q0
-	vorr	q1,q0,q0
-	vorr	q11,q10,q10
-	blo	.Lcbc_dec_tail
-
-	vorr	q1,q10,q10
-	vld1.8	{q10},[r0]!
-	vorr	q2,q0,q0
-	vorr	q3,q1,q1
-	vorr	q11,q10,q10
-
-.Loop3x_cbc_dec:
-.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q9},[r7]!
-	bgt	.Loop3x_cbc_dec
-
-.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q4,q6,q7
-	subs	r2,r2,#0x30
-	veor	q5,q2,q7
-	movlo	r6,r2			@ r6, r6, is zero at this point
-.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q9,q3,q7
-	add	r0,r0,r6		@ r0 is adjusted in such way that
-					@ at exit from the loop q1-q10
-					@ are loaded with last "words"
-	vorr	q6,q11,q11
-	mov	r7,r3
-.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.8	{q2},[r0]!
-.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.8	{q3},[r0]!
-.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.8	{q11},[r0]!
-.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
-.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
-.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
-	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
-	add	r6,r5,#2
-	veor	q4,q4,q0
-	veor	q5,q5,q1
-	veor	q10,q10,q9
-	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
-	vst1.8	{q4},[r1]!
-	vorr	q0,q2,q2
-	vst1.8	{q5},[r1]!
-	vorr	q1,q3,q3
-	vst1.8	{q10},[r1]!
-	vorr	q10,q11,q11
-	bhs	.Loop3x_cbc_dec
-
-	cmn	r2,#0x30
-	beq	.Lcbc_done
-	nop
-
-.Lcbc_dec_tail:
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q9},[r7]!
-	bgt	.Lcbc_dec_tail
-
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	cmn	r2,#0x20
-.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q5,q6,q7
-.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q9,q3,q7
-.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
-.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
-	beq	.Lcbc_dec_one
-	veor	q5,q5,q1
-	veor	q9,q9,q10
-	vorr	q6,q11,q11
-	vst1.8	{q5},[r1]!
-	vst1.8	{q9},[r1]!
-	b	.Lcbc_done
-
-.Lcbc_dec_one:
-	veor	q5,q5,q10
-	vorr	q6,q11,q11
-	vst1.8	{q5},[r1]!
-
-.Lcbc_done:
-	vst1.8	{q6},[r4]
-.Lcbc_abort:
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
-	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
-.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
-.globl	aes_hw_ctr32_encrypt_blocks
-.hidden	aes_hw_ctr32_encrypt_blocks
-.type	aes_hw_ctr32_encrypt_blocks,%function
-.align	5
-aes_hw_ctr32_encrypt_blocks:
-	mov	ip,sp
-	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
-	ldr	r4, [ip]		@ load remaining arg
-	ldr	r5,[r3,#240]
-
-	ldr	r8, [r4, #12]
-	vld1.32	{q0},[r4]
-
-	vld1.32	{q8,q9},[r3]		@ load key schedule...
-	sub	r5,r5,#4
-	mov	r12,#16
-	cmp	r2,#2
-	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
-	sub	r5,r5,#2
-	vld1.32	{q12,q13},[r7]!
-	vld1.32	{q14,q15},[r7]!
-	vld1.32	{q7},[r7]
-	add	r7,r3,#32
-	mov	r6,r5
-	movlo	r12,#0
-#ifndef __ARMEB__
-	rev	r8, r8
-#endif
-	vorr	q1,q0,q0
-	add	r10, r8, #1
-	vorr	q10,q0,q0
-	add	r8, r8, #2
-	vorr	q6,q0,q0
-	rev	r10, r10
-	vmov.32	d3[1],r10
-	bls	.Lctr32_tail
-	rev	r12, r8
-	sub	r2,r2,#3		@ bias
-	vmov.32	d21[1],r12
-	b	.Loop3x_ctr32
-
-.align	4
-.Loop3x_ctr32:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
-.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
-.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
-	vld1.32	{q9},[r7]!
-	bgt	.Loop3x_ctr32
-
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
-	vld1.8	{q2},[r0]!
-	vorr	q0,q6,q6
-.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
-.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
-	vld1.8	{q3},[r0]!
-	vorr	q1,q6,q6
-.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	vld1.8	{q11},[r0]!
-	mov	r7,r3
-.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
-.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
-	vorr	q10,q6,q6
-	add	r9,r8,#1
-.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	veor	q2,q2,q7
-	add	r10,r8,#2
-.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
-.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	veor	q3,q3,q7
-	add	r8,r8,#3
-.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	veor	q11,q11,q7
-	rev	r9,r9
-.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
-.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d1[1], r9
-	rev	r10,r10
-.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	vmov.32	d3[1], r10
-	rev	r12,r8
-.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
-.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d21[1], r12
-	subs	r2,r2,#3
-.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
-.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
-.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
-
-	veor	q2,q2,q4
-	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
-	vst1.8	{q2},[r1]!
-	veor	q3,q3,q5
-	mov	r6,r5
-	vst1.8	{q3},[r1]!
-	veor	q11,q11,q9
-	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
-	vst1.8	{q11},[r1]!
-	bhs	.Loop3x_ctr32
-
-	adds	r2,r2,#3
-	beq	.Lctr32_done
-	cmp	r2,#1
-	mov	r12,#16
-	moveq	r12,#0
-
-.Lctr32_tail:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.32	{q9},[r7]!
-	bgt	.Lctr32_tail
-
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.8	{q2},[r0],r12
-.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.8	{q3},[r0]
-.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	veor	q2,q2,q7
-.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	veor	q3,q3,q7
-.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
-.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
-
-	cmp	r2,#1
-	veor	q2,q2,q0
-	veor	q3,q3,q1
-	vst1.8	{q2},[r1]!
-	beq	.Lctr32_done
-	vst1.8	{q3},[r1]
-
-.Lctr32_done:
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
-	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
-.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
-#endif
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/armv4-mont.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/armv4-mont.S
@ -1,977 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__arm__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
-@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
-.arch	armv7-a
-
-.text
-#if defined(__thumb2__)
-.syntax	unified
-.thumb
-#else
-.code	32
-#endif
-
-#if __ARM_MAX_ARCH__>=7
-.align	5
-.LOPENSSL_armcap:
-.word	OPENSSL_armcap_P-.Lbn_mul_mont
-#endif
-
-.globl	bn_mul_mont
-.hidden	bn_mul_mont
-.type	bn_mul_mont,%function
-
-.align	5
-bn_mul_mont:
-.Lbn_mul_mont:
-	ldr	ip,[sp,#4]		@ load num
-	stmdb	sp!,{r0,r2}		@ sp points at argument block
-#if __ARM_MAX_ARCH__>=7
-	tst	ip,#7
-	bne	.Lialu
-	adr	r0,.Lbn_mul_mont
-	ldr	r2,.LOPENSSL_armcap
-	ldr	r0,[r0,r2]
-#ifdef	__APPLE__
-	ldr	r0,[r0]
-#endif
-	tst	r0,#ARMV7_NEON		@ NEON available?
-	ldmia	sp, {r0,r2}
-	beq	.Lialu
-	add	sp,sp,#8
-	b	bn_mul8x_mont_neon
-.align	4
-.Lialu:
-#endif
-	cmp	ip,#2
-	mov	r0,ip			@ load num
-#ifdef	__thumb2__
-	ittt	lt
-#endif
-	movlt	r0,#0
-	addlt	sp,sp,#2*4
-	blt	.Labrt
-
-	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ save 10 registers
-
-	mov	r0,r0,lsl#2		@ rescale r0 for byte count
-	sub	sp,sp,r0		@ alloca(4*num)
-	sub	sp,sp,#4		@ +extra dword
-	sub	r0,r0,#4		@ "num=num-1"
-	add	r4,r2,r0		@ &bp[num-1]
-
-	add	r0,sp,r0		@ r0 to point at &tp[num-1]
-	ldr	r8,[r0,#14*4]		@ &n0
-	ldr	r2,[r2]		@ bp[0]
-	ldr	r5,[r1],#4		@ ap[0],ap++
-	ldr	r6,[r3],#4		@ np[0],np++
-	ldr	r8,[r8]		@ *n0
-	str	r4,[r0,#15*4]		@ save &bp[num]
-
-	umull	r10,r11,r5,r2	@ ap[0]*bp[0]
-	str	r8,[r0,#14*4]		@ save n0 value
-	mul	r8,r10,r8		@ "tp[0]"*n0
-	mov	r12,#0
-	umlal	r10,r12,r6,r8	@ np[0]*n0+"t[0]"
-	mov	r4,sp
-
-.L1st:
-	ldr	r5,[r1],#4		@ ap[j],ap++
-	mov	r10,r11
-	ldr	r6,[r3],#4		@ np[j],np++
-	mov	r11,#0
-	umlal	r10,r11,r5,r2	@ ap[j]*bp[0]
-	mov	r14,#0
-	umlal	r12,r14,r6,r8	@ np[j]*n0
-	adds	r12,r12,r10
-	str	r12,[r4],#4		@ tp[j-1]=,tp++
-	adc	r12,r14,#0
-	cmp	r4,r0
-	bne	.L1st
-
-	adds	r12,r12,r11
-	ldr	r4,[r0,#13*4]		@ restore bp
-	mov	r14,#0
-	ldr	r8,[r0,#14*4]		@ restore n0
-	adc	r14,r14,#0
-	str	r12,[r0]		@ tp[num-1]=
-	mov	r7,sp
-	str	r14,[r0,#4]		@ tp[num]=
-
-.Louter:
-	sub	r7,r0,r7		@ "original" r0-1 value
-	sub	r1,r1,r7		@ "rewind" ap to &ap[1]
-	ldr	r2,[r4,#4]!		@ *(++bp)
-	sub	r3,r3,r7		@ "rewind" np to &np[1]
-	ldr	r5,[r1,#-4]		@ ap[0]
-	ldr	r10,[sp]		@ tp[0]
-	ldr	r6,[r3,#-4]		@ np[0]
-	ldr	r7,[sp,#4]		@ tp[1]
-
-	mov	r11,#0
-	umlal	r10,r11,r5,r2	@ ap[0]*bp[i]+tp[0]
-	str	r4,[r0,#13*4]		@ save bp
-	mul	r8,r10,r8
-	mov	r12,#0
-	umlal	r10,r12,r6,r8	@ np[0]*n0+"tp[0]"
-	mov	r4,sp
-
-.Linner:
-	ldr	r5,[r1],#4		@ ap[j],ap++
-	adds	r10,r11,r7		@ +=tp[j]
-	ldr	r6,[r3],#4		@ np[j],np++
-	mov	r11,#0
-	umlal	r10,r11,r5,r2	@ ap[j]*bp[i]
-	mov	r14,#0
-	umlal	r12,r14,r6,r8	@ np[j]*n0
-	adc	r11,r11,#0
-	ldr	r7,[r4,#8]		@ tp[j+1]
-	adds	r12,r12,r10
-	str	r12,[r4],#4		@ tp[j-1]=,tp++
-	adc	r12,r14,#0
-	cmp	r4,r0
-	bne	.Linner
-
-	adds	r12,r12,r11
-	mov	r14,#0
-	ldr	r4,[r0,#13*4]		@ restore bp
-	adc	r14,r14,#0
-	ldr	r8,[r0,#14*4]		@ restore n0
-	adds	r12,r12,r7
-	ldr	r7,[r0,#15*4]		@ restore &bp[num]
-	adc	r14,r14,#0
-	str	r12,[r0]		@ tp[num-1]=
-	str	r14,[r0,#4]		@ tp[num]=
-
-	cmp	r4,r7
-#ifdef	__thumb2__
-	itt	ne
-#endif
-	movne	r7,sp
-	bne	.Louter
-
-	ldr	r2,[r0,#12*4]		@ pull rp
-	mov	r5,sp
-	add	r0,r0,#4		@ r0 to point at &tp[num]
-	sub	r5,r0,r5		@ "original" num value
-	mov	r4,sp			@ "rewind" r4
-	mov	r1,r4			@ "borrow" r1
-	sub	r3,r3,r5		@ "rewind" r3 to &np[0]
-
-	subs	r7,r7,r7		@ "clear" carry flag
-.Lsub:	ldr	r7,[r4],#4
-	ldr	r6,[r3],#4
-	sbcs	r7,r7,r6		@ tp[j]-np[j]
-	str	r7,[r2],#4		@ rp[j]=
-	teq	r4,r0		@ preserve carry
-	bne	.Lsub
-	sbcs	r14,r14,#0		@ upmost carry
-	mov	r4,sp			@ "rewind" r4
-	sub	r2,r2,r5		@ "rewind" r2
-
-.Lcopy:	ldr	r7,[r4]		@ conditional copy
-	ldr	r5,[r2]
-	str	sp,[r4],#4		@ zap tp
-#ifdef	__thumb2__
-	it	cc
-#endif
-	movcc	r5,r7
-	str	r5,[r2],#4
-	teq	r4,r0		@ preserve carry
-	bne	.Lcopy
-
-	mov	sp,r0
-	add	sp,sp,#4		@ skip over tp[num+1]
-	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ restore registers
-	add	sp,sp,#2*4		@ skip over {r0,r2}
-	mov	r0,#1
-.Labrt:
-#if __ARM_ARCH__>=5
-	bx	lr				@ bx lr
-#else
-	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-#endif
-.size	bn_mul_mont,.-bn_mul_mont
-#if __ARM_MAX_ARCH__>=7
-.arch	armv7-a
-.fpu	neon
-
-.type	bn_mul8x_mont_neon,%function
-.align	5
-bn_mul8x_mont_neon:
-	mov	ip,sp
-	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ ABI specification says so
-	ldmia	ip,{r4,r5}		@ load rest of parameter block
-	mov	ip,sp
-
-	cmp	r5,#8
-	bhi	.LNEON_8n
-
-	@ special case for r5==8, everything is in register bank...
-
-	vld1.32	{d28[0]}, [r2,:32]!
-	veor	d8,d8,d8
-	sub	r7,sp,r5,lsl#4
-	vld1.32	{d0,d1,d2,d3},  [r1]!		@ can't specify :32 :-(
-	and	r7,r7,#-64
-	vld1.32	{d30[0]}, [r4,:32]
-	mov	sp,r7			@ alloca
-	vzip.16	d28,d8
-
-	vmull.u32	q6,d28,d0[0]
-	vmull.u32	q7,d28,d0[1]
-	vmull.u32	q8,d28,d1[0]
-	vshl.i64	d29,d13,#16
-	vmull.u32	q9,d28,d1[1]
-
-	vadd.u64	d29,d29,d12
-	veor	d8,d8,d8
-	vmul.u32	d29,d29,d30
-
-	vmull.u32	q10,d28,d2[0]
-	vld1.32	{d4,d5,d6,d7}, [r3]!
-	vmull.u32	q11,d28,d2[1]
-	vmull.u32	q12,d28,d3[0]
-	vzip.16	d29,d8
-	vmull.u32	q13,d28,d3[1]
-
-	vmlal.u32	q6,d29,d4[0]
-	sub	r9,r5,#1
-	vmlal.u32	q7,d29,d4[1]
-	vmlal.u32	q8,d29,d5[0]
-	vmlal.u32	q9,d29,d5[1]
-
-	vmlal.u32	q10,d29,d6[0]
-	vmov	q5,q6
-	vmlal.u32	q11,d29,d6[1]
-	vmov	q6,q7
-	vmlal.u32	q12,d29,d7[0]
-	vmov	q7,q8
-	vmlal.u32	q13,d29,d7[1]
-	vmov	q8,q9
-	vmov	q9,q10
-	vshr.u64	d10,d10,#16
-	vmov	q10,q11
-	vmov	q11,q12
-	vadd.u64	d10,d10,d11
-	vmov	q12,q13
-	veor	q13,q13
-	vshr.u64	d10,d10,#16
-
-	b	.LNEON_outer8
-
-.align	4
-.LNEON_outer8:
-	vld1.32	{d28[0]}, [r2,:32]!
-	veor	d8,d8,d8
-	vzip.16	d28,d8
-	vadd.u64	d12,d12,d10
-
-	vmlal.u32	q6,d28,d0[0]
-	vmlal.u32	q7,d28,d0[1]
-	vmlal.u32	q8,d28,d1[0]
-	vshl.i64	d29,d13,#16
-	vmlal.u32	q9,d28,d1[1]
-
-	vadd.u64	d29,d29,d12
-	veor	d8,d8,d8
-	subs	r9,r9,#1
-	vmul.u32	d29,d29,d30
-
-	vmlal.u32	q10,d28,d2[0]
-	vmlal.u32	q11,d28,d2[1]
-	vmlal.u32	q12,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q13,d28,d3[1]
-
-	vmlal.u32	q6,d29,d4[0]
-	vmlal.u32	q7,d29,d4[1]
-	vmlal.u32	q8,d29,d5[0]
-	vmlal.u32	q9,d29,d5[1]
-
-	vmlal.u32	q10,d29,d6[0]
-	vmov	q5,q6
-	vmlal.u32	q11,d29,d6[1]
-	vmov	q6,q7
-	vmlal.u32	q12,d29,d7[0]
-	vmov	q7,q8
-	vmlal.u32	q13,d29,d7[1]
-	vmov	q8,q9
-	vmov	q9,q10
-	vshr.u64	d10,d10,#16
-	vmov	q10,q11
-	vmov	q11,q12
-	vadd.u64	d10,d10,d11
-	vmov	q12,q13
-	veor	q13,q13
-	vshr.u64	d10,d10,#16
-
-	bne	.LNEON_outer8
-
-	vadd.u64	d12,d12,d10
-	mov	r7,sp
-	vshr.u64	d10,d12,#16
-	mov	r8,r5
-	vadd.u64	d13,d13,d10
-	add	r6,sp,#96
-	vshr.u64	d10,d13,#16
-	vzip.16	d12,d13
-
-	b	.LNEON_tail_entry
-
-.align	4
-.LNEON_8n:
-	veor	q6,q6,q6
-	sub	r7,sp,#128
-	veor	q7,q7,q7
-	sub	r7,r7,r5,lsl#4
-	veor	q8,q8,q8
-	and	r7,r7,#-64
-	veor	q9,q9,q9
-	mov	sp,r7			@ alloca
-	veor	q10,q10,q10
-	add	r7,r7,#256
-	veor	q11,q11,q11
-	sub	r8,r5,#8
-	veor	q12,q12,q12
-	veor	q13,q13,q13
-
-.LNEON_8n_init:
-	vst1.64	{q6,q7},[r7,:256]!
-	subs	r8,r8,#8
-	vst1.64	{q8,q9},[r7,:256]!
-	vst1.64	{q10,q11},[r7,:256]!
-	vst1.64	{q12,q13},[r7,:256]!
-	bne	.LNEON_8n_init
-
-	add	r6,sp,#256
-	vld1.32	{d0,d1,d2,d3},[r1]!
-	add	r10,sp,#8
-	vld1.32	{d30[0]},[r4,:32]
-	mov	r9,r5
-	b	.LNEON_8n_outer
-
-.align	4
-.LNEON_8n_outer:
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	veor	d8,d8,d8
-	vzip.16	d28,d8
-	add	r7,sp,#128
-	vld1.32	{d4,d5,d6,d7},[r3]!
-
-	vmlal.u32	q6,d28,d0[0]
-	vmlal.u32	q7,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q8,d28,d1[0]
-	vshl.i64	d29,d13,#16
-	vmlal.u32	q9,d28,d1[1]
-	vadd.u64	d29,d29,d12
-	vmlal.u32	q10,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q11,d28,d2[1]
-	vst1.32	{d28},[sp,:64]		@ put aside smashed b[8*i+0]
-	vmlal.u32	q12,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q13,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q6,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q7,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q8,d29,d5[0]
-	vshr.u64	d12,d12,#16
-	vmlal.u32	q9,d29,d5[1]
-	vmlal.u32	q10,d29,d6[0]
-	vadd.u64	d12,d12,d13
-	vmlal.u32	q11,d29,d6[1]
-	vshr.u64	d12,d12,#16
-	vmlal.u32	q12,d29,d7[0]
-	vmlal.u32	q13,d29,d7[1]
-	vadd.u64	d14,d14,d12
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+0]
-	vmlal.u32	q7,d28,d0[0]
-	vld1.64	{q6},[r6,:128]!
-	vmlal.u32	q8,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q9,d28,d1[0]
-	vshl.i64	d29,d15,#16
-	vmlal.u32	q10,d28,d1[1]
-	vadd.u64	d29,d29,d14
-	vmlal.u32	q11,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q12,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+1]
-	vmlal.u32	q13,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q6,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q7,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q8,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q9,d29,d5[0]
-	vshr.u64	d14,d14,#16
-	vmlal.u32	q10,d29,d5[1]
-	vmlal.u32	q11,d29,d6[0]
-	vadd.u64	d14,d14,d15
-	vmlal.u32	q12,d29,d6[1]
-	vshr.u64	d14,d14,#16
-	vmlal.u32	q13,d29,d7[0]
-	vmlal.u32	q6,d29,d7[1]
-	vadd.u64	d16,d16,d14
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+1]
-	vmlal.u32	q8,d28,d0[0]
-	vld1.64	{q7},[r6,:128]!
-	vmlal.u32	q9,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q10,d28,d1[0]
-	vshl.i64	d29,d17,#16
-	vmlal.u32	q11,d28,d1[1]
-	vadd.u64	d29,d29,d16
-	vmlal.u32	q12,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q13,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+2]
-	vmlal.u32	q6,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q7,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q8,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q9,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q10,d29,d5[0]
-	vshr.u64	d16,d16,#16
-	vmlal.u32	q11,d29,d5[1]
-	vmlal.u32	q12,d29,d6[0]
-	vadd.u64	d16,d16,d17
-	vmlal.u32	q13,d29,d6[1]
-	vshr.u64	d16,d16,#16
-	vmlal.u32	q6,d29,d7[0]
-	vmlal.u32	q7,d29,d7[1]
-	vadd.u64	d18,d18,d16
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+2]
-	vmlal.u32	q9,d28,d0[0]
-	vld1.64	{q8},[r6,:128]!
-	vmlal.u32	q10,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q11,d28,d1[0]
-	vshl.i64	d29,d19,#16
-	vmlal.u32	q12,d28,d1[1]
-	vadd.u64	d29,d29,d18
-	vmlal.u32	q13,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q6,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+3]
-	vmlal.u32	q7,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q8,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q9,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q10,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q11,d29,d5[0]
-	vshr.u64	d18,d18,#16
-	vmlal.u32	q12,d29,d5[1]
-	vmlal.u32	q13,d29,d6[0]
-	vadd.u64	d18,d18,d19
-	vmlal.u32	q6,d29,d6[1]
-	vshr.u64	d18,d18,#16
-	vmlal.u32	q7,d29,d7[0]
-	vmlal.u32	q8,d29,d7[1]
-	vadd.u64	d20,d20,d18
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+3]
-	vmlal.u32	q10,d28,d0[0]
-	vld1.64	{q9},[r6,:128]!
-	vmlal.u32	q11,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q12,d28,d1[0]
-	vshl.i64	d29,d21,#16
-	vmlal.u32	q13,d28,d1[1]
-	vadd.u64	d29,d29,d20
-	vmlal.u32	q6,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q7,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+4]
-	vmlal.u32	q8,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q9,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q10,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q11,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q12,d29,d5[0]
-	vshr.u64	d20,d20,#16
-	vmlal.u32	q13,d29,d5[1]
-	vmlal.u32	q6,d29,d6[0]
-	vadd.u64	d20,d20,d21
-	vmlal.u32	q7,d29,d6[1]
-	vshr.u64	d20,d20,#16
-	vmlal.u32	q8,d29,d7[0]
-	vmlal.u32	q9,d29,d7[1]
-	vadd.u64	d22,d22,d20
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+4]
-	vmlal.u32	q11,d28,d0[0]
-	vld1.64	{q10},[r6,:128]!
-	vmlal.u32	q12,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q13,d28,d1[0]
-	vshl.i64	d29,d23,#16
-	vmlal.u32	q6,d28,d1[1]
-	vadd.u64	d29,d29,d22
-	vmlal.u32	q7,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q8,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+5]
-	vmlal.u32	q9,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q10,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q11,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q12,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q13,d29,d5[0]
-	vshr.u64	d22,d22,#16
-	vmlal.u32	q6,d29,d5[1]
-	vmlal.u32	q7,d29,d6[0]
-	vadd.u64	d22,d22,d23
-	vmlal.u32	q8,d29,d6[1]
-	vshr.u64	d22,d22,#16
-	vmlal.u32	q9,d29,d7[0]
-	vmlal.u32	q10,d29,d7[1]
-	vadd.u64	d24,d24,d22
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+5]
-	vmlal.u32	q12,d28,d0[0]
-	vld1.64	{q11},[r6,:128]!
-	vmlal.u32	q13,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q6,d28,d1[0]
-	vshl.i64	d29,d25,#16
-	vmlal.u32	q7,d28,d1[1]
-	vadd.u64	d29,d29,d24
-	vmlal.u32	q8,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q9,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+6]
-	vmlal.u32	q10,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q11,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q12,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q13,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q6,d29,d5[0]
-	vshr.u64	d24,d24,#16
-	vmlal.u32	q7,d29,d5[1]
-	vmlal.u32	q8,d29,d6[0]
-	vadd.u64	d24,d24,d25
-	vmlal.u32	q9,d29,d6[1]
-	vshr.u64	d24,d24,#16
-	vmlal.u32	q10,d29,d7[0]
-	vmlal.u32	q11,d29,d7[1]
-	vadd.u64	d26,d26,d24
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+6]
-	vmlal.u32	q13,d28,d0[0]
-	vld1.64	{q12},[r6,:128]!
-	vmlal.u32	q6,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q7,d28,d1[0]
-	vshl.i64	d29,d27,#16
-	vmlal.u32	q8,d28,d1[1]
-	vadd.u64	d29,d29,d26
-	vmlal.u32	q9,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q10,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+7]
-	vmlal.u32	q11,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q12,d28,d3[1]
-	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
-	vmlal.u32	q13,d29,d4[0]
-	vld1.32	{d0,d1,d2,d3},[r1]!
-	vmlal.u32	q6,d29,d4[1]
-	vmlal.u32	q7,d29,d5[0]
-	vshr.u64	d26,d26,#16
-	vmlal.u32	q8,d29,d5[1]
-	vmlal.u32	q9,d29,d6[0]
-	vadd.u64	d26,d26,d27
-	vmlal.u32	q10,d29,d6[1]
-	vshr.u64	d26,d26,#16
-	vmlal.u32	q11,d29,d7[0]
-	vmlal.u32	q12,d29,d7[1]
-	vadd.u64	d12,d12,d26
-	vst1.32	{d29},[r10,:64]	@ put aside smashed m[8*i+7]
-	add	r10,sp,#8		@ rewind
-	sub	r8,r5,#8
-	b	.LNEON_8n_inner
-
-.align	4
-.LNEON_8n_inner:
-	subs	r8,r8,#8
-	vmlal.u32	q6,d28,d0[0]
-	vld1.64	{q13},[r6,:128]
-	vmlal.u32	q7,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+0]
-	vmlal.u32	q8,d28,d1[0]
-	vld1.32	{d4,d5,d6,d7},[r3]!
-	vmlal.u32	q9,d28,d1[1]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q10,d28,d2[0]
-	vmlal.u32	q11,d28,d2[1]
-	vmlal.u32	q12,d28,d3[0]
-	vmlal.u32	q13,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+1]
-	vmlal.u32	q6,d29,d4[0]
-	vmlal.u32	q7,d29,d4[1]
-	vmlal.u32	q8,d29,d5[0]
-	vmlal.u32	q9,d29,d5[1]
-	vmlal.u32	q10,d29,d6[0]
-	vmlal.u32	q11,d29,d6[1]
-	vmlal.u32	q12,d29,d7[0]
-	vmlal.u32	q13,d29,d7[1]
-	vst1.64	{q6},[r7,:128]!
-	vmlal.u32	q7,d28,d0[0]
-	vld1.64	{q6},[r6,:128]
-	vmlal.u32	q8,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+1]
-	vmlal.u32	q9,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q10,d28,d1[1]
-	vmlal.u32	q11,d28,d2[0]
-	vmlal.u32	q12,d28,d2[1]
-	vmlal.u32	q13,d28,d3[0]
-	vmlal.u32	q6,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+2]
-	vmlal.u32	q7,d29,d4[0]
-	vmlal.u32	q8,d29,d4[1]
-	vmlal.u32	q9,d29,d5[0]
-	vmlal.u32	q10,d29,d5[1]
-	vmlal.u32	q11,d29,d6[0]
-	vmlal.u32	q12,d29,d6[1]
-	vmlal.u32	q13,d29,d7[0]
-	vmlal.u32	q6,d29,d7[1]
-	vst1.64	{q7},[r7,:128]!
-	vmlal.u32	q8,d28,d0[0]
-	vld1.64	{q7},[r6,:128]
-	vmlal.u32	q9,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+2]
-	vmlal.u32	q10,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q11,d28,d1[1]
-	vmlal.u32	q12,d28,d2[0]
-	vmlal.u32	q13,d28,d2[1]
-	vmlal.u32	q6,d28,d3[0]
-	vmlal.u32	q7,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+3]
-	vmlal.u32	q8,d29,d4[0]
-	vmlal.u32	q9,d29,d4[1]
-	vmlal.u32	q10,d29,d5[0]
-	vmlal.u32	q11,d29,d5[1]
-	vmlal.u32	q12,d29,d6[0]
-	vmlal.u32	q13,d29,d6[1]
-	vmlal.u32	q6,d29,d7[0]
-	vmlal.u32	q7,d29,d7[1]
-	vst1.64	{q8},[r7,:128]!
-	vmlal.u32	q9,d28,d0[0]
-	vld1.64	{q8},[r6,:128]
-	vmlal.u32	q10,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+3]
-	vmlal.u32	q11,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q12,d28,d1[1]
-	vmlal.u32	q13,d28,d2[0]
-	vmlal.u32	q6,d28,d2[1]
-	vmlal.u32	q7,d28,d3[0]
-	vmlal.u32	q8,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+4]
-	vmlal.u32	q9,d29,d4[0]
-	vmlal.u32	q10,d29,d4[1]
-	vmlal.u32	q11,d29,d5[0]
-	vmlal.u32	q12,d29,d5[1]
-	vmlal.u32	q13,d29,d6[0]
-	vmlal.u32	q6,d29,d6[1]
-	vmlal.u32	q7,d29,d7[0]
-	vmlal.u32	q8,d29,d7[1]
-	vst1.64	{q9},[r7,:128]!
-	vmlal.u32	q10,d28,d0[0]
-	vld1.64	{q9},[r6,:128]
-	vmlal.u32	q11,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+4]
-	vmlal.u32	q12,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q13,d28,d1[1]
-	vmlal.u32	q6,d28,d2[0]
-	vmlal.u32	q7,d28,d2[1]
-	vmlal.u32	q8,d28,d3[0]
-	vmlal.u32	q9,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+5]
-	vmlal.u32	q10,d29,d4[0]
-	vmlal.u32	q11,d29,d4[1]
-	vmlal.u32	q12,d29,d5[0]
-	vmlal.u32	q13,d29,d5[1]
-	vmlal.u32	q6,d29,d6[0]
-	vmlal.u32	q7,d29,d6[1]
-	vmlal.u32	q8,d29,d7[0]
-	vmlal.u32	q9,d29,d7[1]
-	vst1.64	{q10},[r7,:128]!
-	vmlal.u32	q11,d28,d0[0]
-	vld1.64	{q10},[r6,:128]
-	vmlal.u32	q12,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+5]
-	vmlal.u32	q13,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q6,d28,d1[1]
-	vmlal.u32	q7,d28,d2[0]
-	vmlal.u32	q8,d28,d2[1]
-	vmlal.u32	q9,d28,d3[0]
-	vmlal.u32	q10,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+6]
-	vmlal.u32	q11,d29,d4[0]
-	vmlal.u32	q12,d29,d4[1]
-	vmlal.u32	q13,d29,d5[0]
-	vmlal.u32	q6,d29,d5[1]
-	vmlal.u32	q7,d29,d6[0]
-	vmlal.u32	q8,d29,d6[1]
-	vmlal.u32	q9,d29,d7[0]
-	vmlal.u32	q10,d29,d7[1]
-	vst1.64	{q11},[r7,:128]!
-	vmlal.u32	q12,d28,d0[0]
-	vld1.64	{q11},[r6,:128]
-	vmlal.u32	q13,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+6]
-	vmlal.u32	q6,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q7,d28,d1[1]
-	vmlal.u32	q8,d28,d2[0]
-	vmlal.u32	q9,d28,d2[1]
-	vmlal.u32	q10,d28,d3[0]
-	vmlal.u32	q11,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+7]
-	vmlal.u32	q12,d29,d4[0]
-	vmlal.u32	q13,d29,d4[1]
-	vmlal.u32	q6,d29,d5[0]
-	vmlal.u32	q7,d29,d5[1]
-	vmlal.u32	q8,d29,d6[0]
-	vmlal.u32	q9,d29,d6[1]
-	vmlal.u32	q10,d29,d7[0]
-	vmlal.u32	q11,d29,d7[1]
-	vst1.64	{q12},[r7,:128]!
-	vmlal.u32	q13,d28,d0[0]
-	vld1.64	{q12},[r6,:128]
-	vmlal.u32	q6,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+7]
-	vmlal.u32	q7,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q8,d28,d1[1]
-	vmlal.u32	q9,d28,d2[0]
-	vmlal.u32	q10,d28,d2[1]
-	vmlal.u32	q11,d28,d3[0]
-	vmlal.u32	q12,d28,d3[1]
-	it	eq
-	subeq	r1,r1,r5,lsl#2	@ rewind
-	vmlal.u32	q13,d29,d4[0]
-	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
-	vmlal.u32	q6,d29,d4[1]
-	vld1.32	{d0,d1,d2,d3},[r1]!
-	vmlal.u32	q7,d29,d5[0]
-	add	r10,sp,#8		@ rewind
-	vmlal.u32	q8,d29,d5[1]
-	vmlal.u32	q9,d29,d6[0]
-	vmlal.u32	q10,d29,d6[1]
-	vmlal.u32	q11,d29,d7[0]
-	vst1.64	{q13},[r7,:128]!
-	vmlal.u32	q12,d29,d7[1]
-
-	bne	.LNEON_8n_inner
-	add	r6,sp,#128
-	vst1.64	{q6,q7},[r7,:256]!
-	veor	q2,q2,q2		@ d4-d5
-	vst1.64	{q8,q9},[r7,:256]!
-	veor	q3,q3,q3		@ d6-d7
-	vst1.64	{q10,q11},[r7,:256]!
-	vst1.64	{q12},[r7,:128]
-
-	subs	r9,r9,#8
-	vld1.64	{q6,q7},[r6,:256]!
-	vld1.64	{q8,q9},[r6,:256]!
-	vld1.64	{q10,q11},[r6,:256]!
-	vld1.64	{q12,q13},[r6,:256]!
-
-	itt	ne
-	subne	r3,r3,r5,lsl#2	@ rewind
-	bne	.LNEON_8n_outer
-
-	add	r7,sp,#128
-	vst1.64	{q2,q3}, [sp,:256]!	@ start wiping stack frame
-	vshr.u64	d10,d12,#16
-	vst1.64	{q2,q3},[sp,:256]!
-	vadd.u64	d13,d13,d10
-	vst1.64	{q2,q3}, [sp,:256]!
-	vshr.u64	d10,d13,#16
-	vst1.64	{q2,q3}, [sp,:256]!
-	vzip.16	d12,d13
-
-	mov	r8,r5
-	b	.LNEON_tail_entry
-
-.align	4
-.LNEON_tail:
-	vadd.u64	d12,d12,d10
-	vshr.u64	d10,d12,#16
-	vld1.64	{q8,q9}, [r6, :256]!
-	vadd.u64	d13,d13,d10
-	vld1.64	{q10,q11}, [r6, :256]!
-	vshr.u64	d10,d13,#16
-	vld1.64	{q12,q13}, [r6, :256]!
-	vzip.16	d12,d13
-
-.LNEON_tail_entry:
-	vadd.u64	d14,d14,d10
-	vst1.32	{d12[0]}, [r7, :32]!
-	vshr.u64	d10,d14,#16
-	vadd.u64	d15,d15,d10
-	vshr.u64	d10,d15,#16
-	vzip.16	d14,d15
-	vadd.u64	d16,d16,d10
-	vst1.32	{d14[0]}, [r7, :32]!
-	vshr.u64	d10,d16,#16
-	vadd.u64	d17,d17,d10
-	vshr.u64	d10,d17,#16
-	vzip.16	d16,d17
-	vadd.u64	d18,d18,d10
-	vst1.32	{d16[0]}, [r7, :32]!
-	vshr.u64	d10,d18,#16
-	vadd.u64	d19,d19,d10
-	vshr.u64	d10,d19,#16
-	vzip.16	d18,d19
-	vadd.u64	d20,d20,d10
-	vst1.32	{d18[0]}, [r7, :32]!
-	vshr.u64	d10,d20,#16
-	vadd.u64	d21,d21,d10
-	vshr.u64	d10,d21,#16
-	vzip.16	d20,d21
-	vadd.u64	d22,d22,d10
-	vst1.32	{d20[0]}, [r7, :32]!
-	vshr.u64	d10,d22,#16
-	vadd.u64	d23,d23,d10
-	vshr.u64	d10,d23,#16
-	vzip.16	d22,d23
-	vadd.u64	d24,d24,d10
-	vst1.32	{d22[0]}, [r7, :32]!
-	vshr.u64	d10,d24,#16
-	vadd.u64	d25,d25,d10
-	vshr.u64	d10,d25,#16
-	vzip.16	d24,d25
-	vadd.u64	d26,d26,d10
-	vst1.32	{d24[0]}, [r7, :32]!
-	vshr.u64	d10,d26,#16
-	vadd.u64	d27,d27,d10
-	vshr.u64	d10,d27,#16
-	vzip.16	d26,d27
-	vld1.64	{q6,q7}, [r6, :256]!
-	subs	r8,r8,#8
-	vst1.32	{d26[0]},   [r7, :32]!
-	bne	.LNEON_tail
-
-	vst1.32	{d10[0]}, [r7, :32]		@ top-most bit
-	sub	r3,r3,r5,lsl#2			@ rewind r3
-	subs	r1,sp,#0				@ clear carry flag
-	add	r2,sp,r5,lsl#2
-
-.LNEON_sub:
-	ldmia	r1!, {r4,r5,r6,r7}
-	ldmia	r3!, {r8,r9,r10,r11}
-	sbcs	r8, r4,r8
-	sbcs	r9, r5,r9
-	sbcs	r10,r6,r10
-	sbcs	r11,r7,r11
-	teq	r1,r2				@ preserves carry
-	stmia	r0!, {r8,r9,r10,r11}
-	bne	.LNEON_sub
-
-	ldr	r10, [r1]				@ load top-most bit
-	mov	r11,sp
-	veor	q0,q0,q0
-	sub	r11,r2,r11				@ this is num*4
-	veor	q1,q1,q1
-	mov	r1,sp
-	sub	r0,r0,r11				@ rewind r0
-	mov	r3,r2				@ second 3/4th of frame
-	sbcs	r10,r10,#0				@ result is carry flag
-
-.LNEON_copy_n_zap:
-	ldmia	r1!, {r4,r5,r6,r7}
-	ldmia	r0,  {r8,r9,r10,r11}
-	it	cc
-	movcc	r8, r4
-	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
-	itt	cc
-	movcc	r9, r5
-	movcc	r10,r6
-	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
-	it	cc
-	movcc	r11,r7
-	ldmia	r1, {r4,r5,r6,r7}
-	stmia	r0!, {r8,r9,r10,r11}
-	sub	r1,r1,#16
-	ldmia	r0, {r8,r9,r10,r11}
-	it	cc
-	movcc	r8, r4
-	vst1.64	{q0,q1}, [r1,:256]!			@ wipe
-	itt	cc
-	movcc	r9, r5
-	movcc	r10,r6
-	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
-	it	cc
-	movcc	r11,r7
-	teq	r1,r2				@ preserves carry
-	stmia	r0!, {r8,r9,r10,r11}
-	bne	.LNEON_copy_n_zap
-
-	mov	sp,ip
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
-	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
-	bx	lr						@ bx lr
-.size	bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
-#endif
-.byte	77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#if __ARM_MAX_ARCH__>=7
-.comm	OPENSSL_armcap_P,4,4
-.hidden	OPENSSL_armcap_P
-#endif
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/bsaes-armv7.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/bsaes-armv7.S
--- a/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/ghash-armv4.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/ghash-armv4.S
@ -1,255 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__arm__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
-@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
-@ instructions are in aesv8-armx.pl.)
-.arch	armv7-a
-
-.text
-#if defined(__thumb2__) || defined(__clang__)
-.syntax	unified
-#define ldrplb  ldrbpl
-#define ldrneb  ldrbne
-#endif
-#if defined(__thumb2__)
-.thumb
-#else
-.code	32
-#endif
-#if __ARM_MAX_ARCH__>=7
-.arch	armv7-a
-.fpu	neon
-
-.globl	gcm_init_neon
-.hidden	gcm_init_neon
-.type	gcm_init_neon,%function
-.align	4
-gcm_init_neon:
-	vld1.64	d7,[r1]!		@ load H
-	vmov.i8	q8,#0xe1
-	vld1.64	d6,[r1]
-	vshl.i64	d17,#57
-	vshr.u64	d16,#63		@ t0=0xc2....01
-	vdup.8	q9,d7[7]
-	vshr.u64	d26,d6,#63
-	vshr.s8	q9,#7			@ broadcast carry bit
-	vshl.i64	q3,q3,#1
-	vand	q8,q8,q9
-	vorr	d7,d26		@ H<<<=1
-	veor	q3,q3,q8		@ twisted H
-	vstmia	r0,{q3}
-
-	bx	lr					@ bx lr
-.size	gcm_init_neon,.-gcm_init_neon
-
-.globl	gcm_gmult_neon
-.hidden	gcm_gmult_neon
-.type	gcm_gmult_neon,%function
-.align	4
-gcm_gmult_neon:
-	vld1.64	d7,[r0]!		@ load Xi
-	vld1.64	d6,[r0]!
-	vmov.i64	d29,#0x0000ffffffffffff
-	vldmia	r1,{d26,d27}	@ load twisted H
-	vmov.i64	d30,#0x00000000ffffffff
-#ifdef __ARMEL__
-	vrev64.8	q3,q3
-#endif
-	vmov.i64	d31,#0x000000000000ffff
-	veor	d28,d26,d27		@ Karatsuba pre-processing
-	mov	r3,#16
-	b	.Lgmult_neon
-.size	gcm_gmult_neon,.-gcm_gmult_neon
-
-.globl	gcm_ghash_neon
-.hidden	gcm_ghash_neon
-.type	gcm_ghash_neon,%function
-.align	4
-gcm_ghash_neon:
-	vld1.64	d1,[r0]!		@ load Xi
-	vld1.64	d0,[r0]!
-	vmov.i64	d29,#0x0000ffffffffffff
-	vldmia	r1,{d26,d27}	@ load twisted H
-	vmov.i64	d30,#0x00000000ffffffff
-#ifdef __ARMEL__
-	vrev64.8	q0,q0
-#endif
-	vmov.i64	d31,#0x000000000000ffff
-	veor	d28,d26,d27		@ Karatsuba pre-processing
-
-.Loop_neon:
-	vld1.64	d7,[r2]!		@ load inp
-	vld1.64	d6,[r2]!
-#ifdef __ARMEL__
-	vrev64.8	q3,q3
-#endif
-	veor	q3,q0			@ inp^=Xi
-.Lgmult_neon:
-	vext.8	d16, d26, d26, #1	@ A1
-	vmull.p8	q8, d16, d6		@ F = A1*B
-	vext.8	d0, d6, d6, #1	@ B1
-	vmull.p8	q0, d26, d0		@ E = A*B1
-	vext.8	d18, d26, d26, #2	@ A2
-	vmull.p8	q9, d18, d6		@ H = A2*B
-	vext.8	d22, d6, d6, #2	@ B2
-	vmull.p8	q11, d26, d22		@ G = A*B2
-	vext.8	d20, d26, d26, #3	@ A3
-	veor	q8, q8, q0		@ L = E + F
-	vmull.p8	q10, d20, d6		@ J = A3*B
-	vext.8	d0, d6, d6, #3	@ B3
-	veor	q9, q9, q11		@ M = G + H
-	vmull.p8	q0, d26, d0		@ I = A*B3
-	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
-	vand	d17, d17, d29
-	vext.8	d22, d6, d6, #4	@ B4
-	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
-	vand	d19, d19, d30
-	vmull.p8	q11, d26, d22		@ K = A*B4
-	veor	q10, q10, q0		@ N = I + J
-	veor	d16, d16, d17
-	veor	d18, d18, d19
-	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
-	vand	d21, d21, d31
-	vext.8	q8, q8, q8, #15
-	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
-	vmov.i64	d23, #0
-	vext.8	q9, q9, q9, #14
-	veor	d20, d20, d21
-	vmull.p8	q0, d26, d6		@ D = A*B
-	vext.8	q11, q11, q11, #12
-	vext.8	q10, q10, q10, #13
-	veor	q8, q8, q9
-	veor	q10, q10, q11
-	veor	q0, q0, q8
-	veor	q0, q0, q10
-	veor	d6,d6,d7	@ Karatsuba pre-processing
-	vext.8	d16, d28, d28, #1	@ A1
-	vmull.p8	q8, d16, d6		@ F = A1*B
-	vext.8	d2, d6, d6, #1	@ B1
-	vmull.p8	q1, d28, d2		@ E = A*B1
-	vext.8	d18, d28, d28, #2	@ A2
-	vmull.p8	q9, d18, d6		@ H = A2*B
-	vext.8	d22, d6, d6, #2	@ B2
-	vmull.p8	q11, d28, d22		@ G = A*B2
-	vext.8	d20, d28, d28, #3	@ A3
-	veor	q8, q8, q1		@ L = E + F
-	vmull.p8	q10, d20, d6		@ J = A3*B
-	vext.8	d2, d6, d6, #3	@ B3
-	veor	q9, q9, q11		@ M = G + H
-	vmull.p8	q1, d28, d2		@ I = A*B3
-	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
-	vand	d17, d17, d29
-	vext.8	d22, d6, d6, #4	@ B4
-	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
-	vand	d19, d19, d30
-	vmull.p8	q11, d28, d22		@ K = A*B4
-	veor	q10, q10, q1		@ N = I + J
-	veor	d16, d16, d17
-	veor	d18, d18, d19
-	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
-	vand	d21, d21, d31
-	vext.8	q8, q8, q8, #15
-	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
-	vmov.i64	d23, #0
-	vext.8	q9, q9, q9, #14
-	veor	d20, d20, d21
-	vmull.p8	q1, d28, d6		@ D = A*B
-	vext.8	q11, q11, q11, #12
-	vext.8	q10, q10, q10, #13
-	veor	q8, q8, q9
-	veor	q10, q10, q11
-	veor	q1, q1, q8
-	veor	q1, q1, q10
-	vext.8	d16, d27, d27, #1	@ A1
-	vmull.p8	q8, d16, d7		@ F = A1*B
-	vext.8	d4, d7, d7, #1	@ B1
-	vmull.p8	q2, d27, d4		@ E = A*B1
-	vext.8	d18, d27, d27, #2	@ A2
-	vmull.p8	q9, d18, d7		@ H = A2*B
-	vext.8	d22, d7, d7, #2	@ B2
-	vmull.p8	q11, d27, d22		@ G = A*B2
-	vext.8	d20, d27, d27, #3	@ A3
-	veor	q8, q8, q2		@ L = E + F
-	vmull.p8	q10, d20, d7		@ J = A3*B
-	vext.8	d4, d7, d7, #3	@ B3
-	veor	q9, q9, q11		@ M = G + H
-	vmull.p8	q2, d27, d4		@ I = A*B3
-	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
-	vand	d17, d17, d29
-	vext.8	d22, d7, d7, #4	@ B4
-	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
-	vand	d19, d19, d30
-	vmull.p8	q11, d27, d22		@ K = A*B4
-	veor	q10, q10, q2		@ N = I + J
-	veor	d16, d16, d17
-	veor	d18, d18, d19
-	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
-	vand	d21, d21, d31
-	vext.8	q8, q8, q8, #15
-	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
-	vmov.i64	d23, #0
-	vext.8	q9, q9, q9, #14
-	veor	d20, d20, d21
-	vmull.p8	q2, d27, d7		@ D = A*B
-	vext.8	q11, q11, q11, #12
-	vext.8	q10, q10, q10, #13
-	veor	q8, q8, q9
-	veor	q10, q10, q11
-	veor	q2, q2, q8
-	veor	q2, q2, q10
-	veor	q1,q1,q0		@ Karatsuba post-processing
-	veor	q1,q1,q2
-	veor	d1,d1,d2
-	veor	d4,d4,d3	@ Xh|Xl - 256-bit result
-
-	@ equivalent of reduction_avx from ghash-x86_64.pl
-	vshl.i64	q9,q0,#57		@ 1st phase
-	vshl.i64	q10,q0,#62
-	veor	q10,q10,q9		@
-	vshl.i64	q9,q0,#63
-	veor	q10, q10, q9		@
-	veor	d1,d1,d20	@
-	veor	d4,d4,d21
-
-	vshr.u64	q10,q0,#1		@ 2nd phase
-	veor	q2,q2,q0
-	veor	q0,q0,q10		@
-	vshr.u64	q10,q10,#6
-	vshr.u64	q0,q0,#1		@
-	veor	q0,q0,q2		@
-	veor	q0,q0,q10		@
-
-	subs	r3,#16
-	bne	.Loop_neon
-
-#ifdef __ARMEL__
-	vrev64.8	q0,q0
-#endif
-	sub	r0,#16
-	vst1.64	d1,[r0]!		@ write out Xi
-	vst1.64	d0,[r0]
-
-	bx	lr					@ bx lr
-.size	gcm_ghash_neon,.-gcm_ghash_neon
-#endif
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/ghashv8-armx32.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/ghashv8-armx32.S
@ -1,253 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__arm__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-.fpu	neon
-.code	32
-#undef	__thumb2__
-.globl	gcm_init_v8
-.hidden	gcm_init_v8
-.type	gcm_init_v8,%function
-.align	4
-gcm_init_v8:
-	vld1.64	{q9},[r1]		@ load input H
-	vmov.i8	q11,#0xe1
-	vshl.i64	q11,q11,#57		@ 0xc2.0
-	vext.8	q3,q9,q9,#8
-	vshr.u64	q10,q11,#63
-	vdup.32	q9,d18[1]
-	vext.8	q8,q10,q11,#8		@ t0=0xc2....01
-	vshr.u64	q10,q3,#63
-	vshr.s32	q9,q9,#31		@ broadcast carry bit
-	vand	q10,q10,q8
-	vshl.i64	q3,q3,#1
-	vext.8	q10,q10,q10,#8
-	vand	q8,q8,q9
-	vorr	q3,q3,q10		@ H<<<=1
-	veor	q12,q3,q8		@ twisted H
-	vst1.64	{q12},[r0]!		@ store Htable[0]
-
-	@ calculate H^2
-	vext.8	q8,q12,q12,#8		@ Karatsuba pre-processing
-.byte	0xa8,0x0e,0xa8,0xf2	@ pmull q0,q12,q12
-	veor	q8,q8,q12
-.byte	0xa9,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q12
-.byte	0xa0,0x2e,0xa0,0xf2	@ pmull q1,q8,q8
-
-	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
-	veor	q10,q0,q2
-	veor	q1,q1,q9
-	veor	q1,q1,q10
-.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase
-
-	vmov	d4,d3		@ Xh|Xm - 256-bit result
-	vmov	d3,d0		@ Xm is rotated Xl
-	veor	q0,q1,q10
-
-	vext.8	q10,q0,q0,#8		@ 2nd phase
-.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
-	veor	q10,q10,q2
-	veor	q14,q0,q10
-
-	vext.8	q9,q14,q14,#8		@ Karatsuba pre-processing
-	veor	q9,q9,q14
-	vext.8	q13,q8,q9,#8		@ pack Karatsuba pre-processed
-	vst1.64	{q13,q14},[r0]		@ store Htable[1..2]
-
-	bx	lr
-.size	gcm_init_v8,.-gcm_init_v8
-.globl	gcm_gmult_v8
-.hidden	gcm_gmult_v8
-.type	gcm_gmult_v8,%function
-.align	4
-gcm_gmult_v8:
-	vld1.64	{q9},[r0]		@ load Xi
-	vmov.i8	q11,#0xe1
-	vld1.64	{q12,q13},[r1]	@ load twisted H, ...
-	vshl.u64	q11,q11,#57
-#ifndef __ARMEB__
-	vrev64.8	q9,q9
-#endif
-	vext.8	q3,q9,q9,#8
-
-.byte	0x86,0x0e,0xa8,0xf2	@ pmull q0,q12,q3		@ H.lo·Xi.lo
-	veor	q9,q9,q3		@ Karatsuba pre-processing
-.byte	0x87,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q3		@ H.hi·Xi.hi
-.byte	0xa2,0x2e,0xaa,0xf2	@ pmull q1,q13,q9		@ (H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
-	veor	q10,q0,q2
-	veor	q1,q1,q9
-	veor	q1,q1,q10
-.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
-
-	vmov	d4,d3		@ Xh|Xm - 256-bit result
-	vmov	d3,d0		@ Xm is rotated Xl
-	veor	q0,q1,q10
-
-	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
-.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
-	veor	q10,q10,q2
-	veor	q0,q0,q10
-
-#ifndef __ARMEB__
-	vrev64.8	q0,q0
-#endif
-	vext.8	q0,q0,q0,#8
-	vst1.64	{q0},[r0]		@ write out Xi
-
-	bx	lr
-.size	gcm_gmult_v8,.-gcm_gmult_v8
-.globl	gcm_ghash_v8
-.hidden	gcm_ghash_v8
-.type	gcm_ghash_v8,%function
-.align	4
-gcm_ghash_v8:
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
-	vld1.64	{q0},[r0]		@ load [rotated] Xi
-						@ "[rotated]" means that
-						@ loaded value would have
-						@ to be rotated in order to
-						@ make it appear as in
-						@ algorithm specification
-	subs	r3,r3,#32		@ see if r3 is 32 or larger
-	mov	r12,#16		@ r12 is used as post-
-						@ increment for input pointer;
-						@ as loop is modulo-scheduled
-						@ r12 is zeroed just in time
-						@ to preclude overstepping
-						@ inp[len], which means that
-						@ last block[s] are actually
-						@ loaded twice, but last
-						@ copy is not processed
-	vld1.64	{q12,q13},[r1]!	@ load twisted H, ..., H^2
-	vmov.i8	q11,#0xe1
-	vld1.64	{q14},[r1]
-	moveq	r12,#0			@ is it time to zero r12?
-	vext.8	q0,q0,q0,#8		@ rotate Xi
-	vld1.64	{q8},[r2]!	@ load [rotated] I[0]
-	vshl.u64	q11,q11,#57		@ compose 0xc2.0 constant
-#ifndef __ARMEB__
-	vrev64.8	q8,q8
-	vrev64.8	q0,q0
-#endif
-	vext.8	q3,q8,q8,#8		@ rotate I[0]
-	blo	.Lodd_tail_v8		@ r3 was less than 32
-	vld1.64	{q9},[r2],r12	@ load [rotated] I[1]
-#ifndef __ARMEB__
-	vrev64.8	q9,q9
-#endif
-	vext.8	q7,q9,q9,#8
-	veor	q3,q3,q0		@ I[i]^=Xi
-.byte	0x8e,0x8e,0xa8,0xf2	@ pmull q4,q12,q7		@ H·Ii+1
-	veor	q9,q9,q7		@ Karatsuba pre-processing
-.byte	0x8f,0xce,0xa9,0xf2	@ pmull2 q6,q12,q7
-	b	.Loop_mod2x_v8
-
-.align	4
-.Loop_mod2x_v8:
-	vext.8	q10,q3,q3,#8
-	subs	r3,r3,#32		@ is there more data?
-.byte	0x86,0x0e,0xac,0xf2	@ pmull q0,q14,q3		@ H^2.lo·Xi.lo
-	movlo	r12,#0			@ is it time to zero r12?
-
-.byte	0xa2,0xae,0xaa,0xf2	@ pmull q5,q13,q9
-	veor	q10,q10,q3		@ Karatsuba pre-processing
-.byte	0x87,0x4e,0xad,0xf2	@ pmull2 q2,q14,q3		@ H^2.hi·Xi.hi
-	veor	q0,q0,q4		@ accumulate
-.byte	0xa5,0x2e,0xab,0xf2	@ pmull2 q1,q13,q10		@ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
-	vld1.64	{q8},[r2],r12	@ load [rotated] I[i+2]
-
-	veor	q2,q2,q6
-	moveq	r12,#0			@ is it time to zero r12?
-	veor	q1,q1,q5
-
-	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
-	veor	q10,q0,q2
-	veor	q1,q1,q9
-	vld1.64	{q9},[r2],r12	@ load [rotated] I[i+3]
-#ifndef __ARMEB__
-	vrev64.8	q8,q8
-#endif
-	veor	q1,q1,q10
-.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
-
-#ifndef __ARMEB__
-	vrev64.8	q9,q9
-#endif
-	vmov	d4,d3		@ Xh|Xm - 256-bit result
-	vmov	d3,d0		@ Xm is rotated Xl
-	vext.8	q7,q9,q9,#8
-	vext.8	q3,q8,q8,#8
-	veor	q0,q1,q10
-.byte	0x8e,0x8e,0xa8,0xf2	@ pmull q4,q12,q7		@ H·Ii+1
-	veor	q3,q3,q2		@ accumulate q3 early
-
-	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
-.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
-	veor	q3,q3,q10
-	veor	q9,q9,q7		@ Karatsuba pre-processing
-	veor	q3,q3,q0
-.byte	0x8f,0xce,0xa9,0xf2	@ pmull2 q6,q12,q7
-	bhs	.Loop_mod2x_v8		@ there was at least 32 more bytes
-
-	veor	q2,q2,q10
-	vext.8	q3,q8,q8,#8		@ re-construct q3
-	adds	r3,r3,#32		@ re-construct r3
-	veor	q0,q0,q2		@ re-construct q0
-	beq	.Ldone_v8		@ is r3 zero?
-.Lodd_tail_v8:
-	vext.8	q10,q0,q0,#8
-	veor	q3,q3,q0		@ inp^=Xi
-	veor	q9,q8,q10		@ q9 is rotated inp^Xi
-
-.byte	0x86,0x0e,0xa8,0xf2	@ pmull q0,q12,q3		@ H.lo·Xi.lo
-	veor	q9,q9,q3		@ Karatsuba pre-processing
-.byte	0x87,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q3		@ H.hi·Xi.hi
-.byte	0xa2,0x2e,0xaa,0xf2	@ pmull q1,q13,q9		@ (H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
-	veor	q10,q0,q2
-	veor	q1,q1,q9
-	veor	q1,q1,q10
-.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
-
-	vmov	d4,d3		@ Xh|Xm - 256-bit result
-	vmov	d3,d0		@ Xm is rotated Xl
-	veor	q0,q1,q10
-
-	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
-.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
-	veor	q10,q10,q2
-	veor	q0,q0,q10
-
-.Ldone_v8:
-#ifndef __ARMEB__
-	vrev64.8	q0,q0
-#endif
-	vext.8	q0,q0,q0,#8
-	vst1.64	{q0},[r0]		@ write out Xi
-
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
-	bx	lr
-.size	gcm_ghash_v8,.-gcm_ghash_v8
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha1-armv4-large.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha1-armv4-large.S
--- a/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha256-armv4.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha256-armv4.S
--- a/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha512-armv4.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/sha512-armv4.S
--- a/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/vpaes-armv7.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/vpaes-armv7.S
--- a/contrib/boringssl-cmake/linux-arm/crypto/test/trampoline-armv4.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/test/trampoline-armv4.S
@ -1,379 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__arm__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-.syntax	unified
-
-.arch	armv7-a
-.fpu	vfp
-
-.text
-
-@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
-@ with |argv|, then saves the callee-saved registers into |state|. It returns
-@ the result of |func|. The |unwind| argument is unused.
-@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
-@                              const uint32_t *argv, size_t argc,
-@                              int unwind);
-.type	abi_test_trampoline, %function
-.globl	abi_test_trampoline
-.hidden	abi_test_trampoline
-.align	4
-abi_test_trampoline:
-	@ Save parameters and all callee-saved registers. For convenience, we
-	@ save r9 on iOS even though it's volatile.
-	vstmdb	sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
-	stmdb	sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
-
-	@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
-	@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
-	sub	sp, sp, #28
-
-	@ Every register in AAPCS is either non-volatile or a parameter (except
-	@ r9 on iOS), so this code, by the actual call, loses all its scratch
-	@ registers. First fill in stack parameters while there are registers
-	@ to spare.
-	cmp	r3, #4
-	bls	.Lstack_args_done
-	mov	r4, sp				@ r4 is the output pointer.
-	add	r5, r2, r3, lsl #2	@ Set r5 to the end of argv.
-	add	r2, r2, #16		@ Skip four arguments.
-.Lstack_args_loop:
-	ldr	r6, [r2], #4
-	cmp	r2, r5
-	str	r6, [r4], #4
-	bne	.Lstack_args_loop
-
-.Lstack_args_done:
-	@ Load registers from |r1|.
-	vldmia	r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
-#if defined(__APPLE__)
-	@ r9 is not volatile on iOS.
-	ldmia	r1!, {r4,r5,r6,r7,r8,r10-r11}
-#else
-	ldmia	r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
-#endif
-
-	@ Load register parameters. This uses up our remaining registers, so we
-	@ repurpose lr as scratch space.
-	ldr	r3, [sp, #40]	@ Reload argc.
-	ldr	lr, [sp, #36]		@ .Load argv into lr.
-	cmp	r3, #3
-	bhi	.Larg_r3
-	beq	.Larg_r2
-	cmp	r3, #1
-	bhi	.Larg_r1
-	beq	.Larg_r0
-	b	.Largs_done
-
-.Larg_r3:
-	ldr	r3, [lr, #12]	@ argv[3]
-.Larg_r2:
-	ldr	r2, [lr, #8]	@ argv[2]
-.Larg_r1:
-	ldr	r1, [lr, #4]	@ argv[1]
-.Larg_r0:
-	ldr	r0, [lr]	@ argv[0]
-.Largs_done:
-
-	@ With every other register in use, load the function pointer into lr
-	@ and call the function.
-	ldr	lr, [sp, #28]
-	blx	lr
-
-	@ r1-r3 are free for use again. The trampoline only supports
-	@ single-return functions. Pass r4-r11 to the caller.
-	ldr	r1, [sp, #32]
-	vstmia	r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
-#if defined(__APPLE__)
-	@ r9 is not volatile on iOS.
-	stmia	r1!, {r4,r5,r6,r7,r8,r10-r11}
-#else
-	stmia	r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
-#endif
-
-	@ Unwind the stack and restore registers.
-	add	sp, sp, #44		@ 44 = 28+16
-	ldmia	sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr}	@ Skip r0-r3 (see +16 above).
-	vldmia	sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
-
-	bx	lr
-.size	abi_test_trampoline,.-abi_test_trampoline
-.type	abi_test_clobber_r0, %function
-.globl	abi_test_clobber_r0
-.hidden	abi_test_clobber_r0
-.align	4
-abi_test_clobber_r0:
-	mov	r0, #0
-	bx	lr
-.size	abi_test_clobber_r0,.-abi_test_clobber_r0
-.type	abi_test_clobber_r1, %function
-.globl	abi_test_clobber_r1
-.hidden	abi_test_clobber_r1
-.align	4
-abi_test_clobber_r1:
-	mov	r1, #0
-	bx	lr
-.size	abi_test_clobber_r1,.-abi_test_clobber_r1
-.type	abi_test_clobber_r2, %function
-.globl	abi_test_clobber_r2
-.hidden	abi_test_clobber_r2
-.align	4
-abi_test_clobber_r2:
-	mov	r2, #0
-	bx	lr
-.size	abi_test_clobber_r2,.-abi_test_clobber_r2
-.type	abi_test_clobber_r3, %function
-.globl	abi_test_clobber_r3
-.hidden	abi_test_clobber_r3
-.align	4
-abi_test_clobber_r3:
-	mov	r3, #0
-	bx	lr
-.size	abi_test_clobber_r3,.-abi_test_clobber_r3
-.type	abi_test_clobber_r4, %function
-.globl	abi_test_clobber_r4
-.hidden	abi_test_clobber_r4
-.align	4
-abi_test_clobber_r4:
-	mov	r4, #0
-	bx	lr
-.size	abi_test_clobber_r4,.-abi_test_clobber_r4
-.type	abi_test_clobber_r5, %function
-.globl	abi_test_clobber_r5
-.hidden	abi_test_clobber_r5
-.align	4
-abi_test_clobber_r5:
-	mov	r5, #0
-	bx	lr
-.size	abi_test_clobber_r5,.-abi_test_clobber_r5
-.type	abi_test_clobber_r6, %function
-.globl	abi_test_clobber_r6
-.hidden	abi_test_clobber_r6
-.align	4
-abi_test_clobber_r6:
-	mov	r6, #0
-	bx	lr
-.size	abi_test_clobber_r6,.-abi_test_clobber_r6
-.type	abi_test_clobber_r7, %function
-.globl	abi_test_clobber_r7
-.hidden	abi_test_clobber_r7
-.align	4
-abi_test_clobber_r7:
-	mov	r7, #0
-	bx	lr
-.size	abi_test_clobber_r7,.-abi_test_clobber_r7
-.type	abi_test_clobber_r8, %function
-.globl	abi_test_clobber_r8
-.hidden	abi_test_clobber_r8
-.align	4
-abi_test_clobber_r8:
-	mov	r8, #0
-	bx	lr
-.size	abi_test_clobber_r8,.-abi_test_clobber_r8
-.type	abi_test_clobber_r9, %function
-.globl	abi_test_clobber_r9
-.hidden	abi_test_clobber_r9
-.align	4
-abi_test_clobber_r9:
-	mov	r9, #0
-	bx	lr
-.size	abi_test_clobber_r9,.-abi_test_clobber_r9
-.type	abi_test_clobber_r10, %function
-.globl	abi_test_clobber_r10
-.hidden	abi_test_clobber_r10
-.align	4
-abi_test_clobber_r10:
-	mov	r10, #0
-	bx	lr
-.size	abi_test_clobber_r10,.-abi_test_clobber_r10
-.type	abi_test_clobber_r11, %function
-.globl	abi_test_clobber_r11
-.hidden	abi_test_clobber_r11
-.align	4
-abi_test_clobber_r11:
-	mov	r11, #0
-	bx	lr
-.size	abi_test_clobber_r11,.-abi_test_clobber_r11
-.type	abi_test_clobber_r12, %function
-.globl	abi_test_clobber_r12
-.hidden	abi_test_clobber_r12
-.align	4
-abi_test_clobber_r12:
-	mov	r12, #0
-	bx	lr
-.size	abi_test_clobber_r12,.-abi_test_clobber_r12
-.type	abi_test_clobber_d0, %function
-.globl	abi_test_clobber_d0
-.hidden	abi_test_clobber_d0
-.align	4
-abi_test_clobber_d0:
-	mov	r0, #0
-	vmov	s0, r0
-	vmov	s1, r0
-	bx	lr
-.size	abi_test_clobber_d0,.-abi_test_clobber_d0
-.type	abi_test_clobber_d1, %function
-.globl	abi_test_clobber_d1
-.hidden	abi_test_clobber_d1
-.align	4
-abi_test_clobber_d1:
-	mov	r0, #0
-	vmov	s2, r0
-	vmov	s3, r0
-	bx	lr
-.size	abi_test_clobber_d1,.-abi_test_clobber_d1
-.type	abi_test_clobber_d2, %function
-.globl	abi_test_clobber_d2
-.hidden	abi_test_clobber_d2
-.align	4
-abi_test_clobber_d2:
-	mov	r0, #0
-	vmov	s4, r0
-	vmov	s5, r0
-	bx	lr
-.size	abi_test_clobber_d2,.-abi_test_clobber_d2
-.type	abi_test_clobber_d3, %function
-.globl	abi_test_clobber_d3
-.hidden	abi_test_clobber_d3
-.align	4
-abi_test_clobber_d3:
-	mov	r0, #0
-	vmov	s6, r0
-	vmov	s7, r0
-	bx	lr
-.size	abi_test_clobber_d3,.-abi_test_clobber_d3
-.type	abi_test_clobber_d4, %function
-.globl	abi_test_clobber_d4
-.hidden	abi_test_clobber_d4
-.align	4
-abi_test_clobber_d4:
-	mov	r0, #0
-	vmov	s8, r0
-	vmov	s9, r0
-	bx	lr
-.size	abi_test_clobber_d4,.-abi_test_clobber_d4
-.type	abi_test_clobber_d5, %function
-.globl	abi_test_clobber_d5
-.hidden	abi_test_clobber_d5
-.align	4
-abi_test_clobber_d5:
-	mov	r0, #0
-	vmov	s10, r0
-	vmov	s11, r0
-	bx	lr
-.size	abi_test_clobber_d5,.-abi_test_clobber_d5
-.type	abi_test_clobber_d6, %function
-.globl	abi_test_clobber_d6
-.hidden	abi_test_clobber_d6
-.align	4
-abi_test_clobber_d6:
-	mov	r0, #0
-	vmov	s12, r0
-	vmov	s13, r0
-	bx	lr
-.size	abi_test_clobber_d6,.-abi_test_clobber_d6
-.type	abi_test_clobber_d7, %function
-.globl	abi_test_clobber_d7
-.hidden	abi_test_clobber_d7
-.align	4
-abi_test_clobber_d7:
-	mov	r0, #0
-	vmov	s14, r0
-	vmov	s15, r0
-	bx	lr
-.size	abi_test_clobber_d7,.-abi_test_clobber_d7
-.type	abi_test_clobber_d8, %function
-.globl	abi_test_clobber_d8
-.hidden	abi_test_clobber_d8
-.align	4
-abi_test_clobber_d8:
-	mov	r0, #0
-	vmov	s16, r0
-	vmov	s17, r0
-	bx	lr
-.size	abi_test_clobber_d8,.-abi_test_clobber_d8
-.type	abi_test_clobber_d9, %function
-.globl	abi_test_clobber_d9
-.hidden	abi_test_clobber_d9
-.align	4
-abi_test_clobber_d9:
-	mov	r0, #0
-	vmov	s18, r0
-	vmov	s19, r0
-	bx	lr
-.size	abi_test_clobber_d9,.-abi_test_clobber_d9
-.type	abi_test_clobber_d10, %function
-.globl	abi_test_clobber_d10
-.hidden	abi_test_clobber_d10
-.align	4
-abi_test_clobber_d10:
-	mov	r0, #0
-	vmov	s20, r0
-	vmov	s21, r0
-	bx	lr
-.size	abi_test_clobber_d10,.-abi_test_clobber_d10
-.type	abi_test_clobber_d11, %function
-.globl	abi_test_clobber_d11
-.hidden	abi_test_clobber_d11
-.align	4
-abi_test_clobber_d11:
-	mov	r0, #0
-	vmov	s22, r0
-	vmov	s23, r0
-	bx	lr
-.size	abi_test_clobber_d11,.-abi_test_clobber_d11
-.type	abi_test_clobber_d12, %function
-.globl	abi_test_clobber_d12
-.hidden	abi_test_clobber_d12
-.align	4
-abi_test_clobber_d12:
-	mov	r0, #0
-	vmov	s24, r0
-	vmov	s25, r0
-	bx	lr
-.size	abi_test_clobber_d12,.-abi_test_clobber_d12
-.type	abi_test_clobber_d13, %function
-.globl	abi_test_clobber_d13
-.hidden	abi_test_clobber_d13
-.align	4
-abi_test_clobber_d13:
-	mov	r0, #0
-	vmov	s26, r0
-	vmov	s27, r0
-	bx	lr
-.size	abi_test_clobber_d13,.-abi_test_clobber_d13
-.type	abi_test_clobber_d14, %function
-.globl	abi_test_clobber_d14
-.hidden	abi_test_clobber_d14
-.align	4
-abi_test_clobber_d14:
-	mov	r0, #0
-	vmov	s28, r0
-	vmov	s29, r0
-	bx	lr
-.size	abi_test_clobber_d14,.-abi_test_clobber_d14
-.type	abi_test_clobber_d15, %function
-.globl	abi_test_clobber_d15
-.hidden	abi_test_clobber_d15
-.align	4
-abi_test_clobber_d15:
-	mov	r0, #0
-	vmov	s30, r0
-	vmov	s31, r0
-	bx	lr
-.size	abi_test_clobber_d15,.-abi_test_clobber_d15
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
+++ b/contrib/boringssl-cmake/linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
--- a/contrib/boringssl-cmake/linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
+++ b/contrib/boringssl-cmake/linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
@ -1,587 +0,0 @@
-# This file is generated from a similarly-named Perl script in the BoringSSL
-# source tree. Do not edit by hand.
-
-#if defined(__has_feature)
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-#endif
-
-#if !defined(OPENSSL_NO_ASM) && defined(__powerpc64__)
-.machine	"any"
-
-.abiversion	2
-.text
-
-.globl	gcm_init_p8
-.type	gcm_init_p8,@function
-.align	5
-gcm_init_p8:
-.localentry	gcm_init_p8,0
-
-	li	0,-4096
-	li	8,0x10
-	li	12,-1
-	li	9,0x20
-	or	0,0,0
-	li	10,0x30
-	.long	0x7D202699
-
-	vspltisb	8,-16
-	vspltisb	5,1
-	vaddubm	8,8,8
-	vxor	4,4,4
-	vor	8,8,5
-	vsldoi	8,8,4,15
-	vsldoi	6,4,5,1
-	vaddubm	8,8,8
-	vspltisb	7,7
-	vor	8,8,6
-	vspltb	6,9,0
-	vsl	9,9,5
-	vsrab	6,6,7
-	vand	6,6,8
-	vxor	3,9,6
-
-	vsldoi	9,3,3,8
-	vsldoi	8,4,8,8
-	vsldoi	11,4,9,8
-	vsldoi	10,9,4,8
-
-	.long	0x7D001F99
-	.long	0x7D681F99
-	li	8,0x40
-	.long	0x7D291F99
-	li	9,0x50
-	.long	0x7D4A1F99
-	li	10,0x60
-
-	.long	0x10035CC8
-	.long	0x10234CC8
-	.long	0x104354C8
-
-	.long	0x10E044C8
-
-	vsldoi	5,1,4,8
-	vsldoi	6,4,1,8
-	vxor	0,0,5
-	vxor	2,2,6
-
-	vsldoi	0,0,0,8
-	vxor	0,0,7
-
-	vsldoi	6,0,0,8
-	.long	0x100044C8
-	vxor	6,6,2
-	vxor	16,0,6
-
-	vsldoi	17,16,16,8
-	vsldoi	19,4,17,8
-	vsldoi	18,17,4,8
-
-	.long	0x7E681F99
-	li	8,0x70
-	.long	0x7E291F99
-	li	9,0x80
-	.long	0x7E4A1F99
-	li	10,0x90
-	.long	0x10039CC8
-	.long	0x11B09CC8
-	.long	0x10238CC8
-	.long	0x11D08CC8
-	.long	0x104394C8
-	.long	0x11F094C8
-
-	.long	0x10E044C8
-	.long	0x114D44C8
-
-	vsldoi	5,1,4,8
-	vsldoi	6,4,1,8
-	vsldoi	11,14,4,8
-	vsldoi	9,4,14,8
-	vxor	0,0,5
-	vxor	2,2,6
-	vxor	13,13,11
-	vxor	15,15,9
-
-	vsldoi	0,0,0,8
-	vsldoi	13,13,13,8
-	vxor	0,0,7
-	vxor	13,13,10
-
-	vsldoi	6,0,0,8
-	vsldoi	9,13,13,8
-	.long	0x100044C8
-	.long	0x11AD44C8
-	vxor	6,6,2
-	vxor	9,9,15
-	vxor	0,0,6
-	vxor	13,13,9
-
-	vsldoi	9,0,0,8
-	vsldoi	17,13,13,8
-	vsldoi	11,4,9,8
-	vsldoi	10,9,4,8
-	vsldoi	19,4,17,8
-	vsldoi	18,17,4,8
-
-	.long	0x7D681F99
-	li	8,0xa0
-	.long	0x7D291F99
-	li	9,0xb0
-	.long	0x7D4A1F99
-	li	10,0xc0
-	.long	0x7E681F99
-	.long	0x7E291F99
-	.long	0x7E4A1F99
-
-	or	12,12,12
-	blr	
-.long	0
-.byte	0,12,0x14,0,0,0,2,0
-.long	0
-.size	gcm_init_p8,.-gcm_init_p8
-.globl	gcm_gmult_p8
-.type	gcm_gmult_p8,@function
-.align	5
-gcm_gmult_p8:
-.localentry	gcm_gmult_p8,0
-
-	lis	0,0xfff8
-	li	8,0x10
-	li	12,-1
-	li	9,0x20
-	or	0,0,0
-	li	10,0x30
-	.long	0x7C601E99
-
-	.long	0x7D682699
-	lvsl	12,0,0
-	.long	0x7D292699
-	vspltisb	5,0x07
-	.long	0x7D4A2699
-	vxor	12,12,5
-	.long	0x7D002699
-	vperm	3,3,3,12
-	vxor	4,4,4
-
-	.long	0x10035CC8
-	.long	0x10234CC8
-	.long	0x104354C8
-
-	.long	0x10E044C8
-
-	vsldoi	5,1,4,8
-	vsldoi	6,4,1,8
-	vxor	0,0,5
-	vxor	2,2,6
-
-	vsldoi	0,0,0,8
-	vxor	0,0,7
-
-	vsldoi	6,0,0,8
-	.long	0x100044C8
-	vxor	6,6,2
-	vxor	0,0,6
-
-	vperm	0,0,0,12
-	.long	0x7C001F99
-
-	or	12,12,12
-	blr	
-.long	0
-.byte	0,12,0x14,0,0,0,2,0
-.long	0
-.size	gcm_gmult_p8,.-gcm_gmult_p8
-
-.globl	gcm_ghash_p8
-.type	gcm_ghash_p8,@function
-.align	5
-gcm_ghash_p8:
-.localentry	gcm_ghash_p8,0
-
-	li	0,-4096
-	li	8,0x10
-	li	12,-1
-	li	9,0x20
-	or	0,0,0
-	li	10,0x30
-	.long	0x7C001E99
-
-	.long	0x7D682699
-	li	8,0x40
-	lvsl	12,0,0
-	.long	0x7D292699
-	li	9,0x50
-	vspltisb	5,0x07
-	.long	0x7D4A2699
-	li	10,0x60
-	vxor	12,12,5
-	.long	0x7D002699
-	vperm	0,0,0,12
-	vxor	4,4,4
-
-	cmpldi	6,64
-	bge	.Lgcm_ghash_p8_4x
-
-	.long	0x7C602E99
-	addi	5,5,16
-	subic.	6,6,16
-	vperm	3,3,3,12
-	vxor	3,3,0
-	beq	.Lshort
-
-	.long	0x7E682699
-	li	8,16
-	.long	0x7E292699
-	add	9,5,6
-	.long	0x7E4A2699
-
-
-.align	5
-.Loop_2x:
-	.long	0x7E002E99
-	vperm	16,16,16,12
-
-	subic	6,6,32
-	.long	0x10039CC8
-	.long	0x11B05CC8
-	subfe	0,0,0
-	.long	0x10238CC8
-	.long	0x11D04CC8
-	and	0,0,6
-	.long	0x104394C8
-	.long	0x11F054C8
-	add	5,5,0
-
-	vxor	0,0,13
-	vxor	1,1,14
-
-	.long	0x10E044C8
-
-	vsldoi	5,1,4,8
-	vsldoi	6,4,1,8
-	vxor	2,2,15
-	vxor	0,0,5
-	vxor	2,2,6
-
-	vsldoi	0,0,0,8
-	vxor	0,0,7
-	.long	0x7C682E99
-	addi	5,5,32
-
-	vsldoi	6,0,0,8
-	.long	0x100044C8
-	vperm	3,3,3,12
-	vxor	6,6,2
-	vxor	3,3,6
-	vxor	3,3,0
-	cmpld	9,5
-	bgt	.Loop_2x
-
-	cmplwi	6,0
-	bne	.Leven
-
-.Lshort:
-	.long	0x10035CC8
-	.long	0x10234CC8
-	.long	0x104354C8
-
-	.long	0x10E044C8
-
-	vsldoi	5,1,4,8
-	vsldoi	6,4,1,8
-	vxor	0,0,5
-	vxor	2,2,6
-
-	vsldoi	0,0,0,8
-	vxor	0,0,7
-
-	vsldoi	6,0,0,8
-	.long	0x100044C8
-	vxor	6,6,2
-
-.Leven:
-	vxor	0,0,6
-	vperm	0,0,0,12
-	.long	0x7C001F99
-
-	or	12,12,12
-	blr	
-.long	0
-.byte	0,12,0x14,0,0,0,4,0
-.long	0
-.align	5
-.gcm_ghash_p8_4x:
-.Lgcm_ghash_p8_4x:
-	stdu	1,-256(1)
-	li	10,63
-	li	11,79
-	stvx	20,10,1
-	addi	10,10,32
-	stvx	21,11,1
-	addi	11,11,32
-	stvx	22,10,1
-	addi	10,10,32
-	stvx	23,11,1
-	addi	11,11,32
-	stvx	24,10,1
-	addi	10,10,32
-	stvx	25,11,1
-	addi	11,11,32
-	stvx	26,10,1
-	addi	10,10,32
-	stvx	27,11,1
-	addi	11,11,32
-	stvx	28,10,1
-	addi	10,10,32
-	stvx	29,11,1
-	addi	11,11,32
-	stvx	30,10,1
-	li	10,0x60
-	stvx	31,11,1
-	li	0,-1
-	stw	12,252(1)
-	or	0,0,0
-
-	lvsl	5,0,8
-
-	li	8,0x70
-	.long	0x7E292699
-	li	9,0x80
-	vspltisb	6,8
-
-	li	10,0x90
-	.long	0x7EE82699
-	li	8,0xa0
-	.long	0x7F092699
-	li	9,0xb0
-	.long	0x7F2A2699
-	li	10,0xc0
-	.long	0x7FA82699
-	li	8,0x10
-	.long	0x7FC92699
-	li	9,0x20
-	.long	0x7FEA2699
-	li	10,0x30
-
-	vsldoi	7,4,6,8
-	vaddubm	18,5,7
-	vaddubm	19,6,18
-
-	srdi	6,6,4
-
-	.long	0x7C602E99
-	.long	0x7E082E99
-	subic.	6,6,8
-	.long	0x7EC92E99
-	.long	0x7F8A2E99
-	addi	5,5,0x40
-	vperm	3,3,3,12
-	vperm	16,16,16,12
-	vperm	22,22,22,12
-	vperm	28,28,28,12
-
-	vxor	2,3,0
-
-	.long	0x11B0BCC8
-	.long	0x11D0C4C8
-	.long	0x11F0CCC8
-
-	vperm	11,17,9,18
-	vperm	5,22,28,19
-	vperm	10,17,9,19
-	vperm	6,22,28,18
-	.long	0x12B68CC8
-	.long	0x12855CC8
-	.long	0x137C4CC8
-	.long	0x134654C8
-
-	vxor	21,21,14
-	vxor	20,20,13
-	vxor	27,27,21
-	vxor	26,26,15
-
-	blt	.Ltail_4x
-
-.Loop_4x:
-	.long	0x7C602E99
-	.long	0x7E082E99
-	subic.	6,6,4
-	.long	0x7EC92E99
-	.long	0x7F8A2E99
-	addi	5,5,0x40
-	vperm	16,16,16,12
-	vperm	22,22,22,12
-	vperm	28,28,28,12
-	vperm	3,3,3,12
-
-	.long	0x1002ECC8
-	.long	0x1022F4C8
-	.long	0x1042FCC8
-	.long	0x11B0BCC8
-	.long	0x11D0C4C8
-	.long	0x11F0CCC8
-
-	vxor	0,0,20
-	vxor	1,1,27
-	vxor	2,2,26
-	vperm	5,22,28,19
-	vperm	6,22,28,18
-
-	.long	0x10E044C8
-	.long	0x12855CC8
-	.long	0x134654C8
-
-	vsldoi	5,1,4,8
-	vsldoi	6,4,1,8
-	vxor	0,0,5
-	vxor	2,2,6
-
-	vsldoi	0,0,0,8
-	vxor	0,0,7
-
-	vsldoi	6,0,0,8
-	.long	0x12B68CC8
-	.long	0x137C4CC8
-	.long	0x100044C8
-
-	vxor	20,20,13
-	vxor	26,26,15
-	vxor	2,2,3
-	vxor	21,21,14
-	vxor	2,2,6
-	vxor	27,27,21
-	vxor	2,2,0
-	bge	.Loop_4x
-
-.Ltail_4x:
-	.long	0x1002ECC8
-	.long	0x1022F4C8
-	.long	0x1042FCC8
-
-	vxor	0,0,20
-	vxor	1,1,27
-
-	.long	0x10E044C8
-
-	vsldoi	5,1,4,8
-	vsldoi	6,4,1,8
-	vxor	2,2,26
-	vxor	0,0,5
-	vxor	2,2,6
-
-	vsldoi	0,0,0,8
-	vxor	0,0,7
-
-	vsldoi	6,0,0,8
-	.long	0x100044C8
-	vxor	6,6,2
-	vxor	0,0,6
-
-	addic.	6,6,4
-	beq	.Ldone_4x
-
-	.long	0x7C602E99
-	cmpldi	6,2
-	li	6,-4
-	blt	.Lone
-	.long	0x7E082E99
-	beq	.Ltwo
-
-.Lthree:
-	.long	0x7EC92E99
-	vperm	3,3,3,12
-	vperm	16,16,16,12
-	vperm	22,22,22,12
-
-	vxor	2,3,0
-	vor	29,23,23
-	vor	30, 24, 24
-	vor	31,25,25
-
-	vperm	5,16,22,19
-	vperm	6,16,22,18
-	.long	0x12B08CC8
-	.long	0x13764CC8
-	.long	0x12855CC8
-	.long	0x134654C8
-
-	vxor	27,27,21
-	b	.Ltail_4x
-
-.align	4
-.Ltwo:
-	vperm	3,3,3,12
-	vperm	16,16,16,12
-
-	vxor	2,3,0
-	vperm	5,4,16,19
-	vperm	6,4,16,18
-
-	vsldoi	29,4,17,8
-	vor	30, 17, 17
-	vsldoi	31,17,4,8
-
-	.long	0x12855CC8
-	.long	0x13704CC8
-	.long	0x134654C8
-
-	b	.Ltail_4x
-
-.align	4
-.Lone:
-	vperm	3,3,3,12
-
-	vsldoi	29,4,9,8
-	vor	30, 9, 9
-	vsldoi	31,9,4,8
-
-	vxor	2,3,0
-	vxor	20,20,20
-	vxor	27,27,27
-	vxor	26,26,26
-
-	b	.Ltail_4x
-
-.Ldone_4x:
-	vperm	0,0,0,12
-	.long	0x7C001F99
-
-	li	10,63
-	li	11,79
-	or	12,12,12
-	lvx	20,10,1
-	addi	10,10,32
-	lvx	21,11,1
-	addi	11,11,32
-	lvx	22,10,1
-	addi	10,10,32
-	lvx	23,11,1
-	addi	11,11,32
-	lvx	24,10,1
-	addi	10,10,32
-	lvx	25,11,1
-	addi	11,11,32
-	lvx	26,10,1
-	addi	10,10,32
-	lvx	27,11,1
-	addi	11,11,32
-	lvx	28,10,1
-	addi	10,10,32
-	lvx	29,11,1
-	addi	11,11,32
-	lvx	30,10,1
-	lvx	31,11,1
-	addi	1,1,256
-	blr	
-.long	0
-.byte	0,12,0x04,0,0x80,0,4,0
-.long	0
-.size	gcm_ghash_p8,.-gcm_ghash_p8
-
-.byte	71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif  // !OPENSSL_NO_ASM && __powerpc64__
-.section	.note.GNU-stack,"",@progbits
--- a/contrib/boringssl-cmake/linux-ppc64le/crypto/test/trampoline-ppc.S
+++ b/contrib/boringssl-cmake/linux-ppc64le/crypto/test/trampoline-ppc.S
--- a/contrib/boringssl-cmake/linux-x86/crypto/chacha/chacha-x86.S
+++ b/contrib/boringssl-cmake/linux-x86/crypto/chacha/chacha-x86.S
@ -1,975 +0,0 @@
-# This file is generated from a similarly-named Perl script in the BoringSSL
-# source tree. Do not edit by hand.
-
-#if defined(__i386__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-.text
-.globl	ChaCha20_ctr32
-.hidden	ChaCha20_ctr32
-.type	ChaCha20_ctr32,@function
-.align	16
-ChaCha20_ctr32:
-.L_ChaCha20_ctr32_begin:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-	xorl	%eax,%eax
-	cmpl	28(%esp),%eax
-	je	.L000no_data
-	call	.Lpic_point
-.Lpic_point:
-	popl	%eax
-	leal	OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
-	testl	$16777216,(%ebp)
-	jz	.L001x86
-	testl	$512,4(%ebp)
-	jz	.L001x86
-	jmp	.Lssse3_shortcut
-.L001x86:
-	movl	32(%esp),%esi
-	movl	36(%esp),%edi
-	subl	$132,%esp
-	movl	(%esi),%eax
-	movl	4(%esi),%ebx
-	movl	8(%esi),%ecx
-	movl	12(%esi),%edx
-	movl	%eax,80(%esp)
-	movl	%ebx,84(%esp)
-	movl	%ecx,88(%esp)
-	movl	%edx,92(%esp)
-	movl	16(%esi),%eax
-	movl	20(%esi),%ebx
-	movl	24(%esi),%ecx
-	movl	28(%esi),%edx
-	movl	%eax,96(%esp)
-	movl	%ebx,100(%esp)
-	movl	%ecx,104(%esp)
-	movl	%edx,108(%esp)
-	movl	(%edi),%eax
-	movl	4(%edi),%ebx
-	movl	8(%edi),%ecx
-	movl	12(%edi),%edx
-	subl	$1,%eax
-	movl	%eax,112(%esp)
-	movl	%ebx,116(%esp)
-	movl	%ecx,120(%esp)
-	movl	%edx,124(%esp)
-	jmp	.L002entry
-.align	16
-.L003outer_loop:
-	movl	%ebx,156(%esp)
-	movl	%eax,152(%esp)
-	movl	%ecx,160(%esp)
-.L002entry:
-	movl	$1634760805,%eax
-	movl	$857760878,4(%esp)
-	movl	$2036477234,8(%esp)
-	movl	$1797285236,12(%esp)
-	movl	84(%esp),%ebx
-	movl	88(%esp),%ebp
-	movl	104(%esp),%ecx
-	movl	108(%esp),%esi
-	movl	116(%esp),%edx
-	movl	120(%esp),%edi
-	movl	%ebx,20(%esp)
-	movl	%ebp,24(%esp)
-	movl	%ecx,40(%esp)
-	movl	%esi,44(%esp)
-	movl	%edx,52(%esp)
-	movl	%edi,56(%esp)
-	movl	92(%esp),%ebx
-	movl	124(%esp),%edi
-	movl	112(%esp),%edx
-	movl	80(%esp),%ebp
-	movl	96(%esp),%ecx
-	movl	100(%esp),%esi
-	addl	$1,%edx
-	movl	%ebx,28(%esp)
-	movl	%edi,60(%esp)
-	movl	%edx,112(%esp)
-	movl	$10,%ebx
-	jmp	.L004loop
-.align	16
-.L004loop:
-	addl	%ebp,%eax
-	movl	%ebx,128(%esp)
-	movl	%ebp,%ebx
-	xorl	%eax,%edx
-	roll	$16,%edx
-	addl	%edx,%ecx
-	xorl	%ecx,%ebx
-	movl	52(%esp),%edi
-	roll	$12,%ebx
-	movl	20(%esp),%ebp
-	addl	%ebx,%eax
-	xorl	%eax,%edx
-	movl	%eax,(%esp)
-	roll	$8,%edx
-	movl	4(%esp),%eax
-	addl	%edx,%ecx
-	movl	%edx,48(%esp)
-	xorl	%ecx,%ebx
-	addl	%ebp,%eax
-	roll	$7,%ebx
-	xorl	%eax,%edi
-	movl	%ecx,32(%esp)
-	roll	$16,%edi
-	movl	%ebx,16(%esp)
-	addl	%edi,%esi
-	movl	40(%esp),%ecx
-	xorl	%esi,%ebp
-	movl	56(%esp),%edx
-	roll	$12,%ebp
-	movl	24(%esp),%ebx
-	addl	%ebp,%eax
-	xorl	%eax,%edi
-	movl	%eax,4(%esp)
-	roll	$8,%edi
-	movl	8(%esp),%eax
-	addl	%edi,%esi
-	movl	%edi,52(%esp)
-	xorl	%esi,%ebp
-	addl	%ebx,%eax
-	roll	$7,%ebp
-	xorl	%eax,%edx
-	movl	%esi,36(%esp)
-	roll	$16,%edx
-	movl	%ebp,20(%esp)
-	addl	%edx,%ecx
-	movl	44(%esp),%esi
-	xorl	%ecx,%ebx
-	movl	60(%esp),%edi
-	roll	$12,%ebx
-	movl	28(%esp),%ebp
-	addl	%ebx,%eax
-	xorl	%eax,%edx
-	movl	%eax,8(%esp)
-	roll	$8,%edx
-	movl	12(%esp),%eax
-	addl	%edx,%ecx
-	movl	%edx,56(%esp)
-	xorl	%ecx,%ebx
-	addl	%ebp,%eax
-	roll	$7,%ebx
-	xorl	%eax,%edi
-	roll	$16,%edi
-	movl	%ebx,24(%esp)
-	addl	%edi,%esi
-	xorl	%esi,%ebp
-	roll	$12,%ebp
-	movl	20(%esp),%ebx
-	addl	%ebp,%eax
-	xorl	%eax,%edi
-	movl	%eax,12(%esp)
-	roll	$8,%edi
-	movl	(%esp),%eax
-	addl	%edi,%esi
-	movl	%edi,%edx
-	xorl	%esi,%ebp
-	addl	%ebx,%eax
-	roll	$7,%ebp
-	xorl	%eax,%edx
-	roll	$16,%edx
-	movl	%ebp,28(%esp)
-	addl	%edx,%ecx
-	xorl	%ecx,%ebx
-	movl	48(%esp),%edi
-	roll	$12,%ebx
-	movl	24(%esp),%ebp
-	addl	%ebx,%eax
-	xorl	%eax,%edx
-	movl	%eax,(%esp)
-	roll	$8,%edx
-	movl	4(%esp),%eax
-	addl	%edx,%ecx
-	movl	%edx,60(%esp)
-	xorl	%ecx,%ebx
-	addl	%ebp,%eax
-	roll	$7,%ebx
-	xorl	%eax,%edi
-	movl	%ecx,40(%esp)
-	roll	$16,%edi
-	movl	%ebx,20(%esp)
-	addl	%edi,%esi
-	movl	32(%esp),%ecx
-	xorl	%esi,%ebp
-	movl	52(%esp),%edx
-	roll	$12,%ebp
-	movl	28(%esp),%ebx
-	addl	%ebp,%eax
-	xorl	%eax,%edi
-	movl	%eax,4(%esp)
-	roll	$8,%edi
-	movl	8(%esp),%eax
-	addl	%edi,%esi
-	movl	%edi,48(%esp)
-	xorl	%esi,%ebp
-	addl	%ebx,%eax
-	roll	$7,%ebp
-	xorl	%eax,%edx
-	movl	%esi,44(%esp)
-	roll	$16,%edx
-	movl	%ebp,24(%esp)
-	addl	%edx,%ecx
-	movl	36(%esp),%esi
-	xorl	%ecx,%ebx
-	movl	56(%esp),%edi
-	roll	$12,%ebx
-	movl	16(%esp),%ebp
-	addl	%ebx,%eax
-	xorl	%eax,%edx
-	movl	%eax,8(%esp)
-	roll	$8,%edx
-	movl	12(%esp),%eax
-	addl	%edx,%ecx
-	movl	%edx,52(%esp)
-	xorl	%ecx,%ebx
-	addl	%ebp,%eax
-	roll	$7,%ebx
-	xorl	%eax,%edi
-	roll	$16,%edi
-	movl	%ebx,28(%esp)
-	addl	%edi,%esi
-	xorl	%esi,%ebp
-	movl	48(%esp),%edx
-	roll	$12,%ebp
-	movl	128(%esp),%ebx
-	addl	%ebp,%eax
-	xorl	%eax,%edi
-	movl	%eax,12(%esp)
-	roll	$8,%edi
-	movl	(%esp),%eax
-	addl	%edi,%esi
-	movl	%edi,56(%esp)
-	xorl	%esi,%ebp
-	roll	$7,%ebp
-	decl	%ebx
-	jnz	.L004loop
-	movl	160(%esp),%ebx
-	addl	$1634760805,%eax
-	addl	80(%esp),%ebp
-	addl	96(%esp),%ecx
-	addl	100(%esp),%esi
-	cmpl	$64,%ebx
-	jb	.L005tail
-	movl	156(%esp),%ebx
-	addl	112(%esp),%edx
-	addl	120(%esp),%edi
-	xorl	(%ebx),%eax
-	xorl	16(%ebx),%ebp
-	movl	%eax,(%esp)
-	movl	152(%esp),%eax
-	xorl	32(%ebx),%ecx
-	xorl	36(%ebx),%esi
-	xorl	48(%ebx),%edx
-	xorl	56(%ebx),%edi
-	movl	%ebp,16(%eax)
-	movl	%ecx,32(%eax)
-	movl	%esi,36(%eax)
-	movl	%edx,48(%eax)
-	movl	%edi,56(%eax)
-	movl	4(%esp),%ebp
-	movl	8(%esp),%ecx
-	movl	12(%esp),%esi
-	movl	20(%esp),%edx
-	movl	24(%esp),%edi
-	addl	$857760878,%ebp
-	addl	$2036477234,%ecx
-	addl	$1797285236,%esi
-	addl	84(%esp),%edx
-	addl	88(%esp),%edi
-	xorl	4(%ebx),%ebp
-	xorl	8(%ebx),%ecx
-	xorl	12(%ebx),%esi
-	xorl	20(%ebx),%edx
-	xorl	24(%ebx),%edi
-	movl	%ebp,4(%eax)
-	movl	%ecx,8(%eax)
-	movl	%esi,12(%eax)
-	movl	%edx,20(%eax)
-	movl	%edi,24(%eax)
-	movl	28(%esp),%ebp
-	movl	40(%esp),%ecx
-	movl	44(%esp),%esi
-	movl	52(%esp),%edx
-	movl	60(%esp),%edi
-	addl	92(%esp),%ebp
-	addl	104(%esp),%ecx
-	addl	108(%esp),%esi
-	addl	116(%esp),%edx
-	addl	124(%esp),%edi
-	xorl	28(%ebx),%ebp
-	xorl	40(%ebx),%ecx
-	xorl	44(%ebx),%esi
-	xorl	52(%ebx),%edx
-	xorl	60(%ebx),%edi
-	leal	64(%ebx),%ebx
-	movl	%ebp,28(%eax)
-	movl	(%esp),%ebp
-	movl	%ecx,40(%eax)
-	movl	160(%esp),%ecx
-	movl	%esi,44(%eax)
-	movl	%edx,52(%eax)
-	movl	%edi,60(%eax)
-	movl	%ebp,(%eax)
-	leal	64(%eax),%eax
-	subl	$64,%ecx
-	jnz	.L003outer_loop
-	jmp	.L006done
-.L005tail:
-	addl	112(%esp),%edx
-	addl	120(%esp),%edi
-	movl	%eax,(%esp)
-	movl	%ebp,16(%esp)
-	movl	%ecx,32(%esp)
-	movl	%esi,36(%esp)
-	movl	%edx,48(%esp)
-	movl	%edi,56(%esp)
-	movl	4(%esp),%ebp
-	movl	8(%esp),%ecx
-	movl	12(%esp),%esi
-	movl	20(%esp),%edx
-	movl	24(%esp),%edi
-	addl	$857760878,%ebp
-	addl	$2036477234,%ecx
-	addl	$1797285236,%esi
-	addl	84(%esp),%edx
-	addl	88(%esp),%edi
-	movl	%ebp,4(%esp)
-	movl	%ecx,8(%esp)
-	movl	%esi,12(%esp)
-	movl	%edx,20(%esp)
-	movl	%edi,24(%esp)
-	movl	28(%esp),%ebp
-	movl	40(%esp),%ecx
-	movl	44(%esp),%esi
-	movl	52(%esp),%edx
-	movl	60(%esp),%edi
-	addl	92(%esp),%ebp
-	addl	104(%esp),%ecx
-	addl	108(%esp),%esi
-	addl	116(%esp),%edx
-	addl	124(%esp),%edi
-	movl	%ebp,28(%esp)
-	movl	156(%esp),%ebp
-	movl	%ecx,40(%esp)
-	movl	152(%esp),%ecx
-	movl	%esi,44(%esp)
-	xorl	%esi,%esi
-	movl	%edx,52(%esp)
-	movl	%edi,60(%esp)
-	xorl	%eax,%eax
-	xorl	%edx,%edx
-.L007tail_loop:
-	movb	(%esi,%ebp,1),%al
-	movb	(%esp,%esi,1),%dl
-	leal	1(%esi),%esi
-	xorb	%dl,%al
-	movb	%al,-1(%ecx,%esi,1)
-	decl	%ebx
-	jnz	.L007tail_loop
-.L006done:
-	addl	$132,%esp
-.L000no_data:
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
-.globl	ChaCha20_ssse3
-.hidden	ChaCha20_ssse3
-.type	ChaCha20_ssse3,@function
-.align	16
-ChaCha20_ssse3:
-.L_ChaCha20_ssse3_begin:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-.Lssse3_shortcut:
-	movl	20(%esp),%edi
-	movl	24(%esp),%esi
-	movl	28(%esp),%ecx
-	movl	32(%esp),%edx
-	movl	36(%esp),%ebx
-	movl	%esp,%ebp
-	subl	$524,%esp
-	andl	$-64,%esp
-	movl	%ebp,512(%esp)
-	leal	.Lssse3_data-.Lpic_point(%eax),%eax
-	movdqu	(%ebx),%xmm3
-	cmpl	$256,%ecx
-	jb	.L0081x
-	movl	%edx,516(%esp)
-	movl	%ebx,520(%esp)
-	subl	$256,%ecx
-	leal	384(%esp),%ebp
-	movdqu	(%edx),%xmm7
-	pshufd	$0,%xmm3,%xmm0
-	pshufd	$85,%xmm3,%xmm1
-	pshufd	$170,%xmm3,%xmm2
-	pshufd	$255,%xmm3,%xmm3
-	paddd	48(%eax),%xmm0
-	pshufd	$0,%xmm7,%xmm4
-	pshufd	$85,%xmm7,%xmm5
-	psubd	64(%eax),%xmm0
-	pshufd	$170,%xmm7,%xmm6
-	pshufd	$255,%xmm7,%xmm7
-	movdqa	%xmm0,64(%ebp)
-	movdqa	%xmm1,80(%ebp)
-	movdqa	%xmm2,96(%ebp)
-	movdqa	%xmm3,112(%ebp)
-	movdqu	16(%edx),%xmm3
-	movdqa	%xmm4,-64(%ebp)
-	movdqa	%xmm5,-48(%ebp)
-	movdqa	%xmm6,-32(%ebp)
-	movdqa	%xmm7,-16(%ebp)
-	movdqa	32(%eax),%xmm7
-	leal	128(%esp),%ebx
-	pshufd	$0,%xmm3,%xmm0
-	pshufd	$85,%xmm3,%xmm1
-	pshufd	$170,%xmm3,%xmm2
-	pshufd	$255,%xmm3,%xmm3
-	pshufd	$0,%xmm7,%xmm4
-	pshufd	$85,%xmm7,%xmm5
-	pshufd	$170,%xmm7,%xmm6
-	pshufd	$255,%xmm7,%xmm7
-	movdqa	%xmm0,(%ebp)
-	movdqa	%xmm1,16(%ebp)
-	movdqa	%xmm2,32(%ebp)
-	movdqa	%xmm3,48(%ebp)
-	movdqa	%xmm4,-128(%ebp)
-	movdqa	%xmm5,-112(%ebp)
-	movdqa	%xmm6,-96(%ebp)
-	movdqa	%xmm7,-80(%ebp)
-	leal	128(%esi),%esi
-	leal	128(%edi),%edi
-	jmp	.L009outer_loop
-.align	16
-.L009outer_loop:
-	movdqa	-112(%ebp),%xmm1
-	movdqa	-96(%ebp),%xmm2
-	movdqa	-80(%ebp),%xmm3
-	movdqa	-48(%ebp),%xmm5
-	movdqa	-32(%ebp),%xmm6
-	movdqa	-16(%ebp),%xmm7
-	movdqa	%xmm1,-112(%ebx)
-	movdqa	%xmm2,-96(%ebx)
-	movdqa	%xmm3,-80(%ebx)
-	movdqa	%xmm5,-48(%ebx)
-	movdqa	%xmm6,-32(%ebx)
-	movdqa	%xmm7,-16(%ebx)
-	movdqa	32(%ebp),%xmm2
-	movdqa	48(%ebp),%xmm3
-	movdqa	64(%ebp),%xmm4
-	movdqa	80(%ebp),%xmm5
-	movdqa	96(%ebp),%xmm6
-	movdqa	112(%ebp),%xmm7
-	paddd	64(%eax),%xmm4
-	movdqa	%xmm2,32(%ebx)
-	movdqa	%xmm3,48(%ebx)
-	movdqa	%xmm4,64(%ebx)
-	movdqa	%xmm5,80(%ebx)
-	movdqa	%xmm6,96(%ebx)
-	movdqa	%xmm7,112(%ebx)
-	movdqa	%xmm4,64(%ebp)
-	movdqa	-128(%ebp),%xmm0
-	movdqa	%xmm4,%xmm6
-	movdqa	-64(%ebp),%xmm3
-	movdqa	(%ebp),%xmm4
-	movdqa	16(%ebp),%xmm5
-	movl	$10,%edx
-	nop
-.align	16
-.L010loop:
-	paddd	%xmm3,%xmm0
-	movdqa	%xmm3,%xmm2
-	pxor	%xmm0,%xmm6
-	pshufb	(%eax),%xmm6
-	paddd	%xmm6,%xmm4
-	pxor	%xmm4,%xmm2
-	movdqa	-48(%ebx),%xmm3
-	movdqa	%xmm2,%xmm1
-	pslld	$12,%xmm2
-	psrld	$20,%xmm1
-	por	%xmm1,%xmm2
-	movdqa	-112(%ebx),%xmm1
-	paddd	%xmm2,%xmm0
-	movdqa	80(%ebx),%xmm7
-	pxor	%xmm0,%xmm6
-	movdqa	%xmm0,-128(%ebx)
-	pshufb	16(%eax),%xmm6
-	paddd	%xmm6,%xmm4
-	movdqa	%xmm6,64(%ebx)
-	pxor	%xmm4,%xmm2
-	paddd	%xmm3,%xmm1
-	movdqa	%xmm2,%xmm0
-	pslld	$7,%xmm2
-	psrld	$25,%xmm0
-	pxor	%xmm1,%xmm7
-	por	%xmm0,%xmm2
-	movdqa	%xmm4,(%ebx)
-	pshufb	(%eax),%xmm7
-	movdqa	%xmm2,-64(%ebx)
-	paddd	%xmm7,%xmm5
-	movdqa	32(%ebx),%xmm4
-	pxor	%xmm5,%xmm3
-	movdqa	-32(%ebx),%xmm2
-	movdqa	%xmm3,%xmm0
-	pslld	$12,%xmm3
-	psrld	$20,%xmm0
-	por	%xmm0,%xmm3
-	movdqa	-96(%ebx),%xmm0
-	paddd	%xmm3,%xmm1
-	movdqa	96(%ebx),%xmm6
-	pxor	%xmm1,%xmm7
-	movdqa	%xmm1,-112(%ebx)
-	pshufb	16(%eax),%xmm7
-	paddd	%xmm7,%xmm5
-	movdqa	%xmm7,80(%ebx)
-	pxor	%xmm5,%xmm3
-	paddd	%xmm2,%xmm0
-	movdqa	%xmm3,%xmm1
-	pslld	$7,%xmm3
-	psrld	$25,%xmm1
-	pxor	%xmm0,%xmm6
-	por	%xmm1,%xmm3
-	movdqa	%xmm5,16(%ebx)
-	pshufb	(%eax),%xmm6
-	movdqa	%xmm3,-48(%ebx)
-	paddd	%xmm6,%xmm4
-	movdqa	48(%ebx),%xmm5
-	pxor	%xmm4,%xmm2
-	movdqa	-16(%ebx),%xmm3
-	movdqa	%xmm2,%xmm1
-	pslld	$12,%xmm2
-	psrld	$20,%xmm1
-	por	%xmm1,%xmm2
-	movdqa	-80(%ebx),%xmm1
-	paddd	%xmm2,%xmm0
-	movdqa	112(%ebx),%xmm7
-	pxor	%xmm0,%xmm6
-	movdqa	%xmm0,-96(%ebx)
-	pshufb	16(%eax),%xmm6
-	paddd	%xmm6,%xmm4
-	movdqa	%xmm6,96(%ebx)
-	pxor	%xmm4,%xmm2
-	paddd	%xmm3,%xmm1
-	movdqa	%xmm2,%xmm0
-	pslld	$7,%xmm2
-	psrld	$25,%xmm0
-	pxor	%xmm1,%xmm7
-	por	%xmm0,%xmm2
-	pshufb	(%eax),%xmm7
-	movdqa	%xmm2,-32(%ebx)
-	paddd	%xmm7,%xmm5
-	pxor	%xmm5,%xmm3
-	movdqa	-48(%ebx),%xmm2
-	movdqa	%xmm3,%xmm0
-	pslld	$12,%xmm3
-	psrld	$20,%xmm0
-	por	%xmm0,%xmm3
-	movdqa	-128(%ebx),%xmm0
-	paddd	%xmm3,%xmm1
-	pxor	%xmm1,%xmm7
-	movdqa	%xmm1,-80(%ebx)
-	pshufb	16(%eax),%xmm7
-	paddd	%xmm7,%xmm5
-	movdqa	%xmm7,%xmm6
-	pxor	%xmm5,%xmm3
-	paddd	%xmm2,%xmm0
-	movdqa	%xmm3,%xmm1
-	pslld	$7,%xmm3
-	psrld	$25,%xmm1
-	pxor	%xmm0,%xmm6
-	por	%xmm1,%xmm3
-	pshufb	(%eax),%xmm6
-	movdqa	%xmm3,-16(%ebx)
-	paddd	%xmm6,%xmm4
-	pxor	%xmm4,%xmm2
-	movdqa	-32(%ebx),%xmm3
-	movdqa	%xmm2,%xmm1
-	pslld	$12,%xmm2
-	psrld	$20,%xmm1
-	por	%xmm1,%xmm2
-	movdqa	-112(%ebx),%xmm1
-	paddd	%xmm2,%xmm0
-	movdqa	64(%ebx),%xmm7
-	pxor	%xmm0,%xmm6
-	movdqa	%xmm0,-128(%ebx)
-	pshufb	16(%eax),%xmm6
-	paddd	%xmm6,%xmm4
-	movdqa	%xmm6,112(%ebx)
-	pxor	%xmm4,%xmm2
-	paddd	%xmm3,%xmm1
-	movdqa	%xmm2,%xmm0
-	pslld	$7,%xmm2
-	psrld	$25,%xmm0
-	pxor	%xmm1,%xmm7
-	por	%xmm0,%xmm2
-	movdqa	%xmm4,32(%ebx)
-	pshufb	(%eax),%xmm7
-	movdqa	%xmm2,-48(%ebx)
-	paddd	%xmm7,%xmm5
-	movdqa	(%ebx),%xmm4
-	pxor	%xmm5,%xmm3
-	movdqa	-16(%ebx),%xmm2
-	movdqa	%xmm3,%xmm0
-	pslld	$12,%xmm3
-	psrld	$20,%xmm0
-	por	%xmm0,%xmm3
-	movdqa	-96(%ebx),%xmm0
-	paddd	%xmm3,%xmm1
-	movdqa	80(%ebx),%xmm6
-	pxor	%xmm1,%xmm7
-	movdqa	%xmm1,-112(%ebx)
-	pshufb	16(%eax),%xmm7
-	paddd	%xmm7,%xmm5
-	movdqa	%xmm7,64(%ebx)
-	pxor	%xmm5,%xmm3
-	paddd	%xmm2,%xmm0
-	movdqa	%xmm3,%xmm1
-	pslld	$7,%xmm3
-	psrld	$25,%xmm1
-	pxor	%xmm0,%xmm6
-	por	%xmm1,%xmm3
-	movdqa	%xmm5,48(%ebx)
-	pshufb	(%eax),%xmm6
-	movdqa	%xmm3,-32(%ebx)
-	paddd	%xmm6,%xmm4
-	movdqa	16(%ebx),%xmm5
-	pxor	%xmm4,%xmm2
-	movdqa	-64(%ebx),%xmm3
-	movdqa	%xmm2,%xmm1
-	pslld	$12,%xmm2
-	psrld	$20,%xmm1
-	por	%xmm1,%xmm2
-	movdqa	-80(%ebx),%xmm1
-	paddd	%xmm2,%xmm0
-	movdqa	96(%ebx),%xmm7
-	pxor	%xmm0,%xmm6
-	movdqa	%xmm0,-96(%ebx)
-	pshufb	16(%eax),%xmm6
-	paddd	%xmm6,%xmm4
-	movdqa	%xmm6,80(%ebx)
-	pxor	%xmm4,%xmm2
-	paddd	%xmm3,%xmm1
-	movdqa	%xmm2,%xmm0
-	pslld	$7,%xmm2
-	psrld	$25,%xmm0
-	pxor	%xmm1,%xmm7
-	por	%xmm0,%xmm2
-	pshufb	(%eax),%xmm7
-	movdqa	%xmm2,-16(%ebx)
-	paddd	%xmm7,%xmm5
-	pxor	%xmm5,%xmm3
-	movdqa	%xmm3,%xmm0
-	pslld	$12,%xmm3
-	psrld	$20,%xmm0
-	por	%xmm0,%xmm3
-	movdqa	-128(%ebx),%xmm0
-	paddd	%xmm3,%xmm1
-	movdqa	64(%ebx),%xmm6
-	pxor	%xmm1,%xmm7
-	movdqa	%xmm1,-80(%ebx)
-	pshufb	16(%eax),%xmm7
-	paddd	%xmm7,%xmm5
-	movdqa	%xmm7,96(%ebx)
-	pxor	%xmm5,%xmm3
-	movdqa	%xmm3,%xmm1
-	pslld	$7,%xmm3
-	psrld	$25,%xmm1
-	por	%xmm1,%xmm3
-	decl	%edx
-	jnz	.L010loop
-	movdqa	%xmm3,-64(%ebx)
-	movdqa	%xmm4,(%ebx)
-	movdqa	%xmm5,16(%ebx)
-	movdqa	%xmm6,64(%ebx)
-	movdqa	%xmm7,96(%ebx)
-	movdqa	-112(%ebx),%xmm1
-	movdqa	-96(%ebx),%xmm2
-	movdqa	-80(%ebx),%xmm3
-	paddd	-128(%ebp),%xmm0
-	paddd	-112(%ebp),%xmm1
-	paddd	-96(%ebp),%xmm2
-	paddd	-80(%ebp),%xmm3
-	movdqa	%xmm0,%xmm6
-	punpckldq	%xmm1,%xmm0
-	movdqa	%xmm2,%xmm7
-	punpckldq	%xmm3,%xmm2
-	punpckhdq	%xmm1,%xmm6
-	punpckhdq	%xmm3,%xmm7
-	movdqa	%xmm0,%xmm1
-	punpcklqdq	%xmm2,%xmm0
-	movdqa	%xmm6,%xmm3
-	punpcklqdq	%xmm7,%xmm6
-	punpckhqdq	%xmm2,%xmm1
-	punpckhqdq	%xmm7,%xmm3
-	movdqu	-128(%esi),%xmm4
-	movdqu	-64(%esi),%xmm5
-	movdqu	(%esi),%xmm2
-	movdqu	64(%esi),%xmm7
-	leal	16(%esi),%esi
-	pxor	%xmm0,%xmm4
-	movdqa	-64(%ebx),%xmm0
-	pxor	%xmm1,%xmm5
-	movdqa	-48(%ebx),%xmm1
-	pxor	%xmm2,%xmm6
-	movdqa	-32(%ebx),%xmm2
-	pxor	%xmm3,%xmm7
-	movdqa	-16(%ebx),%xmm3
-	movdqu	%xmm4,-128(%edi)
-	movdqu	%xmm5,-64(%edi)
-	movdqu	%xmm6,(%edi)
-	movdqu	%xmm7,64(%edi)
-	leal	16(%edi),%edi
-	paddd	-64(%ebp),%xmm0
-	paddd	-48(%ebp),%xmm1
-	paddd	-32(%ebp),%xmm2
-	paddd	-16(%ebp),%xmm3
-	movdqa	%xmm0,%xmm6
-	punpckldq	%xmm1,%xmm0
-	movdqa	%xmm2,%xmm7
-	punpckldq	%xmm3,%xmm2
-	punpckhdq	%xmm1,%xmm6
-	punpckhdq	%xmm3,%xmm7
-	movdqa	%xmm0,%xmm1
-	punpcklqdq	%xmm2,%xmm0
-	movdqa	%xmm6,%xmm3
-	punpcklqdq	%xmm7,%xmm6
-	punpckhqdq	%xmm2,%xmm1
-	punpckhqdq	%xmm7,%xmm3
-	movdqu	-128(%esi),%xmm4
-	movdqu	-64(%esi),%xmm5
-	movdqu	(%esi),%xmm2
-	movdqu	64(%esi),%xmm7
-	leal	16(%esi),%esi
-	pxor	%xmm0,%xmm4
-	movdqa	(%ebx),%xmm0
-	pxor	%xmm1,%xmm5
-	movdqa	16(%ebx),%xmm1
-	pxor	%xmm2,%xmm6
-	movdqa	32(%ebx),%xmm2
-	pxor	%xmm3,%xmm7
-	movdqa	48(%ebx),%xmm3
-	movdqu	%xmm4,-128(%edi)
-	movdqu	%xmm5,-64(%edi)
-	movdqu	%xmm6,(%edi)
-	movdqu	%xmm7,64(%edi)
-	leal	16(%edi),%edi
-	paddd	(%ebp),%xmm0
-	paddd	16(%ebp),%xmm1
-	paddd	32(%ebp),%xmm2
-	paddd	48(%ebp),%xmm3
-	movdqa	%xmm0,%xmm6
-	punpckldq	%xmm1,%xmm0
-	movdqa	%xmm2,%xmm7
-	punpckldq	%xmm3,%xmm2
-	punpckhdq	%xmm1,%xmm6
-	punpckhdq	%xmm3,%xmm7
-	movdqa	%xmm0,%xmm1
-	punpcklqdq	%xmm2,%xmm0
-	movdqa	%xmm6,%xmm3
-	punpcklqdq	%xmm7,%xmm6
-	punpckhqdq	%xmm2,%xmm1
-	punpckhqdq	%xmm7,%xmm3
-	movdqu	-128(%esi),%xmm4
-	movdqu	-64(%esi),%xmm5
-	movdqu	(%esi),%xmm2
-	movdqu	64(%esi),%xmm7
-	leal	16(%esi),%esi
-	pxor	%xmm0,%xmm4
-	movdqa	64(%ebx),%xmm0
-	pxor	%xmm1,%xmm5
-	movdqa	80(%ebx),%xmm1
-	pxor	%xmm2,%xmm6
-	movdqa	96(%ebx),%xmm2
-	pxor	%xmm3,%xmm7
-	movdqa	112(%ebx),%xmm3
-	movdqu	%xmm4,-128(%edi)
-	movdqu	%xmm5,-64(%edi)
-	movdqu	%xmm6,(%edi)
-	movdqu	%xmm7,64(%edi)
-	leal	16(%edi),%edi
-	paddd	64(%ebp),%xmm0
-	paddd	80(%ebp),%xmm1
-	paddd	96(%ebp),%xmm2
-	paddd	112(%ebp),%xmm3
-	movdqa	%xmm0,%xmm6
-	punpckldq	%xmm1,%xmm0
-	movdqa	%xmm2,%xmm7
-	punpckldq	%xmm3,%xmm2
-	punpckhdq	%xmm1,%xmm6
-	punpckhdq	%xmm3,%xmm7
-	movdqa	%xmm0,%xmm1
-	punpcklqdq	%xmm2,%xmm0
-	movdqa	%xmm6,%xmm3
-	punpcklqdq	%xmm7,%xmm6
-	punpckhqdq	%xmm2,%xmm1
-	punpckhqdq	%xmm7,%xmm3
-	movdqu	-128(%esi),%xmm4
-	movdqu	-64(%esi),%xmm5
-	movdqu	(%esi),%xmm2
-	movdqu	64(%esi),%xmm7
-	leal	208(%esi),%esi
-	pxor	%xmm0,%xmm4
-	pxor	%xmm1,%xmm5
-	pxor	%xmm2,%xmm6
-	pxor	%xmm3,%xmm7
-	movdqu	%xmm4,-128(%edi)
-	movdqu	%xmm5,-64(%edi)
-	movdqu	%xmm6,(%edi)
-	movdqu	%xmm7,64(%edi)
-	leal	208(%edi),%edi
-	subl	$256,%ecx
-	jnc	.L009outer_loop
-	addl	$256,%ecx
-	jz	.L011done
-	movl	520(%esp),%ebx
-	leal	-128(%esi),%esi
-	movl	516(%esp),%edx
-	leal	-128(%edi),%edi
-	movd	64(%ebp),%xmm2
-	movdqu	(%ebx),%xmm3
-	paddd	96(%eax),%xmm2
-	pand	112(%eax),%xmm3
-	por	%xmm2,%xmm3
-.L0081x:
-	movdqa	32(%eax),%xmm0
-	movdqu	(%edx),%xmm1
-	movdqu	16(%edx),%xmm2
-	movdqa	(%eax),%xmm6
-	movdqa	16(%eax),%xmm7
-	movl	%ebp,48(%esp)
-	movdqa	%xmm0,(%esp)
-	movdqa	%xmm1,16(%esp)
-	movdqa	%xmm2,32(%esp)
-	movdqa	%xmm3,48(%esp)
-	movl	$10,%edx
-	jmp	.L012loop1x
-.align	16
-.L013outer1x:
-	movdqa	80(%eax),%xmm3
-	movdqa	(%esp),%xmm0
-	movdqa	16(%esp),%xmm1
-	movdqa	32(%esp),%xmm2
-	paddd	48(%esp),%xmm3
-	movl	$10,%edx
-	movdqa	%xmm3,48(%esp)
-	jmp	.L012loop1x
-.align	16
-.L012loop1x:
-	paddd	%xmm1,%xmm0
-	pxor	%xmm0,%xmm3
-.byte	102,15,56,0,222
-	paddd	%xmm3,%xmm2
-	pxor	%xmm2,%xmm1
-	movdqa	%xmm1,%xmm4
-	psrld	$20,%xmm1
-	pslld	$12,%xmm4
-	por	%xmm4,%xmm1
-	paddd	%xmm1,%xmm0
-	pxor	%xmm0,%xmm3
-.byte	102,15,56,0,223
-	paddd	%xmm3,%xmm2
-	pxor	%xmm2,%xmm1
-	movdqa	%xmm1,%xmm4
-	psrld	$25,%xmm1
-	pslld	$7,%xmm4
-	por	%xmm4,%xmm1
-	pshufd	$78,%xmm2,%xmm2
-	pshufd	$57,%xmm1,%xmm1
-	pshufd	$147,%xmm3,%xmm3
-	nop
-	paddd	%xmm1,%xmm0
-	pxor	%xmm0,%xmm3
-.byte	102,15,56,0,222
-	paddd	%xmm3,%xmm2
-	pxor	%xmm2,%xmm1
-	movdqa	%xmm1,%xmm4
-	psrld	$20,%xmm1
-	pslld	$12,%xmm4
-	por	%xmm4,%xmm1
-	paddd	%xmm1,%xmm0
-	pxor	%xmm0,%xmm3
-.byte	102,15,56,0,223
-	paddd	%xmm3,%xmm2
-	pxor	%xmm2,%xmm1
-	movdqa	%xmm1,%xmm4
-	psrld	$25,%xmm1
-	pslld	$7,%xmm4
-	por	%xmm4,%xmm1
-	pshufd	$78,%xmm2,%xmm2
-	pshufd	$147,%xmm1,%xmm1
-	pshufd	$57,%xmm3,%xmm3
-	decl	%edx
-	jnz	.L012loop1x
-	paddd	(%esp),%xmm0
-	paddd	16(%esp),%xmm1
-	paddd	32(%esp),%xmm2
-	paddd	48(%esp),%xmm3
-	cmpl	$64,%ecx
-	jb	.L014tail
-	movdqu	(%esi),%xmm4
-	movdqu	16(%esi),%xmm5
-	pxor	%xmm4,%xmm0
-	movdqu	32(%esi),%xmm4
-	pxor	%xmm5,%xmm1
-	movdqu	48(%esi),%xmm5
-	pxor	%xmm4,%xmm2
-	pxor	%xmm5,%xmm3
-	leal	64(%esi),%esi
-	movdqu	%xmm0,(%edi)
-	movdqu	%xmm1,16(%edi)
-	movdqu	%xmm2,32(%edi)
-	movdqu	%xmm3,48(%edi)
-	leal	64(%edi),%edi
-	subl	$64,%ecx
-	jnz	.L013outer1x
-	jmp	.L011done
-.L014tail:
-	movdqa	%xmm0,(%esp)
-	movdqa	%xmm1,16(%esp)
-	movdqa	%xmm2,32(%esp)
-	movdqa	%xmm3,48(%esp)
-	xorl	%eax,%eax
-	xorl	%edx,%edx
-	xorl	%ebp,%ebp
-.L015tail_loop:
-	movb	(%esp,%ebp,1),%al
-	movb	(%esi,%ebp,1),%dl
-	leal	1(%ebp),%ebp
-	xorb	%dl,%al
-	movb	%al,-1(%edi,%ebp,1)
-	decl	%ecx
-	jnz	.L015tail_loop
-.L011done:
-	movl	512(%esp),%esp
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
-.align	64
-.Lssse3_data:
-.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
-.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
-.long	1634760805,857760878,2036477234,1797285236
-.long	0,1,2,3
-.long	4,4,4,4
-.long	1,0,0,0
-.long	4,0,0,0
-.long	0,-1,-1,-1
-.align	64
-.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
-.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
-.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
-.byte	114,103,62,0
-#endif
-.section	.note.GNU-stack,"",@progbits
--- a/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/aesni-x86.S
+++ b/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/aesni-x86.S
--- a/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/bn-586.S
+++ b/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/bn-586.S
@ -1,997 +0,0 @@
-# This file is generated from a similarly-named Perl script in the BoringSSL
-# source tree. Do not edit by hand.
-
-#if defined(__i386__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-.text
-.globl	bn_mul_add_words
-.hidden	bn_mul_add_words
-.type	bn_mul_add_words,@function
-.align	16
-bn_mul_add_words:
-.L_bn_mul_add_words_begin:
-	call	.L000PIC_me_up
-.L000PIC_me_up:
-	popl	%eax
-	leal	OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax
-	btl	$26,(%eax)
-	jnc	.L001maw_non_sse2
-	movl	4(%esp),%eax
-	movl	8(%esp),%edx
-	movl	12(%esp),%ecx
-	movd	16(%esp),%mm0
-	pxor	%mm1,%mm1
-	jmp	.L002maw_sse2_entry
-.align	16
-.L003maw_sse2_unrolled:
-	movd	(%eax),%mm3
-	paddq	%mm3,%mm1
-	movd	(%edx),%mm2
-	pmuludq	%mm0,%mm2
-	movd	4(%edx),%mm4
-	pmuludq	%mm0,%mm4
-	movd	8(%edx),%mm6
-	pmuludq	%mm0,%mm6
-	movd	12(%edx),%mm7
-	pmuludq	%mm0,%mm7
-	paddq	%mm2,%mm1
-	movd	4(%eax),%mm3
-	paddq	%mm4,%mm3
-	movd	8(%eax),%mm5
-	paddq	%mm6,%mm5
-	movd	12(%eax),%mm4
-	paddq	%mm4,%mm7
-	movd	%mm1,(%eax)
-	movd	16(%edx),%mm2
-	pmuludq	%mm0,%mm2
-	psrlq	$32,%mm1
-	movd	20(%edx),%mm4
-	pmuludq	%mm0,%mm4
-	paddq	%mm3,%mm1
-	movd	24(%edx),%mm6
-	pmuludq	%mm0,%mm6
-	movd	%mm1,4(%eax)
-	psrlq	$32,%mm1
-	movd	28(%edx),%mm3
-	addl	$32,%edx
-	pmuludq	%mm0,%mm3
-	paddq	%mm5,%mm1
-	movd	16(%eax),%mm5
-	paddq	%mm5,%mm2
-	movd	%mm1,8(%eax)
-	psrlq	$32,%mm1
-	paddq	%mm7,%mm1
-	movd	20(%eax),%mm5
-	paddq	%mm5,%mm4
-	movd	%mm1,12(%eax)
-	psrlq	$32,%mm1
-	paddq	%mm2,%mm1
-	movd	24(%eax),%mm5
-	paddq	%mm5,%mm6
-	movd	%mm1,16(%eax)
-	psrlq	$32,%mm1
-	paddq	%mm4,%mm1
-	movd	28(%eax),%mm5
-	paddq	%mm5,%mm3
-	movd	%mm1,20(%eax)
-	psrlq	$32,%mm1
-	paddq	%mm6,%mm1
-	movd	%mm1,24(%eax)
-	psrlq	$32,%mm1
-	paddq	%mm3,%mm1
-	movd	%mm1,28(%eax)
-	leal	32(%eax),%eax
-	psrlq	$32,%mm1
-	subl	$8,%ecx
-	jz	.L004maw_sse2_exit
-.L002maw_sse2_entry:
-	testl	$4294967288,%ecx
-	jnz	.L003maw_sse2_unrolled
-.align	4
-.L005maw_sse2_loop:
-	movd	(%edx),%mm2
-	movd	(%eax),%mm3
-	pmuludq	%mm0,%mm2
-	leal	4(%edx),%edx
-	paddq	%mm3,%mm1
-	paddq	%mm2,%mm1
-	movd	%mm1,(%eax)
-	subl	$1,%ecx
-	psrlq	$32,%mm1
-	leal	4(%eax),%eax
-	jnz	.L005maw_sse2_loop
-.L004maw_sse2_exit:
-	movd	%mm1,%eax
-	emms
-	ret
-.align	16
-.L001maw_non_sse2:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-
-	xorl	%esi,%esi
-	movl	20(%esp),%edi
-	movl	28(%esp),%ecx
-	movl	24(%esp),%ebx
-	andl	$4294967288,%ecx
-	movl	32(%esp),%ebp
-	pushl	%ecx
-	jz	.L006maw_finish
-.align	16
-.L007maw_loop:
-
-	movl	(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	(%edi),%eax
-	adcl	$0,%edx
-	movl	%eax,(%edi)
-	movl	%edx,%esi
-
-	movl	4(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	4(%edi),%eax
-	adcl	$0,%edx
-	movl	%eax,4(%edi)
-	movl	%edx,%esi
-
-	movl	8(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	8(%edi),%eax
-	adcl	$0,%edx
-	movl	%eax,8(%edi)
-	movl	%edx,%esi
-
-	movl	12(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	12(%edi),%eax
-	adcl	$0,%edx
-	movl	%eax,12(%edi)
-	movl	%edx,%esi
-
-	movl	16(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	16(%edi),%eax
-	adcl	$0,%edx
-	movl	%eax,16(%edi)
-	movl	%edx,%esi
-
-	movl	20(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	20(%edi),%eax
-	adcl	$0,%edx
-	movl	%eax,20(%edi)
-	movl	%edx,%esi
-
-	movl	24(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	24(%edi),%eax
-	adcl	$0,%edx
-	movl	%eax,24(%edi)
-	movl	%edx,%esi
-
-	movl	28(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	28(%edi),%eax
-	adcl	$0,%edx
-	movl	%eax,28(%edi)
-	movl	%edx,%esi
-
-	subl	$8,%ecx
-	leal	32(%ebx),%ebx
-	leal	32(%edi),%edi
-	jnz	.L007maw_loop
-.L006maw_finish:
-	movl	32(%esp),%ecx
-	andl	$7,%ecx
-	jnz	.L008maw_finish2
-	jmp	.L009maw_end
-.L008maw_finish2:
-
-	movl	(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	(%edi),%eax
-	adcl	$0,%edx
-	decl	%ecx
-	movl	%eax,(%edi)
-	movl	%edx,%esi
-	jz	.L009maw_end
-
-	movl	4(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	4(%edi),%eax
-	adcl	$0,%edx
-	decl	%ecx
-	movl	%eax,4(%edi)
-	movl	%edx,%esi
-	jz	.L009maw_end
-
-	movl	8(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	8(%edi),%eax
-	adcl	$0,%edx
-	decl	%ecx
-	movl	%eax,8(%edi)
-	movl	%edx,%esi
-	jz	.L009maw_end
-
-	movl	12(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	12(%edi),%eax
-	adcl	$0,%edx
-	decl	%ecx
-	movl	%eax,12(%edi)
-	movl	%edx,%esi
-	jz	.L009maw_end
-
-	movl	16(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	16(%edi),%eax
-	adcl	$0,%edx
-	decl	%ecx
-	movl	%eax,16(%edi)
-	movl	%edx,%esi
-	jz	.L009maw_end
-
-	movl	20(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	20(%edi),%eax
-	adcl	$0,%edx
-	decl	%ecx
-	movl	%eax,20(%edi)
-	movl	%edx,%esi
-	jz	.L009maw_end
-
-	movl	24(%ebx),%eax
-	mull	%ebp
-	addl	%esi,%eax
-	adcl	$0,%edx
-	addl	24(%edi),%eax
-	adcl	$0,%edx
-	movl	%eax,24(%edi)
-	movl	%edx,%esi
-.L009maw_end:
-	movl	%esi,%eax
-	popl	%ecx
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.size	bn_mul_add_words,.-.L_bn_mul_add_words_begin
-.globl	bn_mul_words
-.hidden	bn_mul_words
-.type	bn_mul_words,@function
-.align	16
-bn_mul_words:
-.L_bn_mul_words_begin:
-	call	.L010PIC_me_up
-.L010PIC_me_up:
-	popl	%eax
-	leal	OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax
-	btl	$26,(%eax)
-	jnc	.L011mw_non_sse2
-	movl	4(%esp),%eax
-	movl	8(%esp),%edx
-	movl	12(%esp),%ecx
-	movd	16(%esp),%mm0
-	pxor	%mm1,%mm1
-.align	16
-.L012mw_sse2_loop:
-	movd	(%edx),%mm2
-	pmuludq	%mm0,%mm2
-	leal	4(%edx),%edx
-	paddq	%mm2,%mm1
-	movd	%mm1,(%eax)
-	subl	$1,%ecx
-	psrlq	$32,%mm1
-	leal	4(%eax),%eax
-	jnz	.L012mw_sse2_loop
-	movd	%mm1,%eax
-	emms
-	ret
-.align	16
-.L011mw_non_sse2:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-
-	xorl	%esi,%esi
-	movl	20(%esp),%edi
-	movl	24(%esp),%ebx
-	movl	28(%esp),%ebp
-	movl	32(%esp),%ecx
-	andl	$4294967288,%ebp
-	jz	.L013mw_finish
-.L014mw_loop:
-
-	movl	(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,(%edi)
-	movl	%edx,%esi
-
-	movl	4(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,4(%edi)
-	movl	%edx,%esi
-
-	movl	8(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,8(%edi)
-	movl	%edx,%esi
-
-	movl	12(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,12(%edi)
-	movl	%edx,%esi
-
-	movl	16(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,16(%edi)
-	movl	%edx,%esi
-
-	movl	20(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,20(%edi)
-	movl	%edx,%esi
-
-	movl	24(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,24(%edi)
-	movl	%edx,%esi
-
-	movl	28(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,28(%edi)
-	movl	%edx,%esi
-
-	addl	$32,%ebx
-	addl	$32,%edi
-	subl	$8,%ebp
-	jz	.L013mw_finish
-	jmp	.L014mw_loop
-.L013mw_finish:
-	movl	28(%esp),%ebp
-	andl	$7,%ebp
-	jnz	.L015mw_finish2
-	jmp	.L016mw_end
-.L015mw_finish2:
-
-	movl	(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,(%edi)
-	movl	%edx,%esi
-	decl	%ebp
-	jz	.L016mw_end
-
-	movl	4(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,4(%edi)
-	movl	%edx,%esi
-	decl	%ebp
-	jz	.L016mw_end
-
-	movl	8(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,8(%edi)
-	movl	%edx,%esi
-	decl	%ebp
-	jz	.L016mw_end
-
-	movl	12(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,12(%edi)
-	movl	%edx,%esi
-	decl	%ebp
-	jz	.L016mw_end
-
-	movl	16(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,16(%edi)
-	movl	%edx,%esi
-	decl	%ebp
-	jz	.L016mw_end
-
-	movl	20(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,20(%edi)
-	movl	%edx,%esi
-	decl	%ebp
-	jz	.L016mw_end
-
-	movl	24(%ebx),%eax
-	mull	%ecx
-	addl	%esi,%eax
-	adcl	$0,%edx
-	movl	%eax,24(%edi)
-	movl	%edx,%esi
-.L016mw_end:
-	movl	%esi,%eax
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.size	bn_mul_words,.-.L_bn_mul_words_begin
-.globl	bn_sqr_words
-.hidden	bn_sqr_words
-.type	bn_sqr_words,@function
-.align	16
-bn_sqr_words:
-.L_bn_sqr_words_begin:
-	call	.L017PIC_me_up
-.L017PIC_me_up:
-	popl	%eax
-	leal	OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax
-	btl	$26,(%eax)
-	jnc	.L018sqr_non_sse2
-	movl	4(%esp),%eax
-	movl	8(%esp),%edx
-	movl	12(%esp),%ecx
-.align	16
-.L019sqr_sse2_loop:
-	movd	(%edx),%mm0
-	pmuludq	%mm0,%mm0
-	leal	4(%edx),%edx
-	movq	%mm0,(%eax)
-	subl	$1,%ecx
-	leal	8(%eax),%eax
-	jnz	.L019sqr_sse2_loop
-	emms
-	ret
-.align	16
-.L018sqr_non_sse2:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-
-	movl	20(%esp),%esi
-	movl	24(%esp),%edi
-	movl	28(%esp),%ebx
-	andl	$4294967288,%ebx
-	jz	.L020sw_finish
-.L021sw_loop:
-
-	movl	(%edi),%eax
-	mull	%eax
-	movl	%eax,(%esi)
-	movl	%edx,4(%esi)
-
-	movl	4(%edi),%eax
-	mull	%eax
-	movl	%eax,8(%esi)
-	movl	%edx,12(%esi)
-
-	movl	8(%edi),%eax
-	mull	%eax
-	movl	%eax,16(%esi)
-	movl	%edx,20(%esi)
-
-	movl	12(%edi),%eax
-	mull	%eax
-	movl	%eax,24(%esi)
-	movl	%edx,28(%esi)
-
-	movl	16(%edi),%eax
-	mull	%eax
-	movl	%eax,32(%esi)
-	movl	%edx,36(%esi)
-
-	movl	20(%edi),%eax
-	mull	%eax
-	movl	%eax,40(%esi)
-	movl	%edx,44(%esi)
-
-	movl	24(%edi),%eax
-	mull	%eax
-	movl	%eax,48(%esi)
-	movl	%edx,52(%esi)
-
-	movl	28(%edi),%eax
-	mull	%eax
-	movl	%eax,56(%esi)
-	movl	%edx,60(%esi)
-
-	addl	$32,%edi
-	addl	$64,%esi
-	subl	$8,%ebx
-	jnz	.L021sw_loop
-.L020sw_finish:
-	movl	28(%esp),%ebx
-	andl	$7,%ebx
-	jz	.L022sw_end
-
-	movl	(%edi),%eax
-	mull	%eax
-	movl	%eax,(%esi)
-	decl	%ebx
-	movl	%edx,4(%esi)
-	jz	.L022sw_end
-
-	movl	4(%edi),%eax
-	mull	%eax
-	movl	%eax,8(%esi)
-	decl	%ebx
-	movl	%edx,12(%esi)
-	jz	.L022sw_end
-
-	movl	8(%edi),%eax
-	mull	%eax
-	movl	%eax,16(%esi)
-	decl	%ebx
-	movl	%edx,20(%esi)
-	jz	.L022sw_end
-
-	movl	12(%edi),%eax
-	mull	%eax
-	movl	%eax,24(%esi)
-	decl	%ebx
-	movl	%edx,28(%esi)
-	jz	.L022sw_end
-
-	movl	16(%edi),%eax
-	mull	%eax
-	movl	%eax,32(%esi)
-	decl	%ebx
-	movl	%edx,36(%esi)
-	jz	.L022sw_end
-
-	movl	20(%edi),%eax
-	mull	%eax
-	movl	%eax,40(%esi)
-	decl	%ebx
-	movl	%edx,44(%esi)
-	jz	.L022sw_end
-
-	movl	24(%edi),%eax
-	mull	%eax
-	movl	%eax,48(%esi)
-	movl	%edx,52(%esi)
-.L022sw_end:
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.size	bn_sqr_words,.-.L_bn_sqr_words_begin
-.globl	bn_div_words
-.hidden	bn_div_words
-.type	bn_div_words,@function
-.align	16
-bn_div_words:
-.L_bn_div_words_begin:
-	movl	4(%esp),%edx
-	movl	8(%esp),%eax
-	movl	12(%esp),%ecx
-	divl	%ecx
-	ret
-.size	bn_div_words,.-.L_bn_div_words_begin
-.globl	bn_add_words
-.hidden	bn_add_words
-.type	bn_add_words,@function
-.align	16
-bn_add_words:
-.L_bn_add_words_begin:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-
-	movl	20(%esp),%ebx
-	movl	24(%esp),%esi
-	movl	28(%esp),%edi
-	movl	32(%esp),%ebp
-	xorl	%eax,%eax
-	andl	$4294967288,%ebp
-	jz	.L023aw_finish
-.L024aw_loop:
-
-	movl	(%esi),%ecx
-	movl	(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,(%ebx)
-
-	movl	4(%esi),%ecx
-	movl	4(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,4(%ebx)
-
-	movl	8(%esi),%ecx
-	movl	8(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,8(%ebx)
-
-	movl	12(%esi),%ecx
-	movl	12(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,12(%ebx)
-
-	movl	16(%esi),%ecx
-	movl	16(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,16(%ebx)
-
-	movl	20(%esi),%ecx
-	movl	20(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,20(%ebx)
-
-	movl	24(%esi),%ecx
-	movl	24(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,24(%ebx)
-
-	movl	28(%esi),%ecx
-	movl	28(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,28(%ebx)
-
-	addl	$32,%esi
-	addl	$32,%edi
-	addl	$32,%ebx
-	subl	$8,%ebp
-	jnz	.L024aw_loop
-.L023aw_finish:
-	movl	32(%esp),%ebp
-	andl	$7,%ebp
-	jz	.L025aw_end
-
-	movl	(%esi),%ecx
-	movl	(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	decl	%ebp
-	movl	%ecx,(%ebx)
-	jz	.L025aw_end
-
-	movl	4(%esi),%ecx
-	movl	4(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	decl	%ebp
-	movl	%ecx,4(%ebx)
-	jz	.L025aw_end
-
-	movl	8(%esi),%ecx
-	movl	8(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	decl	%ebp
-	movl	%ecx,8(%ebx)
-	jz	.L025aw_end
-
-	movl	12(%esi),%ecx
-	movl	12(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	decl	%ebp
-	movl	%ecx,12(%ebx)
-	jz	.L025aw_end
-
-	movl	16(%esi),%ecx
-	movl	16(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	decl	%ebp
-	movl	%ecx,16(%ebx)
-	jz	.L025aw_end
-
-	movl	20(%esi),%ecx
-	movl	20(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	decl	%ebp
-	movl	%ecx,20(%ebx)
-	jz	.L025aw_end
-
-	movl	24(%esi),%ecx
-	movl	24(%edi),%edx
-	addl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	addl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,24(%ebx)
-.L025aw_end:
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.size	bn_add_words,.-.L_bn_add_words_begin
-.globl	bn_sub_words
-.hidden	bn_sub_words
-.type	bn_sub_words,@function
-.align	16
-bn_sub_words:
-.L_bn_sub_words_begin:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-
-	movl	20(%esp),%ebx
-	movl	24(%esp),%esi
-	movl	28(%esp),%edi
-	movl	32(%esp),%ebp
-	xorl	%eax,%eax
-	andl	$4294967288,%ebp
-	jz	.L026aw_finish
-.L027aw_loop:
-
-	movl	(%esi),%ecx
-	movl	(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,(%ebx)
-
-	movl	4(%esi),%ecx
-	movl	4(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,4(%ebx)
-
-	movl	8(%esi),%ecx
-	movl	8(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,8(%ebx)
-
-	movl	12(%esi),%ecx
-	movl	12(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,12(%ebx)
-
-	movl	16(%esi),%ecx
-	movl	16(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,16(%ebx)
-
-	movl	20(%esi),%ecx
-	movl	20(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,20(%ebx)
-
-	movl	24(%esi),%ecx
-	movl	24(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,24(%ebx)
-
-	movl	28(%esi),%ecx
-	movl	28(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,28(%ebx)
-
-	addl	$32,%esi
-	addl	$32,%edi
-	addl	$32,%ebx
-	subl	$8,%ebp
-	jnz	.L027aw_loop
-.L026aw_finish:
-	movl	32(%esp),%ebp
-	andl	$7,%ebp
-	jz	.L028aw_end
-
-	movl	(%esi),%ecx
-	movl	(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	decl	%ebp
-	movl	%ecx,(%ebx)
-	jz	.L028aw_end
-
-	movl	4(%esi),%ecx
-	movl	4(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	decl	%ebp
-	movl	%ecx,4(%ebx)
-	jz	.L028aw_end
-
-	movl	8(%esi),%ecx
-	movl	8(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	decl	%ebp
-	movl	%ecx,8(%ebx)
-	jz	.L028aw_end
-
-	movl	12(%esi),%ecx
-	movl	12(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	decl	%ebp
-	movl	%ecx,12(%ebx)
-	jz	.L028aw_end
-
-	movl	16(%esi),%ecx
-	movl	16(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	decl	%ebp
-	movl	%ecx,16(%ebx)
-	jz	.L028aw_end
-
-	movl	20(%esi),%ecx
-	movl	20(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	decl	%ebp
-	movl	%ecx,20(%ebx)
-	jz	.L028aw_end
-
-	movl	24(%esi),%ecx
-	movl	24(%edi),%edx
-	subl	%eax,%ecx
-	movl	$0,%eax
-	adcl	%eax,%eax
-	subl	%edx,%ecx
-	adcl	$0,%eax
-	movl	%ecx,24(%ebx)
-.L028aw_end:
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.size	bn_sub_words,.-.L_bn_sub_words_begin
-#endif
-.section	.note.GNU-stack,"",@progbits
--- a/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/co-586.S
+++ b/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/co-586.S
--- a/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S
+++ b/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S
@ -1,294 +0,0 @@
-# This file is generated from a similarly-named Perl script in the BoringSSL
-# source tree. Do not edit by hand.
-
-#if defined(__i386__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-.text
-.globl	gcm_gmult_ssse3
-.hidden	gcm_gmult_ssse3
-.type	gcm_gmult_ssse3,@function
-.align	16
-gcm_gmult_ssse3:
-.L_gcm_gmult_ssse3_begin:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-	movl	20(%esp),%edi
-	movl	24(%esp),%esi
-	movdqu	(%edi),%xmm0
-	call	.L000pic_point
-.L000pic_point:
-	popl	%eax
-	movdqa	.Lreverse_bytes-.L000pic_point(%eax),%xmm7
-	movdqa	.Llow4_mask-.L000pic_point(%eax),%xmm2
-.byte	102,15,56,0,199
-	movdqa	%xmm2,%xmm1
-	pandn	%xmm0,%xmm1
-	psrld	$4,%xmm1
-	pand	%xmm2,%xmm0
-	pxor	%xmm2,%xmm2
-	pxor	%xmm3,%xmm3
-	movl	$5,%eax
-.L001loop_row_1:
-	movdqa	(%esi),%xmm4
-	leal	16(%esi),%esi
-	movdqa	%xmm2,%xmm6
-.byte	102,15,58,15,243,1
-	movdqa	%xmm6,%xmm3
-	psrldq	$1,%xmm2
-	movdqa	%xmm4,%xmm5
-.byte	102,15,56,0,224
-.byte	102,15,56,0,233
-	pxor	%xmm5,%xmm2
-	movdqa	%xmm4,%xmm5
-	psllq	$60,%xmm5
-	movdqa	%xmm5,%xmm6
-	pslldq	$8,%xmm6
-	pxor	%xmm6,%xmm3
-	psrldq	$8,%xmm5
-	pxor	%xmm5,%xmm2
-	psrlq	$4,%xmm4
-	pxor	%xmm4,%xmm2
-	subl	$1,%eax
-	jnz	.L001loop_row_1
-	pxor	%xmm3,%xmm2
-	psrlq	$1,%xmm3
-	pxor	%xmm3,%xmm2
-	psrlq	$1,%xmm3
-	pxor	%xmm3,%xmm2
-	psrlq	$5,%xmm3
-	pxor	%xmm3,%xmm2
-	pxor	%xmm3,%xmm3
-	movl	$5,%eax
-.L002loop_row_2:
-	movdqa	(%esi),%xmm4
-	leal	16(%esi),%esi
-	movdqa	%xmm2,%xmm6
-.byte	102,15,58,15,243,1
-	movdqa	%xmm6,%xmm3
-	psrldq	$1,%xmm2
-	movdqa	%xmm4,%xmm5
-.byte	102,15,56,0,224
-.byte	102,15,56,0,233
-	pxor	%xmm5,%xmm2
-	movdqa	%xmm4,%xmm5
-	psllq	$60,%xmm5
-	movdqa	%xmm5,%xmm6
-	pslldq	$8,%xmm6
-	pxor	%xmm6,%xmm3
-	psrldq	$8,%xmm5
-	pxor	%xmm5,%xmm2
-	psrlq	$4,%xmm4
-	pxor	%xmm4,%xmm2
-	subl	$1,%eax
-	jnz	.L002loop_row_2
-	pxor	%xmm3,%xmm2
-	psrlq	$1,%xmm3
-	pxor	%xmm3,%xmm2
-	psrlq	$1,%xmm3
-	pxor	%xmm3,%xmm2
-	psrlq	$5,%xmm3
-	pxor	%xmm3,%xmm2
-	pxor	%xmm3,%xmm3
-	movl	$6,%eax
-.L003loop_row_3:
-	movdqa	(%esi),%xmm4
-	leal	16(%esi),%esi
-	movdqa	%xmm2,%xmm6
-.byte	102,15,58,15,243,1
-	movdqa	%xmm6,%xmm3
-	psrldq	$1,%xmm2
-	movdqa	%xmm4,%xmm5
-.byte	102,15,56,0,224
-.byte	102,15,56,0,233
-	pxor	%xmm5,%xmm2
-	movdqa	%xmm4,%xmm5
-	psllq	$60,%xmm5
-	movdqa	%xmm5,%xmm6
-	pslldq	$8,%xmm6
-	pxor	%xmm6,%xmm3
-	psrldq	$8,%xmm5
-	pxor	%xmm5,%xmm2
-	psrlq	$4,%xmm4
-	pxor	%xmm4,%xmm2
-	subl	$1,%eax
-	jnz	.L003loop_row_3
-	pxor	%xmm3,%xmm2
-	psrlq	$1,%xmm3
-	pxor	%xmm3,%xmm2
-	psrlq	$1,%xmm3
-	pxor	%xmm3,%xmm2
-	psrlq	$5,%xmm3
-	pxor	%xmm3,%xmm2
-	pxor	%xmm3,%xmm3
-.byte	102,15,56,0,215
-	movdqu	%xmm2,(%edi)
-	pxor	%xmm0,%xmm0
-	pxor	%xmm1,%xmm1
-	pxor	%xmm2,%xmm2
-	pxor	%xmm3,%xmm3
-	pxor	%xmm4,%xmm4
-	pxor	%xmm5,%xmm5
-	pxor	%xmm6,%xmm6
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.size	gcm_gmult_ssse3,.-.L_gcm_gmult_ssse3_begin
-.globl	gcm_ghash_ssse3
-.hidden	gcm_ghash_ssse3
-.type	gcm_ghash_ssse3,@function
-.align	16
-gcm_ghash_ssse3:
-.L_gcm_ghash_ssse3_begin:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-	movl	20(%esp),%edi
-	movl	24(%esp),%esi
-	movl	28(%esp),%edx
-	movl	32(%esp),%ecx
-	movdqu	(%edi),%xmm0
-	call	.L004pic_point
-.L004pic_point:
-	popl	%ebx
-	movdqa	.Lreverse_bytes-.L004pic_point(%ebx),%xmm7
-	andl	$-16,%ecx
-.byte	102,15,56,0,199
-	pxor	%xmm3,%xmm3
-.L005loop_ghash:
-	movdqa	.Llow4_mask-.L004pic_point(%ebx),%xmm2
-	movdqu	(%edx),%xmm1
-.byte	102,15,56,0,207
-	pxor	%xmm1,%xmm0
-	movdqa	%xmm2,%xmm1
-	pandn	%xmm0,%xmm1
-	psrld	$4,%xmm1
-	pand	%xmm2,%xmm0
-	pxor	%xmm2,%xmm2
-	movl	$5,%eax
-.L006loop_row_4:
-	movdqa	(%esi),%xmm4
-	leal	16(%esi),%esi
-	movdqa	%xmm2,%xmm6
-.byte	102,15,58,15,243,1
-	movdqa	%xmm6,%xmm3
-	psrldq	$1,%xmm2
-	movdqa	%xmm4,%xmm5
-.byte	102,15,56,0,224
-.byte	102,15,56,0,233
-	pxor	%xmm5,%xmm2
-	movdqa	%xmm4,%xmm5
-	psllq	$60,%xmm5
-	movdqa	%xmm5,%xmm6
-	pslldq	$8,%xmm6
-	pxor	%xmm6,%xmm3
-	psrldq	$8,%xmm5
-	pxor	%xmm5,%xmm2
-	psrlq	$4,%xmm4
-	pxor	%xmm4,%xmm2
-	subl	$1,%eax
-	jnz	.L006loop_row_4
-	pxor	%xmm3,%xmm2
-	psrlq	$1,%xmm3
-	pxor	%xmm3,%xmm2
-	psrlq	$1,%xmm3
-	pxor	%xmm3,%xmm2
-	psrlq	$5,%xmm3
-	pxor	%xmm3,%xmm2
-	pxor	%xmm3,%xmm3
-	movl	$5,%eax
-.L007loop_row_5:
-	movdqa	(%esi),%xmm4
-	leal	16(%esi),%esi
-	movdqa	%xmm2,%xmm6
-.byte	102,15,58,15,243,1
-	movdqa	%xmm6,%xmm3
-	psrldq	$1,%xmm2
-	movdqa	%xmm4,%xmm5
-.byte	102,15,56,0,224
-.byte	102,15,56,0,233
-	pxor	%xmm5,%xmm2
-	movdqa	%xmm4,%xmm5
-	psllq	$60,%xmm5
-	movdqa	%xmm5,%xmm6
-	pslldq	$8,%xmm6
-	pxor	%xmm6,%xmm3
-	psrldq	$8,%xmm5
-	pxor	%xmm5,%xmm2
-	psrlq	$4,%xmm4
-	pxor	%xmm4,%xmm2
-	subl	$1,%eax
-	jnz	.L007loop_row_5
-	pxor	%xmm3,%xmm2
-	psrlq	$1,%xmm3
-	pxor	%xmm3,%xmm2
-	psrlq	$1,%xmm3
-	pxor	%xmm3,%xmm2
-	psrlq	$5,%xmm3
-	pxor	%xmm3,%xmm2
-	pxor	%xmm3,%xmm3
-	movl	$6,%eax
-.L008loop_row_6:
-	movdqa	(%esi),%xmm4
-	leal	16(%esi),%esi
-	movdqa	%xmm2,%xmm6
-.byte	102,15,58,15,243,1
-	movdqa	%xmm6,%xmm3
-	psrldq	$1,%xmm2
-	movdqa	%xmm4,%xmm5
-.byte	102,15,56,0,224
-.byte	102,15,56,0,233
-	pxor	%xmm5,%xmm2
-	movdqa	%xmm4,%xmm5
-	psllq	$60,%xmm5
-	movdqa	%xmm5,%xmm6
-	pslldq	$8,%xmm6
-	pxor	%xmm6,%xmm3
-	psrldq	$8,%xmm5
-	pxor	%xmm5,%xmm2
-	psrlq	$4,%xmm4
-	pxor	%xmm4,%xmm2
-	subl	$1,%eax
-	jnz	.L008loop_row_6
-	pxor	%xmm3,%xmm2
-	psrlq	$1,%xmm3
-	pxor	%xmm3,%xmm2
-	psrlq	$1,%xmm3
-	pxor	%xmm3,%xmm2
-	psrlq	$5,%xmm3
-	pxor	%xmm3,%xmm2
-	pxor	%xmm3,%xmm3
-	movdqa	%xmm2,%xmm0
-	leal	-256(%esi),%esi
-	leal	16(%edx),%edx
-	subl	$16,%ecx
-	jnz	.L005loop_ghash
-.byte	102,15,56,0,199
-	movdqu	%xmm0,(%edi)
-	pxor	%xmm0,%xmm0
-	pxor	%xmm1,%xmm1
-	pxor	%xmm2,%xmm2
-	pxor	%xmm3,%xmm3
-	pxor	%xmm4,%xmm4
-	pxor	%xmm5,%xmm5
-	pxor	%xmm6,%xmm6
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.size	gcm_ghash_ssse3,.-.L_gcm_ghash_ssse3_begin
-.align	16
-.Lreverse_bytes:
-.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
-.align	16
-.Llow4_mask:
-.long	252645135,252645135,252645135,252645135
-#endif
-.section	.note.GNU-stack,"",@progbits
--- a/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/ghash-x86.S
+++ b/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/ghash-x86.S
@ -1,330 +0,0 @@
-# This file is generated from a similarly-named Perl script in the BoringSSL
-# source tree. Do not edit by hand.
-
-#if defined(__i386__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-.text
-.globl	gcm_init_clmul
-.hidden	gcm_init_clmul
-.type	gcm_init_clmul,@function
-.align	16
-gcm_init_clmul:
-.L_gcm_init_clmul_begin:
-	movl	4(%esp),%edx
-	movl	8(%esp),%eax
-	call	.L000pic
-.L000pic:
-	popl	%ecx
-	leal	.Lbswap-.L000pic(%ecx),%ecx
-	movdqu	(%eax),%xmm2
-	pshufd	$78,%xmm2,%xmm2
-	pshufd	$255,%xmm2,%xmm4
-	movdqa	%xmm2,%xmm3
-	psllq	$1,%xmm2
-	pxor	%xmm5,%xmm5
-	psrlq	$63,%xmm3
-	pcmpgtd	%xmm4,%xmm5
-	pslldq	$8,%xmm3
-	por	%xmm3,%xmm2
-	pand	16(%ecx),%xmm5
-	pxor	%xmm5,%xmm2
-	movdqa	%xmm2,%xmm0
-	movdqa	%xmm0,%xmm1
-	pshufd	$78,%xmm0,%xmm3
-	pshufd	$78,%xmm2,%xmm4
-	pxor	%xmm0,%xmm3
-	pxor	%xmm2,%xmm4
-.byte	102,15,58,68,194,0
-.byte	102,15,58,68,202,17
-.byte	102,15,58,68,220,0
-	xorps	%xmm0,%xmm3
-	xorps	%xmm1,%xmm3
-	movdqa	%xmm3,%xmm4
-	psrldq	$8,%xmm3
-	pslldq	$8,%xmm4
-	pxor	%xmm3,%xmm1
-	pxor	%xmm4,%xmm0
-	movdqa	%xmm0,%xmm4
-	movdqa	%xmm0,%xmm3
-	psllq	$5,%xmm0
-	pxor	%xmm0,%xmm3
-	psllq	$1,%xmm0
-	pxor	%xmm3,%xmm0
-	psllq	$57,%xmm0
-	movdqa	%xmm0,%xmm3
-	pslldq	$8,%xmm0
-	psrldq	$8,%xmm3
-	pxor	%xmm4,%xmm0
-	pxor	%xmm3,%xmm1
-	movdqa	%xmm0,%xmm4
-	psrlq	$1,%xmm0
-	pxor	%xmm4,%xmm1
-	pxor	%xmm0,%xmm4
-	psrlq	$5,%xmm0
-	pxor	%xmm4,%xmm0
-	psrlq	$1,%xmm0
-	pxor	%xmm1,%xmm0
-	pshufd	$78,%xmm2,%xmm3
-	pshufd	$78,%xmm0,%xmm4
-	pxor	%xmm2,%xmm3
-	movdqu	%xmm2,(%edx)
-	pxor	%xmm0,%xmm4
-	movdqu	%xmm0,16(%edx)
-.byte	102,15,58,15,227,8
-	movdqu	%xmm4,32(%edx)
-	ret
-.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
-.globl	gcm_gmult_clmul
-.hidden	gcm_gmult_clmul
-.type	gcm_gmult_clmul,@function
-.align	16
-gcm_gmult_clmul:
-.L_gcm_gmult_clmul_begin:
-	movl	4(%esp),%eax
-	movl	8(%esp),%edx
-	call	.L001pic
-.L001pic:
-	popl	%ecx
-	leal	.Lbswap-.L001pic(%ecx),%ecx
-	movdqu	(%eax),%xmm0
-	movdqa	(%ecx),%xmm5
-	movups	(%edx),%xmm2
-.byte	102,15,56,0,197
-	movups	32(%edx),%xmm4
-	movdqa	%xmm0,%xmm1
-	pshufd	$78,%xmm0,%xmm3
-	pxor	%xmm0,%xmm3
-.byte	102,15,58,68,194,0
-.byte	102,15,58,68,202,17
-.byte	102,15,58,68,220,0
-	xorps	%xmm0,%xmm3
-	xorps	%xmm1,%xmm3
-	movdqa	%xmm3,%xmm4
-	psrldq	$8,%xmm3
-	pslldq	$8,%xmm4
-	pxor	%xmm3,%xmm1
-	pxor	%xmm4,%xmm0
-	movdqa	%xmm0,%xmm4
-	movdqa	%xmm0,%xmm3
-	psllq	$5,%xmm0
-	pxor	%xmm0,%xmm3
-	psllq	$1,%xmm0
-	pxor	%xmm3,%xmm0
-	psllq	$57,%xmm0
-	movdqa	%xmm0,%xmm3
-	pslldq	$8,%xmm0
-	psrldq	$8,%xmm3
-	pxor	%xmm4,%xmm0
-	pxor	%xmm3,%xmm1
-	movdqa	%xmm0,%xmm4
-	psrlq	$1,%xmm0
-	pxor	%xmm4,%xmm1
-	pxor	%xmm0,%xmm4
-	psrlq	$5,%xmm0
-	pxor	%xmm4,%xmm0
-	psrlq	$1,%xmm0
-	pxor	%xmm1,%xmm0
-.byte	102,15,56,0,197
-	movdqu	%xmm0,(%eax)
-	ret
-.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
-.globl	gcm_ghash_clmul
-.hidden	gcm_ghash_clmul
-.type	gcm_ghash_clmul,@function
-.align	16
-gcm_ghash_clmul:
-.L_gcm_ghash_clmul_begin:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-	movl	20(%esp),%eax
-	movl	24(%esp),%edx
-	movl	28(%esp),%esi
-	movl	32(%esp),%ebx
-	call	.L002pic
-.L002pic:
-	popl	%ecx
-	leal	.Lbswap-.L002pic(%ecx),%ecx
-	movdqu	(%eax),%xmm0
-	movdqa	(%ecx),%xmm5
-	movdqu	(%edx),%xmm2
-.byte	102,15,56,0,197
-	subl	$16,%ebx
-	jz	.L003odd_tail
-	movdqu	(%esi),%xmm3
-	movdqu	16(%esi),%xmm6
-.byte	102,15,56,0,221
-.byte	102,15,56,0,245
-	movdqu	32(%edx),%xmm5
-	pxor	%xmm3,%xmm0
-	pshufd	$78,%xmm6,%xmm3
-	movdqa	%xmm6,%xmm7
-	pxor	%xmm6,%xmm3
-	leal	32(%esi),%esi
-.byte	102,15,58,68,242,0
-.byte	102,15,58,68,250,17
-.byte	102,15,58,68,221,0
-	movups	16(%edx),%xmm2
-	nop
-	subl	$32,%ebx
-	jbe	.L004even_tail
-	jmp	.L005mod_loop
-.align	32
-.L005mod_loop:
-	pshufd	$78,%xmm0,%xmm4
-	movdqa	%xmm0,%xmm1
-	pxor	%xmm0,%xmm4
-	nop
-.byte	102,15,58,68,194,0
-.byte	102,15,58,68,202,17
-.byte	102,15,58,68,229,16
-	movups	(%edx),%xmm2
-	xorps	%xmm6,%xmm0
-	movdqa	(%ecx),%xmm5
-	xorps	%xmm7,%xmm1
-	movdqu	(%esi),%xmm7
-	pxor	%xmm0,%xmm3
-	movdqu	16(%esi),%xmm6
-	pxor	%xmm1,%xmm3
-.byte	102,15,56,0,253
-	pxor	%xmm3,%xmm4
-	movdqa	%xmm4,%xmm3
-	psrldq	$8,%xmm4
-	pslldq	$8,%xmm3
-	pxor	%xmm4,%xmm1
-	pxor	%xmm3,%xmm0
-.byte	102,15,56,0,245
-	pxor	%xmm7,%xmm1
-	movdqa	%xmm6,%xmm7
-	movdqa	%xmm0,%xmm4
-	movdqa	%xmm0,%xmm3
-	psllq	$5,%xmm0
-	pxor	%xmm0,%xmm3
-	psllq	$1,%xmm0
-	pxor	%xmm3,%xmm0
-.byte	102,15,58,68,242,0
-	movups	32(%edx),%xmm5
-	psllq	$57,%xmm0
-	movdqa	%xmm0,%xmm3
-	pslldq	$8,%xmm0
-	psrldq	$8,%xmm3
-	pxor	%xmm4,%xmm0
-	pxor	%xmm3,%xmm1
-	pshufd	$78,%xmm7,%xmm3
-	movdqa	%xmm0,%xmm4
-	psrlq	$1,%xmm0
-	pxor	%xmm7,%xmm3
-	pxor	%xmm4,%xmm1
-.byte	102,15,58,68,250,17
-	movups	16(%edx),%xmm2
-	pxor	%xmm0,%xmm4
-	psrlq	$5,%xmm0
-	pxor	%xmm4,%xmm0
-	psrlq	$1,%xmm0
-	pxor	%xmm1,%xmm0
-.byte	102,15,58,68,221,0
-	leal	32(%esi),%esi
-	subl	$32,%ebx
-	ja	.L005mod_loop
-.L004even_tail:
-	pshufd	$78,%xmm0,%xmm4
-	movdqa	%xmm0,%xmm1
-	pxor	%xmm0,%xmm4
-.byte	102,15,58,68,194,0
-.byte	102,15,58,68,202,17
-.byte	102,15,58,68,229,16
-	movdqa	(%ecx),%xmm5
-	xorps	%xmm6,%xmm0
-	xorps	%xmm7,%xmm1
-	pxor	%xmm0,%xmm3
-	pxor	%xmm1,%xmm3
-	pxor	%xmm3,%xmm4
-	movdqa	%xmm4,%xmm3
-	psrldq	$8,%xmm4
-	pslldq	$8,%xmm3
-	pxor	%xmm4,%xmm1
-	pxor	%xmm3,%xmm0
-	movdqa	%xmm0,%xmm4
-	movdqa	%xmm0,%xmm3
-	psllq	$5,%xmm0
-	pxor	%xmm0,%xmm3
-	psllq	$1,%xmm0
-	pxor	%xmm3,%xmm0
-	psllq	$57,%xmm0
-	movdqa	%xmm0,%xmm3
-	pslldq	$8,%xmm0
-	psrldq	$8,%xmm3
-	pxor	%xmm4,%xmm0
-	pxor	%xmm3,%xmm1
-	movdqa	%xmm0,%xmm4
-	psrlq	$1,%xmm0
-	pxor	%xmm4,%xmm1
-	pxor	%xmm0,%xmm4
-	psrlq	$5,%xmm0
-	pxor	%xmm4,%xmm0
-	psrlq	$1,%xmm0
-	pxor	%xmm1,%xmm0
-	testl	%ebx,%ebx
-	jnz	.L006done
-	movups	(%edx),%xmm2
-.L003odd_tail:
-	movdqu	(%esi),%xmm3
-.byte	102,15,56,0,221
-	pxor	%xmm3,%xmm0
-	movdqa	%xmm0,%xmm1
-	pshufd	$78,%xmm0,%xmm3
-	pshufd	$78,%xmm2,%xmm4
-	pxor	%xmm0,%xmm3
-	pxor	%xmm2,%xmm4
-.byte	102,15,58,68,194,0
-.byte	102,15,58,68,202,17
-.byte	102,15,58,68,220,0
-	xorps	%xmm0,%xmm3
-	xorps	%xmm1,%xmm3
-	movdqa	%xmm3,%xmm4
-	psrldq	$8,%xmm3
-	pslldq	$8,%xmm4
-	pxor	%xmm3,%xmm1
-	pxor	%xmm4,%xmm0
-	movdqa	%xmm0,%xmm4
-	movdqa	%xmm0,%xmm3
-	psllq	$5,%xmm0
-	pxor	%xmm0,%xmm3
-	psllq	$1,%xmm0
-	pxor	%xmm3,%xmm0
-	psllq	$57,%xmm0
-	movdqa	%xmm0,%xmm3
-	pslldq	$8,%xmm0
-	psrldq	$8,%xmm3
-	pxor	%xmm4,%xmm0
-	pxor	%xmm3,%xmm1
-	movdqa	%xmm0,%xmm4
-	psrlq	$1,%xmm0
-	pxor	%xmm4,%xmm1
-	pxor	%xmm0,%xmm4
-	psrlq	$5,%xmm0
-	pxor	%xmm4,%xmm0
-	psrlq	$1,%xmm0
-	pxor	%xmm1,%xmm0
-.L006done:
-.byte	102,15,56,0,197
-	movdqu	%xmm0,(%eax)
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
-.align	64
-.Lbswap:
-.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
-.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
-.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
-.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
-.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
-.byte	0
-#endif
-.section	.note.GNU-stack,"",@progbits
--- a/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/md5-586.S
+++ b/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/md5-586.S
@ -1,688 +0,0 @@
-# This file is generated from a similarly-named Perl script in the BoringSSL
-# source tree. Do not edit by hand.
-
-#if defined(__i386__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-.text
-.globl	md5_block_asm_data_order
-.hidden	md5_block_asm_data_order
-.type	md5_block_asm_data_order,@function
-.align	16
-md5_block_asm_data_order:
-.L_md5_block_asm_data_order_begin:
-	pushl	%esi
-	pushl	%edi
-	movl	12(%esp),%edi
-	movl	16(%esp),%esi
-	movl	20(%esp),%ecx
-	pushl	%ebp
-	shll	$6,%ecx
-	pushl	%ebx
-	addl	%esi,%ecx
-	subl	$64,%ecx
-	movl	(%edi),%eax
-	pushl	%ecx
-	movl	4(%edi),%ebx
-	movl	8(%edi),%ecx
-	movl	12(%edi),%edx
-.L000start:
-
-
-	movl	%ecx,%edi
-	movl	(%esi),%ebp
-
-	xorl	%edx,%edi
-	andl	%ebx,%edi
-	leal	3614090360(%eax,%ebp,1),%eax
-	xorl	%edx,%edi
-	addl	%edi,%eax
-	movl	%ebx,%edi
-	roll	$7,%eax
-	movl	4(%esi),%ebp
-	addl	%ebx,%eax
-
-	xorl	%ecx,%edi
-	andl	%eax,%edi
-	leal	3905402710(%edx,%ebp,1),%edx
-	xorl	%ecx,%edi
-	addl	%edi,%edx
-	movl	%eax,%edi
-	roll	$12,%edx
-	movl	8(%esi),%ebp
-	addl	%eax,%edx
-
-	xorl	%ebx,%edi
-	andl	%edx,%edi
-	leal	606105819(%ecx,%ebp,1),%ecx
-	xorl	%ebx,%edi
-	addl	%edi,%ecx
-	movl	%edx,%edi
-	roll	$17,%ecx
-	movl	12(%esi),%ebp
-	addl	%edx,%ecx
-
-	xorl	%eax,%edi
-	andl	%ecx,%edi
-	leal	3250441966(%ebx,%ebp,1),%ebx
-	xorl	%eax,%edi
-	addl	%edi,%ebx
-	movl	%ecx,%edi
-	roll	$22,%ebx
-	movl	16(%esi),%ebp
-	addl	%ecx,%ebx
-
-	xorl	%edx,%edi
-	andl	%ebx,%edi
-	leal	4118548399(%eax,%ebp,1),%eax
-	xorl	%edx,%edi
-	addl	%edi,%eax
-	movl	%ebx,%edi
-	roll	$7,%eax
-	movl	20(%esi),%ebp
-	addl	%ebx,%eax
-
-	xorl	%ecx,%edi
-	andl	%eax,%edi
-	leal	1200080426(%edx,%ebp,1),%edx
-	xorl	%ecx,%edi
-	addl	%edi,%edx
-	movl	%eax,%edi
-	roll	$12,%edx
-	movl	24(%esi),%ebp
-	addl	%eax,%edx
-
-	xorl	%ebx,%edi
-	andl	%edx,%edi
-	leal	2821735955(%ecx,%ebp,1),%ecx
-	xorl	%ebx,%edi
-	addl	%edi,%ecx
-	movl	%edx,%edi
-	roll	$17,%ecx
-	movl	28(%esi),%ebp
-	addl	%edx,%ecx
-
-	xorl	%eax,%edi
-	andl	%ecx,%edi
-	leal	4249261313(%ebx,%ebp,1),%ebx
-	xorl	%eax,%edi
-	addl	%edi,%ebx
-	movl	%ecx,%edi
-	roll	$22,%ebx
-	movl	32(%esi),%ebp
-	addl	%ecx,%ebx
-
-	xorl	%edx,%edi
-	andl	%ebx,%edi
-	leal	1770035416(%eax,%ebp,1),%eax
-	xorl	%edx,%edi
-	addl	%edi,%eax
-	movl	%ebx,%edi
-	roll	$7,%eax
-	movl	36(%esi),%ebp
-	addl	%ebx,%eax
-
-	xorl	%ecx,%edi
-	andl	%eax,%edi
-	leal	2336552879(%edx,%ebp,1),%edx
-	xorl	%ecx,%edi
-	addl	%edi,%edx
-	movl	%eax,%edi
-	roll	$12,%edx
-	movl	40(%esi),%ebp
-	addl	%eax,%edx
-
-	xorl	%ebx,%edi
-	andl	%edx,%edi
-	leal	4294925233(%ecx,%ebp,1),%ecx
-	xorl	%ebx,%edi
-	addl	%edi,%ecx
-	movl	%edx,%edi
-	roll	$17,%ecx
-	movl	44(%esi),%ebp
-	addl	%edx,%ecx
-
-	xorl	%eax,%edi
-	andl	%ecx,%edi
-	leal	2304563134(%ebx,%ebp,1),%ebx
-	xorl	%eax,%edi
-	addl	%edi,%ebx
-	movl	%ecx,%edi
-	roll	$22,%ebx
-	movl	48(%esi),%ebp
-	addl	%ecx,%ebx
-
-	xorl	%edx,%edi
-	andl	%ebx,%edi
-	leal	1804603682(%eax,%ebp,1),%eax
-	xorl	%edx,%edi
-	addl	%edi,%eax
-	movl	%ebx,%edi
-	roll	$7,%eax
-	movl	52(%esi),%ebp
-	addl	%ebx,%eax
-
-	xorl	%ecx,%edi
-	andl	%eax,%edi
-	leal	4254626195(%edx,%ebp,1),%edx
-	xorl	%ecx,%edi
-	addl	%edi,%edx
-	movl	%eax,%edi
-	roll	$12,%edx
-	movl	56(%esi),%ebp
-	addl	%eax,%edx
-
-	xorl	%ebx,%edi
-	andl	%edx,%edi
-	leal	2792965006(%ecx,%ebp,1),%ecx
-	xorl	%ebx,%edi
-	addl	%edi,%ecx
-	movl	%edx,%edi
-	roll	$17,%ecx
-	movl	60(%esi),%ebp
-	addl	%edx,%ecx
-
-	xorl	%eax,%edi
-	andl	%ecx,%edi
-	leal	1236535329(%ebx,%ebp,1),%ebx
-	xorl	%eax,%edi
-	addl	%edi,%ebx
-	movl	%ecx,%edi
-	roll	$22,%ebx
-	movl	4(%esi),%ebp
-	addl	%ecx,%ebx
-
-
-
-	leal	4129170786(%eax,%ebp,1),%eax
-	xorl	%ebx,%edi
-	andl	%edx,%edi
-	movl	24(%esi),%ebp
-	xorl	%ecx,%edi
-	addl	%edi,%eax
-	movl	%ebx,%edi
-	roll	$5,%eax
-	addl	%ebx,%eax
-
-	leal	3225465664(%edx,%ebp,1),%edx
-	xorl	%eax,%edi
-	andl	%ecx,%edi
-	movl	44(%esi),%ebp
-	xorl	%ebx,%edi
-	addl	%edi,%edx
-	movl	%eax,%edi
-	roll	$9,%edx
-	addl	%eax,%edx
-
-	leal	643717713(%ecx,%ebp,1),%ecx
-	xorl	%edx,%edi
-	andl	%ebx,%edi
-	movl	(%esi),%ebp
-	xorl	%eax,%edi
-	addl	%edi,%ecx
-	movl	%edx,%edi
-	roll	$14,%ecx
-	addl	%edx,%ecx
-
-	leal	3921069994(%ebx,%ebp,1),%ebx
-	xorl	%ecx,%edi
-	andl	%eax,%edi
-	movl	20(%esi),%ebp
-	xorl	%edx,%edi
-	addl	%edi,%ebx
-	movl	%ecx,%edi
-	roll	$20,%ebx
-	addl	%ecx,%ebx
-
-	leal	3593408605(%eax,%ebp,1),%eax
-	xorl	%ebx,%edi
-	andl	%edx,%edi
-	movl	40(%esi),%ebp
-	xorl	%ecx,%edi
-	addl	%edi,%eax
-	movl	%ebx,%edi
-	roll	$5,%eax
-	addl	%ebx,%eax
-
-	leal	38016083(%edx,%ebp,1),%edx
-	xorl	%eax,%edi
-	andl	%ecx,%edi
-	movl	60(%esi),%ebp
-	xorl	%ebx,%edi
-	addl	%edi,%edx
-	movl	%eax,%edi
-	roll	$9,%edx
-	addl	%eax,%edx
-
-	leal	3634488961(%ecx,%ebp,1),%ecx
-	xorl	%edx,%edi
-	andl	%ebx,%edi
-	movl	16(%esi),%ebp
-	xorl	%eax,%edi
-	addl	%edi,%ecx
-	movl	%edx,%edi
-	roll	$14,%ecx
-	addl	%edx,%ecx
-
-	leal	3889429448(%ebx,%ebp,1),%ebx
-	xorl	%ecx,%edi
-	andl	%eax,%edi
-	movl	36(%esi),%ebp
-	xorl	%edx,%edi
-	addl	%edi,%ebx
-	movl	%ecx,%edi
-	roll	$20,%ebx
-	addl	%ecx,%ebx
-
-	leal	568446438(%eax,%ebp,1),%eax
-	xorl	%ebx,%edi
-	andl	%edx,%edi
-	movl	56(%esi),%ebp
-	xorl	%ecx,%edi
-	addl	%edi,%eax
-	movl	%ebx,%edi
-	roll	$5,%eax
-	addl	%ebx,%eax
-
-	leal	3275163606(%edx,%ebp,1),%edx
-	xorl	%eax,%edi
-	andl	%ecx,%edi
-	movl	12(%esi),%ebp
-	xorl	%ebx,%edi
-	addl	%edi,%edx
-	movl	%eax,%edi
-	roll	$9,%edx
-	addl	%eax,%edx
-
-	leal	4107603335(%ecx,%ebp,1),%ecx
-	xorl	%edx,%edi
-	andl	%ebx,%edi
-	movl	32(%esi),%ebp
-	xorl	%eax,%edi
-	addl	%edi,%ecx
-	movl	%edx,%edi
-	roll	$14,%ecx
-	addl	%edx,%ecx
-
-	leal	1163531501(%ebx,%ebp,1),%ebx
-	xorl	%ecx,%edi
-	andl	%eax,%edi
-	movl	52(%esi),%ebp
-	xorl	%edx,%edi
-	addl	%edi,%ebx
-	movl	%ecx,%edi
-	roll	$20,%ebx
-	addl	%ecx,%ebx
-
-	leal	2850285829(%eax,%ebp,1),%eax
-	xorl	%ebx,%edi
-	andl	%edx,%edi
-	movl	8(%esi),%ebp
-	xorl	%ecx,%edi
-	addl	%edi,%eax
-	movl	%ebx,%edi
-	roll	$5,%eax
-	addl	%ebx,%eax
-
-	leal	4243563512(%edx,%ebp,1),%edx
-	xorl	%eax,%edi
-	andl	%ecx,%edi
-	movl	28(%esi),%ebp
-	xorl	%ebx,%edi
-	addl	%edi,%edx
-	movl	%eax,%edi
-	roll	$9,%edx
-	addl	%eax,%edx
-
-	leal	1735328473(%ecx,%ebp,1),%ecx
-	xorl	%edx,%edi
-	andl	%ebx,%edi
-	movl	48(%esi),%ebp
-	xorl	%eax,%edi
-	addl	%edi,%ecx
-	movl	%edx,%edi
-	roll	$14,%ecx
-	addl	%edx,%ecx
-
-	leal	2368359562(%ebx,%ebp,1),%ebx
-	xorl	%ecx,%edi
-	andl	%eax,%edi
-	movl	20(%esi),%ebp
-	xorl	%edx,%edi
-	addl	%edi,%ebx
-	movl	%ecx,%edi
-	roll	$20,%ebx
-	addl	%ecx,%ebx
-
-
-
-	xorl	%edx,%edi
-	xorl	%ebx,%edi
-	leal	4294588738(%eax,%ebp,1),%eax
-	addl	%edi,%eax
-	roll	$4,%eax
-	movl	32(%esi),%ebp
-	movl	%ebx,%edi
-
-	leal	2272392833(%edx,%ebp,1),%edx
-	addl	%ebx,%eax
-	xorl	%ecx,%edi
-	xorl	%eax,%edi
-	movl	44(%esi),%ebp
-	addl	%edi,%edx
-	movl	%eax,%edi
-	roll	$11,%edx
-	addl	%eax,%edx
-
-	xorl	%ebx,%edi
-	xorl	%edx,%edi
-	leal	1839030562(%ecx,%ebp,1),%ecx
-	addl	%edi,%ecx
-	roll	$16,%ecx
-	movl	56(%esi),%ebp
-	movl	%edx,%edi
-
-	leal	4259657740(%ebx,%ebp,1),%ebx
-	addl	%edx,%ecx
-	xorl	%eax,%edi
-	xorl	%ecx,%edi
-	movl	4(%esi),%ebp
-	addl	%edi,%ebx
-	movl	%ecx,%edi
-	roll	$23,%ebx
-	addl	%ecx,%ebx
-
-	xorl	%edx,%edi
-	xorl	%ebx,%edi
-	leal	2763975236(%eax,%ebp,1),%eax
-	addl	%edi,%eax
-	roll	$4,%eax
-	movl	16(%esi),%ebp
-	movl	%ebx,%edi
-
-	leal	1272893353(%edx,%ebp,1),%edx
-	addl	%ebx,%eax
-	xorl	%ecx,%edi
-	xorl	%eax,%edi
-	movl	28(%esi),%ebp
-	addl	%edi,%edx
-	movl	%eax,%edi
-	roll	$11,%edx
-	addl	%eax,%edx
-
-	xorl	%ebx,%edi
-	xorl	%edx,%edi
-	leal	4139469664(%ecx,%ebp,1),%ecx
-	addl	%edi,%ecx
-	roll	$16,%ecx
-	movl	40(%esi),%ebp
-	movl	%edx,%edi
-
-	leal	3200236656(%ebx,%ebp,1),%ebx
-	addl	%edx,%ecx
-	xorl	%eax,%edi
-	xorl	%ecx,%edi
-	movl	52(%esi),%ebp
-	addl	%edi,%ebx
-	movl	%ecx,%edi
-	roll	$23,%ebx
-	addl	%ecx,%ebx
-
-	xorl	%edx,%edi
-	xorl	%ebx,%edi
-	leal	681279174(%eax,%ebp,1),%eax
-	addl	%edi,%eax
-	roll	$4,%eax
-	movl	(%esi),%ebp
-	movl	%ebx,%edi
-
-	leal	3936430074(%edx,%ebp,1),%edx
-	addl	%ebx,%eax
-	xorl	%ecx,%edi
-	xorl	%eax,%edi
-	movl	12(%esi),%ebp
-	addl	%edi,%edx
-	movl	%eax,%edi
-	roll	$11,%edx
-	addl	%eax,%edx
-
-	xorl	%ebx,%edi
-	xorl	%edx,%edi
-	leal	3572445317(%ecx,%ebp,1),%ecx
-	addl	%edi,%ecx
-	roll	$16,%ecx
-	movl	24(%esi),%ebp
-	movl	%edx,%edi
-
-	leal	76029189(%ebx,%ebp,1),%ebx
-	addl	%edx,%ecx
-	xorl	%eax,%edi
-	xorl	%ecx,%edi
-	movl	36(%esi),%ebp
-	addl	%edi,%ebx
-	movl	%ecx,%edi
-	roll	$23,%ebx
-	addl	%ecx,%ebx
-
-	xorl	%edx,%edi
-	xorl	%ebx,%edi
-	leal	3654602809(%eax,%ebp,1),%eax
-	addl	%edi,%eax
-	roll	$4,%eax
-	movl	48(%esi),%ebp
-	movl	%ebx,%edi
-
-	leal	3873151461(%edx,%ebp,1),%edx
-	addl	%ebx,%eax
-	xorl	%ecx,%edi
-	xorl	%eax,%edi
-	movl	60(%esi),%ebp
-	addl	%edi,%edx
-	movl	%eax,%edi
-	roll	$11,%edx
-	addl	%eax,%edx
-
-	xorl	%ebx,%edi
-	xorl	%edx,%edi
-	leal	530742520(%ecx,%ebp,1),%ecx
-	addl	%edi,%ecx
-	roll	$16,%ecx
-	movl	8(%esi),%ebp
-	movl	%edx,%edi
-
-	leal	3299628645(%ebx,%ebp,1),%ebx
-	addl	%edx,%ecx
-	xorl	%eax,%edi
-	xorl	%ecx,%edi
-	movl	(%esi),%ebp
-	addl	%edi,%ebx
-	movl	$-1,%edi
-	roll	$23,%ebx
-	addl	%ecx,%ebx
-
-
-
-	xorl	%edx,%edi
-	orl	%ebx,%edi
-	leal	4096336452(%eax,%ebp,1),%eax
-	xorl	%ecx,%edi
-	movl	28(%esi),%ebp
-	addl	%edi,%eax
-	movl	$-1,%edi
-	roll	$6,%eax
-	xorl	%ecx,%edi
-	addl	%ebx,%eax
-
-	orl	%eax,%edi
-	leal	1126891415(%edx,%ebp,1),%edx
-	xorl	%ebx,%edi
-	movl	56(%esi),%ebp
-	addl	%edi,%edx
-	movl	$-1,%edi
-	roll	$10,%edx
-	xorl	%ebx,%edi
-	addl	%eax,%edx
-
-	orl	%edx,%edi
-	leal	2878612391(%ecx,%ebp,1),%ecx
-	xorl	%eax,%edi
-	movl	20(%esi),%ebp
-	addl	%edi,%ecx
-	movl	$-1,%edi
-	roll	$15,%ecx
-	xorl	%eax,%edi
-	addl	%edx,%ecx
-
-	orl	%ecx,%edi
-	leal	4237533241(%ebx,%ebp,1),%ebx
-	xorl	%edx,%edi
-	movl	48(%esi),%ebp
-	addl	%edi,%ebx
-	movl	$-1,%edi
-	roll	$21,%ebx
-	xorl	%edx,%edi
-	addl	%ecx,%ebx
-
-	orl	%ebx,%edi
-	leal	1700485571(%eax,%ebp,1),%eax
-	xorl	%ecx,%edi
-	movl	12(%esi),%ebp
-	addl	%edi,%eax
-	movl	$-1,%edi
-	roll	$6,%eax
-	xorl	%ecx,%edi
-	addl	%ebx,%eax
-
-	orl	%eax,%edi
-	leal	2399980690(%edx,%ebp,1),%edx
-	xorl	%ebx,%edi
-	movl	40(%esi),%ebp
-	addl	%edi,%edx
-	movl	$-1,%edi
-	roll	$10,%edx
-	xorl	%ebx,%edi
-	addl	%eax,%edx
-
-	orl	%edx,%edi
-	leal	4293915773(%ecx,%ebp,1),%ecx
-	xorl	%eax,%edi
-	movl	4(%esi),%ebp
-	addl	%edi,%ecx
-	movl	$-1,%edi
-	roll	$15,%ecx
-	xorl	%eax,%edi
-	addl	%edx,%ecx
-
-	orl	%ecx,%edi
-	leal	2240044497(%ebx,%ebp,1),%ebx
-	xorl	%edx,%edi
-	movl	32(%esi),%ebp
-	addl	%edi,%ebx
-	movl	$-1,%edi
-	roll	$21,%ebx
-	xorl	%edx,%edi
-	addl	%ecx,%ebx
-
-	orl	%ebx,%edi
-	leal	1873313359(%eax,%ebp,1),%eax
-	xorl	%ecx,%edi
-	movl	60(%esi),%ebp
-	addl	%edi,%eax
-	movl	$-1,%edi
-	roll	$6,%eax
-	xorl	%ecx,%edi
-	addl	%ebx,%eax
-
-	orl	%eax,%edi
-	leal	4264355552(%edx,%ebp,1),%edx
-	xorl	%ebx,%edi
-	movl	24(%esi),%ebp
-	addl	%edi,%edx
-	movl	$-1,%edi
-	roll	$10,%edx
-	xorl	%ebx,%edi
-	addl	%eax,%edx
-
-	orl	%edx,%edi
-	leal	2734768916(%ecx,%ebp,1),%ecx
-	xorl	%eax,%edi
-	movl	52(%esi),%ebp
-	addl	%edi,%ecx
-	movl	$-1,%edi
-	roll	$15,%ecx
-	xorl	%eax,%edi
-	addl	%edx,%ecx
-
-	orl	%ecx,%edi
-	leal	1309151649(%ebx,%ebp,1),%ebx
-	xorl	%edx,%edi
-	movl	16(%esi),%ebp
-	addl	%edi,%ebx
-	movl	$-1,%edi
-	roll	$21,%ebx
-	xorl	%edx,%edi
-	addl	%ecx,%ebx
-
-	orl	%ebx,%edi
-	leal	4149444226(%eax,%ebp,1),%eax
-	xorl	%ecx,%edi
-	movl	44(%esi),%ebp
-	addl	%edi,%eax
-	movl	$-1,%edi
-	roll	$6,%eax
-	xorl	%ecx,%edi
-	addl	%ebx,%eax
-
-	orl	%eax,%edi
-	leal	3174756917(%edx,%ebp,1),%edx
-	xorl	%ebx,%edi
-	movl	8(%esi),%ebp
-	addl	%edi,%edx
-	movl	$-1,%edi
-	roll	$10,%edx
-	xorl	%ebx,%edi
-	addl	%eax,%edx
-
-	orl	%edx,%edi
-	leal	718787259(%ecx,%ebp,1),%ecx
-	xorl	%eax,%edi
-	movl	36(%esi),%ebp
-	addl	%edi,%ecx
-	movl	$-1,%edi
-	roll	$15,%ecx
-	xorl	%eax,%edi
-	addl	%edx,%ecx
-
-	orl	%ecx,%edi
-	leal	3951481745(%ebx,%ebp,1),%ebx
-	xorl	%edx,%edi
-	movl	24(%esp),%ebp
-	addl	%edi,%ebx
-	addl	$64,%esi
-	roll	$21,%ebx
-	movl	(%ebp),%edi
-	addl	%ecx,%ebx
-	addl	%edi,%eax
-	movl	4(%ebp),%edi
-	addl	%edi,%ebx
-	movl	8(%ebp),%edi
-	addl	%edi,%ecx
-	movl	12(%ebp),%edi
-	addl	%edi,%edx
-	movl	%eax,(%ebp)
-	movl	%ebx,4(%ebp)
-	movl	(%esp),%edi
-	movl	%ecx,8(%ebp)
-	movl	%edx,12(%ebp)
-	cmpl	%esi,%edi
-	jae	.L000start
-	popl	%eax
-	popl	%ebx
-	popl	%ebp
-	popl	%edi
-	popl	%esi
-	ret
-.size	md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
-#endif
-.section	.note.GNU-stack,"",@progbits
--- a/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/sha1-586.S
+++ b/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/sha1-586.S
--- a/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/sha256-586.S
+++ b/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/sha256-586.S
--- a/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/sha512-586.S
+++ b/contrib/boringssl-cmake/linux-x86/crypto/fipsmodule/sha512-586.S
--- a/Show More
+++ b/Show More
				`@ -1 +0,0 @@`
				`Subproject commit aa6d2f865a2eab01cf94f197e11e36b6de47b5b4`