[solve config]

2024-11-10 01:25:21 +00:00 · 2024-04-28 19:20:57 +08:00 · 2024-04-28 19:20:57 +08:00 · 73c6d09eff
commit 73c6d09eff
parent b15141f1b1 5139ef7b91
3557 changed files with 151037 additions and 296701 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -5,128 +5,126 @@
 # a) the new check is not controversial (this includes many checks in readability-* and google-*) or
 # b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*).

-# TODO: Once clang(-tidy) 17 is the minimum, we can convert this list to YAML
-# See https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/ReleaseNotes.html#improvements-to-clang-tidy
+HeaderFilterRegex: '^.*/(base|src|programs|utils)/.*(h|hpp)$'

-# TODO Let clang-tidy check headers in further directories
-#      --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$'
-HeaderFilterRegex: '^.*/(base|programs|utils)/.*(h|hpp)$'
+Checks: [
+  '*',

-Checks: '*,
-    -abseil-*,
+  '-abseil-*',

-    -altera-*,
+  '-altera-*',

-    -android-*,
+  '-android-*',

-    -bugprone-assignment-in-if-condition,
-    -bugprone-branch-clone,
-    -bugprone-easily-swappable-parameters,
-    -bugprone-exception-escape,
-    -bugprone-implicit-widening-of-multiplication-result,
-    -bugprone-narrowing-conversions,
-    -bugprone-not-null-terminated-result,
-    -bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
-    -bugprone-unchecked-optional-access,
+  '-bugprone-assignment-in-if-condition',
+  '-bugprone-branch-clone',
+  '-bugprone-easily-swappable-parameters',
+  '-bugprone-exception-escape',
+  '-bugprone-forward-declaration-namespace',
+  '-bugprone-implicit-widening-of-multiplication-result',
+  '-bugprone-narrowing-conversions',
+  '-bugprone-not-null-terminated-result',
+  '-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
+  '-bugprone-unchecked-optional-access',

-    -cert-dcl16-c,
-    -cert-dcl37-c,
-    -cert-dcl51-cpp,
-    -cert-err58-cpp,
-    -cert-msc32-c,
-    -cert-msc51-cpp,
-    -cert-oop54-cpp,
-    -cert-oop57-cpp,
+  '-cert-dcl16-c',
+  '-cert-dcl37-c',
+  '-cert-dcl51-cpp',
+  '-cert-err58-cpp',
+  '-cert-msc32-c',
+  '-cert-msc51-cpp',
+  '-cert-oop54-cpp',
+  '-cert-oop57-cpp',

-    -clang-analyzer-unix.Malloc,
+  '-clang-analyzer-optin.performance.Padding',

-    -cppcoreguidelines-*, # impractical in a codebase as large as ClickHouse, also slow
+  '-clang-analyzer-unix.Malloc',

-    -darwin-*,
+  '-cppcoreguidelines-*', # impractical in a codebase as large as ClickHouse, also slow

-    -fuchsia-*,
+  '-darwin-*',

-    -google-build-using-namespace,
-    -google-readability-braces-around-statements,
-    -google-readability-casting,
-    -google-readability-function-size,
-    -google-readability-namespace-comments,
-    -google-readability-todo,
+  '-fuchsia-*',

-    -hicpp-avoid-c-arrays,
-    -hicpp-avoid-goto,
-    -hicpp-braces-around-statements,
-    -hicpp-explicit-conversions,
-    -hicpp-function-size,
-    -hicpp-member-init,
-    -hicpp-move-const-arg,
-    -hicpp-multiway-paths-covered,
-    -hicpp-named-parameter,
-    -hicpp-no-array-decay,
-    -hicpp-no-assembler,
-    -hicpp-no-malloc,
-    -hicpp-signed-bitwise,
-    -hicpp-special-member-functions,
-    -hicpp-uppercase-literal-suffix,
-    -hicpp-use-auto,
-    -hicpp-use-emplace,
-    -hicpp-vararg,
+  '-google-build-using-namespace',
+  '-google-readability-braces-around-statements',
+  '-google-readability-casting',
+  '-google-readability-function-size',
+  '-google-readability-namespace-comments',
+  '-google-readability-todo',

-    -linuxkernel-*,
+  '-hicpp-avoid-c-arrays',
+  '-hicpp-avoid-goto',
+  '-hicpp-braces-around-statements',
+  '-hicpp-explicit-conversions',
+  '-hicpp-function-size',
+  '-hicpp-member-init',
+  '-hicpp-move-const-arg',
+  '-hicpp-multiway-paths-covered',
+  '-hicpp-named-parameter',
+  '-hicpp-no-array-decay',
+  '-hicpp-no-assembler',
+  '-hicpp-no-malloc',
+  '-hicpp-signed-bitwise',
+  '-hicpp-special-member-functions',
+  '-hicpp-uppercase-literal-suffix',
+  '-hicpp-use-auto',
+  '-hicpp-use-emplace',
+  '-hicpp-vararg',

-    -llvm-*,
+  '-linuxkernel-*',

-    -llvmlibc-*,
+  '-llvm-*',

-    -openmp-*,
+  '-llvmlibc-*',

-    -misc-const-correctness,
-    -misc-include-cleaner, # useful but far too many occurrences
-    -misc-no-recursion,
-    -misc-non-private-member-variables-in-classes,
-    -misc-confusable-identifiers, # useful but slooow
-    -misc-use-anonymous-namespace,
+  '-openmp-*',

-    -modernize-avoid-c-arrays,
-    -modernize-concat-nested-namespaces,
-    -modernize-macro-to-enum,
-    -modernize-pass-by-value,
-    -modernize-return-braced-init-list,
-    -modernize-use-auto,
-    -modernize-use-default-member-init,
-    -modernize-use-emplace,
-    -modernize-use-nodiscard,
-    -modernize-use-override,
-    -modernize-use-trailing-return-type,
+  '-misc-const-correctness',
+  '-misc-include-cleaner', # useful but far too many occurrences
+  '-misc-no-recursion',
+  '-misc-non-private-member-variables-in-classes',
+  '-misc-confusable-identifiers', # useful but slooo
+  '-misc-use-anonymous-namespace',

-    -performance-inefficient-string-concatenation,
-    -performance-no-int-to-ptr,
-    -performance-avoid-endl,
-    -performance-unnecessary-value-param,
+  '-modernize-avoid-c-arrays',
+  '-modernize-concat-nested-namespaces',
+  '-modernize-macro-to-enum',
+  '-modernize-pass-by-value',
+  '-modernize-return-braced-init-list',
+  '-modernize-use-auto',
+  '-modernize-use-default-member-init',
+  '-modernize-use-emplace',
+  '-modernize-use-nodiscard',
+  '-modernize-use-trailing-return-type',

-    -portability-simd-intrinsics,
+  '-performance-inefficient-string-concatenation',
+  '-performance-no-int-to-ptr',
+  '-performance-avoid-endl',
+  '-performance-unnecessary-value-param',

-    -readability-avoid-unconditional-preprocessor-if,
-    -readability-braces-around-statements,
-    -readability-convert-member-functions-to-static,
-    -readability-else-after-return,
-    -readability-function-cognitive-complexity,
-    -readability-function-size,
-    -readability-identifier-length,
-    -readability-identifier-naming, # useful but too slow
-    -readability-implicit-bool-conversion,
-    -readability-isolate-declaration,
-    -readability-magic-numbers,
-    -readability-named-parameter,
-    -readability-redundant-declaration,
-    -readability-simplify-boolean-expr,
-    -readability-static-accessed-through-instance,
-    -readability-suspicious-call-argument,
-    -readability-uppercase-literal-suffix,
-    -readability-use-anyofallof,
+  '-portability-simd-intrinsics',

-    -zircon-*,
-'
+  '-readability-avoid-unconditional-preprocessor-if',
+  '-readability-braces-around-statements',
+  '-readability-convert-member-functions-to-static',
+  '-readability-else-after-return',
+  '-readability-function-cognitive-complexity',
+  '-readability-function-size',
+  '-readability-identifier-length',
+  '-readability-identifier-naming', # useful but too slow
+  '-readability-implicit-bool-conversion',
+  '-readability-isolate-declaration',
+  '-readability-magic-numbers',
+  '-readability-named-parameter',
+  '-readability-redundant-declaration',
+  '-readability-simplify-boolean-expr',
+  '-readability-suspicious-call-argument',
+  '-readability-uppercase-literal-suffix',
+  '-readability-use-anyofallof',
+
+  '-zircon-*'
+]

 WarningsAsErrors: '*'

--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -40,3 +40,45 @@ At a minimum, the following information should be added (but add more as needed)


 > Information about CI checks: https://clickhouse.com/docs/en/development/continuous-integration/
+
+---
+### Modify your CI run:
+**NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing
+**NOTE:** Checked options will be applied if set before CI RunConfig/PrepareRunConfig step
+
+#### Include tests (required builds will be added automatically):
+- [ ] <!---ci_include_fast--> Fast test
+- [ ] <!---ci_include_integration--> Integration Tests
+- [ ] <!---ci_include_stateless--> Stateless tests
+- [ ] <!---ci_include_stateful--> Stateful tests
+- [ ] <!---ci_include_unit--> Unit tests
+- [ ] <!---ci_include_performance--> Performance tests
+- [ ] <!---ci_include_asan--> All with ASAN
+- [ ] <!---ci_include_tsan--> All with TSAN
+- [ ] <!---ci_include_analyzer--> All with Analyzer
+- [ ] <!---ci_include_KEYWORD--> Add your option here
+
+#### Exclude tests:
+- [ ] <!---ci_exclude_fast--> Fast test
+- [ ] <!---ci_exclude_integration--> Integration Tests
+- [ ] <!---ci_exclude_stateless--> Stateless tests
+- [ ] <!---ci_exclude_stateful--> Stateful tests
+- [ ] <!---ci_exclude_performance--> Performance tests
+- [ ] <!---ci_exclude_asan--> All with ASAN
+- [ ] <!---ci_exclude_tsan--> All with TSAN
+- [ ] <!---ci_exclude_msan--> All with MSAN
+- [ ] <!---ci_exclude_ubsan--> All with UBSAN
+- [ ] <!---ci_exclude_coverage--> All with Coverage
+- [ ] <!---ci_exclude_aarch64--> All with Aarch64
+- [ ] <!---ci_exclude_KEYWORD--> Add your option here
+
+#### Extra options:
+- [ ] <!---do_not_test--> do not test (only style check)
+- [ ] <!---no_merge_commit--> disable merge-commit (no merge from master before tests)
+- [ ] <!---no_ci_cache--> disable CI cache (job reuse)
+
+#### Only specified batches in multi-batch jobs:
+- [ ] <!---batch_0--> 1
+- [ ] <!---batch_1--> 2
+- [ ] <!---batch_2--> 3
+- [ ] <!---batch_3--> 4
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@ -23,6 +23,10 @@ jobs:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get version
          filter: tree:0
+      - name: Merge sync PR
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 sync_pr.py --merge || :
      - name: Python unit tests
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
@ -55,16 +59,17 @@ jobs:
    uses: ./.github/workflows/reusable_docker.yml
    with:
      data: ${{ needs.RunConfig.outputs.data }}
-  StyleCheck:
-    needs: [RunConfig, BuildDockers]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Style check
-      runner_type: style-checker
-      data: ${{ needs.RunConfig.outputs.data }}
-      run_command: |
-          python3 style_check.py --no-push
+  # Tested in MQ
+  # StyleCheck:
+  #   needs: [RunConfig, BuildDockers]
+  #   if: ${{ !failure() && !cancelled() }}
+  #   uses: ./.github/workflows/reusable_test.yml
+  #   with:
+  #     test_name: Style check
+  #     runner_type: style-checker
+  #     data: ${{ needs.RunConfig.outputs.data }}
+  #     run_command: |
+  #         python3 style_check.py --no-push
  CompatibilityCheckX86:
    needs: [RunConfig, BuilderDebRelease]
    if: ${{ !failure() && !cancelled() }}
@ -234,14 +239,15 @@ jobs:
      build_name: binary_riscv64
      data: ${{ needs.RunConfig.outputs.data }}
      checkout_depth: 0
-  BuilderBinS390X:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_s390x
-      data: ${{ needs.RunConfig.outputs.data }}
-      checkout_depth: 0
+  # disabled because s390x refused to build in the migration to OpenSSL
+  # BuilderBinS390X:
+  #   needs: [RunConfig, BuilderDebRelease]
+  #   if: ${{ !failure() && !cancelled() }}
+  #   uses: ./.github/workflows/reusable_build.yml
+  #   with:
+  #     build_name: binary_s390x
+  #     data: ${{ needs.RunConfig.outputs.data }}
+  #     checkout_depth: 0
 ############################################################################################
 ##################################### Docker images  #######################################
 ############################################################################################
@ -292,7 +298,7 @@ jobs:
      - BuilderBinFreeBSD
      - BuilderBinPPC64
      - BuilderBinRISCV64
-      - BuilderBinS390X
+      # - BuilderBinS390X # disabled because s390x refused to build in the migration to OpenSSL
      - BuilderBinAmd64Compat
      - BuilderBinAarch64V80Compat
      - BuilderBinClangTidy
@ -374,7 +380,7 @@ jobs:
    if: ${{ !failure() && !cancelled() }}
    uses: ./.github/workflows/reusable_test.yml
    with:
-      test_name: Stateless tests (release, analyzer, s3, DatabaseReplicated)
+      test_name: Stateless tests (release, old analyzer, s3, DatabaseReplicated)
      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
  FunctionalStatelessTestS3Debug:
@ -442,6 +448,14 @@ jobs:
      test_name: Stateless tests (debug)
      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
+  FunctionalStatelessTestAsanAzure:
+    needs: [RunConfig, BuilderDebAsan]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stateless tests (azure, asan)
+      runner_type: func-tester
+      data: ${{ needs.RunConfig.outputs.data }}
 ##############################################################################################
 ############################ FUNCTIONAl STATEFUL TESTS #######################################
 ##############################################################################################
@ -592,6 +606,14 @@ jobs:
      test_name: Stress test (tsan)
      runner_type: stress-tester
      data: ${{ needs.RunConfig.outputs.data }}
+  StressTestTsanAzure:
+    needs: [RunConfig, BuilderDebTsan]
+    if: ${{ !failure() && !cancelled() }}
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: Stress test (azure, tsan)
+      runner_type: stress-tester
+      data: ${{ needs.RunConfig.outputs.data }}
  StressTestMsan:
    needs: [RunConfig, BuilderDebMsan]
    if: ${{ !failure() && !cancelled() }}
@ -632,7 +654,7 @@ jobs:
    if: ${{ !failure() && !cancelled() }}
    uses: ./.github/workflows/reusable_test.yml
    with:
-      test_name: Integration tests (asan, analyzer)
+      test_name: Integration tests (asan, old analyzer)
      runner_type: stress-tester
      data: ${{ needs.RunConfig.outputs.data }}
  IntegrationTestsTsan:
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@ -45,62 +45,3 @@ jobs:
    with:
      data: "${{ needs.RunConfig.outputs.data }}"
      set_latest: true
-  SonarCloud:
-    runs-on: [self-hosted, builder]
-    env:
-      SONAR_SCANNER_VERSION: 4.8.0.2856
-      SONAR_SERVER_URL: "https://sonarcloud.io"
-      BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
-      CC: clang-17
-      CXX: clang++-17
-    steps:
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-          fetch-depth: 0  # Shallow clones should be disabled for a better relevancy of analysis
-          filter: tree:0
-          submodules: true
-      - name: Set up JDK 11
-        uses: actions/setup-java@v1
-        with:
-          java-version: 11
-      - name: Download and set up sonar-scanner
-        env:
-          SONAR_SCANNER_DOWNLOAD_URL: https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${{ env.SONAR_SCANNER_VERSION }}-linux.zip
-        run: |
-          mkdir -p "$HOME/.sonar"
-          curl -sSLo "$HOME/.sonar/sonar-scanner.zip" "${{ env.SONAR_SCANNER_DOWNLOAD_URL }}"
-          unzip -o "$HOME/.sonar/sonar-scanner.zip" -d "$HOME/.sonar/"
-          echo "$HOME/.sonar/sonar-scanner-${{ env.SONAR_SCANNER_VERSION }}-linux/bin" >> "$GITHUB_PATH"
-      - name: Download and set up build-wrapper
-        env:
-          BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip
-        run: |
-          curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip" "${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}"
-          unzip -o "$HOME/.sonar/build-wrapper-linux-x86.zip" -d "$HOME/.sonar/"
-          echo "$HOME/.sonar/build-wrapper-linux-x86" >> "$GITHUB_PATH"
-      - name: Set Up Build Tools
-        run: |
-          sudo apt-get update
-          sudo apt-get install -yq git cmake ccache ninja-build python3 yasm nasm
-          sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
-      - name: Run build-wrapper
-        run: |
-          mkdir build
-          cd build
-          cmake ..
-          cd ..
-          build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/
-      - name: Run sonar-scanner
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
-        run: |
-          sonar-scanner \
-            --define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \
-            --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
-            --define sonar.projectKey="ClickHouse_ClickHouse" \
-            --define sonar.organization="clickhouse-java" \
-            --define sonar.cfamily.cpp23.enabled=true \
-            --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql"
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@ -6,6 +6,7 @@ env:
  PYTHONUNBUFFERED: 1

 on:  # yamllint disable-line rule:truthy
+  merge_group:
  pull_request:
    types:
      - synchronize
@ -29,6 +30,7 @@ jobs:
          fetch-depth: 0 # to get version
          filter: tree:0
      - name: Labels check
+        if: ${{ github.event_name != 'merge_group' }}
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
          python3 run_check.py
@ -56,16 +58,9 @@ jobs:
              echo 'EOF'
            } >> "$GITHUB_OUTPUT"
      - name: Re-create GH statuses for skipped jobs if any
+        if: ${{ github.event_name != 'merge_group' }}
        run: |
            python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses
-      - name: Style check early
-        # hack to run style check before the docker build job if possible (style-check image not changed)
-        if: contains(fromJson(steps.runconfig.outputs.CI_DATA).jobs_data.jobs_to_do, 'Style check early')
-        run: |
-          DOCKER_TAG=$(echo '${{ toJson(fromJson(steps.runconfig.outputs.CI_DATA).docker_data.images) }}' | tr -d '\n')
-          export DOCKER_TAG=$DOCKER_TAG
-          python3 ./tests/ci/style_check.py --no-push
-          python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --post --job-name 'Style check'
  BuildDockers:
    needs: [RunConfig]
    if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }}
@ -88,7 +83,7 @@ jobs:
        ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
        RCSK
  FastTest:
-    needs: [RunConfig, StyleCheck]
+    needs: [RunConfig, BuildDockers]
    if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Fast test') }}
    uses: ./.github/workflows/reusable_test.yml
    with:
@ -102,7 +97,7 @@ jobs:
  # for main CI chain
  #
  Builds_1:
-    needs: [RunConfig, FastTest]
+    needs: [RunConfig, StyleCheck, FastTest]
    if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }}
    # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
    uses: ./.github/workflows/reusable_build_stage.yml
@ -163,15 +158,25 @@ jobs:
  #
  FinishCheck:
    if: ${{ !failure() && !cancelled() }}
-    needs: [Tests_1, Tests_2]
+    needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2]
    runs-on: [self-hosted, style-checker]
    steps:
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
+      - name: Check sync status
+        if: ${{ github.event_name == 'merge_group' }}
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 sync_pr.py --status
      - name: Finish label
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 finish_check.py
+          python3 finish_check.py ${{ (contains(needs.*.result, 'failure') && github.event_name == 'merge_group') && '--pipeline-failure' || '' }}
+      - name: Auto merge if approved
+        if: ${{ github.event_name != 'merge_group' }}
+        run: |
+          cd "$GITHUB_WORKSPACE/tests/ci"
+          python3 merge_pr.py --check-approved


 #############################################################################################
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@ -436,7 +436,7 @@ jobs:
    if: ${{ !failure() && !cancelled() }}
    uses: ./.github/workflows/reusable_test.yml
    with:
-      test_name: Integration tests (asan, analyzer)
+      test_name: Integration tests (asan, old analyzer)
      runner_type: stress-tester
      data: ${{ needs.RunConfig.outputs.data }}
  IntegrationTestsTsan:
--- a/.github/workflows/reusable_build.yml
+++ b/.github/workflows/reusable_build.yml
@ -43,8 +43,7 @@ jobs:
    runs-on: [self-hosted, '${{inputs.runner_type}}']
    steps:
      - name: Check out repository code
-        # WIP: temporary try commit with limited perallelization of checkout
-        uses: ClickHouse/checkout@0be3f7b3098bae494d3ef5d29d2e0676fb606232
+        uses: ClickHouse/checkout@v1
        with:
          clear-repository: true
          ref: ${{ fromJson(inputs.data).git_ref }}
--- a/.gitignore
+++ b/.gitignore
@ -164,6 +164,9 @@ tests/queries/0_stateless/*.generated-expect
 tests/queries/0_stateless/*.expect.history
 tests/integration/**/_gen

+# pytest --pdb history
+.pdb_history
+
 # rust
 /rust/**/target*
 # It is autogenerated from *.in
--- a/.gitmessage
+++ b/.gitmessage
@ -16,7 +16,7 @@
 #ci_set_reduced
 #ci_set_arm
 #ci_set_integration
-#ci_set_analyzer
+#ci_set_old_analyzer

 ## To run specified job in CI:
 #job_<JOB NAME>
@ -26,4 +26,4 @@

 ## To run only specified batches for multi-batch job(s)
 #batch_2
-#btach_1_2_3
+#batch_1_2_3
--- a/.gitmodules
+++ b/.gitmodules
@ -22,9 +22,6 @@
 [submodule "contrib/capnproto"]
 	path = contrib/capnproto
 	url = https://github.com/ClickHouse/capnproto
-[submodule "contrib/double-conversion"]
-	path = contrib/double-conversion
-	url = https://github.com/google/double-conversion
 [submodule "contrib/re2"]
 	path = contrib/re2
 	url = https://github.com/google/re2
@ -176,9 +173,6 @@
 [submodule "contrib/libpq"]
 	path = contrib/libpq
 	url = https://github.com/ClickHouse/libpq
-[submodule "contrib/boringssl"]
-	path = contrib/boringssl
-	url = https://github.com/ClickHouse/boringssl
 [submodule "contrib/NuRaft"]
 	path = contrib/NuRaft
 	url = https://github.com/ClickHouse/NuRaft
@ -278,9 +272,6 @@
 [submodule "contrib/crc32-s390x"]
 	path = contrib/crc32-s390x
 	url = https://github.com/linux-on-ibm-z/crc32-s390x
-[submodule "contrib/openssl"]
-	path = contrib/openssl
-	url = https://github.com/openssl/openssl
 [submodule "contrib/google-benchmark"]
 	path = contrib/google-benchmark
 	url = https://github.com/google/benchmark
@ -326,6 +317,9 @@
 [submodule "contrib/crc32-vpmsum"]
 	path = contrib/crc32-vpmsum
 	url = https://github.com/antonblanchard/crc32-vpmsum.git
+[submodule "contrib/expected"]
+	path = contrib/expected
+	url = https://github.com/TartanLlama/expected
 [submodule "contrib/liburing"]
 	path = contrib/liburing
 	url = https://github.com/axboe/liburing
@ -369,3 +363,12 @@
 [submodule "contrib/idna"]
 	path = contrib/idna
 	url = https://github.com/ada-url/idna.git
+[submodule "contrib/rust_vendor"]
+	path = contrib/rust_vendor
+	url = https://github.com/ClickHouse/rust_vendor.git
+[submodule "contrib/openssl"]
+	path = contrib/openssl
+	url = https://github.com/ClickHouse/openssl.git
+[submodule "contrib/double-conversion"]
+	path = contrib/double-conversion
+	url = https://github.com/ClickHouse/double-conversion.git
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,10 +1,183 @@
 ### Table of Contents
+**[ClickHouse release v24.3 LTS, 2024-03-26](#243)**<br/>
 **[ClickHouse release v24.2, 2024-02-29](#242)**<br/>
 **[ClickHouse release v24.1, 2024-01-30](#241)**<br/>
 **[Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023/)**<br/>

 # 2024 Changelog

+### <a id="243"></a> ClickHouse release 24.3 LTS, 2024-03-27
+
+#### Upgrade Notes
+* The setting `allow_experimental_analyzer` is enabled by default and it switches the query analysis to a new implementation, which has better compatibility and feature completeness. The feature "analyzer" is considered beta instead of experimental. You can turn the old behavior by setting the `compatibility` to `24.2` or disabling the `allow_experimental_analyzer` setting. Watch the [video on YouTube](https://www.youtube.com/watch?v=zhrOYQpgvkk).
+* ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. This is controlled by the settings, `output_format_parquet_string_as_string`, `output_format_orc_string_as_string`, `output_format_arrow_string_as_string`. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases. Parquet/ORC/Arrow supports many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools lack support for the faster `lz4` compression method, that's why we set `zstd` by default. This is controlled by the settings `output_format_parquet_compression_method`, `output_format_orc_compression_method`, and `output_format_arrow_compression_method`. We changed the default to `zstd` for Parquet and ORC, but not Arrow (it is emphasized for low-level usages). [#61817](https://github.com/ClickHouse/ClickHouse/pull/61817) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* In the new ClickHouse version, the functions `geoDistance`, `greatCircleDistance`, and `greatCircleAngle` will use 64-bit double precision floating point data type for internal calculations and return type if all the arguments are Float64. This closes [#58476](https://github.com/ClickHouse/ClickHouse/issues/58476). In previous versions, the function always used Float32. You can switch to the old behavior by setting `geo_distance_returns_float64_on_float64_arguments` to `false` or setting `compatibility` to `24.2` or earlier. [#61848](https://github.com/ClickHouse/ClickHouse/pull/61848) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Co-authored with [Geet Patel](https://github.com/geetptl).
+* The obsolete in-memory data parts have been deprecated since version 23.5 and have not been supported since version 23.10. Now the remaining code is removed. Continuation of [#55186](https://github.com/ClickHouse/ClickHouse/issues/55186) and [#45409](https://github.com/ClickHouse/ClickHouse/issues/45409). It is unlikely that you have used in-memory data parts because they were available only before version 23.5 and only when you enabled them manually by specifying the corresponding SETTINGS for a MergeTree table. To check if you have in-memory data parts, run the following query: `SELECT part_type, count() FROM system.parts GROUP BY part_type ORDER BY part_type`. To disable the usage of in-memory data parts, do `ALTER TABLE ... MODIFY SETTING min_bytes_for_compact_part = DEFAULT, min_rows_for_compact_part = DEFAULT`. Before upgrading from old ClickHouse releases, first check that you don't have in-memory data parts. If there are in-memory data parts, disable them first, then wait while there are no in-memory data parts and continue the upgrade. [#61127](https://github.com/ClickHouse/ClickHouse/pull/61127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Changed the column name from `duration_ms` to `duration_microseconds` in the `system.zookeeper` table to reflect the reality that the duration is in the microsecond resolution. [#60774](https://github.com/ClickHouse/ClickHouse/pull/60774) ([Duc Canh Le](https://github.com/canhld94)).
+* Reject incoming INSERT queries in case when query-level settings `async_insert` and `deduplicate_blocks_in_dependent_materialized_views` are enabled together. This behaviour is controlled by a setting `throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert` and enabled by default. This is a continuation of https://github.com/ClickHouse/ClickHouse/pull/59699 needed to unblock https://github.com/ClickHouse/ClickHouse/pull/59915. [#60888](https://github.com/ClickHouse/ClickHouse/pull/60888) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Utility `clickhouse-copier` is moved to a separate repository on GitHub: https://github.com/ClickHouse/copier. It is no longer included in the bundle but is still available as a separate download. This closes: [#60734](https://github.com/ClickHouse/ClickHouse/issues/60734) This closes: [#60540](https://github.com/ClickHouse/ClickHouse/issues/60540) This closes: [#60250](https://github.com/ClickHouse/ClickHouse/issues/60250) This closes: [#52917](https://github.com/ClickHouse/ClickHouse/issues/52917) This closes: [#51140](https://github.com/ClickHouse/ClickHouse/issues/51140) This closes: [#47517](https://github.com/ClickHouse/ClickHouse/issues/47517) This closes: [#47189](https://github.com/ClickHouse/ClickHouse/issues/47189) This closes: [#46598](https://github.com/ClickHouse/ClickHouse/issues/46598) This closes: [#40257](https://github.com/ClickHouse/ClickHouse/issues/40257) This closes: [#36504](https://github.com/ClickHouse/ClickHouse/issues/36504) This closes: [#35485](https://github.com/ClickHouse/ClickHouse/issues/35485) This closes: [#33702](https://github.com/ClickHouse/ClickHouse/issues/33702) This closes: [#26702](https://github.com/ClickHouse/ClickHouse/issues/26702).
+* To increase compatibility with MySQL, the compatibility alias `locate` now accepts arguments `(needle, haystack[, start_pos])` by default. The previous behavior `(haystack, needle, [, start_pos])` can be restored by setting `function_locate_has_mysql_compatible_argument_order = 0`. [#61092](https://github.com/ClickHouse/ClickHouse/pull/61092) ([Robert Schulze](https://github.com/rschu1ze)).
+* Forbid `SimpleAggregateFunction` in `ORDER BY` of `MergeTree` tables (like `AggregateFunction` is forbidden, but they are forbidden because they are not comparable) by default (use `allow_suspicious_primary_key` to allow them). [#61399](https://github.com/ClickHouse/ClickHouse/pull/61399) ([Azat Khuzhin](https://github.com/azat)).
+* The `Ordinary` database engine is deprecated. You will receive a warning in clickhouse-client if your server is using it. This closes [#52229](https://github.com/ClickHouse/ClickHouse/issues/52229). [#56942](https://github.com/ClickHouse/ClickHouse/pull/56942) ([shabroo](https://github.com/shabroo)).
+
+#### New Feature
+* Support reading and writing backups as `tar` (in addition to `zip`). [#59535](https://github.com/ClickHouse/ClickHouse/pull/59535) ([josh-hildred](https://github.com/josh-hildred)).
+* Implemented support for S3 Express buckets. [#59965](https://github.com/ClickHouse/ClickHouse/pull/59965) ([Nikita Taranov](https://github.com/nickitat)).
+* Allow to attach parts from a different disk (using copy instead of hard link). [#60112](https://github.com/ClickHouse/ClickHouse/pull/60112) ([Unalian](https://github.com/Unalian)).
+* Size-capped `Memory` tables: controlled by their settings, `min_bytes_to_keep, max_bytes_to_keep, min_rows_to_keep` and `max_rows_to_keep`. [#60612](https://github.com/ClickHouse/ClickHouse/pull/60612) ([Jake Bamrah](https://github.com/JakeBamrah)).
+* Separate limits on number of waiting and executing queries. Added new server setting `max_waiting_queries` that limits the number of queries waiting due to `async_load_databases`. Existing limits on number of executing queries no longer count waiting queries. [#61053](https://github.com/ClickHouse/ClickHouse/pull/61053) ([Sergei Trifonov](https://github.com/serxa)).
+* Added a table `system.keywords` which contains all the keywords from parser. Mostly needed and will be used for better fuzzing and syntax highlighting. [#51808](https://github.com/ClickHouse/ClickHouse/pull/51808) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Add support for `ATTACH PARTITION ALL`. [#61107](https://github.com/ClickHouse/ClickHouse/pull/61107) ([Kirill Nikiforov](https://github.com/allmazz)).
+* Add a new function, `getClientHTTPHeader`. This closes [#54665](https://github.com/ClickHouse/ClickHouse/issues/54665). Co-authored with @lingtaolf. [#61820](https://github.com/ClickHouse/ClickHouse/pull/61820) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add `generate_series` as a table function (compatibility alias for PostgreSQL to the existing `numbers` function). This function generates table with an arithmetic progression with natural numbers. [#59390](https://github.com/ClickHouse/ClickHouse/pull/59390) ([divanik](https://github.com/divanik)).
+* A mode for `topK`/`topkWeighed` support mode, which return count of values and its error. [#54508](https://github.com/ClickHouse/ClickHouse/pull/54508) ([UnamedRus](https://github.com/UnamedRus)).
+* Added function `toMillisecond` which returns the millisecond component for values of type`DateTime` or `DateTime64`. [#60281](https://github.com/ClickHouse/ClickHouse/pull/60281) ([Shaun Struwig](https://github.com/Blargian)).
+* Allow configuring HTTP redirect handlers for clickhouse-server. For example, you can make `/` redirect to the Play UI. [#60390](https://github.com/ClickHouse/ClickHouse/pull/60390) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Performance Improvement
+* Optimized function `dotProduct` to omit unnecessary and expensive memory copies. [#60928](https://github.com/ClickHouse/ClickHouse/pull/60928) ([Robert Schulze](https://github.com/rschu1ze)).
+* 30x faster printing for 256-bit integers. [#61100](https://github.com/ClickHouse/ClickHouse/pull/61100) ([Raúl Marín](https://github.com/Algunenano)).
+* If the table's primary key contains mostly useless columns, don't keep them in memory. This is controlled by a new setting `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns` with the value `0.9` by default, which means: for a composite primary key, if a column changes its value for at least 0.9 of all the times, the next columns after it will be not loaded. [#60255](https://github.com/ClickHouse/ClickHouse/pull/60255) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Improve the performance of serialized aggregation methods when involving multiple `Nullable` columns. [#55809](https://github.com/ClickHouse/ClickHouse/pull/55809) ([Amos Bird](https://github.com/amosbird)).
+* Lazy builds JSON's output to improve performance of ALL JOIN. [#58278](https://github.com/ClickHouse/ClickHouse/pull/58278) ([LiuNeng](https://github.com/liuneng1994)).
+* Make HTTP/HTTPs connections with external services, such as AWS S3 reusable for all use cases. Even when the response is 3xx or 4xx. [#58845](https://github.com/ClickHouse/ClickHouse/pull/58845) ([Sema Checherinda](https://github.com/CheSema)).
+* Improvements to aggregate functions `argMin` / `argMax` / `any` / `anyLast` / `anyHeavy`, as well as `ORDER BY {u8/u16/u32/u64/i8/i16/u32/i64) LIMIT 1` queries. [#58640](https://github.com/ClickHouse/ClickHouse/pull/58640) ([Raúl Marín](https://github.com/Algunenano)).
+* Trivial optimization for column's filter. Peak memory can be reduced to 44% of the original in some cases. [#59698](https://github.com/ClickHouse/ClickHouse/pull/59698) ([李扬](https://github.com/taiyang-li)).
+* Execute `multiIf` function in a columnar fashion when the result type's underlying type is a number. [#60384](https://github.com/ClickHouse/ClickHouse/pull/60384) ([李扬](https://github.com/taiyang-li)).
+* Faster (almost 2x) mutexes. [#60823](https://github.com/ClickHouse/ClickHouse/pull/60823) ([Azat Khuzhin](https://github.com/azat)).
+* Drain multiple connections in parallel when a distributed query is finishing. [#60845](https://github.com/ClickHouse/ClickHouse/pull/60845) ([lizhuoyu5](https://github.com/lzydmxy)).
+* Optimize data movement between columns of a Nullable number or a Nullable string, which improves some micro-benchmarks. [#60846](https://github.com/ClickHouse/ClickHouse/pull/60846) ([李扬](https://github.com/taiyang-li)).
+* Operations with the filesystem cache will suffer less from the lock contention. [#61066](https://github.com/ClickHouse/ClickHouse/pull/61066) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Optimize array join and other JOINs by preventing a wrong compiler's optimization. Close [#61074](https://github.com/ClickHouse/ClickHouse/issues/61074).  [#61075](https://github.com/ClickHouse/ClickHouse/pull/61075) ([李扬](https://github.com/taiyang-li)).
+* If a query with a syntax error contained the `COLUMNS` matcher with a regular expression, the regular expression was compiled each time during the parser's backtracking, instead of being compiled once. This was a fundamental error. The compiled regexp was put to AST. But the letter A in AST means "abstract" which means it should not contain heavyweight objects. Parts of AST can be created and discarded during parsing, including a large number of backtracking. This leads to slowness on the parsing side and consequently allows DoS by a readonly user. But the main problem is that it prevents progress in fuzzers. [#61543](https://github.com/ClickHouse/ClickHouse/pull/61543) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add a new analyzer pass to optimize the IN operator for a single value. [#61564](https://github.com/ClickHouse/ClickHouse/pull/61564) ([LiuNeng](https://github.com/liuneng1994)).
+* DNSResolver shuffles set of resolved IPs which is needed to uniformly utilize multiple endpoints of AWS S3. [#60965](https://github.com/ClickHouse/ClickHouse/pull/60965) ([Sema Checherinda](https://github.com/CheSema)).
+
+#### Experimental Feature
+* Support parallel reading for Azure blob storage. This improves the performance of the experimental Azure object storage. [#61503](https://github.com/ClickHouse/ClickHouse/pull/61503) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Add asynchronous WriteBuffer for Azure blob storage similar to S3. This improves the performance of the experimental Azure object storage. [#59929](https://github.com/ClickHouse/ClickHouse/pull/59929) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* Use managed identity for backups IO when using Azure Blob Storage. Add a setting to prevent ClickHouse from attempting to create a non-existent container, which requires permissions at the storage account level. [#61785](https://github.com/ClickHouse/ClickHouse/pull/61785) ([Daniel Pozo Escalona](https://github.com/danipozo)).
+* Add a setting `parallel_replicas_allow_in_with_subquery = 1` which allows subqueries for IN work with parallel replicas. [#60950](https://github.com/ClickHouse/ClickHouse/pull/60950) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* A change for the "zero-copy" replication: all zero copy locks related to a table have to be dropped when the table is dropped. The directory which contains these locks has to be removed also. [#57575](https://github.com/ClickHouse/ClickHouse/pull/57575) ([Sema Checherinda](https://github.com/CheSema)).
+
+#### Improvement
+* Use `MergeTree` as a default table engine. [#60524](https://github.com/ClickHouse/ClickHouse/pull/60524) ([Alexey Milovidov](https://github.com/alexey-milovidov))
+* Enable `output_format_pretty_row_numbers` by default. It is better for usability. [#61791](https://github.com/ClickHouse/ClickHouse/pull/61791) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* In the previous version, some numbers in Pretty formats were not pretty enough. [#61794](https://github.com/ClickHouse/ClickHouse/pull/61794) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* A long value in Pretty formats won't be cut if it is the single value in the resultset, such as in the result of the `SHOW CREATE TABLE` query. [#61795](https://github.com/ClickHouse/ClickHouse/pull/61795) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Similarly to `clickhouse-local`, `clickhouse-client` will accept the `--output-format` option as a synonym to the `--format` option. This closes [#59848](https://github.com/ClickHouse/ClickHouse/issues/59848). [#61797](https://github.com/ClickHouse/ClickHouse/pull/61797) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* If stdout is a terminal and the output format is not specified, `clickhouse-client` and similar tools will use `PrettyCompact` by default, similarly to the interactive mode. `clickhouse-client` and `clickhouse-local` will handle command line arguments for input and output formats in a unified fashion. This closes [#61272](https://github.com/ClickHouse/ClickHouse/issues/61272). [#61800](https://github.com/ClickHouse/ClickHouse/pull/61800) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Underscore digit groups in Pretty formats for better readability. This is controlled by a new setting, `output_format_pretty_highlight_digit_groups`. [#61802](https://github.com/ClickHouse/ClickHouse/pull/61802) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add ability to override initial INSERT settings via `SYSTEM FLUSH DISTRIBUTED`. [#61832](https://github.com/ClickHouse/ClickHouse/pull/61832) ([Azat Khuzhin](https://github.com/azat)).
+* Enable processors profiling (time spent/in and out bytes for sorting, aggregation, ...) by default. [#61096](https://github.com/ClickHouse/ClickHouse/pull/61096) ([Azat Khuzhin](https://github.com/azat)).
+* Support files without format extension in Filesystem database. [#60795](https://github.com/ClickHouse/ClickHouse/pull/60795) ([Kruglov Pavel](https://github.com/Avogar)).
+* Make all format names case insensitive, like Tsv, or TSV, or tsv, or even rowbinary. [#60420](https://github.com/ClickHouse/ClickHouse/pull/60420) ([豪肥肥](https://github.com/HowePa)). I appreciate if you will continue to write it correctly, e.g., `JSON` 😇, not `Json` 🤮, but we don't mind if you spell it as you prefer.
+* Added `none_only_active` mode for `distributed_ddl_output_mode` setting. [#60340](https://github.com/ClickHouse/ClickHouse/pull/60340) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* The advanced dashboard has slightly better colors for multi-line graphs. [#60391](https://github.com/ClickHouse/ClickHouse/pull/60391) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* The Advanced dashboard now has controls always visible on scrolling. This allows you to add a new chart without scrolling up. [#60692](https://github.com/ClickHouse/ClickHouse/pull/60692) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* While running the `MODIFY COLUMN` query for materialized views, check the inner table's structure to ensure every column exists. [#47427](https://github.com/ClickHouse/ClickHouse/pull/47427) ([sunny](https://github.com/sunny19930321)).
+* String types and Enums can be used in the same context, such as: arrays, UNION queries, conditional expressions. This closes [#60726](https://github.com/ClickHouse/ClickHouse/issues/60726). [#60727](https://github.com/ClickHouse/ClickHouse/pull/60727) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Allow declaring Enums in the structure of external data for query processing (this is an immediate temporary table that you can provide for your query). [#57857](https://github.com/ClickHouse/ClickHouse/pull/57857) ([Duc Canh Le](https://github.com/canhld94)).
+* Consider lightweight deleted rows when selecting parts to merge, so the disk size of the resulting part will be estimated better. [#58223](https://github.com/ClickHouse/ClickHouse/pull/58223) ([Zhuo Qiu](https://github.com/jewelzqiu)).
+* Added comments for columns for more system tables. Continuation of https://github.com/ClickHouse/ClickHouse/pull/58356. [#59016](https://github.com/ClickHouse/ClickHouse/pull/59016) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Now we can use virtual columns in PREWHERE. It's worthwhile for non-const virtual columns like `_part_offset`. [#59033](https://github.com/ClickHouse/ClickHouse/pull/59033) ([Amos Bird](https://github.com/amosbird)). Improved overall usability of virtual columns. Now it is allowed to use virtual columns in `PREWHERE` (it's worthwhile for non-const virtual columns like `_part_offset`). Now a builtin documentation is available for virtual columns as a comment of column in `DESCRIBE` query with enabled setting `describe_include_virtual_columns`. [#60205](https://github.com/ClickHouse/ClickHouse/pull/60205) ([Anton Popov](https://github.com/CurtizJ)).
+* Instead of using a constant key, now object storage generates key for determining remove objects capability. [#59495](https://github.com/ClickHouse/ClickHouse/pull/59495) ([Sema Checherinda](https://github.com/CheSema)).
+* Allow "local" as object storage type instead of "local_blob_storage". [#60165](https://github.com/ClickHouse/ClickHouse/pull/60165) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Parallel flush of pending INSERT blocks of Distributed engine on `DETACH`/server shutdown and `SYSTEM FLUSH DISTRIBUTED` (Parallelism will work only if you have multi-disk policy for a table (like everything in the Distributed engine right now)). [#60225](https://github.com/ClickHouse/ClickHouse/pull/60225) ([Azat Khuzhin](https://github.com/azat)).
+* Add a setting to force read-through cache for merges. [#60308](https://github.com/ClickHouse/ClickHouse/pull/60308) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* An improvement for the MySQL compatibility protocol. The issue [#57598](https://github.com/ClickHouse/ClickHouse/issues/57598) mentions a variant behaviour regarding transaction handling. An issued COMMIT/ROLLBACK when no transaction is active is reported as an error contrary to MySQL behaviour. [#60338](https://github.com/ClickHouse/ClickHouse/pull/60338) ([PapaToemmsn](https://github.com/PapaToemmsn)).
+* Function `substring` now has a new alias `byteSlice`. [#60494](https://github.com/ClickHouse/ClickHouse/pull/60494) ([Robert Schulze](https://github.com/rschu1ze)).
+* Renamed server setting `dns_cache_max_size` to `dns_cache_max_entries` to reduce ambiguity. [#60500](https://github.com/ClickHouse/ClickHouse/pull/60500) ([Kirill Nikiforov](https://github.com/allmazz)).
+* `SHOW INDEX | INDEXES | INDICES | KEYS` no longer sorts by the primary key columns (which was unintuitive). [#60514](https://github.com/ClickHouse/ClickHouse/pull/60514) ([Robert Schulze](https://github.com/rschu1ze)).
+* Keeper improvement: abort during startup if an invalid snapshot is detected to avoid data loss. [#60537](https://github.com/ClickHouse/ClickHouse/pull/60537) ([Antonio Andelic](https://github.com/antonio2368)).
+* Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
+* Keeper improvement: support `leadership_expiry_ms` in Keeper's settings. [#60806](https://github.com/ClickHouse/ClickHouse/pull/60806) ([Brokenice0415](https://github.com/Brokenice0415)).
+* Always infer exponential numbers in JSON formats regardless of the setting `input_format_try_infer_exponent_floats`. Add setting `input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects` that allows to use String type for ambiguous paths instead of an exception during named Tuples inference from JSON objects. [#60808](https://github.com/ClickHouse/ClickHouse/pull/60808) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add support for `START TRANSACTION` syntax typically used in MySQL syntax, resolving https://github.com/ClickHouse/ClickHouse/discussions/60865. [#60886](https://github.com/ClickHouse/ClickHouse/pull/60886) ([Zach Naimon](https://github.com/ArctypeZach)).
+* Add a flag for the full-sorting merge join algorithm to treat null as biggest/smallest. So the behavior can be compitable with other SQL systems, like Apache Spark. [#60896](https://github.com/ClickHouse/ClickHouse/pull/60896) ([loudongfeng](https://github.com/loudongfeng)).
+* Support detect output format by file exctension in `clickhouse-client` and `clickhouse-local`. [#61036](https://github.com/ClickHouse/ClickHouse/pull/61036) ([豪肥肥](https://github.com/HowePa)).
+* Update memory limit in runtime when Linux's CGroups value changed. [#61049](https://github.com/ClickHouse/ClickHouse/pull/61049) ([Han Fei](https://github.com/hanfei1991)).
+* Add the function `toUInt128OrZero`, which was missed by mistake (the mistake is related to https://github.com/ClickHouse/ClickHouse/pull/945). The compatibility aliases `FROM_UNIXTIME` and `DATE_FORMAT` (they are not ClickHouse-native and only exist for MySQL compatibility) have been made case insensitive, as expected for SQL-compatibility aliases. [#61114](https://github.com/ClickHouse/ClickHouse/pull/61114) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Improvements for the access checks, allowing to revoke of unpossessed rights in case the target user doesn't have the revoking grants either. Example: `GRANT SELECT ON *.* TO user1; REVOKE SELECT ON system.* FROM user1;`. [#61115](https://github.com/ClickHouse/ClickHouse/pull/61115) ([pufit](https://github.com/pufit)).
+* Fix `has()` function with `Nullable` column (fixes [#60214](https://github.com/ClickHouse/ClickHouse/issues/60214)). [#61249](https://github.com/ClickHouse/ClickHouse/pull/61249) ([Mikhail Koviazin](https://github.com/mkmkme)).
+* Now it's possible to specify the attribute `merge="true"` in config substitutions for subtrees `<include from_zk="/path" merge="true">`. In case this attribute specified, clickhouse will merge subtree with existing configuration, otherwise default behavior is append new content to configuration. [#61299](https://github.com/ClickHouse/ClickHouse/pull/61299) ([alesapin](https://github.com/alesapin)).
+* Add async metrics for virtual memory mappings: `VMMaxMapCount` & `VMNumMaps`. Closes [#60662](https://github.com/ClickHouse/ClickHouse/issues/60662). [#61354](https://github.com/ClickHouse/ClickHouse/pull/61354) ([Tuan Pham Anh](https://github.com/tuanpavn)).
+* Use `temporary_files_codec` setting in all places where we create temporary data, for example external memory sorting and external memory GROUP BY. Before it worked only in `partial_merge` JOIN algorithm. [#61456](https://github.com/ClickHouse/ClickHouse/pull/61456) ([Maksim Kita](https://github.com/kitaisreal)).
+* Add a new setting `max_parser_backtracks` which allows to limit the complexity of query parsing. [#61502](https://github.com/ClickHouse/ClickHouse/pull/61502) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Less contention during dynamic resize of the filesystem cache. [#61524](https://github.com/ClickHouse/ClickHouse/pull/61524) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Disallow sharded mode of StorageS3 queue, because it will be rewritten. [#61537](https://github.com/ClickHouse/ClickHouse/pull/61537) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fixed typo: from `use_leagcy_max_level` to `use_legacy_max_level`. [#61545](https://github.com/ClickHouse/ClickHouse/pull/61545) ([William Schoeffel](https://github.com/wiledusc)).
+* Remove some duplicate entries in `system.blob_storage_log`. [#61622](https://github.com/ClickHouse/ClickHouse/pull/61622) ([YenchangChan](https://github.com/YenchangChan)).
+* Added `current_user` function as a compatibility alias for MySQL. [#61770](https://github.com/ClickHouse/ClickHouse/pull/61770) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Fix inconsistent floating point aggregate function states in mixed x86-64 / ARM clusters [#60610](https://github.com/ClickHouse/ClickHouse/pull/60610) ([Harry Lee](https://github.com/HarryLeeIBM)).
+
+#### Build/Testing/Packaging Improvement
+* The real-time query profiler now works on AArch64. In previous versions, it worked only when a program didn't spend time inside a syscall. [#60807](https://github.com/ClickHouse/ClickHouse/pull/60807) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* ClickHouse version has been added to docker labels. Closes [#54224](https://github.com/ClickHouse/ClickHouse/issues/54224). [#60949](https://github.com/ClickHouse/ClickHouse/pull/60949) ([Nikolay Monkov](https://github.com/nikmonkov)).
+* Upgrade `prqlc` to 0.11.3. [#60616](https://github.com/ClickHouse/ClickHouse/pull/60616) ([Maximilian Roos](https://github.com/max-sixty)).
+* Add generic query text fuzzer in `clickhouse-local`. [#61508](https://github.com/ClickHouse/ClickHouse/pull/61508) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Fix finished_mutations_to_keep=0 for MergeTree (as docs says 0 is to keep everything) [#60031](https://github.com/ClickHouse/ClickHouse/pull/60031) ([Azat Khuzhin](https://github.com/azat)).
+* Something was wrong with the FINAL optimization, here is how the author describes it: "PartsSplitter invalid ranges for the same part". [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
+* Something was wrong with Apache Hive, which is experimental and not supported. [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)).
+* An improvement for experimental parallel replicas: force reanalysis if parallel replicas changed [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix usage of plain metadata type with new disks configuration option [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)).
+* Avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
+* Something was wrong with experimental KQL (Kusto) support: fix `max_query_size_for_kql_compound_operator`: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
+* Keeper fix: add timeouts when waiting for commit logs [#60544](https://github.com/ClickHouse/ClickHouse/pull/60544) ([Antonio Andelic](https://github.com/antonio2368)).
+* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix reading from MergeTree with non-deterministic functions in filter [#60586](https://github.com/ClickHouse/ClickHouse/pull/60586) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix logical error on bad compatibility setting value type [#60596](https://github.com/ClickHouse/ClickHouse/pull/60596) ([Kruglov Pavel](https://github.com/Avogar)).
+* fix(prql): Robust panic handler [#60615](https://github.com/ClickHouse/ClickHouse/pull/60615) ([Maximilian Roos](https://github.com/max-sixty)).
+* Fix `intDiv` for decimal and date arguments [#60672](https://github.com/ClickHouse/ClickHouse/pull/60672) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Fix: expand CTE in alter modify query [#60682](https://github.com/ClickHouse/ClickHouse/pull/60682) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Fix system.parts for non-Atomic/Ordinary database engine (i.e. Memory) [#60689](https://github.com/ClickHouse/ClickHouse/pull/60689) ([Azat Khuzhin](https://github.com/azat)).
+* Fix "Invalid storage definition in metadata file" for parameterized views [#60708](https://github.com/ClickHouse/ClickHouse/pull/60708) ([Azat Khuzhin](https://github.com/azat)).
+* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove wrong assertion in aggregate function quantileGK [#60740](https://github.com/ClickHouse/ClickHouse/pull/60740) ([李扬](https://github.com/taiyang-li)).
+* Fix insert-select + insert_deduplication_token bug by setting streams to 1 [#60745](https://github.com/ClickHouse/ClickHouse/pull/60745) ([Jordi Villar](https://github.com/jrdi)).
+* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
+* Fix toStartOfInterval [#60763](https://github.com/ClickHouse/ClickHouse/pull/60763) ([Andrey Zvonov](https://github.com/zvonand)).
+* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
+* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix possible stuck on error in HashedDictionaryParallelLoader [#60926](https://github.com/ClickHouse/ClickHouse/pull/60926) ([vdimir](https://github.com/vdimir)).
+* Fix async RESTORE with Replicated database (experimental feature) [#60934](https://github.com/ClickHouse/ClickHouse/pull/60934) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix deadlock in async inserts to `Log` tables via native protocol [#61055](https://github.com/ClickHouse/ClickHouse/pull/61055) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix lazy execution of default argument in dictGetOrDefault for RangeHashedDictionary [#61196](https://github.com/ClickHouse/ClickHouse/pull/61196) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix usage of session_token in S3 engine [#61234](https://github.com/ClickHouse/ClickHouse/pull/61234) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix possible incorrect result of aggregate function `uniqExact` [#61257](https://github.com/ClickHouse/ClickHouse/pull/61257) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix bugs in show database [#61269](https://github.com/ClickHouse/ClickHouse/pull/61269) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix logical error in RabbitMQ storage with MATERIALIZED columns [#61320](https://github.com/ClickHouse/ClickHouse/pull/61320) ([vdimir](https://github.com/vdimir)).
+* Fix CREATE OR REPLACE DICTIONARY [#61356](https://github.com/ClickHouse/ClickHouse/pull/61356) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix ATTACH query with external ON CLUSTER [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix consecutive keys optimization for nullable keys [#61393](https://github.com/ClickHouse/ClickHouse/pull/61393) ([Anton Popov](https://github.com/CurtizJ)).
+* fix issue of actions dag split [#61458](https://github.com/ClickHouse/ClickHouse/pull/61458) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix finishing a failed RESTORE [#61466](https://github.com/ClickHouse/ClickHouse/pull/61466) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Disable async_insert_use_adaptive_busy_timeout correctly with compatibility settings [#61468](https://github.com/ClickHouse/ClickHouse/pull/61468) ([Raúl Marín](https://github.com/Algunenano)).
+* Allow queuing in restore pool [#61475](https://github.com/ClickHouse/ClickHouse/pull/61475) ([Nikita Taranov](https://github.com/nickitat)).
+* Fix an inconsistency when reading system.parts using UUID. [#61479](https://github.com/ClickHouse/ClickHouse/pull/61479) ([Dan Wu](https://github.com/wudanzy)).
+* Fix ALTER QUERY MODIFY SQL SECURITY [#61480](https://github.com/ClickHouse/ClickHouse/pull/61480) ([pufit](https://github.com/pufit)).
+* Fix a crash in window view (experimental feature) [#61526](https://github.com/ClickHouse/ClickHouse/pull/61526) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix `repeat` with non-native integers [#61527](https://github.com/ClickHouse/ClickHouse/pull/61527) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix client's `-s` argument [#61530](https://github.com/ClickHouse/ClickHouse/pull/61530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix crash in arrayPartialReverseSort [#61539](https://github.com/ClickHouse/ClickHouse/pull/61539) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix string search with const position [#61547](https://github.com/ClickHouse/ClickHouse/pull/61547) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix addDays cause an error when used DateTime64 [#61561](https://github.com/ClickHouse/ClickHouse/pull/61561) ([Shuai li](https://github.com/loneylee)).
+* Disallow LowCardinality input type for JSONExtract [#61617](https://github.com/ClickHouse/ClickHouse/pull/61617) ([Julia Kartseva](https://github.com/jkartseva)).
+* Fix `system.part_log` for async insert with deduplication [#61620](https://github.com/ClickHouse/ClickHouse/pull/61620) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix a `Non-ready set` exception for system.parts. [#61666](https://github.com/ClickHouse/ClickHouse/pull/61666) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix actual_part_name for REPLACE_RANGE (`Entry actual part isn't empty yet`) [#61675](https://github.com/ClickHouse/ClickHouse/pull/61675) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix a sanitizer report in `multiSearchAllPositionsCaseInsensitiveUTF8` for incorrect UTF-8 [#61749](https://github.com/ClickHouse/ClickHouse/pull/61749) ([pufit](https://github.com/pufit)).
+* Fix an observation that the RANGE frame is not supported for Nullable columns. [#61766](https://github.com/ClickHouse/ClickHouse/pull/61766) ([YuanLiu](https://github.com/ditgittube)).
+
 ### <a id="242"></a> ClickHouse release 24.2, 2024-02-29

 #### Backward Incompatible Change
@ -198,7 +371,6 @@
 * Added `FROM <Replicas>` modifier for `SYSTEM SYNC REPLICA LIGHTWEIGHT` query. With the `FROM` modifier ensures we wait for fetches and drop-ranges only for the specified source replicas, as well as any replica not in zookeeper or with an empty source_replica. [#58393](https://github.com/ClickHouse/ClickHouse/pull/58393) ([Jayme Bird](https://github.com/jaymebrd)).
 * Added setting `update_insert_deduplication_token_in_dependent_materialized_views`. This setting allows to update insert deduplication token with table identifier during insert in dependent materialized views. Closes [#59165](https://github.com/ClickHouse/ClickHouse/issues/59165). [#59238](https://github.com/ClickHouse/ClickHouse/pull/59238) ([Maksim Kita](https://github.com/kitaisreal)).
 * Added statement `SYSTEM RELOAD ASYNCHRONOUS METRICS` which updates the asynchronous metrics. Mostly useful for testing and development. [#53710](https://github.com/ClickHouse/ClickHouse/pull/53710) ([Robert Schulze](https://github.com/rschu1ze)).
-* Attach parts from a different disk `ALTER TABLE destination ATTACH PARTITION tuple() FROM source` where source is an [instant table](https://github.com/ClickHouse/web-tables-demo). [#60112](https://github.com/ClickHouse/ClickHouse/pull/60112)([Unalian](https://github.com/Unalian)).

 #### Performance Improvement
 * Coordination for parallel replicas is rewritten for better parallelism and cache locality. It has been tested for linear scalability on hundreds of replicas. It also got support for reading in order. [#57968](https://github.com/ClickHouse/ClickHouse/pull/57968) ([Nikita Taranov](https://github.com/nickitat)).
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -56,16 +56,21 @@ option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile t
 if (ENABLE_CHECK_HEAVY_BUILDS)
    # set DATA (since RSS does not work since 2.6.x+) to 5G
    set (RLIMIT_DATA 5000000000)
-    # set VIRT (RLIMIT_AS) to 10G (DATA*10)
+    # set VIRT (RLIMIT_AS) to 10G (DATA*2)
    set (RLIMIT_AS 10000000000)
    # set CPU time limit to 1000 seconds
    set (RLIMIT_CPU 1000)

-    # -fsanitize=memory and address are too heavy
-    if (SANITIZE)
+    # Sanitizers are too heavy
+    if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE)
       set (RLIMIT_DATA 10000000000) # 10G
    endif()

+    # For some files currently building RISCV64 might be too slow. TODO: Improve compilation times per file
+    if (ARCH_RISCV64)
+        set (RLIMIT_CPU 1800)
+    endif()
+
    set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=${RLIMIT_CPU} ${CMAKE_CXX_COMPILER_LAUNCHER})
 endif ()

@ -102,6 +107,8 @@ if (ENABLE_FUZZING)

    # For codegen_select_fuzzer
    set (ENABLE_PROTOBUF 1)
+
+    add_compile_definitions(FUZZING_MODE=1)
 endif()

 # Global libraries
@ -110,11 +117,6 @@ endif()
 # - sanitize.cmake
 add_library(global-libs INTERFACE)

-# We don't want to instrument everything with fuzzer, but only specific targets (see below),
-# also, since we build our own llvm, we specifically don't want to instrument
-# libFuzzer library itself - it would result in infinite recursion
-#include (cmake/fuzzer.cmake)
-
 include (cmake/sanitize.cmake)

 option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON)
@ -453,8 +455,6 @@ endif ()

 enable_testing() # Enable for tests without binary

-option(ENABLE_OPENSSL "This option performs a build with OpenSSL. NOTE! This option is insecure and should never be used. By default, ClickHouse uses and only supports BoringSSL" OFF)
-
 if (ARCH_S390X)
    set(ENABLE_OPENSSL_DYNAMIC_DEFAULT ON)
 else ()
@ -554,7 +554,9 @@ if (ENABLE_RUST)
    endif()
 endif()

-if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
+if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
+    AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND NOT ENABLE_FUZZING
+    AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
    set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON)
 else ()
    set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF)
@ -577,9 +579,6 @@ if (FUZZER)
            if (NOT(target_type STREQUAL "INTERFACE_LIBRARY" OR target_type STREQUAL "UTILITY"))
                target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link")
            endif()
-            # clickhouse fuzzer isn't working correctly
-            # initial PR https://github.com/ClickHouse/ClickHouse/pull/27526
-            #if (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse")
            if (target_type STREQUAL "EXECUTABLE" AND target MATCHES ".+_fuzzer")
                message(STATUS "${target} instrumented with fuzzer")
                target_link_libraries(${target} PUBLIC ch_contrib::fuzzer)
@ -589,6 +588,12 @@ if (FUZZER)
                get_target_property(target_bin_dir ${target} BINARY_DIR)
                add_custom_command(TARGET fuzzers POST_BUILD COMMAND mv "${target_bin_dir}/${target_bin_name}" "${CMAKE_CURRENT_BINARY_DIR}/programs/" VERBATIM)
            endif()
+            if (target STREQUAL "clickhouse")
+                message(STATUS "${target} instrumented with fuzzer")
+                target_link_libraries(${target} PUBLIC ch_contrib::fuzzer_no_main)
+                # Add to fuzzers bundle
+                add_dependencies(fuzzers ${target})
+            endif()
        endif()
    endforeach()
    add_custom_command(TARGET fuzzers POST_BUILD COMMAND SRC=${CMAKE_SOURCE_DIR} BIN=${CMAKE_BINARY_DIR} OUT=${CMAKE_BINARY_DIR}/programs ${CMAKE_SOURCE_DIR}/tests/fuzz/build.sh VERBATIM)
--- a/README.md
+++ b/README.md
@ -28,7 +28,6 @@ curl https://clickhouse.com/ | sh
 * [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
 * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
 * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlighting, powered by github.dev.
-* [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements.
 * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.

 ## Monthly Release & Community Call
@ -40,15 +39,8 @@ Every month we get together with the community (users, contributors, customers,

 ## Upcoming Events

-Keep an eye out for upcoming meetups and eventsaround the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
+Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.

-* [ClickHouse Meetup in Bellevue](https://www.meetup.com/clickhouse-seattle-user-group/events/298650371/) - Mar 11
-* [ClickHouse Meetup at Ramp's Offices in NYC](https://www.meetup.com/clickhouse-new-york-user-group/events/298640542/) - Mar 19
-* [ClickHouse Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/299479750/) - Mar 20
-* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/298997115/) - Mar 21
-* [ClickHouse Meetup in Bengaluru](https://www.meetup.com/clickhouse-bangalore-user-group/events/299479850/) - Mar 23
-* [ClickHouse Meetup in Zurich](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/299628922/) - Apr 16
-* [ClickHouse Meetup in Copenhagen](https://www.meetup.com/clickhouse-denmark-meetup-group/events/299629133/) - Apr 23
 * [ClickHouse Meetup in Dubai](https://www.meetup.com/clickhouse-dubai-meetup-group/events/299629189/) - May 28


--- a/SECURITY.md
+++ b/SECURITY.md
@ -13,18 +13,16 @@ The following versions of ClickHouse server are currently being supported with s

 | Version | Supported |
 |:-|:-|
+| 24.3 | ✔️ |
 | 24.2 | ✔️ |
 | 24.1 | ✔️ |
-| 23.12 | ✔️ |
-| 23.11 | ❌ |
-| 23.10 | ❌ |
-| 23.9 | ❌ |
+| 23.* | ❌ |
 | 23.8 | ✔️ |
 | 23.7 | ❌ |
 | 23.6 | ❌ |
 | 23.5 | ❌ |
 | 23.4 | ❌ |
-| 23.3 | ✔️ |
+| 23.3 | ❌ |
 | 23.2 | ❌ |
 | 23.1 | ❌ |
 | 22.* | ❌ |
--- a/base/base/CMakeLists.txt
+++ b/base/base/CMakeLists.txt
@ -20,6 +20,7 @@ set (SRCS
    getPageSize.cpp
    getThreadId.cpp
    int8_to_string.cpp
+    itoa.cpp
    JSON.cpp
    mremap.cpp
    phdr_cache.cpp
--- a/base/base/IPv4andIPv6.h
+++ b/base/base/IPv4andIPv6.h
@ -1,8 +1,7 @@
 #pragma once

-#include <base/strong_typedef.h>
 #include <base/extended_types.h>
-#include <Common/formatIPv6.h>
+#include <base/strong_typedef.h>
 #include <Common/memcmpSmall.h>

 namespace DB
@ -62,7 +61,8 @@ namespace std
    {
        size_t operator()(const DB::IPv6 & x) const
        {
-            return std::hash<std::string_view>{}(std::string_view(reinterpret_cast<const char*>(&x.toUnderType()), IPV6_BINARY_LENGTH));
+            return std::hash<std::string_view>{}(
+                std::string_view(reinterpret_cast<const char *>(&x.toUnderType()), sizeof(DB::IPv6::UnderlyingType)));
        }
    };

--- a/base/base/coverage.cpp
+++ b/base/base/coverage.cpp
@ -1,7 +1,7 @@
 #include "coverage.h"
 #include <sys/mman.h>

-#pragma GCC diagnostic ignored "-Wreserved-identifier"
+#pragma clang diagnostic ignored "-Wreserved-identifier"


 /// WITH_COVERAGE enables the default implementation of code coverage,
--- a/base/base/defines.h
+++ b/base/base/defines.h
@ -108,16 +108,22 @@
        {
            [[noreturn]] void abortOnFailedAssertion(const String & description);
        }
-        #define chassert(x) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
+        #define chassert_1(x, ...) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
+        #define chassert_2(x, comment, ...) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(comment); } while (0)
        #define UNREACHABLE() abort()
        // clang-format off
    #else
        /// Here sizeof() trick is used to suppress unused warning for result,
        /// since simple "(void)x" will evaluate the expression, while
        /// "sizeof(!(x))" will not.
-        #define chassert(x) (void)sizeof(!(x))
+        #define chassert_1(x, ...) (void)sizeof(!(x))
+        #define chassert_2(x, comment, ...) (void)sizeof(!(x))
        #define UNREACHABLE() __builtin_unreachable()
    #endif
+        #define CHASSERT_DISPATCH(_1,_2, N,...) N(_1, _2)
+        #define CHASSERT_INVOKE(tuple) CHASSERT_DISPATCH tuple
+        #define chassert(...) CHASSERT_INVOKE((__VA_ARGS__, chassert_2, chassert_1))
+
 #endif

 /// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.
--- a/base/base/getMemoryAmount.cpp
+++ b/base/base/getMemoryAmount.cpp
@ -50,9 +50,6 @@ std::optional<uint64_t> getCgroupsV2MemoryLimit()

 }

-/** Returns the size of physical memory (RAM) in bytes.
-  * Returns 0 on unsupported platform
-  */
 uint64_t getMemoryAmountOrZero()
 {
    int64_t num_pages = sysconf(_SC_PHYS_PAGES);
--- a/base/base/getMemoryAmount.h
+++ b/base/base/getMemoryAmount.h
@ -2,11 +2,10 @@

 #include <cstdint>

-/** Returns the size of physical memory (RAM) in bytes.
-  * Returns 0 on unsupported platform or if it cannot determine the size of physical memory.
-  */
+/// Returns the size in bytes of physical memory (RAM) available to the process. The value can
+/// be smaller than the total available RAM available to the system due to cgroups settings.
+/// Returns 0 on unsupported platform or if it cannot determine the size of physical memory.
 uint64_t getMemoryAmountOrZero();

-/** Throws exception if it cannot determine the size of physical memory.
-  */
+/// Throws exception if it cannot determine the size of physical memory.
 uint64_t getMemoryAmount();
--- a/base/base/itoa.cpp
+++ b/base/base/itoa.cpp
@ -0,0 +1,503 @@
+// Based on https://github.com/amdn/itoa and combined with our optimizations
+//
+//=== itoa.cpp - Fast integer to ascii conversion                 --*- C++ -*-//
+//
+// The MIT License (MIT)
+// Copyright (c) 2016 Arturo Martin-de-Nicolas
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+//     The above copyright notice and this permission notice shall be included
+//     in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//===----------------------------------------------------------------------===//
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <type_traits>
+#include <base/defines.h>
+#include <base/extended_types.h>
+#include <base/itoa.h>
+
+namespace
+{
+template <typename T>
+ALWAYS_INLINE inline constexpr T pow10(size_t x)
+{
+    return x ? 10 * pow10<T>(x - 1) : 1;
+}
+
+// Division by a power of 10 is implemented using a multiplicative inverse.
+// This strength reduction is also done by optimizing compilers, but
+// presently the fastest results are produced by using the values
+// for the multiplication and the shift as given by the algorithm
+// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
+//
+// http://www.agner.org/optimize/optimizing_assembly.pdf
+//
+// "Integer division by a constant (all processors)
+// A floating point number can be divided by a constant by multiplying
+// with the reciprocal. If we want to do the same with integers, we have
+// to scale the reciprocal by 2n and then shift the product to the right
+// by n. There are various algorithms for finding a suitable value of n
+// and compensating for rounding errors. The algorithm described below
+// was invented by Terje Mathisen, Norway, and not published elsewhere."
+
+/// Division by constant is performed by:
+/// 1. Adding 1 if needed;
+/// 2. Multiplying by another constant;
+/// 3. Shifting right by another constant.
+template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
+struct Division
+{
+    static constexpr bool add{add_};
+    static constexpr UInt multiplier{multiplier_};
+    static constexpr unsigned shift{shift_};
+};
+
+/// Select a type with appropriate number of bytes from the list of types.
+/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
+/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
+template <size_t N, typename T, typename... Ts>
+struct SelectType
+{
+    using Result = typename SelectType<N / 2, Ts...>::Result;
+};
+
+template <typename T, typename... Ts>
+struct SelectType<1, T, Ts...>
+{
+    using Result = T;
+};
+
+
+/// Division by 10^N where N is the size of the type.
+template <size_t N>
+using DivisionBy10PowN = typename SelectType<
+    N,
+    Division<uint8_t, false, 205U, 11>, /// divide by 10
+    Division<uint16_t, true, 41943U, 22>, /// divide by 100
+    Division<uint32_t, false, 3518437209U, 45>, /// divide by 10000
+    Division<uint64_t, false, 12379400392853802749ULL, 90> /// divide by 100000000
+    >::Result;
+
+template <size_t N>
+using UnsignedOfSize = typename SelectType<N, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>::Result;
+
+/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
+template <size_t N>
+struct QuotientAndRemainder
+{
+    UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
+    UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
+};
+
+template <size_t N>
+QuotientAndRemainder<N> inline split(UnsignedOfSize<N> value)
+{
+    constexpr DivisionBy10PowN<N> division;
+
+    UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
+    UnsignedOfSize<N / 2> remainder = static_cast<UnsignedOfSize<N / 2>>(value - quotient * pow10<UnsignedOfSize<N / 2>>(N));
+
+    return {quotient, remainder};
+}
+
+ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value)
+{
+    *p = '0' + value;
+    ++p;
+    return p;
+}
+
+// Using a lookup table to convert binary numbers from 0 to 99
+// into ascii characters as described by Andrei Alexandrescu in
+// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
+
+const char digits[201] = "00010203040506070809"
+                         "10111213141516171819"
+                         "20212223242526272829"
+                         "30313233343536373839"
+                         "40414243444546474849"
+                         "50515253545556575859"
+                         "60616263646566676869"
+                         "70717273747576777879"
+                         "80818283848586878889"
+                         "90919293949596979899";
+
+ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
+{
+    memcpy(p, &digits[value * 2], 2);
+    p += 2;
+    return p;
+}
+
+namespace convert
+{
+template <typename UInt, size_t N = sizeof(UInt)>
+char * head(char * p, UInt u);
+template <typename UInt, size_t N = sizeof(UInt)>
+char * tail(char * p, UInt u);
+
+//===----------------------------------------------------------===//
+//     head: find most significant digit, skip leading zeros
+//===----------------------------------------------------------===//
+
+// "x" contains quotient and remainder after division by 10^N
+// quotient is less than 10^N
+template <size_t N>
+ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder<N> x)
+{
+    p = head(p, UnsignedOfSize<N / 2>(x.quotient));
+    p = tail(p, x.remainder);
+    return p;
+}
+
+// "u" is less than 10^2*N
+template <typename UInt, size_t N>
+ALWAYS_INLINE inline char * head(char * p, UInt u)
+{
+    return u < pow10<UnsignedOfSize<N>>(N) ? head(p, UnsignedOfSize<N / 2>(u)) : head<N>(p, split<N>(u));
+}
+
+// recursion base case, selected when "u" is one byte
+template <>
+ALWAYS_INLINE inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
+{
+    return u < 10 ? outDigit(p, u) : outTwoDigits(p, u);
+}
+
+//===----------------------------------------------------------===//
+//     tail: produce all digits including leading zeros
+//===----------------------------------------------------------===//
+
+// recursive step, "u" is less than 10^2*N
+template <typename UInt, size_t N>
+ALWAYS_INLINE inline char * tail(char * p, UInt u)
+{
+    QuotientAndRemainder<N> x = split<N>(u);
+    p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
+    p = tail(p, x.remainder);
+    return p;
+}
+
+// recursion base case, selected when "u" is one byte
+template <>
+ALWAYS_INLINE inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
+{
+    return outTwoDigits(p, u);
+}
+
+//===----------------------------------------------------------===//
+// large values are >= 10^2*N
+// where x contains quotient and remainder after division by 10^N
+//===----------------------------------------------------------===//
+template <size_t N>
+ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder<N> x)
+{
+    QuotientAndRemainder<N> y = split<N>(x.quotient);
+    p = head(p, UnsignedOfSize<N / 2>(y.quotient));
+    p = tail(p, y.remainder);
+    p = tail(p, x.remainder);
+    return p;
+}
+
+//===----------------------------------------------------------===//
+// handle values of "u" that might be >= 10^2*N
+// where N is the size of "u" in bytes
+//===----------------------------------------------------------===//
+template <typename UInt, size_t N = sizeof(UInt)>
+ALWAYS_INLINE inline char * uitoa(char * p, UInt u)
+{
+    if (u < pow10<UnsignedOfSize<N>>(N))
+        return head(p, UnsignedOfSize<N / 2>(u));
+    QuotientAndRemainder<N> x = split<N>(u);
+
+    return u < pow10<UnsignedOfSize<N>>(2 * N) ? head<N>(p, x) : large<N>(p, x);
+}
+
+// selected when "u" is one byte
+template <>
+ALWAYS_INLINE inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
+{
+    if (u < 10)
+        return outDigit(p, u);
+    else if (u < 100)
+        return outTwoDigits(p, u);
+    else
+    {
+        p = outDigit(p, u / 100);
+        p = outTwoDigits(p, u % 100);
+        return p;
+    }
+}
+
+//===----------------------------------------------------------===//
+//     handle unsigned and signed integral operands
+//===----------------------------------------------------------===//
+
+// itoa: handle unsigned integral operands (selected by SFINAE)
+template <typename U, std::enable_if_t<!std::is_signed_v<U> && std::is_integral_v<U>> * = nullptr>
+ALWAYS_INLINE inline char * itoa(U u, char * p)
+{
+    return convert::uitoa(p, u);
+}
+
+// itoa: handle signed integral operands (selected by SFINAE)
+template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed_v<I> && std::is_integral_v<I>> * = nullptr>
+ALWAYS_INLINE inline char * itoa(I i, char * p)
+{
+    // Need "mask" to be filled with a copy of the sign bit.
+    // If "i" is a negative value, then the result of "operator >>"
+    // is implementation-defined, though usually it is an arithmetic
+    // right shift that replicates the sign bit.
+    // Use a conditional expression to be portable,
+    // a good optimizing compiler generates an arithmetic right shift
+    // and avoids the conditional branch.
+    UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
+    // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
+    // Cannot use std::abs() because the result is undefined
+    // in 2's complement systems for the most-negative value.
+    // Want to avoid conditional branch for performance reasons since
+    // CPU branch prediction will be ineffective when negative values
+    // occur randomly.
+    // Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
+    // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
+    // This yields the absolute value with the desired type without
+    // using a conditional branch and without invoking undefined or
+    // implementation defined behavior:
+    UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
+    // Unconditionally store a minus sign when producing digits
+    // in a forward direction and increment the pointer only if
+    // the value is in fact negative.
+    // This avoids a conditional branch and is safe because we will
+    // always produce at least one digit and it will overwrite the
+    // minus sign when the value is not negative.
+    *p = '-';
+    p += (mask & 1);
+    p = convert::uitoa(p, u);
+    return p;
+}
+}
+
+const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull;
+const int max_multiple_of_hundred_blocks = 9;
+static_assert(max_multiple_of_hundred_that_fits_in_64_bits % 100 == 0);
+
+ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
+{
+    /// If we the highest 64bit item is empty, we can print just the lowest item as u64
+    if (_x.items[UInt128::_impl::little(1)] == 0)
+        return convert::itoa(_x.items[UInt128::_impl::little(0)], p);
+
+    /// Doing operations using __int128 is faster and we already rely on this feature
+    using T = unsigned __int128;
+    T x = (T(_x.items[UInt128::_impl::little(1)]) << 64) + T(_x.items[UInt128::_impl::little(0)]);
+
+    /// We are going to accumulate blocks of 2 digits to print until the number is small enough to be printed as u64
+    /// To do this we could do: x / 100, x % 100
+    /// But these would mean doing many iterations with long integers, so instead we divide by a much longer integer
+    /// multiple of 100 (100^9) and then get the blocks out of it (as u64)
+    /// Once we reach u64::max we can stop and use the fast method to print that in the front
+    static const T large_divisor = max_multiple_of_hundred_that_fits_in_64_bits;
+    static const T largest_uint64 = std::numeric_limits<uint64_t>::max();
+    uint8_t two_values[20] = {0}; // 39 Max characters / 2
+
+    int current_block = 0;
+    while (x > largest_uint64)
+    {
+        uint64_t u64_remainder = uint64_t(x % large_divisor);
+        x /= large_divisor;
+
+        int pos = current_block;
+        while (u64_remainder)
+        {
+            two_values[pos] = uint8_t(u64_remainder % 100);
+            pos++;
+            u64_remainder /= 100;
+        }
+        current_block += max_multiple_of_hundred_blocks;
+    }
+
+    char * highest_part_print = convert::itoa(uint64_t(x), p);
+    for (int i = 0; i < current_block; i++)
+    {
+        outTwoDigits(highest_part_print, two_values[current_block - 1 - i]);
+        highest_part_print += 2;
+    }
+
+    return highest_part_print;
+}
+
+ALWAYS_INLINE inline char * writeUIntText(UInt256 _x, char * p)
+{
+    /// If possible, treat it as a smaller integer as they are much faster to print
+    if (_x.items[UInt256::_impl::little(3)] == 0 && _x.items[UInt256::_impl::little(2)] == 0)
+        return writeUIntText(UInt128{_x.items[UInt256::_impl::little(0)], _x.items[UInt256::_impl::little(1)]}, p);
+
+    /// If available (x86) we transform from our custom class to _BitInt(256) which has better support in the compiler
+    /// and produces better code
+    using T =
+#if defined(__x86_64__)
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored "-Wbit-int-extension"
+        unsigned _BitInt(256)
+#    pragma clang diagnostic pop
+#else
+        UInt256
+#endif
+        ;
+
+#if defined(__x86_64__)
+    T x = (T(_x.items[UInt256::_impl::little(3)]) << 192) + (T(_x.items[UInt256::_impl::little(2)]) << 128)
+        + (T(_x.items[UInt256::_impl::little(1)]) << 64) + T(_x.items[UInt256::_impl::little(0)]);
+#else
+    T x = _x;
+#endif
+
+    /// Similar to writeUIntText(UInt128) only that in this case we will stop as soon as we reach the largest u128
+    /// and switch to that function
+    uint8_t two_values[39] = {0}; // 78 Max characters / 2
+    int current_pos = 0;
+
+    static const T large_divisor = max_multiple_of_hundred_that_fits_in_64_bits;
+    static const T largest_uint128 = T(std::numeric_limits<uint64_t>::max()) << 64 | T(std::numeric_limits<uint64_t>::max());
+
+    while (x > largest_uint128)
+    {
+        uint64_t u64_remainder = uint64_t(x % large_divisor);
+        x /= large_divisor;
+
+        int pos = current_pos;
+        while (u64_remainder)
+        {
+            two_values[pos] = uint8_t(u64_remainder % 100);
+            pos++;
+            u64_remainder /= 100;
+        }
+        current_pos += max_multiple_of_hundred_blocks;
+    }
+
+#if defined(__x86_64__)
+    UInt128 pending{uint64_t(x), uint64_t(x >> 64)};
+#else
+    UInt128 pending{x.items[UInt256::_impl::little(0)], x.items[UInt256::_impl::little(1)]};
+#endif
+
+    char * highest_part_print = writeUIntText(pending, p);
+    for (int i = 0; i < current_pos; i++)
+    {
+        outTwoDigits(highest_part_print, two_values[current_pos - 1 - i]);
+        highest_part_print += 2;
+    }
+
+    return highest_part_print;
+}
+
+ALWAYS_INLINE inline char * writeLeadingMinus(char * pos)
+{
+    *pos = '-';
+    return pos + 1;
+}
+
+template <typename T>
+ALWAYS_INLINE inline char * writeSIntText(T x, char * pos)
+{
+    static_assert(std::is_same_v<T, Int128> || std::is_same_v<T, Int256>);
+
+    using UnsignedT = make_unsigned_t<T>;
+    static constexpr T min_int = UnsignedT(1) << (sizeof(T) * 8 - 1);
+
+    if (unlikely(x == min_int))
+    {
+        if constexpr (std::is_same_v<T, Int128>)
+        {
+            const char * res = "-170141183460469231731687303715884105728";
+            memcpy(pos, res, strlen(res));
+            return pos + strlen(res);
+        }
+        else if constexpr (std::is_same_v<T, Int256>)
+        {
+            const char * res = "-57896044618658097711785492504343953926634992332820282019728792003956564819968";
+            memcpy(pos, res, strlen(res));
+            return pos + strlen(res);
+        }
+    }
+
+    if (x < 0)
+    {
+        x = -x;
+        pos = writeLeadingMinus(pos);
+    }
+    return writeUIntText(UnsignedT(x), pos);
+}
+}
+
+char * itoa(UInt8 i, char * p)
+{
+    return convert::itoa(uint8_t(i), p);
+}
+
+char * itoa(Int8 i, char * p)
+{
+    return convert::itoa(int8_t(i), p);
+}
+
+char * itoa(UInt128 i, char * p)
+{
+    return writeUIntText(i, p);
+}
+
+char * itoa(Int128 i, char * p)
+{
+    return writeSIntText(i, p);
+}
+
+char * itoa(UInt256 i, char * p)
+{
+    return writeUIntText(i, p);
+}
+
+char * itoa(Int256 i, char * p)
+{
+    return writeSIntText(i, p);
+}
+
+#define DEFAULT_ITOA(T) \
+    char * itoa(T i, char * p) \
+    { \
+        return convert::itoa(i, p); \
+    }
+
+#define FOR_MISSING_INTEGER_TYPES(M) \
+    M(uint8_t) \
+    M(UInt16) \
+    M(UInt32) \
+    M(UInt64) \
+    M(int8_t) \
+    M(Int16) \
+    M(Int32) \
+    M(Int64)
+
+FOR_MISSING_INTEGER_TYPES(DEFAULT_ITOA)
+
+#if defined(OS_DARWIN)
+DEFAULT_ITOA(unsigned long)
+DEFAULT_ITOA(long)
+#endif
+
+#undef FOR_MISSING_INTEGER_TYPES
+#undef DEFAULT_ITOA
--- a/base/base/itoa.h
+++ b/base/base/itoa.h
@ -1,446 +1,30 @@
 #pragma once

-// Based on https://github.com/amdn/itoa and combined with our optimizations
-//
-//=== itoa.h - Fast integer to ascii conversion                   --*- C++ -*-//
-//
-// The MIT License (MIT)
-// Copyright (c) 2016 Arturo Martin-de-Nicolas
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-//     The above copyright notice and this permission notice shall be included
-//     in all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-//===----------------------------------------------------------------------===//
-
-#include <cstdint>
-#include <cstddef>
-#include <cstring>
-#include <type_traits>
 #include <base/extended_types.h>

+#define FOR_INTEGER_TYPES(M) \
+    M(uint8_t) \
+    M(UInt8) \
+    M(UInt16) \
+    M(UInt32) \
+    M(UInt64) \
+    M(UInt128) \
+    M(UInt256) \
+    M(int8_t) \
+    M(Int8) \
+    M(Int16) \
+    M(Int32) \
+    M(Int64) \
+    M(Int128) \
+    M(Int256)

-template <typename T>
-inline int digits10(T x)
-{
-    if (x < 10ULL)
-        return 1;
-    if (x < 100ULL)
-        return 2;
-    if (x < 1000ULL)
-        return 3;
+#define INSTANTIATION(T) char * itoa(T i, char * p);
+FOR_INTEGER_TYPES(INSTANTIATION)

-    if (x < 1000000000000ULL)
-    {
-        if (x < 100000000ULL)
-        {
-            if (x < 1000000ULL)
-            {
-                if (x < 10000ULL)
-                    return 4;
-                else
-                    return 5 + (x >= 100000ULL);
-            }
+#if defined(OS_DARWIN)
+INSTANTIATION(unsigned long)
+INSTANTIATION(long)
+#endif

-            return 7 + (x >= 10000000ULL);
-        }
-
-        if (x < 10000000000ULL)
-            return 9 + (x >= 1000000000ULL);
-
-        return 11 + (x >= 100000000000ULL);
-    }
-
-    return 12 + digits10(x / 1000000000000ULL);
-}
-
-
-namespace impl
-{
-
-template <typename T>
-static constexpr T pow10(size_t x)
-{
-    return x ? 10 * pow10<T>(x - 1) : 1;
-}
-
-// Division by a power of 10 is implemented using a multiplicative inverse.
-// This strength reduction is also done by optimizing compilers, but
-// presently the fastest results are produced by using the values
-// for the multiplication and the shift as given by the algorithm
-// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
-//
-// http://www.agner.org/optimize/optimizing_assembly.pdf
-//
-// "Integer division by a constant (all processors)
-// A floating point number can be divided by a constant by multiplying
-// with the reciprocal. If we want to do the same with integers, we have
-// to scale the reciprocal by 2n and then shift the product to the right
-// by n. There are various algorithms for finding a suitable value of n
-// and compensating for rounding errors. The algorithm described below
-// was invented by Terje Mathisen, Norway, and not published elsewhere."
-
-/// Division by constant is performed by:
-/// 1. Adding 1 if needed;
-/// 2. Multiplying by another constant;
-/// 3. Shifting right by another constant.
-template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
-struct Division
-{
-    static constexpr bool add{add_};
-    static constexpr UInt multiplier{multiplier_};
-    static constexpr unsigned shift{shift_};
-};
-
-/// Select a type with appropriate number of bytes from the list of types.
-/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
-/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
-template <size_t N, typename T, typename... Ts>
-struct SelectType
-{
-    using Result = typename SelectType<N / 2, Ts...>::Result;
-};
-
-template <typename T, typename... Ts>
-struct SelectType<1, T, Ts...>
-{
-    using Result = T;
-};
-
-
-/// Division by 10^N where N is the size of the type.
-template <size_t N>
-using DivisionBy10PowN = typename SelectType
-<
-    N,
-    Division<uint8_t, false, 205U, 11>,                           /// divide by 10
-    Division<uint16_t, true, 41943U, 22>,                         /// divide by 100
-    Division<uint32_t, false, 3518437209U, 45>,                   /// divide by 10000
-    Division<uint64_t, false, 12379400392853802749ULL, 90>        /// divide by 100000000
->::Result;
-
-template <size_t N>
-using UnsignedOfSize = typename SelectType
-<
-    N,
-    uint8_t,
-    uint16_t,
-    uint32_t,
-    uint64_t,
-    __uint128_t
->::Result;
-
-/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
-template <size_t N>
-struct QuotientAndRemainder
-{
-    UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
-    UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
-};
-
-template <size_t N>
-QuotientAndRemainder<N> static inline split(UnsignedOfSize<N> value)
-{
-    constexpr DivisionBy10PowN<N> division;
-
-    UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
-    UnsignedOfSize<N / 2> remainder = static_cast<UnsignedOfSize<N / 2>>(value - quotient * pow10<UnsignedOfSize<N / 2>>(N));
-
-    return {quotient, remainder};
-}
-
-
-static inline char * outDigit(char * p, uint8_t value)
-{
-    *p = '0' + value;
-    ++p;
-    return p;
-}
-
-// Using a lookup table to convert binary numbers from 0 to 99
-// into ascii characters as described by Andrei Alexandrescu in
-// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
-
-static const char digits[201] = "00010203040506070809"
-                                "10111213141516171819"
-                                "20212223242526272829"
-                                "30313233343536373839"
-                                "40414243444546474849"
-                                "50515253545556575859"
-                                "60616263646566676869"
-                                "70717273747576777879"
-                                "80818283848586878889"
-                                "90919293949596979899";
-
-static inline char * outTwoDigits(char * p, uint8_t value)
-{
-    memcpy(p, &digits[value * 2], 2);
-    p += 2;
-    return p;
-}
-
-
-namespace convert
-{
-    template <typename UInt, size_t N = sizeof(UInt)> static char * head(char * p, UInt u);
-    template <typename UInt, size_t N = sizeof(UInt)> static char * tail(char * p, UInt u);
-
-    //===----------------------------------------------------------===//
-    //     head: find most significant digit, skip leading zeros
-    //===----------------------------------------------------------===//
-
-    // "x" contains quotient and remainder after division by 10^N
-    // quotient is less than 10^N
-    template <size_t N>
-    static inline char * head(char * p, QuotientAndRemainder<N> x)
-    {
-        p = head(p, UnsignedOfSize<N / 2>(x.quotient));
-        p = tail(p, x.remainder);
-        return p;
-    }
-
-    // "u" is less than 10^2*N
-    template <typename UInt, size_t N>
-    static inline char * head(char * p, UInt u)
-    {
-        return u < pow10<UnsignedOfSize<N>>(N)
-            ? head(p, UnsignedOfSize<N / 2>(u))
-            : head<N>(p, split<N>(u));
-    }
-
-    // recursion base case, selected when "u" is one byte
-    template <>
-    inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
-    {
-        return u < 10
-            ? outDigit(p, u)
-            : outTwoDigits(p, u);
-    }
-
-    //===----------------------------------------------------------===//
-    //     tail: produce all digits including leading zeros
-    //===----------------------------------------------------------===//
-
-    // recursive step, "u" is less than 10^2*N
-    template <typename UInt, size_t N>
-    static inline char * tail(char * p, UInt u)
-    {
-        QuotientAndRemainder<N> x = split<N>(u);
-        p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
-        p = tail(p, x.remainder);
-        return p;
-    }
-
-    // recursion base case, selected when "u" is one byte
-    template <>
-    inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
-    {
-        return outTwoDigits(p, u);
-    }
-
-    //===----------------------------------------------------------===//
-    // large values are >= 10^2*N
-    // where x contains quotient and remainder after division by 10^N
-    //===----------------------------------------------------------===//
-
-    template <size_t N>
-    static inline char * large(char * p, QuotientAndRemainder<N> x)
-    {
-        QuotientAndRemainder<N> y = split<N>(x.quotient);
-        p = head(p, UnsignedOfSize<N / 2>(y.quotient));
-        p = tail(p, y.remainder);
-        p = tail(p, x.remainder);
-        return p;
-    }
-
-    //===----------------------------------------------------------===//
-    // handle values of "u" that might be >= 10^2*N
-    // where N is the size of "u" in bytes
-    //===----------------------------------------------------------===//
-
-    template <typename UInt, size_t N = sizeof(UInt)>
-    static inline char * uitoa(char * p, UInt u)
-    {
-        if (u < pow10<UnsignedOfSize<N>>(N))
-            return head(p, UnsignedOfSize<N / 2>(u));
-        QuotientAndRemainder<N> x = split<N>(u);
-
-        return u < pow10<UnsignedOfSize<N>>(2 * N)
-            ? head<N>(p, x)
-            : large<N>(p, x);
-    }
-
-    // selected when "u" is one byte
-    template <>
-    inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
-    {
-        if (u < 10)
-            return outDigit(p, u);
-        else if (u < 100)
-            return outTwoDigits(p, u);
-        else
-        {
-            p = outDigit(p, u / 100);
-            p = outTwoDigits(p, u % 100);
-            return p;
-        }
-    }
-
-    //===----------------------------------------------------------===//
-    //     handle unsigned and signed integral operands
-    //===----------------------------------------------------------===//
-
-    // itoa: handle unsigned integral operands (selected by SFINAE)
-    template <typename U, std::enable_if_t<!std::is_signed_v<U> && std::is_integral_v<U>> * = nullptr>
-    static inline char * itoa(U u, char * p)
-    {
-        return convert::uitoa(p, u);
-    }
-
-    // itoa: handle signed integral operands (selected by SFINAE)
-    template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed_v<I> && std::is_integral_v<I>> * = nullptr>
-    static inline char * itoa(I i, char * p)
-    {
-        // Need "mask" to be filled with a copy of the sign bit.
-        // If "i" is a negative value, then the result of "operator >>"
-        // is implementation-defined, though usually it is an arithmetic
-        // right shift that replicates the sign bit.
-        // Use a conditional expression to be portable,
-        // a good optimizing compiler generates an arithmetic right shift
-        // and avoids the conditional branch.
-        UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
-        // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
-        // Cannot use std::abs() because the result is undefined
-        // in 2's complement systems for the most-negative value.
-        // Want to avoid conditional branch for performance reasons since
-        // CPU branch prediction will be ineffective when negative values
-        // occur randomly.
-        // Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
-        // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
-        // This yields the absolute value with the desired type without
-        // using a conditional branch and without invoking undefined or
-        // implementation defined behavior:
-        UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
-        // Unconditionally store a minus sign when producing digits
-        // in a forward direction and increment the pointer only if
-        // the value is in fact negative.
-        // This avoids a conditional branch and is safe because we will
-        // always produce at least one digit and it will overwrite the
-        // minus sign when the value is not negative.
-        *p = '-';
-        p += (mask & 1);
-        p = convert::uitoa(p, u);
-        return p;
-    }
-}
-
-
-template <typename T>
-static inline char * writeUIntText(T x, char * p)
-{
-    static_assert(is_unsigned_v<T>);
-
-    int len = digits10(x);
-    auto * pp = p + len;
-    while (x >= 100)
-    {
-        const auto i = x % 100;
-        x /= 100;
-        pp -= 2;
-        outTwoDigits(pp, i);
-    }
-    if (x < 10)
-        *p = '0' + x;
-    else
-        outTwoDigits(p, x);
-    return p + len;
-}
-
-static inline char * writeLeadingMinus(char * pos)
-{
-    *pos = '-';
-    return pos + 1;
-}
-
-template <typename T>
-static inline char * writeSIntText(T x, char * pos)
-{
-    static_assert(std::is_same_v<T, Int128> || std::is_same_v<T, Int256>);
-
-    using UnsignedT = make_unsigned_t<T>;
-    static constexpr T min_int = UnsignedT(1) << (sizeof(T) * 8 - 1);
-
-    if (unlikely(x == min_int))
-    {
-        if constexpr (std::is_same_v<T, Int128>)
-        {
-            const char * res = "-170141183460469231731687303715884105728";
-            memcpy(pos, res, strlen(res));
-            return pos + strlen(res);
-        }
-        else if constexpr (std::is_same_v<T, Int256>)
-        {
-            const char * res = "-57896044618658097711785492504343953926634992332820282019728792003956564819968";
-            memcpy(pos, res, strlen(res));
-            return pos + strlen(res);
-        }
-    }
-
-    if (x < 0)
-    {
-        x = -x;
-        pos = writeLeadingMinus(pos);
-    }
-    return writeUIntText(UnsignedT(x), pos);
-}
-
-}
-
-template <typename I>
-char * itoa(I i, char * p)
-{
-    return impl::convert::itoa(i, p);
-}
-
-template <>
-inline char * itoa(char8_t i, char * p)
-{
-    return impl::convert::itoa(uint8_t(i), p);
-}
-
-template <>
-inline char * itoa(UInt128 i, char * p)
-{
-    return impl::writeUIntText(i, p);
-}
-
-template <>
-inline char * itoa(Int128 i, char * p)
-{
-    return impl::writeSIntText(i, p);
-}
-
-template <>
-inline char * itoa(UInt256 i, char * p)
-{
-    return impl::writeUIntText(i, p);
-}
-
-template <>
-inline char * itoa(Int256 i, char * p)
-{
-    return impl::writeSIntText(i, p);
-}
+#undef FOR_INTEGER_TYPES
+#undef INSTANTIATION
--- a/base/base/scope_guard.h
+++ b/base/base/scope_guard.h
@ -29,11 +29,13 @@ public:
    requires std::is_convertible_v<G, F>
    constexpr BasicScopeGuard & operator=(BasicScopeGuard<G> && src) // NOLINT(cppcoreguidelines-rvalue-reference-param-not-moved, cppcoreguidelines-noexcept-move-operations)
    {
-        if (this != &src)
+        if constexpr (std::is_same_v<G, F>)
        {
-            invoke();
-            function = src.release();
+            if (this == &src)
+                return *this;
        }
+        invoke();
+        function = src.release();
        return *this;
    }

--- a/base/base/sort.h
+++ b/base/base/sort.h
@ -59,8 +59,8 @@ using ComparatorWrapper = Comparator;

 #endif

-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wold-style-cast"
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wold-style-cast"

 #include <miniselect/floyd_rivest_select.h>

@ -115,7 +115,7 @@ void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
    ::partial_sort(first, middle, last, comparator());
 }

-#pragma GCC diagnostic pop
+#pragma clang diagnostic pop

 template <typename RandomIt, typename Compare>
 void sort(RandomIt first, RandomIt last, Compare compare)
--- a/base/base/wide_integer_impl.h
+++ b/base/base/wide_integer_impl.h
@ -13,8 +13,6 @@
 #include <tuple>
 #include <limits>

-#include <boost/math/special_functions/fpclassify.hpp>
-
 // NOLINTBEGIN(*)

 /// Use same extended double for all platforms
@ -22,6 +20,7 @@
 #define CONSTEXPR_FROM_DOUBLE constexpr
 using FromDoubleIntermediateType = long double;
 #else
+#include <boost/math/special_functions/fpclassify.hpp>
 #include <boost/multiprecision/cpp_bin_float.hpp>
 /// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended`
 #define CONSTEXPR_FROM_DOUBLE
@ -309,6 +308,13 @@ struct integer<Bits, Signed>::_impl
        constexpr uint64_t max_int = std::numeric_limits<uint64_t>::max();
        static_assert(std::is_same_v<T, double> || std::is_same_v<T, FromDoubleIntermediateType>);
        /// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast).
+#if (LDBL_MANT_DIG == 64)
+        if (!std::isfinite(t))
+        {
+            self = 0;
+            return;
+        }
+#else
        if constexpr (std::is_same_v<T, double>)
        {
            if (!std::isfinite(t))
@ -325,6 +331,7 @@ struct integer<Bits, Signed>::_impl
                return;
            }
        }
+#endif

        const T alpha = t / static_cast<T>(max_int);

--- a/base/poco/Crypto/src/OpenSSLInitializer.cpp
+++ b/base/poco/Crypto/src/OpenSSLInitializer.cpp
@ -23,6 +23,9 @@
 #include <openssl/conf.h>
 #endif

+#if __has_feature(address_sanitizer)
+#include <sanitizer/lsan_interface.h>
+#endif

 using Poco::RandomInputStream;
 using Poco::Thread;
@ -67,12 +70,18 @@ void OpenSSLInitializer::initialize()
 		SSL_library_init();
 		SSL_load_error_strings();
 		OpenSSL_add_all_algorithms();
-		
+
 		char seed[SEEDSIZE];
 		RandomInputStream rnd;
 		rnd.read(seed, sizeof(seed));
-		RAND_seed(seed, SEEDSIZE);
-		
+        {
+#   if __has_feature(address_sanitizer)
+            /// Leak sanitizer (part of address sanitizer) thinks that a few bytes of memory in OpenSSL are allocated during but never released.
+            __lsan::ScopedDisabler lsan_disabler;
+#endif
+		    RAND_seed(seed, SEEDSIZE);
+        }
+
 		int nMutexes = CRYPTO_num_locks();
 		_mutexes = new Poco::FastMutex[nMutexes];
 		CRYPTO_set_locking_callback(&OpenSSLInitializer::lock);
@ -80,8 +89,8 @@ void OpenSSLInitializer::initialize()
 // https://sourceforge.net/p/poco/bugs/110/
 //
 // From http://www.openssl.org/docs/crypto/threads.html :
-// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(), 
-//  then a default implementation is used - on Windows and BeOS this uses the system's 
+// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(),
+//  then a default implementation is used - on Windows and BeOS this uses the system's
 //  default thread identifying APIs"
 		CRYPTO_set_id_callback(&OpenSSLInitializer::id);
 		CRYPTO_set_dynlock_create_callback(&OpenSSLInitializer::dynlockCreate);
@ -100,7 +109,7 @@ void OpenSSLInitializer::uninitialize()
 		CRYPTO_set_locking_callback(0);
 		CRYPTO_set_id_callback(0);
 		delete [] _mutexes;
-		
+
 		CONF_modules_free();
 	}
 }
--- a/base/poco/Foundation/include/Poco/FPEnvironment_SUN.h
+++ b/base/poco/Foundation/include/Poco/FPEnvironment_SUN.h
@ -0,0 +1,75 @@
+//
+// FPEnvironment_SUN.h
+//
+// Library: Foundation
+// Package: Core
+// Module:  FPEnvironment
+//
+// Definitions of class FPEnvironmentImpl for Solaris.
+//
+// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH.
+// and Contributors.
+//
+// SPDX-License-Identifier:	BSL-1.0
+//
+
+
+#ifndef Foundation_FPEnvironment_SUN_INCLUDED
+#define Foundation_FPEnvironment_SUN_INCLUDED
+
+
+#include <ieeefp.h>
+#include "Poco/Foundation.h"
+
+
+namespace Poco
+{
+
+
+class FPEnvironmentImpl
+{
+protected:
+    enum RoundingModeImpl
+    {
+        FP_ROUND_DOWNWARD_IMPL = FP_RM,
+        FP_ROUND_UPWARD_IMPL = FP_RP,
+        FP_ROUND_TONEAREST_IMPL = FP_RN,
+        FP_ROUND_TOWARDZERO_IMPL = FP_RZ
+    };
+    enum FlagImpl
+    {
+        FP_DIVIDE_BY_ZERO_IMPL = FP_X_DZ,
+        FP_INEXACT_IMPL = FP_X_IMP,
+        FP_OVERFLOW_IMPL = FP_X_OFL,
+        FP_UNDERFLOW_IMPL = FP_X_UFL,
+        FP_INVALID_IMPL = FP_X_INV
+    };
+    FPEnvironmentImpl();
+    FPEnvironmentImpl(const FPEnvironmentImpl & env);
+    ~FPEnvironmentImpl();
+    FPEnvironmentImpl & operator=(const FPEnvironmentImpl & env);
+    void keepCurrentImpl();
+    static void clearFlagsImpl();
+    static bool isFlagImpl(FlagImpl flag);
+    static void setRoundingModeImpl(RoundingModeImpl mode);
+    static RoundingModeImpl getRoundingModeImpl();
+    static bool isInfiniteImpl(float value);
+    static bool isInfiniteImpl(double value);
+    static bool isInfiniteImpl(long double value);
+    static bool isNaNImpl(float value);
+    static bool isNaNImpl(double value);
+    static bool isNaNImpl(long double value);
+    static float copySignImpl(float target, float source);
+    static double copySignImpl(double target, double source);
+    static long double copySignImpl(long double target, long double source);
+
+private:
+    fp_rnd _rnd;
+    fp_except _exc;
+};
+
+
+} // namespace Poco
+
+
+#endif // Foundation_FPEnvironment_SUN_INCLUDED
--- a/base/poco/Foundation/include/Poco/Message.h
+++ b/base/poco/Foundation/include/Poco/Message.h
@ -66,9 +66,11 @@ public:
    /// The thread and process ids are set.

    Message(
-        const std::string & source, const std::string & text, Priority prio, const char * file, int line, std::string_view fmt_str = {});
+        const std::string & source, const std::string & text, Priority prio, const char * file, int line,
+        std::string_view fmt_str = {}, const std::vector<std::string> & fmt_str_args = {});
    Message(
-        std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str);
+        std::string && source, std::string && text, Priority prio, const char * file, int line,
+        std::string_view fmt_str, std::vector<std::string> && fmt_str_args);
    /// Creates a Message with the given source, text, priority,
    /// source file path and line.
    ///
@ -161,6 +163,9 @@ public:
    std::string_view getFormatString() const;
    void setFormatString(std::string_view fmt_str);

+    const std::vector<std::string> & getFormatStringArgs() const;
+    void setFormatStringArgs(const std::vector<std::string> & fmt_str_args);
+
    int getSourceLine() const;
    /// Returns the source file line of the statement
    /// generating the log message. May be 0
@ -210,6 +215,7 @@ private:
    int _line;
    StringMap * _pMap;
    std::string_view _fmt_str;
+    std::vector<std::string> _fmt_str_args;
 };


--- a/base/poco/Foundation/src/Environment_UNIX.cpp
+++ b/base/poco/Foundation/src/Environment_UNIX.cpp
@ -281,15 +281,15 @@ void EnvironmentImpl::nodeIdImpl(NodeId& id)
 /// #include <sys/ioctl.h>
 #if defined(sun) || defined(__sun)
 #include <sys/sockio.h>
+#include <netdb.h>
+#include <net/if.h>
+#include <net/if_arp.h>
 #endif
 /// #include <sys/socket.h>
 /// #include <sys/types.h>
 /// #include <netinet/in.h>
 /// #include <net/if.h>
 /// #include <arpa/inet.h>
-/// #include <netdb.h>
-/// #include <net/if.h>
-/// #include <net/if_arp.h>
 /// #include <unistd.h>


--- a/base/poco/Foundation/src/FPEnvironment_SUN.cpp
+++ b/base/poco/Foundation/src/FPEnvironment_SUN.cpp
@ -0,0 +1,139 @@
+//
+// FPEnvironment_SUN.cpp
+//
+// Library: Foundation
+// Package: Core
+// Module:  FPEnvironment
+//
+// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH.
+// and Contributors.
+//
+// SPDX-License-Identifier:	BSL-1.0
+//
+
+
+#include <math.h>
+#include "Poco/FPEnvironment_SUN.h"
+
+
+namespace Poco {
+
+
+FPEnvironmentImpl::FPEnvironmentImpl()
+{
+	_rnd = fpgetround();
+	_exc = fpgetmask();
+}
+
+
+FPEnvironmentImpl::FPEnvironmentImpl(const FPEnvironmentImpl& env)
+{
+	_rnd = env._rnd;
+	_exc = env._exc;
+}
+
+
+FPEnvironmentImpl::~FPEnvironmentImpl()
+{
+	fpsetround(_rnd);
+	fpsetmask(_exc);
+}
+
+
+FPEnvironmentImpl& FPEnvironmentImpl::operator = (const FPEnvironmentImpl& env)
+{
+	_rnd = env._rnd;
+	_exc = env._exc;
+	return *this;
+}
+
+
+bool FPEnvironmentImpl::isInfiniteImpl(float value)
+{
+	int cls = fpclass(value);
+	return cls == FP_PINF || cls == FP_NINF;
+}
+
+
+bool FPEnvironmentImpl::isInfiniteImpl(double value)
+{
+	int cls = fpclass(value);
+	return cls == FP_PINF || cls == FP_NINF;
+}
+
+
+bool FPEnvironmentImpl::isInfiniteImpl(long double value)
+{
+	int cls = fpclass(value);
+	return cls == FP_PINF || cls == FP_NINF;
+}
+
+
+bool FPEnvironmentImpl::isNaNImpl(float value)
+{
+	return isnanf(value) != 0;
+}
+
+
+bool FPEnvironmentImpl::isNaNImpl(double value)
+{
+	return isnan(value) != 0;
+}
+
+
+bool FPEnvironmentImpl::isNaNImpl(long double value)
+{
+	return isnan((double) value) != 0;
+}
+
+
+float FPEnvironmentImpl::copySignImpl(float target, float source)
+{
+	return (float) copysign(target, source);
+}
+
+
+double FPEnvironmentImpl::copySignImpl(double target, double source)
+{
+	return (float) copysign(target, source);
+}
+
+
+long double FPEnvironmentImpl::copySignImpl(long double target, long double source)
+{
+	return (source > 0 && target > 0) || (source < 0 && target < 0) ? target : -target;
+}
+
+
+void FPEnvironmentImpl::keepCurrentImpl()
+{
+	fpsetround(_rnd);
+	fpsetmask(_exc);
+}
+
+
+void FPEnvironmentImpl::clearFlagsImpl()
+{
+	fpsetsticky(0);
+}
+
+
+bool FPEnvironmentImpl::isFlagImpl(FlagImpl flag)
+{
+	return (fpgetsticky() & flag) != 0;
+}
+
+
+void FPEnvironmentImpl::setRoundingModeImpl(RoundingModeImpl mode)
+{
+	fpsetround((fp_rnd) mode);
+}
+
+
+FPEnvironmentImpl::RoundingModeImpl FPEnvironmentImpl::getRoundingModeImpl()
+{
+	return (FPEnvironmentImpl::RoundingModeImpl) fpgetround();
+}
+
+
+} // namespace Poco
--- a/base/poco/Foundation/src/Message.cpp
+++ b/base/poco/Foundation/src/Message.cpp
@ -46,7 +46,9 @@ Message::Message(const std::string& source, const std::string& text, Priority pr
 }


-Message::Message(const std::string& source, const std::string& text, Priority prio, const char* file, int line, std::string_view fmt_str):
+Message::Message(
+        const std::string& source, const std::string& text, Priority prio, const char* file, int line,
+        std::string_view fmt_str, const std::vector<std::string>& fmt_str_args):
 	_source(source), 
 	_text(text), 
 	_prio(prio), 
@ -54,13 +56,16 @@ Message::Message(const std::string& source, const std::string& text, Priority pr
 	_file(file),
 	_line(line),
 	_pMap(0),
-    _fmt_str(fmt_str)
+	_fmt_str(fmt_str),
+	_fmt_str_args(fmt_str_args)
 {
 	init();
 }


-Message::Message(std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str):
+Message::Message(
+        std::string && source, std::string && text, Priority prio, const char * file, int line,
+        std::string_view fmt_str, std::vector<std::string> && fmt_str_args):
    _source(std::move(source)),
    _text(std::move(text)),
    _prio(prio),
@ -68,7 +73,8 @@ Message::Message(std::string && source, std::string && text, Priority prio, cons
    _file(file),
    _line(line),
    _pMap(0),
-    _fmt_str(fmt_str)
+    _fmt_str(fmt_str),
+    _fmt_str_args(std::move(fmt_str_args))
 {
    init();
 }
@ -83,7 +89,8 @@ Message::Message(const Message& msg):
 	_pid(msg._pid),
 	_file(msg._file),
 	_line(msg._line),
-    _fmt_str(msg._fmt_str)
+	_fmt_str(msg._fmt_str),
+	_fmt_str_args(msg._fmt_str_args)
 {
 	if (msg._pMap)
 		_pMap = new StringMap(*msg._pMap);
@ -102,7 +109,8 @@ Message::Message(const Message& msg, const std::string& text):
 	_pid(msg._pid),
 	_file(msg._file),
 	_line(msg._line),
-    _fmt_str(msg._fmt_str)
+	_fmt_str(msg._fmt_str),
+	_fmt_str_args(msg._fmt_str_args)
 {
 	if (msg._pMap)
 		_pMap = new StringMap(*msg._pMap);
@ -154,6 +162,7 @@ void Message::swap(Message& msg)
 	swap(_line, msg._line);
 	swap(_pMap, msg._pMap);
 	swap(_fmt_str, msg._fmt_str);
+	swap(_fmt_str_args, msg._fmt_str_args);
 }


@ -227,6 +236,17 @@ void Message::setFormatString(std::string_view fmt_str)
 }


+const std::vector<std::string>& Message::getFormatStringArgs() const
+{
+    return _fmt_str_args;
+}
+
+void Message::setFormatStringArgs(const std::vector<std::string>& fmt_str_args)
+{
+    _fmt_str_args = fmt_str_args;
+}
+
+
 bool Message::has(const std::string& param) const
 {
 	return _pMap && (_pMap->find(param) != _pMap->end());
--- a/base/poco/Foundation/src/NamedEvent_UNIX.cpp
+++ b/base/poco/Foundation/src/NamedEvent_UNIX.cpp
@ -31,7 +31,7 @@
 namespace Poco {


-#if (POCO_OS == POCO_OS_LINUX) || (POCO_OS == POCO_OS_ANDROID) || (POCO_OS == POCO_OS_CYGWIN) || (POCO_OS == POCO_OS_FREE_BSD)
+#if (POCO_OS == POCO_OS_LINUX) || (POCO_OS == POCO_OS_ANDROID) || (POCO_OS == POCO_OS_CYGWIN) || (POCO_OS == POCO_OS_FREE_BSD) || (POCO_OS == POCO_OS_SOLARIS)
 	union semun
 	{
 		int                 val;
--- a/base/poco/Foundation/src/NamedMutex_UNIX.cpp
+++ b/base/poco/Foundation/src/NamedMutex_UNIX.cpp
@ -31,7 +31,7 @@
 namespace Poco {


-#if (POCO_OS == POCO_OS_LINUX) || (POCO_OS == POCO_OS_ANDROID) || (POCO_OS == POCO_OS_CYGWIN) || (POCO_OS == POCO_OS_FREE_BSD)
+#if (POCO_OS == POCO_OS_LINUX) || (POCO_OS == POCO_OS_ANDROID) || (POCO_OS == POCO_OS_CYGWIN) || (POCO_OS == POCO_OS_FREE_BSD) || (POCO_OS == POCO_OS_SOLARIS)
 	union semun
 	{
 		int                 val;
--- a/base/poco/Foundation/src/pcre_compile.c
+++ b/base/poco/Foundation/src/pcre_compile.c
@ -4835,7 +4835,7 @@ for (;; ptr++)

    If the class contains characters outside the 0-255 range, a different
    opcode is compiled. It may optionally have a bit map for characters < 256,
-    but those above are are explicitly listed afterwards. A flag byte tells
+    but those above are explicitly listed afterwards. A flag byte tells
    whether the bitmap is present, and whether this is a negated class or not.

    In JavaScript compatibility mode, an isolated ']' causes an error. In
--- a/base/poco/JSON/src/pdjson.c
+++ b/base/poco/JSON/src/pdjson.c
@ -314,13 +314,13 @@ static int read_unicode(json_stream *json)

        if (l < 0xdc00 || l > 0xdfff) {
            json_error(json, "invalid surrogate pair continuation \\u%04lx out "
-                             "of range (dc00-dfff)", l);
+                             "of range (dc00-dfff)", (unsigned long)l);
            return -1;
        }

        cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000);
    } else if (cp >= 0xdc00 && cp <= 0xdfff) {
-            json_error(json, "dangling surrogate \\u%04lx", cp);
+            json_error(json, "dangling surrogate \\u%04lx", (unsigned long)cp);
            return -1;
    }

--- a/base/poco/Net/CMakeLists.txt
+++ b/base/poco/Net/CMakeLists.txt
@ -9,6 +9,10 @@ elseif (OS_DARWIN OR OS_FREEBSD)
    target_compile_definitions (_poco_net PUBLIC POCO_HAVE_FD_POLL)
 endif ()

+if (OS_SUNOS)
+    target_link_libraries (_poco_net PUBLIC socket nsl)
+endif ()
+
 # TODO: remove these warning exclusions
 target_compile_options (_poco_net
    PRIVATE
--- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h
+++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h
@ -213,6 +213,19 @@ namespace Net
        Poco::Timespan getKeepAliveTimeout() const;
        /// Returns the connection timeout for HTTP connections.

+        void setKeepAliveMaxRequests(int max_requests);
+
+        int getKeepAliveMaxRequests() const;
+
+        int getKeepAliveRequest() const;
+
+        bool isKeepAliveExpired(double reliability = 1.0) const;
+        /// Returns if the connection is expired with some margin as fraction of timeout as reliability
+
+        double getKeepAliveReliability() const;
+        /// Returns the current fraction of keep alive timeout when connection is considered safe to use
+        /// It helps to avoid situation when a client uses nearly expired connection and receives NoMessageException
+
        virtual std::ostream & sendRequest(HTTPRequest & request);
        /// Sends the header for the given HTTP request to
        /// the server.
@ -345,6 +358,8 @@ namespace Net

        void assign(HTTPClientSession & session);

+        void setKeepAliveRequest(int request);
+
        HTTPSessionFactory _proxySessionFactory;
        /// Factory to create HTTPClientSession to proxy.
    private:
@ -353,6 +368,8 @@ namespace Net
        Poco::UInt16 _port;
        ProxyConfig _proxyConfig;
        Poco::Timespan _keepAliveTimeout;
+        int _keepAliveCurrentRequest = 0;
+        int _keepAliveMaxRequests = 1000;
        Poco::Timestamp _lastRequest;
        bool _reconnect;
        bool _mustReconnect;
@ -361,6 +378,7 @@ namespace Net
        Poco::SharedPtr<std::ostream> _pRequestStream;
        Poco::SharedPtr<std::istream> _pResponseStream;

+        static const double _defaultKeepAliveReliabilityLevel;
        static ProxyConfig _globalProxyConfig;

        HTTPClientSession(const HTTPClientSession &);
@ -450,9 +468,19 @@ namespace Net
        return _lastRequest;
    }

-    inline void HTTPClientSession::setLastRequest(Poco::Timestamp time)
+    inline double HTTPClientSession::getKeepAliveReliability() const
    {
-        _lastRequest = time;
+        return _defaultKeepAliveReliabilityLevel;
+    }
+
+    inline int HTTPClientSession::getKeepAliveMaxRequests() const
+    {
+        return _keepAliveMaxRequests;
+    }
+
+    inline int HTTPClientSession::getKeepAliveRequest() const
+    {
+        return _keepAliveCurrentRequest;
    }

 }
--- a/base/poco/Net/include/Poco/Net/HTTPMessage.h
+++ b/base/poco/Net/include/Poco/Net/HTTPMessage.h
@ -120,6 +120,10 @@ namespace Net
        /// The value is set to "Keep-Alive" if keepAlive is
        /// true, or to "Close" otherwise.

+        void setKeepAliveTimeout(int timeout, int max_requests);
+        int getKeepAliveTimeout() const;
+        int getKeepAliveMaxRequests() const;
+
        bool getKeepAlive() const;
        /// Returns true if
        ///   * the message has a Connection header field and its value is "Keep-Alive"
--- a/base/poco/Net/include/Poco/Net/HTTPRequestHandlerFactory.h
+++ b/base/poco/Net/include/Poco/Net/HTTPRequestHandlerFactory.h
@ -30,7 +30,6 @@ namespace Net


    class HTTPServerRequest;
-    class HTTPServerResponse;
    class HTTPRequestHandler;


--- a/base/poco/Net/include/Poco/Net/HTTPServerParams.h
+++ b/base/poco/Net/include/Poco/Net/HTTPServerParams.h
@ -44,7 +44,7 @@ namespace Net
        ///   - timeout:              60 seconds
        ///   - keepAlive:            true
        ///   - maxKeepAliveRequests: 0
-        ///   - keepAliveTimeout:     10 seconds
+        ///   - keepAliveTimeout:     15 seconds

        void setServerName(const std::string & serverName);
        /// Sets the name and port (name:port) that the server uses to identify itself.
--- a/base/poco/Net/include/Poco/Net/HTTPServerSession.h
+++ b/base/poco/Net/include/Poco/Net/HTTPServerSession.h
@ -56,6 +56,8 @@ namespace Net
        SocketAddress serverAddress();
        /// Returns the server's address.

+        void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout);
+
    private:
        bool _firstRequest;
        Poco::Timespan _keepAliveTimeout;
--- a/base/poco/Net/include/Poco/Net/NameValueCollection.h
+++ b/base/poco/Net/include/Poco/Net/NameValueCollection.h
@ -79,6 +79,11 @@ namespace Net
        /// Returns the value of the first name-value pair with the given name.
        /// If no value with the given name has been found, the defaultValue is returned.

+        const std::vector<std::reference_wrapper<const std::string>> getAll(const std::string & name) const;
+        /// Returns all values of all name-value pairs with the given name.
+        ///
+        /// Returns an empty vector if there are no name-value pairs with the given name.
+
        bool has(const std::string & name) const;
        /// Returns true if there is at least one name-value pair
        /// with the given name.
--- a/base/poco/Net/src/HTTPClientSession.cpp
+++ b/base/poco/Net/src/HTTPClientSession.cpp
@ -37,6 +37,7 @@ namespace Net {


 HTTPClientSession::ProxyConfig HTTPClientSession::_globalProxyConfig;
+const double HTTPClientSession::_defaultKeepAliveReliabilityLevel = 0.9;


 HTTPClientSession::HTTPClientSession():
@ -220,7 +221,41 @@ void HTTPClientSession::setGlobalProxyConfig(const ProxyConfig& config)

 void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout)
 {
-	_keepAliveTimeout = timeout;
+    if (connected())
+    {
+        throw Poco::IllegalStateException("cannot change keep alive timeout on initiated connection, "
+                                          "That value is managed privately after connection is established.");
+    }
+    _keepAliveTimeout = timeout;
+}
+
+
+void HTTPClientSession::setKeepAliveMaxRequests(int max_requests)
+{
+    if (connected())
+    {
+        throw Poco::IllegalStateException("cannot change keep alive max requests on initiated connection, "
+                                          "That value is managed privately after connection is established.");
+    }
+    _keepAliveMaxRequests = max_requests;
+}
+
+
+void HTTPClientSession::setKeepAliveRequest(int request)
+{
+    _keepAliveCurrentRequest = request;
+}
+
+
+
+void HTTPClientSession::setLastRequest(Poco::Timestamp time)
+{
+    if (connected())
+    {
+        throw Poco::IllegalStateException("cannot change last request on initiated connection, "
+                                          "That value is managed privately after connection is established.");
+    }
+    _lastRequest = time;
 }


@ -231,6 +266,8 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request)
 	clearException();
 	_responseReceived = false;

+    _keepAliveCurrentRequest += 1;
+
 	bool keepAlive = getKeepAlive();
 	if (((connected() && !keepAlive) || mustReconnect()) && !_host.empty())
 	{
@ -241,8 +278,10 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request)
 	{
 		if (!connected())
 			reconnect();
-		if (!keepAlive)
-			request.setKeepAlive(false);
+        if (!request.has(HTTPMessage::CONNECTION))
+            request.setKeepAlive(keepAlive);
+        if (keepAlive && !request.has(HTTPMessage::CONNECTION_KEEP_ALIVE) && _keepAliveTimeout.totalSeconds() > 0)
+            request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds(), _keepAliveMaxRequests);
 		if (!request.has(HTTPRequest::HOST) && !_host.empty())
 			request.setHost(_host, _port);
 		if (!_proxyConfig.host.empty() && !bypassProxy())
@ -324,6 +363,17 @@ std::istream& HTTPClientSession::receiveResponse(HTTPResponse& response)

 	_mustReconnect = getKeepAlive() && !response.getKeepAlive();

+    if (!_mustReconnect)
+    {
+        /// when server sends its keep alive timeout, client has to follow that value
+        auto timeout = response.getKeepAliveTimeout();
+        if (timeout > 0)
+            _keepAliveTimeout = std::min(_keepAliveTimeout, Poco::Timespan(timeout, 0));
+        auto max_requests = response.getKeepAliveMaxRequests();
+        if (max_requests > 0)
+            _keepAliveMaxRequests = std::min(_keepAliveMaxRequests, max_requests);
+    }
+
 	if (!_expectResponseBody || response.getStatus() < 200 || response.getStatus() == HTTPResponse::HTTP_NO_CONTENT || response.getStatus() == HTTPResponse::HTTP_NOT_MODIFIED)
 		_pResponseStream = new HTTPFixedLengthInputStream(*this, 0);
 	else if (response.getChunkedTransferEncoding())
@ -430,15 +480,18 @@ std::string HTTPClientSession::proxyRequestPrefix() const
 	return result;
 }

+bool HTTPClientSession::isKeepAliveExpired(double reliability) const
+{
+    Poco::Timestamp now;
+    return Timespan(Timestamp::TimeDiff(reliability *_keepAliveTimeout.totalMicroseconds())) <= now - _lastRequest
+            || _keepAliveCurrentRequest > _keepAliveMaxRequests;
+}

 bool HTTPClientSession::mustReconnect() const
 {
 	if (!_mustReconnect)
-	{
-		Poco::Timestamp now;
-		return _keepAliveTimeout <= now - _lastRequest;
-	}
-	else return true;
+        return isKeepAliveExpired(_defaultKeepAliveReliabilityLevel);
+    return true;
 }


@ -511,14 +564,21 @@ void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session)
    if (buffered())
        throw Poco::LogicException("assign to a session with not empty buffered data");

-    attachSocket(session.detachSocket());
-    setLastRequest(session.getLastRequest());
+    poco_assert(!connected());
+
    setResolvedHost(session.getResolvedHost());
-    setKeepAlive(session.getKeepAlive());
+    setProxyConfig(session.getProxyConfig());

    setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout());
+    setKeepAlive(session.getKeepAlive());
+
+    setLastRequest(session.getLastRequest());
    setKeepAliveTimeout(session.getKeepAliveTimeout());
-    setProxyConfig(session.getProxyConfig());
+
+    _keepAliveMaxRequests = session._keepAliveMaxRequests;
+    _keepAliveCurrentRequest = session._keepAliveCurrentRequest;
+
+    attachSocket(session.detachSocket());

    session.reset();
 }
--- a/base/poco/Net/src/HTTPMessage.cpp
+++ b/base/poco/Net/src/HTTPMessage.cpp
@ -17,6 +17,7 @@
 #include "Poco/NumberFormatter.h"
 #include "Poco/NumberParser.h"
 #include "Poco/String.h"
+#include <format>


 using Poco::NumberFormatter;
@ -179,4 +180,51 @@ bool HTTPMessage::getKeepAlive() const
 }


+void HTTPMessage::setKeepAliveTimeout(int timeout, int max_requests)
+{
+    add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}, max={}", timeout, max_requests));
+}
+
+
+int parseFromHeaderValues(const std::string_view header_value, const std::string_view param_name)
+{
+    auto param_value_pos = header_value.find(param_name);
+    if (param_value_pos == std::string::npos)
+        param_value_pos = header_value.size();
+    if (param_value_pos != header_value.size())
+        param_value_pos += param_name.size();
+
+    auto param_value_end = header_value.find(',', param_value_pos);
+    if (param_value_end == std::string::npos)
+        param_value_end = header_value.size();
+
+    auto timeout_value_substr = header_value.substr(param_value_pos, param_value_end - param_value_pos);
+    if (timeout_value_substr.empty())
+        return -1;
+
+    int value = 0;
+    auto [ptr, ec] = std::from_chars(timeout_value_substr.begin(), timeout_value_substr.end(), value);
+
+    if (ec == std::errc())
+        return value;
+
+    return -1;
+}
+
+
+int HTTPMessage::getKeepAliveTimeout() const
+{
+    const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY);
+    static const std::string_view timeout_param = "timeout=";
+    return parseFromHeaderValues(ka_header, timeout_param);
+}
+
+
+int HTTPMessage::getKeepAliveMaxRequests() const
+{
+    const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY);
+    static const std::string_view timeout_param = "max=";
+    return parseFromHeaderValues(ka_header, timeout_param);
+}
+
 } } // namespace Poco::Net
--- a/base/poco/Net/src/HTTPServerConnection.cpp
+++ b/base/poco/Net/src/HTTPServerConnection.cpp
@ -88,7 +88,18 @@ void HTTPServerConnection::run()
 					
 						pHandler->handleRequest(request, response);
 						session.setKeepAlive(_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive());
-					}
+
+                        /// all that fuzz is all about to make session close with less timeout than 15s (set in HTTPServerParams c-tor)
+                        if (_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive())
+                        {
+                            int value = response.getKeepAliveTimeout();
+                            if (value < 0)
+                                value = request.getKeepAliveTimeout();
+                            if (value > 0)
+                                session.setKeepAliveTimeout(Poco::Timespan(value, 0));
+                        }
+
+                    }
 					else sendErrorResponse(session, HTTPResponse::HTTP_NOT_IMPLEMENTED);
 				}
 				catch (Poco::Exception&)
--- a/base/poco/Net/src/HTTPServerSession.cpp
+++ b/base/poco/Net/src/HTTPServerSession.cpp
@ -33,6 +33,12 @@ HTTPServerSession::~HTTPServerSession()
 {
 }

+void HTTPServerSession::setKeepAliveTimeout(Poco::Timespan keepAliveTimeout)
+{
+    _keepAliveTimeout = keepAliveTimeout;
+}
+
+

 bool HTTPServerSession::hasMoreRequests()
 {
--- a/base/poco/Net/src/NameValueCollection.cpp
+++ b/base/poco/Net/src/NameValueCollection.cpp
@ -15,6 +15,7 @@
 #include "Poco/Net/NameValueCollection.h"
 #include "Poco/Exception.h"
 #include <algorithm>
+#include <functional>


 using Poco::NotFoundException;
@ -55,7 +56,7 @@ void NameValueCollection::swap(NameValueCollection& nvc)
 	std::swap(_map, nvc._map);
 }

-	
+
 const std::string& NameValueCollection::operator [] (const std::string& name) const
 {
 	ConstIterator it = _map.find(name);
@ -65,8 +66,8 @@ const std::string& NameValueCollection::operator [] (const std::string& name) co
 		throw NotFoundException(name);
 }

-	
-void NameValueCollection::set(const std::string& name, const std::string& value)	
+
+void NameValueCollection::set(const std::string& name, const std::string& value)
 {
 	Iterator it = _map.find(name);
 	if (it != _map.end())
@ -75,13 +76,13 @@ void NameValueCollection::set(const std::string& name, const std::string& value)
 		_map.insert(HeaderMap::ValueType(name, value));
 }

-	
+
 void NameValueCollection::add(const std::string& name, const std::string& value)
 {
 	_map.insert(HeaderMap::ValueType(name, value));
 }

-	
+
 const std::string& NameValueCollection::get(const std::string& name) const
 {
 	ConstIterator it = _map.find(name);
@ -101,6 +102,15 @@ const std::string& NameValueCollection::get(const std::string& name, const std::
 		return defaultValue;
 }

+const std::vector<std::reference_wrapper<const std::string>> NameValueCollection::getAll(const std::string& name) const
+{
+    std::vector<std::reference_wrapper<const std::string>> values;
+    for (ConstIterator it = _map.find(name); it != _map.end(); it++)
+        if (it->first == name)
+            values.push_back(it->second);
+    return values;
+}
+

 bool NameValueCollection::has(const std::string& name) const
 {
@ -113,19 +123,19 @@ NameValueCollection::ConstIterator NameValueCollection::find(const std::string&
 	return _map.find(name);
 }

-	
+
 NameValueCollection::ConstIterator NameValueCollection::begin() const
 {
 	return _map.begin();
 }

-	
+
 NameValueCollection::ConstIterator NameValueCollection::end() const
 {
 	return _map.end();
 }

-	
+
 bool NameValueCollection::empty() const
 {
 	return _map.empty();
--- a/base/poco/Net/src/TCPServerDispatcher.cpp
+++ b/base/poco/Net/src/TCPServerDispatcher.cpp
@ -93,7 +93,7 @@ void TCPServerDispatcher::release()

 void TCPServerDispatcher::run()
 {
-	AutoPtr<TCPServerDispatcher> guard(this, true); // ensure object stays alive
+	AutoPtr<TCPServerDispatcher> guard(this); // ensure object stays alive

 	int idleTime = (int) _pParams->getThreadIdleTime().totalMilliseconds();

@ -149,11 +149,13 @@ void TCPServerDispatcher::enqueue(const StreamSocket& socket)
 		{
 			try
 			{
+                this->duplicate();
 				_threadPool.startWithPriority(_pParams->getThreadPriority(), *this, threadName);
 				++_currentThreads;
 			}
 			catch (Poco::Exception& exc)
 			{
+                this->release();
 				++_refusedConnections;
 				std::cerr << "Got exception while starting thread for connection. Error code: "
 						  << exc.code() << ", message: '" << exc.displayText() << "'" << std::endl;
--- a/base/poco/NetSSL_OpenSSL/src/Context.cpp
+++ b/base/poco/NetSSL_OpenSSL/src/Context.cpp
@ -592,6 +592,7 @@ void Context::createSSLContext()
 	SSL_CTX_set_default_passwd_cb(_pSSLContext, &SSLManager::privateKeyPassphraseCallback);
 	Utility::clearErrorStack();
 	SSL_CTX_set_options(_pSSLContext, SSL_OP_ALL);
+	SSL_CTX_set_options(_pSSLContext, SSL_OP_IGNORE_UNEXPECTED_EOF);
 }


--- a/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp
+++ b/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp
@ -125,7 +125,7 @@ void SSLManager::initializeClient(PrivateKeyPassphraseHandlerPtr ptrPassphraseHa
 Context::Ptr SSLManager::defaultServerContext()
 {
 	Poco::FastMutex::ScopedLock lock(_mutex);
-	
+
 	if (!_ptrDefaultServerContext)
 		initDefaultContext(true);

@ -150,7 +150,7 @@ Context::Ptr SSLManager::defaultClientContext()
 			_ptrDefaultClientContext->disableProtocols(Context::PROTO_SSLV2 | Context::PROTO_SSLV3);
 		}
 	}
-		
+
 	return _ptrDefaultClientContext;
 }

@ -256,7 +256,7 @@ void SSLManager::initDefaultContext(bool server)
 	Context::Params params;
 	// mandatory options
 	params.privateKeyFile = config.getString(prefix + CFG_PRIV_KEY_FILE, "");
-	params.certificateFile = config.getString(prefix + CFG_CERTIFICATE_FILE, params.privateKeyFile);	
+	params.certificateFile = config.getString(prefix + CFG_CERTIFICATE_FILE, params.privateKeyFile);
 	params.caLocation = config.getString(prefix + CFG_CA_LOCATION, "");

 	if (server && params.certificateFile.empty() && params.privateKeyFile.empty())
@ -283,7 +283,7 @@ void SSLManager::initDefaultContext(bool server)
 	params.ecdhCurve    = config.getString(prefix + CFG_ECDH_CURVE, "");

 	Context::Usage usage;
-	
+
 	if (server)
 	{
 		if (requireTLSv1_2)
@ -308,7 +308,7 @@ void SSLManager::initDefaultContext(bool server)
 			usage = Context::CLIENT_USE;
 		_ptrDefaultClientContext = new Context(usage, params);
 	}
-	
+
 	std::string disabledProtocolsList = config.getString(prefix + CFG_DISABLE_PROTOCOLS, "");
 	Poco::StringTokenizer dpTok(disabledProtocolsList, ";,", Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY);
 	int disabledProtocols = 0;
@ -329,27 +329,28 @@ void SSLManager::initDefaultContext(bool server)
 		_ptrDefaultServerContext->disableProtocols(disabledProtocols);
 	else
 		_ptrDefaultClientContext->disableProtocols(disabledProtocols);
-		
-	bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false);
-	if (server)
-	{
-		std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", ""));
-		_ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext);
-		if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE))
-		{
-			int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE);
-			_ptrDefaultServerContext->setSessionCacheSize(cacheSize);
-		}
-		if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT))
-		{
-			int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT);
-			_ptrDefaultServerContext->setSessionTimeout(timeout);
-		}
-	}
-	else
-	{
-		_ptrDefaultClientContext->enableSessionCache(cacheSessions);
-	}
+
+    /// Temporarily disabled during the transition from boringssl to OpenSSL due to tsan issues.
+	/// bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false);
+	/// if (server)
+	/// {
+	/// 	std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", ""));
+	/// 	_ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext);
+	/// 	if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE))
+	/// 	{
+	/// 		int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE);
+	/// 		_ptrDefaultServerContext->setSessionCacheSize(cacheSize);
+	/// 	}
+	/// 	if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT))
+	/// 	{
+	/// 		int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT);
+	/// 		_ptrDefaultServerContext->setSessionTimeout(timeout);
+	/// 	}
+	/// }
+	/// else
+	/// {
+	/// 	_ptrDefaultClientContext->enableSessionCache(cacheSessions);
+	/// }
 	bool extendedVerification = config.getBool(prefix + CFG_EXTENDED_VERIFICATION, false);
 	if (server)
 		_ptrDefaultServerContext->enableExtendedCertificateVerification(extendedVerification);
@ -378,7 +379,7 @@ void SSLManager::initPassphraseHandler(bool server)
 {
 	if (server && _ptrServerPassphraseHandler) return;
 	if (!server && _ptrClientPassphraseHandler) return;
-	
+
 	std::string prefix = server ? CFG_SERVER_PREFIX : CFG_CLIENT_PREFIX;
 	Poco::Util::AbstractConfiguration& config = appConfig();

@ -399,7 +400,7 @@ void SSLManager::initPassphraseHandler(bool server)
 	}
 	else throw Poco::Util::UnknownOptionException(std::string("No passphrase handler known with the name ") + className);
 }
-	
+

 void SSLManager::initCertificateHandler(bool server)
 {
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@ -2,11 +2,11 @@

 # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
 # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54484)
+SET(VERSION_REVISION 54485)
 SET(VERSION_MAJOR 24)
-SET(VERSION_MINOR 3)
+SET(VERSION_MINOR 4)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH 891689a41506d00aa169548f5b4a8774351242c4)
-SET(VERSION_DESCRIBE v24.3.1.1-testing)
-SET(VERSION_STRING 24.3.1.1)
+SET(VERSION_GITHASH 2c5c589a882ceec35439650337b92db3e76f0081)
+SET(VERSION_DESCRIBE v24.4.1.1-testing)
+SET(VERSION_STRING 24.4.1.1)
 # end of autochange
--- a/cmake/fuzzer.cmake
+++ b/cmake/fuzzer.cmake
@ -1,17 +0,0 @@
-# see ./CMakeLists.txt for variable declaration
-if (FUZZER)
-    if (FUZZER STREQUAL "libfuzzer")
-        # NOTE: Eldar Zaitov decided to name it "libfuzzer" instead of "fuzzer" to keep in mind another possible fuzzer backends.
-        # NOTE: no-link means that all the targets are built with instrumentation for fuzzer, but only some of them
-        # (tests) have entry point for fuzzer and it's not checked.
-        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1")
-        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1")
-
-        # NOTE: oss-fuzz can change LIB_FUZZING_ENGINE variable
-        if (NOT LIB_FUZZING_ENGINE)
-            set (LIB_FUZZING_ENGINE "-fsanitize=fuzzer")
-        endif ()
-    else ()
-        message (FATAL_ERROR "Unknown fuzzer type: ${FUZZER}")
-    endif ()
-endif()
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@ -8,9 +8,6 @@ option (SANITIZE "Enable one of the code sanitizers" "")

 set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")

-# It's possible to pass an ignore list to sanitizers (-fsanitize-ignorelist). Intentionally not doing this because
-# 1. out-of-source suppressions are awkward 2. it seems ignore lists don't work after the Clang v16 upgrade (#49829)
-
 if (SANITIZE)
    if (SANITIZE STREQUAL "address")
        set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
@ -30,7 +27,7 @@ if (SANITIZE)
    elseif (SANITIZE STREQUAL "thread")
        set (TSAN_FLAGS "-fsanitize=thread")
        if (COMPILER_CLANG)
-            set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/tsan_suppressions.txt")
+            set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/tests/tsan_ignorelist.txt")
        endif()

        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}")
@ -48,7 +45,7 @@ if (SANITIZE)
            set(UBSAN_FLAGS "${UBSAN_FLAGS} -fno-sanitize=unsigned-integer-overflow")
        endif()
        if (COMPILER_CLANG)
-            set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/ubsan_suppressions.txt")
+            set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/tests/ubsan_ignorelist.txt")
        endif()

        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -37,11 +37,7 @@ function(add_contrib cmake_folder)
    message(STATUS "Adding contrib module ${base_folders} (configuring with ${cmake_folder})")
    add_subdirectory (${cmake_folder})
 endfunction()
-if (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)
-    add_contrib (openssl-cmake openssl)
-else ()
-    add_contrib (boringssl-cmake boringssl)
-endif ()
+add_contrib (openssl-cmake openssl)
 add_contrib (miniselect-cmake miniselect)
 add_contrib (pdqsort-cmake pdqsort)
 add_contrib (pocketfft-cmake pocketfft)
@ -223,6 +219,8 @@ endif ()

 add_contrib (xxHash-cmake xxHash)

+add_contrib (expected-cmake expected)
+
 add_contrib (libbcrypt-cmake libbcrypt)

 add_contrib (google-benchmark-cmake google-benchmark)
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@ -1 +1 @@
-Subproject commit 4a12f99dfc9d47c687ff7700b927cc76856225d1
+Subproject commit cb5dc3c906e80f253e9ce9535807caef827cc2e0
--- a/contrib/arrow
+++ b/contrib/arrow
@ -1 +1 @@
-Subproject commit ba5c67934e8274d649befcffab56731632dc5253
+Subproject commit 8f36d71d18587f1f315ec832f424183cb6519cbb
--- a/contrib/avro-cmake/CMakeLists.txt
+++ b/contrib/avro-cmake/CMakeLists.txt
@ -59,12 +59,3 @@ target_link_libraries (_avrocpp PRIVATE boost::headers_only boost::iostreams)
 target_compile_definitions (_avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE)
 target_include_directories (_avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
 target_link_libraries (_avrocpp PRIVATE ch_contrib::snappy)
-
-# create a symlink to include headers with <avro/...>
-set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
-ADD_CUSTOM_TARGET(avro_symlink_headers ALL
-    COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}"
-    COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro"
-)
-add_dependencies(_avrocpp avro_symlink_headers)
-target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}")
--- a/contrib/aws
+++ b/contrib/aws
@ -1 +1 @@
-Subproject commit 32870e234cac03e0ac46370c26858b0ffdf14200
+Subproject commit 2e12d7c6dafa81311ee3d73ac6a178550ffa75be
--- a/contrib/aws-c-cal
+++ b/contrib/aws-c-cal
@ -1 +1 @@
-Subproject commit 9453687ff5493ba94eaccf8851200565c4364c77
+Subproject commit 1586846816e6d7d5ff744a2db943107a3a74a082
--- a/contrib/azure
+++ b/contrib/azure
@ -1 +1 @@
-Subproject commit e71395e44f309f97b5a486f5c2c59b82f85dd2d2
+Subproject commit b90fd3c6ef3185f5be3408056567bca0854129b6
--- a/contrib/azure-cmake/CMakeLists.txt
+++ b/contrib/azure-cmake/CMakeLists.txt
@ -10,6 +10,7 @@ set(AZURE_SDK_LIBRARY_DIR "${AZURE_DIR}/sdk")

 file(GLOB AZURE_SDK_SRC
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/*.cpp"
+    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/credentials/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/cryptography/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/*.cpp"
    "${AZURE_SDK_LIBRARY_DIR}/core/azure-core/src/http/curl/*.cpp"
--- a/contrib/boringssl-cmake/CMakeLists.txt
+++ b/contrib/boringssl-cmake/CMakeLists.txt
@ -1,799 +0,0 @@
-# Needed for:
-# - securely connecting to an external server, e.g. clickhouse-client --host ... --secure
-# - lots of thirdparty libraries
-
-# Actually, so many 3rd party libraries + unit tests need SSL that we cannot disable it
-# without breaking the build ...
-option(ENABLE_SSL "Enable ssl" ON) # breaks if OFF
-# TODO: Making SSL dependent on ENABLE_LIBRARIES is desirable but needs fixing dependent libs + tests.
-# option(ENABLE_SSL "Enable ssl" ${ENABLE_LIBRARIES})
-
-if(NOT ENABLE_SSL)
-  message(STATUS "Not using openssl")
-  return()
-endif()
-
-# Copyright (c) 2019 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-# This file is created by generate_build_files.py and edited accordingly.
-
-cmake_minimum_required(VERSION 3.5)
-
-project(BoringSSL LANGUAGES C CXX)
-
-set(BORINGSSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/boringssl")
-
-if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-  set(CLANG 1)
-endif()
-
-if(CMAKE_COMPILER_IS_GNUCXX OR CLANG)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-common -fno-exceptions -fno-rtti")
-  if(APPLE AND CLANG)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
-  endif()
-
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-common -std=c11")
-endif()
-
-# pthread_rwlock_t requires a feature flag.
-if(NOT WIN32)
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_XOPEN_SOURCE=700")
-endif()
-
-if(WIN32)
-  add_definitions(-D_HAS_EXCEPTIONS=0)
-  add_definitions(-DWIN32_LEAN_AND_MEAN)
-  add_definitions(-DNOMINMAX)
-  # Allow use of fopen.
-  add_definitions(-D_CRT_SECURE_NO_WARNINGS)
-  # VS 2017 and higher supports STL-only warning suppressions.
-  # A bug in CMake < 3.13.0 may cause the space in this value to
-  # cause issues when building with NASM. In that case, update CMake.
-  add_definitions("-D_STL_EXTRA_DISABLED_WARNINGS=4774 4987")
-endif()
-
-add_definitions(-DBORINGSSL_IMPLEMENTATION)
-
-# CMake's iOS support uses Apple's multiple-architecture toolchain. It takes an
-# architecture list from CMAKE_OSX_ARCHITECTURES, leaves CMAKE_SYSTEM_PROCESSOR
-# alone, and expects all architecture-specific logic to be conditioned within
-# the source files rather than the build. This does not work for our assembly
-# files, so we fix CMAKE_SYSTEM_PROCESSOR and only support single-architecture
-# builds.
-if(NOT OPENSSL_NO_ASM AND CMAKE_OSX_ARCHITECTURES)
-  list(LENGTH CMAKE_OSX_ARCHITECTURES NUM_ARCHES)
-  if(NOT NUM_ARCHES EQUAL 1)
-    message(FATAL_ERROR "Universal binaries not supported.")
-  endif()
-  list(GET CMAKE_OSX_ARCHITECTURES 0 CMAKE_SYSTEM_PROCESSOR)
-endif()
-
-if(OPENSSL_NO_ASM)
-  add_definitions(-DOPENSSL_NO_ASM)
-  set(ARCH "generic")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
-  set(ARCH "x86_64")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64")
-  set(ARCH "x86_64")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
-  # cmake reports AMD64 on Windows, but we might be building for 32-bit.
-  if(CMAKE_SIZEOF_VOID_P EQUAL 8)
-    set(ARCH "x86_64")
-  else()
-    set(ARCH "x86")
-  endif()
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86")
-  set(ARCH "x86")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386")
-  # cmake uses `uname -p` to set the system processor, but Solaris
-  # systems support multiple architectures.
-  if((${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") AND CMAKE_SIZEOF_VOID_P EQUAL 8)
-    set(ARCH "x86_64")
-  else()
-    set(ARCH "x86")
-  endif()
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
-  set(ARCH "x86")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
-  set(ARCH "aarch64")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
-  set(ARCH "aarch64")
-# Apple A12 Bionic chipset which is added in iPhone XS/XS Max/XR uses arm64e architecture.
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64e")
-  set(ARCH "aarch64")
-elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm*")
-  set(ARCH "arm")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "mips")
-  # Just to avoid the “unknown processor” error.
-  set(ARCH "generic")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le")
-  set(ARCH "ppc64le")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64")
-  set(ARCH "riscv64")
-else()
-  message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR})
-endif()
-
-if(NOT OPENSSL_NO_ASM)
-  if(UNIX)
-    enable_language(ASM)
-
-    # Clang's integerated assembler does not support debug symbols.
-    if(NOT CMAKE_ASM_COMPILER_ID MATCHES "Clang")
-      set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -Wa,-g")
-    endif()
-
-    # CMake does not add -isysroot and -arch flags to assembly.
-    if(APPLE)
-      if(CMAKE_OSX_SYSROOT)
-        set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -isysroot \"${CMAKE_OSX_SYSROOT}\"")
-      endif()
-      foreach(arch ${CMAKE_OSX_ARCHITECTURES})
-        set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -arch ${arch}")
-      endforeach()
-    endif()
-  else()
-    set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -gcv8")
-    enable_language(ASM_NASM)
-  endif()
-endif()
-
-set(
-  CRYPTO_ios_aarch64_SOURCES
-
-  ios-aarch64/crypto/chacha/chacha-armv8.S
-  ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
-  ios-aarch64/crypto/fipsmodule/armv8-mont.S
-  ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
-  ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
-  ios-aarch64/crypto/fipsmodule/sha1-armv8.S
-  ios-aarch64/crypto/fipsmodule/sha256-armv8.S
-  ios-aarch64/crypto/fipsmodule/sha512-armv8.S
-  ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
-  ios-aarch64/crypto/test/trampoline-armv8.S
-)
-
-set(
-  CRYPTO_ios_arm_SOURCES
-
-  ios-arm/crypto/chacha/chacha-armv4.S
-  ios-arm/crypto/fipsmodule/aesv8-armx32.S
-  ios-arm/crypto/fipsmodule/armv4-mont.S
-  ios-arm/crypto/fipsmodule/bsaes-armv7.S
-  ios-arm/crypto/fipsmodule/ghash-armv4.S
-  ios-arm/crypto/fipsmodule/ghashv8-armx32.S
-  ios-arm/crypto/fipsmodule/sha1-armv4-large.S
-  ios-arm/crypto/fipsmodule/sha256-armv4.S
-  ios-arm/crypto/fipsmodule/sha512-armv4.S
-  ios-arm/crypto/fipsmodule/vpaes-armv7.S
-  ios-arm/crypto/test/trampoline-armv4.S
-)
-
-set(
-  CRYPTO_linux_aarch64_SOURCES
-
-  linux-aarch64/crypto/chacha/chacha-armv8.S
-  linux-aarch64/crypto/fipsmodule/aesv8-armx64.S
-  linux-aarch64/crypto/fipsmodule/armv8-mont.S
-  linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
-  linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S
-  linux-aarch64/crypto/fipsmodule/sha1-armv8.S
-  linux-aarch64/crypto/fipsmodule/sha256-armv8.S
-  linux-aarch64/crypto/fipsmodule/sha512-armv8.S
-  linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
-  linux-aarch64/crypto/test/trampoline-armv8.S
-)
-
-set(
-  CRYPTO_linux_arm_SOURCES
-
-  linux-arm/crypto/chacha/chacha-armv4.S
-  linux-arm/crypto/fipsmodule/aesv8-armx32.S
-  linux-arm/crypto/fipsmodule/armv4-mont.S
-  linux-arm/crypto/fipsmodule/bsaes-armv7.S
-  linux-arm/crypto/fipsmodule/ghash-armv4.S
-  linux-arm/crypto/fipsmodule/ghashv8-armx32.S
-  linux-arm/crypto/fipsmodule/sha1-armv4-large.S
-  linux-arm/crypto/fipsmodule/sha256-armv4.S
-  linux-arm/crypto/fipsmodule/sha512-armv4.S
-  linux-arm/crypto/fipsmodule/vpaes-armv7.S
-  linux-arm/crypto/test/trampoline-armv4.S
-  "${BORINGSSL_SOURCE_DIR}/crypto/curve25519/asm/x25519-asm-arm.S"
-  "${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_arm_asm.S"
-)
-
-set(
-  CRYPTO_linux_ppc64le_SOURCES
-
-  linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S
-  linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S
-  linux-ppc64le/crypto/test/trampoline-ppc.S
-)
-
-set(
-  CRYPTO_linux_x86_SOURCES
-
-  linux-x86/crypto/chacha/chacha-x86.S
-  linux-x86/crypto/fipsmodule/aesni-x86.S
-  linux-x86/crypto/fipsmodule/bn-586.S
-  linux-x86/crypto/fipsmodule/co-586.S
-  linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S
-  linux-x86/crypto/fipsmodule/ghash-x86.S
-  linux-x86/crypto/fipsmodule/md5-586.S
-  linux-x86/crypto/fipsmodule/sha1-586.S
-  linux-x86/crypto/fipsmodule/sha256-586.S
-  linux-x86/crypto/fipsmodule/sha512-586.S
-  linux-x86/crypto/fipsmodule/vpaes-x86.S
-  linux-x86/crypto/fipsmodule/x86-mont.S
-  linux-x86/crypto/test/trampoline-x86.S
-)
-
-set(
-  CRYPTO_linux_x86_64_SOURCES
-
-  linux-x86_64/crypto/chacha/chacha-x86_64.S
-  linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
-  linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
-  linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
-  linux-x86_64/crypto/fipsmodule/aesni-x86_64.S
-  linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
-  linux-x86_64/crypto/fipsmodule/ghash-x86_64.S
-  linux-x86_64/crypto/fipsmodule/md5-x86_64.S
-  linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
-  linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
-  linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S
-  linux-x86_64/crypto/fipsmodule/rsaz-avx2.S
-  linux-x86_64/crypto/fipsmodule/sha1-x86_64.S
-  linux-x86_64/crypto/fipsmodule/sha256-x86_64.S
-  linux-x86_64/crypto/fipsmodule/sha512-x86_64.S
-  linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S
-  linux-x86_64/crypto/fipsmodule/x86_64-mont.S
-  linux-x86_64/crypto/fipsmodule/x86_64-mont5.S
-  linux-x86_64/crypto/test/trampoline-x86_64.S
-  "${BORINGSSL_SOURCE_DIR}/crypto/hrss/asm/poly_rq_mul.S"
-)
-
-set(
-  CRYPTO_mac_x86_SOURCES
-
-  mac-x86/crypto/chacha/chacha-x86.S
-  mac-x86/crypto/fipsmodule/aesni-x86.S
-  mac-x86/crypto/fipsmodule/bn-586.S
-  mac-x86/crypto/fipsmodule/co-586.S
-  mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S
-  mac-x86/crypto/fipsmodule/ghash-x86.S
-  mac-x86/crypto/fipsmodule/md5-586.S
-  mac-x86/crypto/fipsmodule/sha1-586.S
-  mac-x86/crypto/fipsmodule/sha256-586.S
-  mac-x86/crypto/fipsmodule/sha512-586.S
-  mac-x86/crypto/fipsmodule/vpaes-x86.S
-  mac-x86/crypto/fipsmodule/x86-mont.S
-  mac-x86/crypto/test/trampoline-x86.S
-)
-
-set(
-  CRYPTO_mac_x86_64_SOURCES
-
-  mac-x86_64/crypto/chacha/chacha-x86_64.S
-  mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S
-  mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
-  mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S
-  mac-x86_64/crypto/fipsmodule/aesni-x86_64.S
-  mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S
-  mac-x86_64/crypto/fipsmodule/ghash-x86_64.S
-  mac-x86_64/crypto/fipsmodule/md5-x86_64.S
-  mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S
-  mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S
-  mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S
-  mac-x86_64/crypto/fipsmodule/rsaz-avx2.S
-  mac-x86_64/crypto/fipsmodule/sha1-x86_64.S
-  mac-x86_64/crypto/fipsmodule/sha256-x86_64.S
-  mac-x86_64/crypto/fipsmodule/sha512-x86_64.S
-  mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S
-  mac-x86_64/crypto/fipsmodule/x86_64-mont.S
-  mac-x86_64/crypto/fipsmodule/x86_64-mont5.S
-  mac-x86_64/crypto/test/trampoline-x86_64.S
-)
-
-set(
-  CRYPTO_win_aarch64_SOURCES
-
-  win-aarch64/crypto/chacha/chacha-armv8.S
-  win-aarch64/crypto/fipsmodule/aesv8-armx64.S
-  win-aarch64/crypto/fipsmodule/armv8-mont.S
-  win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
-  win-aarch64/crypto/fipsmodule/ghashv8-armx64.S
-  win-aarch64/crypto/fipsmodule/sha1-armv8.S
-  win-aarch64/crypto/fipsmodule/sha256-armv8.S
-  win-aarch64/crypto/fipsmodule/sha512-armv8.S
-  win-aarch64/crypto/fipsmodule/vpaes-armv8.S
-  win-aarch64/crypto/test/trampoline-armv8.S
-)
-
-set(
-  CRYPTO_win_x86_SOURCES
-
-  win-x86/crypto/chacha/chacha-x86.asm
-  win-x86/crypto/fipsmodule/aesni-x86.asm
-  win-x86/crypto/fipsmodule/bn-586.asm
-  win-x86/crypto/fipsmodule/co-586.asm
-  win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm
-  win-x86/crypto/fipsmodule/ghash-x86.asm
-  win-x86/crypto/fipsmodule/md5-586.asm
-  win-x86/crypto/fipsmodule/sha1-586.asm
-  win-x86/crypto/fipsmodule/sha256-586.asm
-  win-x86/crypto/fipsmodule/sha512-586.asm
-  win-x86/crypto/fipsmodule/vpaes-x86.asm
-  win-x86/crypto/fipsmodule/x86-mont.asm
-  win-x86/crypto/test/trampoline-x86.asm
-)
-
-set(
-  CRYPTO_win_x86_64_SOURCES
-
-  win-x86_64/crypto/chacha/chacha-x86_64.asm
-  win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
-  win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
-  win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
-  win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
-  win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm
-  win-x86_64/crypto/fipsmodule/ghash-x86_64.asm
-  win-x86_64/crypto/fipsmodule/md5-x86_64.asm
-  win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm
-  win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm
-  win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm
-  win-x86_64/crypto/fipsmodule/rsaz-avx2.asm
-  win-x86_64/crypto/fipsmodule/sha1-x86_64.asm
-  win-x86_64/crypto/fipsmodule/sha256-x86_64.asm
-  win-x86_64/crypto/fipsmodule/sha512-x86_64.asm
-  win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm
-  win-x86_64/crypto/fipsmodule/x86_64-mont.asm
-  win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
-  win-x86_64/crypto/test/trampoline-x86_64.asm
-)
-
-if(APPLE AND ARCH STREQUAL "aarch64")
-  set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_aarch64_SOURCES})
-elseif(APPLE AND ARCH STREQUAL "arm")
-  set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_arm_SOURCES})
-elseif(APPLE)
-  set(CRYPTO_ARCH_SOURCES ${CRYPTO_mac_${ARCH}_SOURCES})
-elseif(UNIX)
-  set(CRYPTO_ARCH_SOURCES ${CRYPTO_linux_${ARCH}_SOURCES})
-elseif(WIN32)
-  set(CRYPTO_ARCH_SOURCES ${CRYPTO_win_${ARCH}_SOURCES})
-endif()
-
-add_library(
-  _crypto
-
-  ${CRYPTO_ARCH_SOURCES}
-  err_data.c
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_bitstr.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_bool.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_d2i_fp.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_dup.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_enum.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_gentm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_i2d_fp.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_int.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_mbstr.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strex.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_utctm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_utf8.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn1_lib.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn1_par.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/asn_pack.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_enum.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_int.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/f_string.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_dec.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_enc.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_fre.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_new.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_typ.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/tasn_utl.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/asn1/time_support.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/base64/base64.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/bio.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/bio_mem.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/connect.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/fd.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/file.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/hexdump.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/pair.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/blake2/blake2.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bytestring/asn1_compat.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bytestring/ber.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bytestring/cbb.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bytestring/cbs.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/bytestring/unicode.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/chacha/chacha.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/cipher_extra.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/derive_key.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesccm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesctrhmac.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_aesgcmsiv.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_chacha20poly1305.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_null.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_rc2.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_rc4.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/e_tls.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cipher_extra/tls_cbc.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cmac/cmac.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-win.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/cpu-ppc64le.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/crypto.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/curve25519/curve25519.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/curve25519/spake25519.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/dh_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/dh_extra/params.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/digest_extra/digest_extra.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/dsa/dsa_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/ec_derive.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/ec_extra/hash_to_curve.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/ecdh_extra/ecdh_extra.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/ecdsa_extra/ecdsa_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/engine/engine.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/err/err.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/digestsign.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/evp.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/evp_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/evp_ctx.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_dsa_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ec.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ec_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ed25519.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_ed25519_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_rsa.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_rsa_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_x25519.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/p_x25519_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/pbkdf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/print.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/scrypt.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/evp/sign.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/lhash/lhash.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/mem.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/obj/obj.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/obj/obj_xref.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_all.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_info.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_lib.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_oth.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_pk8.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_pkey.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_x509.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pem/pem_xaux.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pkcs7/pkcs7.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pkcs7/pkcs7_x509.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/p5_pbev2.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/pkcs8.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pkcs8/pkcs8_x509.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_arm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/poly1305/poly1305_vec.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/pool/pool.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/deterministic.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/forkunsafe.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/fuchsia.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/passive.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/rand_extra.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rand_extra/windows.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rc4/rc4.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/refcount_c11.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/refcount_lock.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rsa_extra/rsa_asn1.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/rsa_extra/rsa_print.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/siphash/siphash.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/stack/stack.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/thread.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/thread_none.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/thread_pthread.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/thread_win.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/trust_token/pmbtoken.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/trust_token/trust_token.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/name_print.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_x509.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_x509a.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_att.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_cmp.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_d2.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_def.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_ext.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_lu.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_obj.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_req.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_set.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_trs.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_txt.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_v3.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_vfy.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_vpm.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509cset.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509name.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509rset.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509spki.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_algor.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_all.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_attrib.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_crl.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_exten.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_info.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_name.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_pkey.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_pubkey.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_req.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_sig.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_spki.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_val.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_x509.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509/x_x509a.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_cache.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_data.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_lib.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_map.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_node.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/pcy_tree.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_akey.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_akeya.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_alt.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_bcons.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_bitst.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_conf.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_cpols.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_crld.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_enum.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_extku.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_genn.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ia5.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_info.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_int.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_lib.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ncons.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_ocsp.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pci.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcia.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pcons.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_pmaps.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_prn.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_purp.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_skey.c"
-  "${BORINGSSL_SOURCE_DIR}/crypto/x509v3/v3_utl.c"
-)
-
-add_library(
-  _ssl
-
-  "${BORINGSSL_SOURCE_DIR}/ssl/bio_ssl.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/d1_both.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/d1_lib.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/d1_pkt.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/encrypted_client_hello.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/extensions.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/handshake_server.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/s3_both.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/s3_lib.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/s3_pkt.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_aead_ctx.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_asn1.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_buffer.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_cert.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_cipher.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_file.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_key_share.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_lib.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_privkey.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_session.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_stat.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_transcript.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/tls13_server.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/tls_method.cc"
-  "${BORINGSSL_SOURCE_DIR}/ssl/tls_record.cc"
-
-  "${BORINGSSL_SOURCE_DIR}/decrepit/ssl/ssl_decrepit.c"
-  "${BORINGSSL_SOURCE_DIR}/decrepit/cfb/cfb.c"
-  "${BORINGSSL_SOURCE_DIR}/decrepit/bio/base64_bio.c"
-)
-
-add_executable(
-  bssl
-
-  "${BORINGSSL_SOURCE_DIR}/tool/args.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/ciphers.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/client.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/const.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/digest.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/fd.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/file.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/generate_ech.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/rand.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/server.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/sign.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/speed.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/tool.cc"
-  "${BORINGSSL_SOURCE_DIR}/tool/transport_common.cc"
-)
-
-target_link_libraries(_ssl _crypto)
-target_link_libraries(bssl _ssl)
-
-if(NOT WIN32 AND NOT ANDROID)
-  target_link_libraries(_crypto pthread)
-endif()
-
-# NOTE: that ClickHouse does not support WIN32 anyway.
-if(WIN32)
-  target_link_libraries(bssl ws2_32)
-endif()
-
-target_include_directories(_crypto SYSTEM PUBLIC "${BORINGSSL_SOURCE_DIR}/include")
-target_include_directories(_ssl SYSTEM PUBLIC "${BORINGSSL_SOURCE_DIR}/include")
-
-target_compile_options(_crypto PRIVATE -Wno-gnu-anonymous-struct)
-
-add_library(OpenSSL::Crypto ALIAS _crypto)
-add_library(OpenSSL::SSL ALIAS _ssl)
-
-# Helper function used in the populate_openssl_vars function below
-function(from_hex HEX DEC)
-  string(TOUPPER "${HEX}" HEX)
-  set(_res 0)
-  string(LENGTH "${HEX}" _strlen)
-
-  while (_strlen GREATER 0)
-    math(EXPR _res "${_res} * 16")
-    string(SUBSTRING "${HEX}" 0 1 NIBBLE)
-    string(SUBSTRING "${HEX}" 1 -1 HEX)
-    if (NIBBLE STREQUAL "A")
-      math(EXPR _res "${_res} + 10")
-    elseif (NIBBLE STREQUAL "B")
-      math(EXPR _res "${_res} + 11")
-    elseif (NIBBLE STREQUAL "C")
-      math(EXPR _res "${_res} + 12")
-    elseif (NIBBLE STREQUAL "D")
-      math(EXPR _res "${_res} + 13")
-    elseif (NIBBLE STREQUAL "E")
-      math(EXPR _res "${_res} + 14")
-    elseif (NIBBLE STREQUAL "F")
-      math(EXPR _res "${_res} + 15")
-    else ()
-      math(EXPR _res "${_res} + ${NIBBLE}")
-    endif ()
-
-    string(LENGTH "${HEX}" _strlen)
-  endwhile ()
-
-  set(${DEC} ${_res} PARENT_SCOPE)
-endfunction()
-
-# ClickHouse uses BoringSSL which is a fork of OpenSSL.
-# This populates CMAKE var OPENSSL_VERSION from the OPENSSL_VERSION_NUMBER defined
-# in contrib/boringssl/include/openssl/base.h. It also sets the CMAKE var OPENSSL_IS_BORING_SSL
-# if it's defined in the file. Both OPENSSL_VERSION and OPENSSL_IS_BORING_SSL variables will be
-# used to populate flags in the `system.build_options` table for more context on ssl version used.
-# This cmake script is adopted from FindOpenSSL cmake module and slightly modified for this use-case .
-if (EXISTS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h")
-  file(STRINGS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h" openssl_version_str
-          REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
-
-  file(STRINGS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h" openssl_is_boringssl
-          REGEX "^#[\t ]*define[\t ]+OPENSSL_IS_BORINGSSL.*")
-
-  # Set to true if OPENSSL_IS_BORING_SSL is defined
-  if (openssl_is_boringssl)
-    set(OPENSSL_IS_BORING_SSL 1)
-  endif ()
-
-  # If openssl_version_str is defined extrapolate and set OPENSSL_VERSION
-  if (openssl_version_str)
-    # The version number is encoded as 0xMNNFFPPS: major minor fix patch status
-    # The status gives if this is a developer or prerelease and is ignored here.
-    # Major, minor, and fix directly translate into the version numbers shown in
-    # the string. The patch field translates to the single character suffix that
-    # indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
-    # on.
-
-    string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
-            "\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
-    list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
-    list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
-    from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
-    list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
-    from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
-    list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
-
-    if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
-      from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
-      # 96 is the ASCII code of 'a' minus 1
-      math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
-      unset(_tmp)
-      # Once anyone knows how OpenSSL would call the patch versions beyond 'z'
-      # this should be updated to handle that, too. This has not happened yet
-      # so it is simply ignored here for now.
-      string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
-    endif ()
-
-    set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
-  else ()
-    # Since OpenSSL 3.0.0, the new version format is MAJOR.MINOR.PATCH and
-    # a new OPENSSL_VERSION_STR macro contains exactly that
-    file(STRINGS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h" OPENSSL_VERSION_STR
-            REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_STR[\t ]+\"([0-9])+\\.([0-9])+\\.([0-9])+\".*")
-    string(REGEX REPLACE "^.*OPENSSL_VERSION_STR[\t ]+\"([0-9]+\\.[0-9]+\\.[0-9]+)\".*$"
-            "\\1" OPENSSL_VERSION_STR "${OPENSSL_VERSION_STR}")
-
-    set(OPENSSL_VERSION "${OPENSSL_VERSION_STR}")
-
-    # Setting OPENSSL_VERSION_MAJOR OPENSSL_VERSION_MINOR and OPENSSL_VERSION_FIX
-    string(REGEX MATCHALL "([0-9])+" OPENSSL_VERSION_NUMBER "${OPENSSL_VERSION}")
-    list(POP_FRONT OPENSSL_VERSION_NUMBER
-            OPENSSL_VERSION_MAJOR
-            OPENSSL_VERSION_MINOR
-            OPENSSL_VERSION_FIX)
-
-    unset(OPENSSL_VERSION_NUMBER)
-    unset(OPENSSL_VERSION_STR)
-  endif ()
-endif ()
-
-# Set CMAKE variables so that they can be referenced properly from everywhere
-set(OPENSSL_VERSION "${OPENSSL_VERSION}" CACHE INTERNAL "")
-set(OPENSSL_IS_BORING_SSL "${OPENSSL_IS_BORING_SSL}" CACHE INTERNAL 0)
--- a/contrib/boringssl-cmake/crypto_test_data.cc
+++ b/contrib/boringssl-cmake/crypto_test_data.cc
--- a/contrib/boringssl-cmake/err_data.c
+++ b/contrib/boringssl-cmake/err_data.c
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/chacha/chacha-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/chacha/chacha-armv8.S
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
@ -1,782 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-#if __ARM_MAX_ARCH__>=7
-.text
-
-.section	__TEXT,__const
-.align	5
-Lrcon:
-.long	0x01,0x01,0x01,0x01
-.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
-.long	0x1b,0x1b,0x1b,0x1b
-
-.text
-
-.globl	_aes_hw_set_encrypt_key
-.private_extern	_aes_hw_set_encrypt_key
-
-.align	5
-_aes_hw_set_encrypt_key:
-Lenc_key:
-	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
-	AARCH64_VALID_CALL_TARGET
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	mov	x3,#-1
-	cmp	x0,#0
-	b.eq	Lenc_key_abort
-	cmp	x2,#0
-	b.eq	Lenc_key_abort
-	mov	x3,#-2
-	cmp	w1,#128
-	b.lt	Lenc_key_abort
-	cmp	w1,#256
-	b.gt	Lenc_key_abort
-	tst	w1,#0x3f
-	b.ne	Lenc_key_abort
-
-	adrp	x3,Lrcon@PAGE
-	add	x3,x3,Lrcon@PAGEOFF
-	cmp	w1,#192
-
-	eor	v0.16b,v0.16b,v0.16b
-	ld1	{v3.16b},[x0],#16
-	mov	w1,#8		// reuse w1
-	ld1	{v1.4s,v2.4s},[x3],#32
-
-	b.lt	Loop128
-	b.eq	L192
-	b	L256
-
-.align	4
-Loop128:
-	tbl	v6.16b,{v3.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v3.4s},[x2],#16
-	aese	v6.16b,v0.16b
-	subs	w1,w1,#1
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	shl	v1.16b,v1.16b,#1
-	eor	v3.16b,v3.16b,v6.16b
-	b.ne	Loop128
-
-	ld1	{v1.4s},[x3]
-
-	tbl	v6.16b,{v3.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v3.4s},[x2],#16
-	aese	v6.16b,v0.16b
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	shl	v1.16b,v1.16b,#1
-	eor	v3.16b,v3.16b,v6.16b
-
-	tbl	v6.16b,{v3.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v3.4s},[x2],#16
-	aese	v6.16b,v0.16b
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	eor	v3.16b,v3.16b,v6.16b
-	st1	{v3.4s},[x2]
-	add	x2,x2,#0x50
-
-	mov	w12,#10
-	b	Ldone
-
-.align	4
-L192:
-	ld1	{v4.8b},[x0],#8
-	movi	v6.16b,#8			// borrow v6.16b
-	st1	{v3.4s},[x2],#16
-	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
-
-Loop192:
-	tbl	v6.16b,{v4.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v4.8b},[x2],#8
-	aese	v6.16b,v0.16b
-	subs	w1,w1,#1
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-
-	dup	v5.4s,v3.s[3]
-	eor	v5.16b,v5.16b,v4.16b
-	eor	v6.16b,v6.16b,v1.16b
-	ext	v4.16b,v0.16b,v4.16b,#12
-	shl	v1.16b,v1.16b,#1
-	eor	v4.16b,v4.16b,v5.16b
-	eor	v3.16b,v3.16b,v6.16b
-	eor	v4.16b,v4.16b,v6.16b
-	st1	{v3.4s},[x2],#16
-	b.ne	Loop192
-
-	mov	w12,#12
-	add	x2,x2,#0x20
-	b	Ldone
-
-.align	4
-L256:
-	ld1	{v4.16b},[x0]
-	mov	w1,#7
-	mov	w12,#14
-	st1	{v3.4s},[x2],#16
-
-Loop256:
-	tbl	v6.16b,{v4.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v4.4s},[x2],#16
-	aese	v6.16b,v0.16b
-	subs	w1,w1,#1
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	shl	v1.16b,v1.16b,#1
-	eor	v3.16b,v3.16b,v6.16b
-	st1	{v3.4s},[x2],#16
-	b.eq	Ldone
-
-	dup	v6.4s,v3.s[3]		// just splat
-	ext	v5.16b,v0.16b,v4.16b,#12
-	aese	v6.16b,v0.16b
-
-	eor	v4.16b,v4.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v4.16b,v4.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v4.16b,v4.16b,v5.16b
-
-	eor	v4.16b,v4.16b,v6.16b
-	b	Loop256
-
-Ldone:
-	str	w12,[x2]
-	mov	x3,#0
-
-Lenc_key_abort:
-	mov	x0,x3			// return value
-	ldr	x29,[sp],#16
-	ret
-
-
-.globl	_aes_hw_set_decrypt_key
-.private_extern	_aes_hw_set_decrypt_key
-
-.align	5
-_aes_hw_set_decrypt_key:
-	AARCH64_SIGN_LINK_REGISTER
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	bl	Lenc_key
-
-	cmp	x0,#0
-	b.ne	Ldec_key_abort
-
-	sub	x2,x2,#240		// restore original x2
-	mov	x4,#-16
-	add	x0,x2,x12,lsl#4	// end of key schedule
-
-	ld1	{v0.4s},[x2]
-	ld1	{v1.4s},[x0]
-	st1	{v0.4s},[x0],x4
-	st1	{v1.4s},[x2],#16
-
-Loop_imc:
-	ld1	{v0.4s},[x2]
-	ld1	{v1.4s},[x0]
-	aesimc	v0.16b,v0.16b
-	aesimc	v1.16b,v1.16b
-	st1	{v0.4s},[x0],x4
-	st1	{v1.4s},[x2],#16
-	cmp	x0,x2
-	b.hi	Loop_imc
-
-	ld1	{v0.4s},[x2]
-	aesimc	v0.16b,v0.16b
-	st1	{v0.4s},[x0]
-
-	eor	x0,x0,x0		// return value
-Ldec_key_abort:
-	ldp	x29,x30,[sp],#16
-	AARCH64_VALIDATE_LINK_REGISTER
-	ret
-
-.globl	_aes_hw_encrypt
-.private_extern	_aes_hw_encrypt
-
-.align	5
-_aes_hw_encrypt:
-	AARCH64_VALID_CALL_TARGET
-	ldr	w3,[x2,#240]
-	ld1	{v0.4s},[x2],#16
-	ld1	{v2.16b},[x0]
-	sub	w3,w3,#2
-	ld1	{v1.4s},[x2],#16
-
-Loop_enc:
-	aese	v2.16b,v0.16b
-	aesmc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2],#16
-	subs	w3,w3,#2
-	aese	v2.16b,v1.16b
-	aesmc	v2.16b,v2.16b
-	ld1	{v1.4s},[x2],#16
-	b.gt	Loop_enc
-
-	aese	v2.16b,v0.16b
-	aesmc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2]
-	aese	v2.16b,v1.16b
-	eor	v2.16b,v2.16b,v0.16b
-
-	st1	{v2.16b},[x1]
-	ret
-
-.globl	_aes_hw_decrypt
-.private_extern	_aes_hw_decrypt
-
-.align	5
-_aes_hw_decrypt:
-	AARCH64_VALID_CALL_TARGET
-	ldr	w3,[x2,#240]
-	ld1	{v0.4s},[x2],#16
-	ld1	{v2.16b},[x0]
-	sub	w3,w3,#2
-	ld1	{v1.4s},[x2],#16
-
-Loop_dec:
-	aesd	v2.16b,v0.16b
-	aesimc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2],#16
-	subs	w3,w3,#2
-	aesd	v2.16b,v1.16b
-	aesimc	v2.16b,v2.16b
-	ld1	{v1.4s},[x2],#16
-	b.gt	Loop_dec
-
-	aesd	v2.16b,v0.16b
-	aesimc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2]
-	aesd	v2.16b,v1.16b
-	eor	v2.16b,v2.16b,v0.16b
-
-	st1	{v2.16b},[x1]
-	ret
-
-.globl	_aes_hw_cbc_encrypt
-.private_extern	_aes_hw_cbc_encrypt
-
-.align	5
-_aes_hw_cbc_encrypt:
-	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
-	AARCH64_VALID_CALL_TARGET
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	subs	x2,x2,#16
-	mov	x8,#16
-	b.lo	Lcbc_abort
-	csel	x8,xzr,x8,eq
-
-	cmp	w5,#0			// en- or decrypting?
-	ldr	w5,[x3,#240]
-	and	x2,x2,#-16
-	ld1	{v6.16b},[x4]
-	ld1	{v0.16b},[x0],x8
-
-	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
-	sub	w5,w5,#6
-	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
-	sub	w5,w5,#2
-	ld1	{v18.4s,v19.4s},[x7],#32
-	ld1	{v20.4s,v21.4s},[x7],#32
-	ld1	{v22.4s,v23.4s},[x7],#32
-	ld1	{v7.4s},[x7]
-
-	add	x7,x3,#32
-	mov	w6,w5
-	b.eq	Lcbc_dec
-
-	cmp	w5,#2
-	eor	v0.16b,v0.16b,v6.16b
-	eor	v5.16b,v16.16b,v7.16b
-	b.eq	Lcbc_enc128
-
-	ld1	{v2.4s,v3.4s},[x7]
-	add	x7,x3,#16
-	add	x6,x3,#16*4
-	add	x12,x3,#16*5
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	add	x14,x3,#16*6
-	add	x3,x3,#16*7
-	b	Lenter_cbc_enc
-
-.align	4
-Loop_cbc_enc:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	st1	{v6.16b},[x1],#16
-Lenter_cbc_enc:
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v2.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.4s},[x6]
-	cmp	w5,#4
-	aese	v0.16b,v3.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v17.4s},[x12]
-	b.eq	Lcbc_enc192
-
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.4s},[x14]
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v17.4s},[x3]
-	nop
-
-Lcbc_enc192:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	subs	x2,x2,#16
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	csel	x8,xzr,x8,eq
-	aese	v0.16b,v18.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v19.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.16b},[x0],x8
-	aese	v0.16b,v20.16b
-	aesmc	v0.16b,v0.16b
-	eor	v16.16b,v16.16b,v5.16b
-	aese	v0.16b,v21.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
-	aese	v0.16b,v22.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v23.16b
-	eor	v6.16b,v0.16b,v7.16b
-	b.hs	Loop_cbc_enc
-
-	st1	{v6.16b},[x1],#16
-	b	Lcbc_done
-
-.align	5
-Lcbc_enc128:
-	ld1	{v2.4s,v3.4s},[x7]
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	b	Lenter_cbc_enc128
-Loop_cbc_enc128:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	st1	{v6.16b},[x1],#16
-Lenter_cbc_enc128:
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	subs	x2,x2,#16
-	aese	v0.16b,v2.16b
-	aesmc	v0.16b,v0.16b
-	csel	x8,xzr,x8,eq
-	aese	v0.16b,v3.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v18.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v19.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.16b},[x0],x8
-	aese	v0.16b,v20.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v21.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v22.16b
-	aesmc	v0.16b,v0.16b
-	eor	v16.16b,v16.16b,v5.16b
-	aese	v0.16b,v23.16b
-	eor	v6.16b,v0.16b,v7.16b
-	b.hs	Loop_cbc_enc128
-
-	st1	{v6.16b},[x1],#16
-	b	Lcbc_done
-.align	5
-Lcbc_dec:
-	ld1	{v18.16b},[x0],#16
-	subs	x2,x2,#32		// bias
-	add	w6,w5,#2
-	orr	v3.16b,v0.16b,v0.16b
-	orr	v1.16b,v0.16b,v0.16b
-	orr	v19.16b,v18.16b,v18.16b
-	b.lo	Lcbc_dec_tail
-
-	orr	v1.16b,v18.16b,v18.16b
-	ld1	{v18.16b},[x0],#16
-	orr	v2.16b,v0.16b,v0.16b
-	orr	v3.16b,v1.16b,v1.16b
-	orr	v19.16b,v18.16b,v18.16b
-
-Loop3x_cbc_dec:
-	aesd	v0.16b,v16.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aesd	v0.16b,v17.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	Loop3x_cbc_dec
-
-	aesd	v0.16b,v16.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	eor	v4.16b,v6.16b,v7.16b
-	subs	x2,x2,#0x30
-	eor	v5.16b,v2.16b,v7.16b
-	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
-	aesd	v0.16b,v17.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	eor	v17.16b,v3.16b,v7.16b
-	add	x0,x0,x6		// x0 is adjusted in such way that
-					// at exit from the loop v1.16b-v18.16b
-					// are loaded with last "words"
-	orr	v6.16b,v19.16b,v19.16b
-	mov	x7,x3
-	aesd	v0.16b,v20.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v20.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v20.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v2.16b},[x0],#16
-	aesd	v0.16b,v21.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v21.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v21.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v3.16b},[x0],#16
-	aesd	v0.16b,v22.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v22.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v22.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v19.16b},[x0],#16
-	aesd	v0.16b,v23.16b
-	aesd	v1.16b,v23.16b
-	aesd	v18.16b,v23.16b
-	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
-	add	w6,w5,#2
-	eor	v4.16b,v4.16b,v0.16b
-	eor	v5.16b,v5.16b,v1.16b
-	eor	v18.16b,v18.16b,v17.16b
-	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
-	st1	{v4.16b},[x1],#16
-	orr	v0.16b,v2.16b,v2.16b
-	st1	{v5.16b},[x1],#16
-	orr	v1.16b,v3.16b,v3.16b
-	st1	{v18.16b},[x1],#16
-	orr	v18.16b,v19.16b,v19.16b
-	b.hs	Loop3x_cbc_dec
-
-	cmn	x2,#0x30
-	b.eq	Lcbc_done
-	nop
-
-Lcbc_dec_tail:
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	Lcbc_dec_tail
-
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	aesd	v1.16b,v20.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v20.16b
-	aesimc	v18.16b,v18.16b
-	cmn	x2,#0x20
-	aesd	v1.16b,v21.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v21.16b
-	aesimc	v18.16b,v18.16b
-	eor	v5.16b,v6.16b,v7.16b
-	aesd	v1.16b,v22.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v22.16b
-	aesimc	v18.16b,v18.16b
-	eor	v17.16b,v3.16b,v7.16b
-	aesd	v1.16b,v23.16b
-	aesd	v18.16b,v23.16b
-	b.eq	Lcbc_dec_one
-	eor	v5.16b,v5.16b,v1.16b
-	eor	v17.16b,v17.16b,v18.16b
-	orr	v6.16b,v19.16b,v19.16b
-	st1	{v5.16b},[x1],#16
-	st1	{v17.16b},[x1],#16
-	b	Lcbc_done
-
-Lcbc_dec_one:
-	eor	v5.16b,v5.16b,v18.16b
-	orr	v6.16b,v19.16b,v19.16b
-	st1	{v5.16b},[x1],#16
-
-Lcbc_done:
-	st1	{v6.16b},[x4]
-Lcbc_abort:
-	ldr	x29,[sp],#16
-	ret
-
-.globl	_aes_hw_ctr32_encrypt_blocks
-.private_extern	_aes_hw_ctr32_encrypt_blocks
-
-.align	5
-_aes_hw_ctr32_encrypt_blocks:
-	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
-	AARCH64_VALID_CALL_TARGET
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	ldr	w5,[x3,#240]
-
-	ldr	w8, [x4, #12]
-	ld1	{v0.4s},[x4]
-
-	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
-	sub	w5,w5,#4
-	mov	x12,#16
-	cmp	x2,#2
-	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
-	sub	w5,w5,#2
-	ld1	{v20.4s,v21.4s},[x7],#32
-	ld1	{v22.4s,v23.4s},[x7],#32
-	ld1	{v7.4s},[x7]
-	add	x7,x3,#32
-	mov	w6,w5
-	csel	x12,xzr,x12,lo
-#ifndef __ARMEB__
-	rev	w8, w8
-#endif
-	orr	v1.16b,v0.16b,v0.16b
-	add	w10, w8, #1
-	orr	v18.16b,v0.16b,v0.16b
-	add	w8, w8, #2
-	orr	v6.16b,v0.16b,v0.16b
-	rev	w10, w10
-	mov	v1.s[3],w10
-	b.ls	Lctr32_tail
-	rev	w12, w8
-	sub	x2,x2,#3		// bias
-	mov	v18.s[3],w12
-	b	Loop3x_ctr32
-
-.align	4
-Loop3x_ctr32:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v1.16b,v1.16b
-	aese	v18.16b,v16.16b
-	aesmc	v18.16b,v18.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v17.16b
-	aesmc	v1.16b,v1.16b
-	aese	v18.16b,v17.16b
-	aesmc	v18.16b,v18.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	Loop3x_ctr32
-
-	aese	v0.16b,v16.16b
-	aesmc	v4.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v5.16b,v1.16b
-	ld1	{v2.16b},[x0],#16
-	orr	v0.16b,v6.16b,v6.16b
-	aese	v18.16b,v16.16b
-	aesmc	v18.16b,v18.16b
-	ld1	{v3.16b},[x0],#16
-	orr	v1.16b,v6.16b,v6.16b
-	aese	v4.16b,v17.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v17.16b
-	aesmc	v5.16b,v5.16b
-	ld1	{v19.16b},[x0],#16
-	mov	x7,x3
-	aese	v18.16b,v17.16b
-	aesmc	v17.16b,v18.16b
-	orr	v18.16b,v6.16b,v6.16b
-	add	w9,w8,#1
-	aese	v4.16b,v20.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v20.16b
-	aesmc	v5.16b,v5.16b
-	eor	v2.16b,v2.16b,v7.16b
-	add	w10,w8,#2
-	aese	v17.16b,v20.16b
-	aesmc	v17.16b,v17.16b
-	eor	v3.16b,v3.16b,v7.16b
-	add	w8,w8,#3
-	aese	v4.16b,v21.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v21.16b
-	aesmc	v5.16b,v5.16b
-	eor	v19.16b,v19.16b,v7.16b
-	rev	w9,w9
-	aese	v17.16b,v21.16b
-	aesmc	v17.16b,v17.16b
-	mov	v0.s[3], w9
-	rev	w10,w10
-	aese	v4.16b,v22.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v22.16b
-	aesmc	v5.16b,v5.16b
-	mov	v1.s[3], w10
-	rev	w12,w8
-	aese	v17.16b,v22.16b
-	aesmc	v17.16b,v17.16b
-	mov	v18.s[3], w12
-	subs	x2,x2,#3
-	aese	v4.16b,v23.16b
-	aese	v5.16b,v23.16b
-	aese	v17.16b,v23.16b
-
-	eor	v2.16b,v2.16b,v4.16b
-	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
-	st1	{v2.16b},[x1],#16
-	eor	v3.16b,v3.16b,v5.16b
-	mov	w6,w5
-	st1	{v3.16b},[x1],#16
-	eor	v19.16b,v19.16b,v17.16b
-	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
-	st1	{v19.16b},[x1],#16
-	b.hs	Loop3x_ctr32
-
-	adds	x2,x2,#3
-	b.eq	Lctr32_done
-	cmp	x2,#1
-	mov	x12,#16
-	csel	x12,xzr,x12,eq
-
-Lctr32_tail:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v17.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	Lctr32_tail
-
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v1.16b,v1.16b
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v17.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v2.16b},[x0],x12
-	aese	v0.16b,v20.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v20.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v3.16b},[x0]
-	aese	v0.16b,v21.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v21.16b
-	aesmc	v1.16b,v1.16b
-	eor	v2.16b,v2.16b,v7.16b
-	aese	v0.16b,v22.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v22.16b
-	aesmc	v1.16b,v1.16b
-	eor	v3.16b,v3.16b,v7.16b
-	aese	v0.16b,v23.16b
-	aese	v1.16b,v23.16b
-
-	cmp	x2,#1
-	eor	v2.16b,v2.16b,v0.16b
-	eor	v3.16b,v3.16b,v1.16b
-	st1	{v2.16b},[x1],#16
-	b.eq	Lctr32_done
-	st1	{v3.16b},[x1]
-
-Lctr32_done:
-	ldr	x29,[sp],#16
-	ret
-
-#endif
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/armv8-mont.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/armv8-mont.S
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
@ -1,343 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-
-.globl	_gcm_init_neon
-.private_extern	_gcm_init_neon
-
-.align	4
-_gcm_init_neon:
-	AARCH64_VALID_CALL_TARGET
-	// This function is adapted from gcm_init_v8. xC2 is t3.
-	ld1	{v17.2d}, [x1]			// load H
-	movi	v19.16b, #0xe1
-	shl	v19.2d, v19.2d, #57		// 0xc2.0
-	ext	v3.16b, v17.16b, v17.16b, #8
-	ushr	v18.2d, v19.2d, #63
-	dup	v17.4s, v17.s[1]
-	ext	v16.16b, v18.16b, v19.16b, #8	// t0=0xc2....01
-	ushr	v18.2d, v3.2d, #63
-	sshr	v17.4s, v17.4s, #31		// broadcast carry bit
-	and	v18.16b, v18.16b, v16.16b
-	shl	v3.2d, v3.2d, #1
-	ext	v18.16b, v18.16b, v18.16b, #8
-	and	v16.16b, v16.16b, v17.16b
-	orr	v3.16b, v3.16b, v18.16b	// H<<<=1
-	eor	v5.16b, v3.16b, v16.16b	// twisted H
-	st1	{v5.2d}, [x0]			// store Htable[0]
-	ret
-
-
-.globl	_gcm_gmult_neon
-.private_extern	_gcm_gmult_neon
-
-.align	4
-_gcm_gmult_neon:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v3.16b}, [x0]		// load Xi
-	ld1	{v5.1d}, [x1], #8		// load twisted H
-	ld1	{v6.1d}, [x1]
-	adrp	x9, Lmasks@PAGE		// load constants
-	add	x9, x9, Lmasks@PAGEOFF
-	ld1	{v24.2d, v25.2d}, [x9]
-	rev64	v3.16b, v3.16b		// byteswap Xi
-	ext	v3.16b, v3.16b, v3.16b, #8
-	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
-
-	mov	x3, #16
-	b	Lgmult_neon
-
-
-.globl	_gcm_ghash_neon
-.private_extern	_gcm_ghash_neon
-
-.align	4
-_gcm_ghash_neon:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v0.16b}, [x0]		// load Xi
-	ld1	{v5.1d}, [x1], #8		// load twisted H
-	ld1	{v6.1d}, [x1]
-	adrp	x9, Lmasks@PAGE		// load constants
-	add	x9, x9, Lmasks@PAGEOFF
-	ld1	{v24.2d, v25.2d}, [x9]
-	rev64	v0.16b, v0.16b		// byteswap Xi
-	ext	v0.16b, v0.16b, v0.16b, #8
-	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
-
-Loop_neon:
-	ld1	{v3.16b}, [x2], #16	// load inp
-	rev64	v3.16b, v3.16b		// byteswap inp
-	ext	v3.16b, v3.16b, v3.16b, #8
-	eor	v3.16b, v3.16b, v0.16b	// inp ^= Xi
-
-Lgmult_neon:
-	// Split the input into v3 and v4. (The upper halves are unused,
-	// so it is okay to leave them alone.)
-	ins	v4.d[0], v3.d[1]
-	ext	v16.8b, v5.8b, v5.8b, #1	// A1
-	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
-	ext	v0.8b, v3.8b, v3.8b, #1		// B1
-	pmull	v0.8h, v5.8b, v0.8b		// E = A*B1
-	ext	v17.8b, v5.8b, v5.8b, #2	// A2
-	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
-	ext	v19.8b, v3.8b, v3.8b, #2	// B2
-	pmull	v19.8h, v5.8b, v19.8b		// G = A*B2
-	ext	v18.8b, v5.8b, v5.8b, #3	// A3
-	eor	v16.16b, v16.16b, v0.16b	// L = E + F
-	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
-	ext	v0.8b, v3.8b, v3.8b, #3		// B3
-	eor	v17.16b, v17.16b, v19.16b	// M = G + H
-	pmull	v0.8h, v5.8b, v0.8b		// I = A*B3
-
-	// Here we diverge from the 32-bit version. It computes the following
-	// (instructions reordered for clarity):
-	//
-	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
-	//     vand	$t0#hi, $t0#hi, $k48
-	//     veor	$t0#lo, $t0#lo, $t0#hi
-	//
-	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
-	//     vand	$t1#hi, $t1#hi, $k32
-	//     veor	$t1#lo, $t1#lo, $t1#hi
-	//
-	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
-	//     vand	$t2#hi, $t2#hi, $k16
-	//     veor	$t2#lo, $t2#lo, $t2#hi
-	//
-	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
-	//     vmov.i64	$t3#hi, #0
-	//
-	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
-	// upper halves of SIMD registers, so we must split each half into
-	// separate registers. To compensate, we pair computations up and
-	// parallelize.
-
-	ext	v19.8b, v3.8b, v3.8b, #4	// B4
-	eor	v18.16b, v18.16b, v0.16b	// N = I + J
-	pmull	v19.8h, v5.8b, v19.8b		// K = A*B4
-
-	// This can probably be scheduled more efficiently. For now, we just
-	// pair up independent instructions.
-	zip1	v20.2d, v16.2d, v17.2d
-	zip1	v22.2d, v18.2d, v19.2d
-	zip2	v21.2d, v16.2d, v17.2d
-	zip2	v23.2d, v18.2d, v19.2d
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	and	v21.16b, v21.16b, v24.16b
-	and	v23.16b, v23.16b, v25.16b
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	zip1	v16.2d, v20.2d, v21.2d
-	zip1	v18.2d, v22.2d, v23.2d
-	zip2	v17.2d, v20.2d, v21.2d
-	zip2	v19.2d, v22.2d, v23.2d
-
-	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
-	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
-	pmull	v0.8h, v5.8b, v3.8b		// D = A*B
-	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
-	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
-	eor	v16.16b, v16.16b, v17.16b
-	eor	v18.16b, v18.16b, v19.16b
-	eor	v0.16b, v0.16b, v16.16b
-	eor	v0.16b, v0.16b, v18.16b
-	eor	v3.8b, v3.8b, v4.8b	// Karatsuba pre-processing
-	ext	v16.8b, v7.8b, v7.8b, #1	// A1
-	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
-	ext	v1.8b, v3.8b, v3.8b, #1		// B1
-	pmull	v1.8h, v7.8b, v1.8b		// E = A*B1
-	ext	v17.8b, v7.8b, v7.8b, #2	// A2
-	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
-	ext	v19.8b, v3.8b, v3.8b, #2	// B2
-	pmull	v19.8h, v7.8b, v19.8b		// G = A*B2
-	ext	v18.8b, v7.8b, v7.8b, #3	// A3
-	eor	v16.16b, v16.16b, v1.16b	// L = E + F
-	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
-	ext	v1.8b, v3.8b, v3.8b, #3		// B3
-	eor	v17.16b, v17.16b, v19.16b	// M = G + H
-	pmull	v1.8h, v7.8b, v1.8b		// I = A*B3
-
-	// Here we diverge from the 32-bit version. It computes the following
-	// (instructions reordered for clarity):
-	//
-	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
-	//     vand	$t0#hi, $t0#hi, $k48
-	//     veor	$t0#lo, $t0#lo, $t0#hi
-	//
-	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
-	//     vand	$t1#hi, $t1#hi, $k32
-	//     veor	$t1#lo, $t1#lo, $t1#hi
-	//
-	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
-	//     vand	$t2#hi, $t2#hi, $k16
-	//     veor	$t2#lo, $t2#lo, $t2#hi
-	//
-	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
-	//     vmov.i64	$t3#hi, #0
-	//
-	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
-	// upper halves of SIMD registers, so we must split each half into
-	// separate registers. To compensate, we pair computations up and
-	// parallelize.
-
-	ext	v19.8b, v3.8b, v3.8b, #4	// B4
-	eor	v18.16b, v18.16b, v1.16b	// N = I + J
-	pmull	v19.8h, v7.8b, v19.8b		// K = A*B4
-
-	// This can probably be scheduled more efficiently. For now, we just
-	// pair up independent instructions.
-	zip1	v20.2d, v16.2d, v17.2d
-	zip1	v22.2d, v18.2d, v19.2d
-	zip2	v21.2d, v16.2d, v17.2d
-	zip2	v23.2d, v18.2d, v19.2d
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	and	v21.16b, v21.16b, v24.16b
-	and	v23.16b, v23.16b, v25.16b
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	zip1	v16.2d, v20.2d, v21.2d
-	zip1	v18.2d, v22.2d, v23.2d
-	zip2	v17.2d, v20.2d, v21.2d
-	zip2	v19.2d, v22.2d, v23.2d
-
-	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
-	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
-	pmull	v1.8h, v7.8b, v3.8b		// D = A*B
-	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
-	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
-	eor	v16.16b, v16.16b, v17.16b
-	eor	v18.16b, v18.16b, v19.16b
-	eor	v1.16b, v1.16b, v16.16b
-	eor	v1.16b, v1.16b, v18.16b
-	ext	v16.8b, v6.8b, v6.8b, #1	// A1
-	pmull	v16.8h, v16.8b, v4.8b		// F = A1*B
-	ext	v2.8b, v4.8b, v4.8b, #1		// B1
-	pmull	v2.8h, v6.8b, v2.8b		// E = A*B1
-	ext	v17.8b, v6.8b, v6.8b, #2	// A2
-	pmull	v17.8h, v17.8b, v4.8b		// H = A2*B
-	ext	v19.8b, v4.8b, v4.8b, #2	// B2
-	pmull	v19.8h, v6.8b, v19.8b		// G = A*B2
-	ext	v18.8b, v6.8b, v6.8b, #3	// A3
-	eor	v16.16b, v16.16b, v2.16b	// L = E + F
-	pmull	v18.8h, v18.8b, v4.8b		// J = A3*B
-	ext	v2.8b, v4.8b, v4.8b, #3		// B3
-	eor	v17.16b, v17.16b, v19.16b	// M = G + H
-	pmull	v2.8h, v6.8b, v2.8b		// I = A*B3
-
-	// Here we diverge from the 32-bit version. It computes the following
-	// (instructions reordered for clarity):
-	//
-	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
-	//     vand	$t0#hi, $t0#hi, $k48
-	//     veor	$t0#lo, $t0#lo, $t0#hi
-	//
-	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
-	//     vand	$t1#hi, $t1#hi, $k32
-	//     veor	$t1#lo, $t1#lo, $t1#hi
-	//
-	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
-	//     vand	$t2#hi, $t2#hi, $k16
-	//     veor	$t2#lo, $t2#lo, $t2#hi
-	//
-	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
-	//     vmov.i64	$t3#hi, #0
-	//
-	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
-	// upper halves of SIMD registers, so we must split each half into
-	// separate registers. To compensate, we pair computations up and
-	// parallelize.
-
-	ext	v19.8b, v4.8b, v4.8b, #4	// B4
-	eor	v18.16b, v18.16b, v2.16b	// N = I + J
-	pmull	v19.8h, v6.8b, v19.8b		// K = A*B4
-
-	// This can probably be scheduled more efficiently. For now, we just
-	// pair up independent instructions.
-	zip1	v20.2d, v16.2d, v17.2d
-	zip1	v22.2d, v18.2d, v19.2d
-	zip2	v21.2d, v16.2d, v17.2d
-	zip2	v23.2d, v18.2d, v19.2d
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	and	v21.16b, v21.16b, v24.16b
-	and	v23.16b, v23.16b, v25.16b
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	zip1	v16.2d, v20.2d, v21.2d
-	zip1	v18.2d, v22.2d, v23.2d
-	zip2	v17.2d, v20.2d, v21.2d
-	zip2	v19.2d, v22.2d, v23.2d
-
-	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
-	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
-	pmull	v2.8h, v6.8b, v4.8b		// D = A*B
-	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
-	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
-	eor	v16.16b, v16.16b, v17.16b
-	eor	v18.16b, v18.16b, v19.16b
-	eor	v2.16b, v2.16b, v16.16b
-	eor	v2.16b, v2.16b, v18.16b
-	ext	v16.16b, v0.16b, v2.16b, #8
-	eor	v1.16b, v1.16b, v0.16b	// Karatsuba post-processing
-	eor	v1.16b, v1.16b, v2.16b
-	eor	v1.16b, v1.16b, v16.16b	// Xm overlaps Xh.lo and Xl.hi
-	ins	v0.d[1], v1.d[0]		// Xh|Xl - 256-bit result
-	// This is a no-op due to the ins instruction below.
-	// ins	v2.d[0], v1.d[1]
-
-	// equivalent of reduction_avx from ghash-x86_64.pl
-	shl	v17.2d, v0.2d, #57		// 1st phase
-	shl	v18.2d, v0.2d, #62
-	eor	v18.16b, v18.16b, v17.16b	//
-	shl	v17.2d, v0.2d, #63
-	eor	v18.16b, v18.16b, v17.16b	//
-	// Note Xm contains {Xl.d[1], Xh.d[0]}.
-	eor	v18.16b, v18.16b, v1.16b
-	ins	v0.d[1], v18.d[0]		// Xl.d[1] ^= t2.d[0]
-	ins	v2.d[0], v18.d[1]		// Xh.d[0] ^= t2.d[1]
-
-	ushr	v18.2d, v0.2d, #1		// 2nd phase
-	eor	v2.16b, v2.16b,v0.16b
-	eor	v0.16b, v0.16b,v18.16b	//
-	ushr	v18.2d, v18.2d, #6
-	ushr	v0.2d, v0.2d, #1		//
-	eor	v0.16b, v0.16b, v2.16b	//
-	eor	v0.16b, v0.16b, v18.16b	//
-
-	subs	x3, x3, #16
-	bne	Loop_neon
-
-	rev64	v0.16b, v0.16b		// byteswap Xi and write
-	ext	v0.16b, v0.16b, v0.16b, #8
-	st1	{v0.16b}, [x0]
-
-	ret
-
-
-.section	__TEXT,__const
-.align	4
-Lmasks:
-.quad	0x0000ffffffffffff	// k48
-.quad	0x00000000ffffffff	// k32
-.quad	0x000000000000ffff	// k16
-.quad	0x0000000000000000	// k0
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
@ -1,249 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-
-.globl	_gcm_init_v8
-.private_extern	_gcm_init_v8
-
-.align	4
-_gcm_init_v8:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v17.2d},[x1]		//load input H
-	movi	v19.16b,#0xe1
-	shl	v19.2d,v19.2d,#57		//0xc2.0
-	ext	v3.16b,v17.16b,v17.16b,#8
-	ushr	v18.2d,v19.2d,#63
-	dup	v17.4s,v17.s[1]
-	ext	v16.16b,v18.16b,v19.16b,#8		//t0=0xc2....01
-	ushr	v18.2d,v3.2d,#63
-	sshr	v17.4s,v17.4s,#31		//broadcast carry bit
-	and	v18.16b,v18.16b,v16.16b
-	shl	v3.2d,v3.2d,#1
-	ext	v18.16b,v18.16b,v18.16b,#8
-	and	v16.16b,v16.16b,v17.16b
-	orr	v3.16b,v3.16b,v18.16b		//H<<<=1
-	eor	v20.16b,v3.16b,v16.16b		//twisted H
-	st1	{v20.2d},[x0],#16		//store Htable[0]
-
-	//calculate H^2
-	ext	v16.16b,v20.16b,v20.16b,#8		//Karatsuba pre-processing
-	pmull	v0.1q,v20.1d,v20.1d
-	eor	v16.16b,v16.16b,v20.16b
-	pmull2	v2.1q,v20.2d,v20.2d
-	pmull	v1.1q,v16.1d,v16.1d
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase
-
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	eor	v0.16b,v1.16b,v18.16b
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v18.16b,v18.16b,v2.16b
-	eor	v22.16b,v0.16b,v18.16b
-
-	ext	v17.16b,v22.16b,v22.16b,#8		//Karatsuba pre-processing
-	eor	v17.16b,v17.16b,v22.16b
-	ext	v21.16b,v16.16b,v17.16b,#8		//pack Karatsuba pre-processed
-	st1	{v21.2d,v22.2d},[x0]		//store Htable[1..2]
-
-	ret
-
-.globl	_gcm_gmult_v8
-.private_extern	_gcm_gmult_v8
-
-.align	4
-_gcm_gmult_v8:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v17.2d},[x0]		//load Xi
-	movi	v19.16b,#0xe1
-	ld1	{v20.2d,v21.2d},[x1]	//load twisted H, ...
-	shl	v19.2d,v19.2d,#57
-#ifndef __ARMEB__
-	rev64	v17.16b,v17.16b
-#endif
-	ext	v3.16b,v17.16b,v17.16b,#8
-
-	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
-	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
-	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
-	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
-
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	eor	v0.16b,v1.16b,v18.16b
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v18.16b,v18.16b,v2.16b
-	eor	v0.16b,v0.16b,v18.16b
-
-#ifndef __ARMEB__
-	rev64	v0.16b,v0.16b
-#endif
-	ext	v0.16b,v0.16b,v0.16b,#8
-	st1	{v0.2d},[x0]		//write out Xi
-
-	ret
-
-.globl	_gcm_ghash_v8
-.private_extern	_gcm_ghash_v8
-
-.align	4
-_gcm_ghash_v8:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v0.2d},[x0]		//load [rotated] Xi
-						//"[rotated]" means that
-						//loaded value would have
-						//to be rotated in order to
-						//make it appear as in
-						//algorithm specification
-	subs	x3,x3,#32		//see if x3 is 32 or larger
-	mov	x12,#16		//x12 is used as post-
-						//increment for input pointer;
-						//as loop is modulo-scheduled
-						//x12 is zeroed just in time
-						//to preclude overstepping
-						//inp[len], which means that
-						//last block[s] are actually
-						//loaded twice, but last
-						//copy is not processed
-	ld1	{v20.2d,v21.2d},[x1],#32	//load twisted H, ..., H^2
-	movi	v19.16b,#0xe1
-	ld1	{v22.2d},[x1]
-	csel	x12,xzr,x12,eq			//is it time to zero x12?
-	ext	v0.16b,v0.16b,v0.16b,#8		//rotate Xi
-	ld1	{v16.2d},[x2],#16	//load [rotated] I[0]
-	shl	v19.2d,v19.2d,#57		//compose 0xc2.0 constant
-#ifndef __ARMEB__
-	rev64	v16.16b,v16.16b
-	rev64	v0.16b,v0.16b
-#endif
-	ext	v3.16b,v16.16b,v16.16b,#8		//rotate I[0]
-	b.lo	Lodd_tail_v8		//x3 was less than 32
-	ld1	{v17.2d},[x2],x12	//load [rotated] I[1]
-#ifndef __ARMEB__
-	rev64	v17.16b,v17.16b
-#endif
-	ext	v7.16b,v17.16b,v17.16b,#8
-	eor	v3.16b,v3.16b,v0.16b		//I[i]^=Xi
-	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
-	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
-	pmull2	v6.1q,v20.2d,v7.2d
-	b	Loop_mod2x_v8
-
-.align	4
-Loop_mod2x_v8:
-	ext	v18.16b,v3.16b,v3.16b,#8
-	subs	x3,x3,#32		//is there more data?
-	pmull	v0.1q,v22.1d,v3.1d		//H^2.lo·Xi.lo
-	csel	x12,xzr,x12,lo			//is it time to zero x12?
-
-	pmull	v5.1q,v21.1d,v17.1d
-	eor	v18.16b,v18.16b,v3.16b		//Karatsuba pre-processing
-	pmull2	v2.1q,v22.2d,v3.2d		//H^2.hi·Xi.hi
-	eor	v0.16b,v0.16b,v4.16b		//accumulate
-	pmull2	v1.1q,v21.2d,v18.2d		//(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
-	ld1	{v16.2d},[x2],x12	//load [rotated] I[i+2]
-
-	eor	v2.16b,v2.16b,v6.16b
-	csel	x12,xzr,x12,eq			//is it time to zero x12?
-	eor	v1.16b,v1.16b,v5.16b
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	ld1	{v17.2d},[x2],x12	//load [rotated] I[i+3]
-#ifndef __ARMEB__
-	rev64	v16.16b,v16.16b
-#endif
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
-
-#ifndef __ARMEB__
-	rev64	v17.16b,v17.16b
-#endif
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	ext	v7.16b,v17.16b,v17.16b,#8
-	ext	v3.16b,v16.16b,v16.16b,#8
-	eor	v0.16b,v1.16b,v18.16b
-	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
-	eor	v3.16b,v3.16b,v2.16b		//accumulate v3.16b early
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v3.16b,v3.16b,v18.16b
-	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
-	eor	v3.16b,v3.16b,v0.16b
-	pmull2	v6.1q,v20.2d,v7.2d
-	b.hs	Loop_mod2x_v8		//there was at least 32 more bytes
-
-	eor	v2.16b,v2.16b,v18.16b
-	ext	v3.16b,v16.16b,v16.16b,#8		//re-construct v3.16b
-	adds	x3,x3,#32		//re-construct x3
-	eor	v0.16b,v0.16b,v2.16b		//re-construct v0.16b
-	b.eq	Ldone_v8		//is x3 zero?
-Lodd_tail_v8:
-	ext	v18.16b,v0.16b,v0.16b,#8
-	eor	v3.16b,v3.16b,v0.16b		//inp^=Xi
-	eor	v17.16b,v16.16b,v18.16b		//v17.16b is rotated inp^Xi
-
-	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
-	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
-	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
-	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
-
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	eor	v0.16b,v1.16b,v18.16b
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v18.16b,v18.16b,v2.16b
-	eor	v0.16b,v0.16b,v18.16b
-
-Ldone_v8:
-#ifndef __ARMEB__
-	rev64	v0.16b,v0.16b
-#endif
-	ext	v0.16b,v0.16b,v0.16b,#8
-	st1	{v0.2d},[x0]		//write out Xi
-
-	ret
-
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha1-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha1-armv8.S
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha256-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha256-armv8.S
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha512-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/sha512-armv8.S
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
--- a/contrib/boringssl-cmake/ios-aarch64/crypto/test/trampoline-armv8.S
+++ b/contrib/boringssl-cmake/ios-aarch64/crypto/test/trampoline-armv8.S
@ -1,758 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-
-// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
-// with |argv|, then saves the callee-saved registers into |state|. It returns
-// the result of |func|. The |unwind| argument is unused.
-// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
-//                              const uint64_t *argv, size_t argc,
-//                              uint64_t unwind);
-
-.globl	_abi_test_trampoline
-.private_extern	_abi_test_trampoline
-.align	4
-_abi_test_trampoline:
-Labi_test_trampoline_begin:
-	AARCH64_SIGN_LINK_REGISTER
-	// Stack layout (low to high addresses)
-	//   x29,x30 (16 bytes)
-	//    d8-d15 (64 bytes)
-	//   x19-x28 (80 bytes)
-	//    x1 (8 bytes)
-	//   padding (8 bytes)
-	stp	x29, x30, [sp, #-176]!
-	mov	x29, sp
-
-	// Saved callee-saved registers and |state|.
-	stp	d8, d9, [sp, #16]
-	stp	d10, d11, [sp, #32]
-	stp	d12, d13, [sp, #48]
-	stp	d14, d15, [sp, #64]
-	stp	x19, x20, [sp, #80]
-	stp	x21, x22, [sp, #96]
-	stp	x23, x24, [sp, #112]
-	stp	x25, x26, [sp, #128]
-	stp	x27, x28, [sp, #144]
-	str	x1, [sp, #160]
-
-	// Load registers from |state|, with the exception of x29. x29 is the
-	// frame pointer and also callee-saved, but AAPCS64 allows platforms to
-	// mandate that x29 always point to a frame. iOS64 does so, which means
-	// we cannot fill x29 with entropy without violating ABI rules
-	// ourselves. x29 is tested separately below.
-	ldp	d8, d9, [x1], #16
-	ldp	d10, d11, [x1], #16
-	ldp	d12, d13, [x1], #16
-	ldp	d14, d15, [x1], #16
-	ldp	x19, x20, [x1], #16
-	ldp	x21, x22, [x1], #16
-	ldp	x23, x24, [x1], #16
-	ldp	x25, x26, [x1], #16
-	ldp	x27, x28, [x1], #16
-
-	// Move parameters into temporary registers.
-	mov	x9, x0
-	mov	x10, x2
-	mov	x11, x3
-
-	// Load parameters into registers.
-	cbz	x11, Largs_done
-	ldr	x0, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x1, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x2, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x3, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x4, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x5, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x6, [x10], #8
-	subs	x11, x11, #1
-	b.eq	Largs_done
-	ldr	x7, [x10], #8
-
-Largs_done:
-	blr	x9
-
-	// Reload |state| and store registers.
-	ldr	x1, [sp, #160]
-	stp	d8, d9, [x1], #16
-	stp	d10, d11, [x1], #16
-	stp	d12, d13, [x1], #16
-	stp	d14, d15, [x1], #16
-	stp	x19, x20, [x1], #16
-	stp	x21, x22, [x1], #16
-	stp	x23, x24, [x1], #16
-	stp	x25, x26, [x1], #16
-	stp	x27, x28, [x1], #16
-
-	// |func| is required to preserve x29, the frame pointer. We cannot load
-	// random values into x29 (see comment above), so compare it against the
-	// expected value and zero the field of |state| if corrupted.
-	mov	x9, sp
-	cmp	x29, x9
-	b.eq	Lx29_ok
-	str	xzr, [x1]
-
-Lx29_ok:
-	// Restore callee-saved registers.
-	ldp	d8, d9, [sp, #16]
-	ldp	d10, d11, [sp, #32]
-	ldp	d12, d13, [sp, #48]
-	ldp	d14, d15, [sp, #64]
-	ldp	x19, x20, [sp, #80]
-	ldp	x21, x22, [sp, #96]
-	ldp	x23, x24, [sp, #112]
-	ldp	x25, x26, [sp, #128]
-	ldp	x27, x28, [sp, #144]
-
-	ldp	x29, x30, [sp], #176
-	AARCH64_VALIDATE_LINK_REGISTER
-	ret
-
-
-.globl	_abi_test_clobber_x0
-.private_extern	_abi_test_clobber_x0
-.align	4
-_abi_test_clobber_x0:
-	AARCH64_VALID_CALL_TARGET
-	mov	x0, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x1
-.private_extern	_abi_test_clobber_x1
-.align	4
-_abi_test_clobber_x1:
-	AARCH64_VALID_CALL_TARGET
-	mov	x1, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x2
-.private_extern	_abi_test_clobber_x2
-.align	4
-_abi_test_clobber_x2:
-	AARCH64_VALID_CALL_TARGET
-	mov	x2, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x3
-.private_extern	_abi_test_clobber_x3
-.align	4
-_abi_test_clobber_x3:
-	AARCH64_VALID_CALL_TARGET
-	mov	x3, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x4
-.private_extern	_abi_test_clobber_x4
-.align	4
-_abi_test_clobber_x4:
-	AARCH64_VALID_CALL_TARGET
-	mov	x4, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x5
-.private_extern	_abi_test_clobber_x5
-.align	4
-_abi_test_clobber_x5:
-	AARCH64_VALID_CALL_TARGET
-	mov	x5, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x6
-.private_extern	_abi_test_clobber_x6
-.align	4
-_abi_test_clobber_x6:
-	AARCH64_VALID_CALL_TARGET
-	mov	x6, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x7
-.private_extern	_abi_test_clobber_x7
-.align	4
-_abi_test_clobber_x7:
-	AARCH64_VALID_CALL_TARGET
-	mov	x7, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x8
-.private_extern	_abi_test_clobber_x8
-.align	4
-_abi_test_clobber_x8:
-	AARCH64_VALID_CALL_TARGET
-	mov	x8, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x9
-.private_extern	_abi_test_clobber_x9
-.align	4
-_abi_test_clobber_x9:
-	AARCH64_VALID_CALL_TARGET
-	mov	x9, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x10
-.private_extern	_abi_test_clobber_x10
-.align	4
-_abi_test_clobber_x10:
-	AARCH64_VALID_CALL_TARGET
-	mov	x10, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x11
-.private_extern	_abi_test_clobber_x11
-.align	4
-_abi_test_clobber_x11:
-	AARCH64_VALID_CALL_TARGET
-	mov	x11, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x12
-.private_extern	_abi_test_clobber_x12
-.align	4
-_abi_test_clobber_x12:
-	AARCH64_VALID_CALL_TARGET
-	mov	x12, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x13
-.private_extern	_abi_test_clobber_x13
-.align	4
-_abi_test_clobber_x13:
-	AARCH64_VALID_CALL_TARGET
-	mov	x13, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x14
-.private_extern	_abi_test_clobber_x14
-.align	4
-_abi_test_clobber_x14:
-	AARCH64_VALID_CALL_TARGET
-	mov	x14, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x15
-.private_extern	_abi_test_clobber_x15
-.align	4
-_abi_test_clobber_x15:
-	AARCH64_VALID_CALL_TARGET
-	mov	x15, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x16
-.private_extern	_abi_test_clobber_x16
-.align	4
-_abi_test_clobber_x16:
-	AARCH64_VALID_CALL_TARGET
-	mov	x16, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x17
-.private_extern	_abi_test_clobber_x17
-.align	4
-_abi_test_clobber_x17:
-	AARCH64_VALID_CALL_TARGET
-	mov	x17, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x19
-.private_extern	_abi_test_clobber_x19
-.align	4
-_abi_test_clobber_x19:
-	AARCH64_VALID_CALL_TARGET
-	mov	x19, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x20
-.private_extern	_abi_test_clobber_x20
-.align	4
-_abi_test_clobber_x20:
-	AARCH64_VALID_CALL_TARGET
-	mov	x20, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x21
-.private_extern	_abi_test_clobber_x21
-.align	4
-_abi_test_clobber_x21:
-	AARCH64_VALID_CALL_TARGET
-	mov	x21, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x22
-.private_extern	_abi_test_clobber_x22
-.align	4
-_abi_test_clobber_x22:
-	AARCH64_VALID_CALL_TARGET
-	mov	x22, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x23
-.private_extern	_abi_test_clobber_x23
-.align	4
-_abi_test_clobber_x23:
-	AARCH64_VALID_CALL_TARGET
-	mov	x23, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x24
-.private_extern	_abi_test_clobber_x24
-.align	4
-_abi_test_clobber_x24:
-	AARCH64_VALID_CALL_TARGET
-	mov	x24, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x25
-.private_extern	_abi_test_clobber_x25
-.align	4
-_abi_test_clobber_x25:
-	AARCH64_VALID_CALL_TARGET
-	mov	x25, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x26
-.private_extern	_abi_test_clobber_x26
-.align	4
-_abi_test_clobber_x26:
-	AARCH64_VALID_CALL_TARGET
-	mov	x26, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x27
-.private_extern	_abi_test_clobber_x27
-.align	4
-_abi_test_clobber_x27:
-	AARCH64_VALID_CALL_TARGET
-	mov	x27, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x28
-.private_extern	_abi_test_clobber_x28
-.align	4
-_abi_test_clobber_x28:
-	AARCH64_VALID_CALL_TARGET
-	mov	x28, xzr
-	ret
-
-
-.globl	_abi_test_clobber_x29
-.private_extern	_abi_test_clobber_x29
-.align	4
-_abi_test_clobber_x29:
-	AARCH64_VALID_CALL_TARGET
-	mov	x29, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d0
-.private_extern	_abi_test_clobber_d0
-.align	4
-_abi_test_clobber_d0:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d0, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d1
-.private_extern	_abi_test_clobber_d1
-.align	4
-_abi_test_clobber_d1:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d1, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d2
-.private_extern	_abi_test_clobber_d2
-.align	4
-_abi_test_clobber_d2:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d2, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d3
-.private_extern	_abi_test_clobber_d3
-.align	4
-_abi_test_clobber_d3:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d3, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d4
-.private_extern	_abi_test_clobber_d4
-.align	4
-_abi_test_clobber_d4:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d4, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d5
-.private_extern	_abi_test_clobber_d5
-.align	4
-_abi_test_clobber_d5:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d5, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d6
-.private_extern	_abi_test_clobber_d6
-.align	4
-_abi_test_clobber_d6:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d6, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d7
-.private_extern	_abi_test_clobber_d7
-.align	4
-_abi_test_clobber_d7:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d7, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d8
-.private_extern	_abi_test_clobber_d8
-.align	4
-_abi_test_clobber_d8:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d8, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d9
-.private_extern	_abi_test_clobber_d9
-.align	4
-_abi_test_clobber_d9:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d9, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d10
-.private_extern	_abi_test_clobber_d10
-.align	4
-_abi_test_clobber_d10:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d10, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d11
-.private_extern	_abi_test_clobber_d11
-.align	4
-_abi_test_clobber_d11:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d11, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d12
-.private_extern	_abi_test_clobber_d12
-.align	4
-_abi_test_clobber_d12:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d12, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d13
-.private_extern	_abi_test_clobber_d13
-.align	4
-_abi_test_clobber_d13:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d13, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d14
-.private_extern	_abi_test_clobber_d14
-.align	4
-_abi_test_clobber_d14:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d14, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d15
-.private_extern	_abi_test_clobber_d15
-.align	4
-_abi_test_clobber_d15:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d15, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d16
-.private_extern	_abi_test_clobber_d16
-.align	4
-_abi_test_clobber_d16:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d16, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d17
-.private_extern	_abi_test_clobber_d17
-.align	4
-_abi_test_clobber_d17:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d17, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d18
-.private_extern	_abi_test_clobber_d18
-.align	4
-_abi_test_clobber_d18:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d18, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d19
-.private_extern	_abi_test_clobber_d19
-.align	4
-_abi_test_clobber_d19:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d19, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d20
-.private_extern	_abi_test_clobber_d20
-.align	4
-_abi_test_clobber_d20:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d20, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d21
-.private_extern	_abi_test_clobber_d21
-.align	4
-_abi_test_clobber_d21:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d21, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d22
-.private_extern	_abi_test_clobber_d22
-.align	4
-_abi_test_clobber_d22:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d22, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d23
-.private_extern	_abi_test_clobber_d23
-.align	4
-_abi_test_clobber_d23:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d23, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d24
-.private_extern	_abi_test_clobber_d24
-.align	4
-_abi_test_clobber_d24:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d24, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d25
-.private_extern	_abi_test_clobber_d25
-.align	4
-_abi_test_clobber_d25:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d25, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d26
-.private_extern	_abi_test_clobber_d26
-.align	4
-_abi_test_clobber_d26:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d26, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d27
-.private_extern	_abi_test_clobber_d27
-.align	4
-_abi_test_clobber_d27:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d27, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d28
-.private_extern	_abi_test_clobber_d28
-.align	4
-_abi_test_clobber_d28:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d28, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d29
-.private_extern	_abi_test_clobber_d29
-.align	4
-_abi_test_clobber_d29:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d29, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d30
-.private_extern	_abi_test_clobber_d30
-.align	4
-_abi_test_clobber_d30:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d30, xzr
-	ret
-
-
-.globl	_abi_test_clobber_d31
-.private_extern	_abi_test_clobber_d31
-.align	4
-_abi_test_clobber_d31:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d31, xzr
-	ret
-
-
-.globl	_abi_test_clobber_v8_upper
-.private_extern	_abi_test_clobber_v8_upper
-.align	4
-_abi_test_clobber_v8_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v8.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v9_upper
-.private_extern	_abi_test_clobber_v9_upper
-.align	4
-_abi_test_clobber_v9_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v9.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v10_upper
-.private_extern	_abi_test_clobber_v10_upper
-.align	4
-_abi_test_clobber_v10_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v10.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v11_upper
-.private_extern	_abi_test_clobber_v11_upper
-.align	4
-_abi_test_clobber_v11_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v11.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v12_upper
-.private_extern	_abi_test_clobber_v12_upper
-.align	4
-_abi_test_clobber_v12_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v12.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v13_upper
-.private_extern	_abi_test_clobber_v13_upper
-.align	4
-_abi_test_clobber_v13_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v13.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v14_upper
-.private_extern	_abi_test_clobber_v14_upper
-.align	4
-_abi_test_clobber_v14_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v14.d[1], xzr
-	ret
-
-
-.globl	_abi_test_clobber_v15_upper
-.private_extern	_abi_test_clobber_v15_upper
-.align	4
-_abi_test_clobber_v15_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v15.d[1], xzr
-	ret
-
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-arm/crypto/chacha/chacha-armv4.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/chacha/chacha-armv4.S
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/aesv8-armx32.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/aesv8-armx32.S
@ -1,790 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-#if __ARM_MAX_ARCH__>=7
-.text
-
-
-.code	32
-#undef	__thumb2__
-.align	5
-Lrcon:
-.long	0x01,0x01,0x01,0x01
-.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
-.long	0x1b,0x1b,0x1b,0x1b
-
-.text
-
-.globl	_aes_hw_set_encrypt_key
-.private_extern	_aes_hw_set_encrypt_key
-#ifdef __thumb2__
-.thumb_func	_aes_hw_set_encrypt_key
-#endif
-.align	5
-_aes_hw_set_encrypt_key:
-Lenc_key:
-	mov	r3,#-1
-	cmp	r0,#0
-	beq	Lenc_key_abort
-	cmp	r2,#0
-	beq	Lenc_key_abort
-	mov	r3,#-2
-	cmp	r1,#128
-	blt	Lenc_key_abort
-	cmp	r1,#256
-	bgt	Lenc_key_abort
-	tst	r1,#0x3f
-	bne	Lenc_key_abort
-
-	adr	r3,Lrcon
-	cmp	r1,#192
-
-	veor	q0,q0,q0
-	vld1.8	{q3},[r0]!
-	mov	r1,#8		@ reuse r1
-	vld1.32	{q1,q2},[r3]!
-
-	blt	Loop128
-	beq	L192
-	b	L256
-
-.align	4
-Loop128:
-	vtbl.8	d20,{q3},d4
-	vtbl.8	d21,{q3},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q3},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-	subs	r1,r1,#1
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	vshl.u8	q1,q1,#1
-	veor	q3,q3,q10
-	bne	Loop128
-
-	vld1.32	{q1},[r3]
-
-	vtbl.8	d20,{q3},d4
-	vtbl.8	d21,{q3},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q3},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	vshl.u8	q1,q1,#1
-	veor	q3,q3,q10
-
-	vtbl.8	d20,{q3},d4
-	vtbl.8	d21,{q3},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q3},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	veor	q3,q3,q10
-	vst1.32	{q3},[r2]
-	add	r2,r2,#0x50
-
-	mov	r12,#10
-	b	Ldone
-
-.align	4
-L192:
-	vld1.8	{d16},[r0]!
-	vmov.i8	q10,#8			@ borrow q10
-	vst1.32	{q3},[r2]!
-	vsub.i8	q2,q2,q10	@ adjust the mask
-
-Loop192:
-	vtbl.8	d20,{q8},d4
-	vtbl.8	d21,{q8},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{d16},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-	subs	r1,r1,#1
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-
-	vdup.32	q9,d7[1]
-	veor	q9,q9,q8
-	veor	q10,q10,q1
-	vext.8	q8,q0,q8,#12
-	vshl.u8	q1,q1,#1
-	veor	q8,q8,q9
-	veor	q3,q3,q10
-	veor	q8,q8,q10
-	vst1.32	{q3},[r2]!
-	bne	Loop192
-
-	mov	r12,#12
-	add	r2,r2,#0x20
-	b	Ldone
-
-.align	4
-L256:
-	vld1.8	{q8},[r0]
-	mov	r1,#7
-	mov	r12,#14
-	vst1.32	{q3},[r2]!
-
-Loop256:
-	vtbl.8	d20,{q8},d4
-	vtbl.8	d21,{q8},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q8},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-	subs	r1,r1,#1
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	vshl.u8	q1,q1,#1
-	veor	q3,q3,q10
-	vst1.32	{q3},[r2]!
-	beq	Ldone
-
-	vdup.32	q10,d7[1]
-	vext.8	q9,q0,q8,#12
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-
-	veor	q8,q8,q9
-	vext.8	q9,q0,q9,#12
-	veor	q8,q8,q9
-	vext.8	q9,q0,q9,#12
-	veor	q8,q8,q9
-
-	veor	q8,q8,q10
-	b	Loop256
-
-Ldone:
-	str	r12,[r2]
-	mov	r3,#0
-
-Lenc_key_abort:
-	mov	r0,r3			@ return value
-
-	bx	lr
-
-
-.globl	_aes_hw_set_decrypt_key
-.private_extern	_aes_hw_set_decrypt_key
-#ifdef __thumb2__
-.thumb_func	_aes_hw_set_decrypt_key
-#endif
-.align	5
-_aes_hw_set_decrypt_key:
-	stmdb	sp!,{r4,lr}
-	bl	Lenc_key
-
-	cmp	r0,#0
-	bne	Ldec_key_abort
-
-	sub	r2,r2,#240		@ restore original r2
-	mov	r4,#-16
-	add	r0,r2,r12,lsl#4	@ end of key schedule
-
-	vld1.32	{q0},[r2]
-	vld1.32	{q1},[r0]
-	vst1.32	{q0},[r0],r4
-	vst1.32	{q1},[r2]!
-
-Loop_imc:
-	vld1.32	{q0},[r2]
-	vld1.32	{q1},[r0]
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-	vst1.32	{q0},[r0],r4
-	vst1.32	{q1},[r2]!
-	cmp	r0,r2
-	bhi	Loop_imc
-
-	vld1.32	{q0},[r2]
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-	vst1.32	{q0},[r0]
-
-	eor	r0,r0,r0		@ return value
-Ldec_key_abort:
-	ldmia	sp!,{r4,pc}
-
-.globl	_aes_hw_encrypt
-.private_extern	_aes_hw_encrypt
-#ifdef __thumb2__
-.thumb_func	_aes_hw_encrypt
-#endif
-.align	5
-_aes_hw_encrypt:
-	ldr	r3,[r2,#240]
-	vld1.32	{q0},[r2]!
-	vld1.8	{q2},[r0]
-	sub	r3,r3,#2
-	vld1.32	{q1},[r2]!
-
-Loop_enc:
-.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
-.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
-	vld1.32	{q0},[r2]!
-	subs	r3,r3,#2
-.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
-.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
-	vld1.32	{q1},[r2]!
-	bgt	Loop_enc
-
-.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
-.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
-	vld1.32	{q0},[r2]
-.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
-	veor	q2,q2,q0
-
-	vst1.8	{q2},[r1]
-	bx	lr
-
-.globl	_aes_hw_decrypt
-.private_extern	_aes_hw_decrypt
-#ifdef __thumb2__
-.thumb_func	_aes_hw_decrypt
-#endif
-.align	5
-_aes_hw_decrypt:
-	ldr	r3,[r2,#240]
-	vld1.32	{q0},[r2]!
-	vld1.8	{q2},[r0]
-	sub	r3,r3,#2
-	vld1.32	{q1},[r2]!
-
-Loop_dec:
-.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
-.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
-	vld1.32	{q0},[r2]!
-	subs	r3,r3,#2
-.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
-.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
-	vld1.32	{q1},[r2]!
-	bgt	Loop_dec
-
-.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
-.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
-	vld1.32	{q0},[r2]
-.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
-	veor	q2,q2,q0
-
-	vst1.8	{q2},[r1]
-	bx	lr
-
-.globl	_aes_hw_cbc_encrypt
-.private_extern	_aes_hw_cbc_encrypt
-#ifdef __thumb2__
-.thumb_func	_aes_hw_cbc_encrypt
-#endif
-.align	5
-_aes_hw_cbc_encrypt:
-	mov	ip,sp
-	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
-	ldmia	ip,{r4,r5}		@ load remaining args
-	subs	r2,r2,#16
-	mov	r8,#16
-	blo	Lcbc_abort
-	moveq	r8,#0
-
-	cmp	r5,#0			@ en- or decrypting?
-	ldr	r5,[r3,#240]
-	and	r2,r2,#-16
-	vld1.8	{q6},[r4]
-	vld1.8	{q0},[r0],r8
-
-	vld1.32	{q8,q9},[r3]		@ load key schedule...
-	sub	r5,r5,#6
-	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
-	sub	r5,r5,#2
-	vld1.32	{q10,q11},[r7]!
-	vld1.32	{q12,q13},[r7]!
-	vld1.32	{q14,q15},[r7]!
-	vld1.32	{q7},[r7]
-
-	add	r7,r3,#32
-	mov	r6,r5
-	beq	Lcbc_dec
-
-	cmp	r5,#2
-	veor	q0,q0,q6
-	veor	q5,q8,q7
-	beq	Lcbc_enc128
-
-	vld1.32	{q2,q3},[r7]
-	add	r7,r3,#16
-	add	r6,r3,#16*4
-	add	r12,r3,#16*5
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	add	r14,r3,#16*6
-	add	r3,r3,#16*7
-	b	Lenter_cbc_enc
-
-.align	4
-Loop_cbc_enc:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vst1.8	{q6},[r1]!
-Lenter_cbc_enc:
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q8},[r6]
-	cmp	r5,#4
-.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q9},[r12]
-	beq	Lcbc_enc192
-
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q8},[r14]
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q9},[r3]
-	nop
-
-Lcbc_enc192:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	subs	r2,r2,#16
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	moveq	r8,#0
-.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.8	{q8},[r0],r8
-.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	veor	q8,q8,q5
-.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
-.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
-	veor	q6,q0,q7
-	bhs	Loop_cbc_enc
-
-	vst1.8	{q6},[r1]!
-	b	Lcbc_done
-
-.align	5
-Lcbc_enc128:
-	vld1.32	{q2,q3},[r7]
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	b	Lenter_cbc_enc128
-Loop_cbc_enc128:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vst1.8	{q6},[r1]!
-Lenter_cbc_enc128:
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	subs	r2,r2,#16
-.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	moveq	r8,#0
-.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.8	{q8},[r0],r8
-.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	veor	q8,q8,q5
-.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
-	veor	q6,q0,q7
-	bhs	Loop_cbc_enc128
-
-	vst1.8	{q6},[r1]!
-	b	Lcbc_done
-.align	5
-Lcbc_dec:
-	vld1.8	{q10},[r0]!
-	subs	r2,r2,#32		@ bias
-	add	r6,r5,#2
-	vorr	q3,q0,q0
-	vorr	q1,q0,q0
-	vorr	q11,q10,q10
-	blo	Lcbc_dec_tail
-
-	vorr	q1,q10,q10
-	vld1.8	{q10},[r0]!
-	vorr	q2,q0,q0
-	vorr	q3,q1,q1
-	vorr	q11,q10,q10
-
-Loop3x_cbc_dec:
-.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q9},[r7]!
-	bgt	Loop3x_cbc_dec
-
-.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q4,q6,q7
-	subs	r2,r2,#0x30
-	veor	q5,q2,q7
-	movlo	r6,r2			@ r6, r6, is zero at this point
-.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q9,q3,q7
-	add	r0,r0,r6		@ r0 is adjusted in such way that
-					@ at exit from the loop q1-q10
-					@ are loaded with last "words"
-	vorr	q6,q11,q11
-	mov	r7,r3
-.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.8	{q2},[r0]!
-.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.8	{q3},[r0]!
-.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.8	{q11},[r0]!
-.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
-.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
-.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
-	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
-	add	r6,r5,#2
-	veor	q4,q4,q0
-	veor	q5,q5,q1
-	veor	q10,q10,q9
-	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
-	vst1.8	{q4},[r1]!
-	vorr	q0,q2,q2
-	vst1.8	{q5},[r1]!
-	vorr	q1,q3,q3
-	vst1.8	{q10},[r1]!
-	vorr	q10,q11,q11
-	bhs	Loop3x_cbc_dec
-
-	cmn	r2,#0x30
-	beq	Lcbc_done
-	nop
-
-Lcbc_dec_tail:
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q9},[r7]!
-	bgt	Lcbc_dec_tail
-
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	cmn	r2,#0x20
-.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q5,q6,q7
-.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q9,q3,q7
-.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
-.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
-	beq	Lcbc_dec_one
-	veor	q5,q5,q1
-	veor	q9,q9,q10
-	vorr	q6,q11,q11
-	vst1.8	{q5},[r1]!
-	vst1.8	{q9},[r1]!
-	b	Lcbc_done
-
-Lcbc_dec_one:
-	veor	q5,q5,q10
-	vorr	q6,q11,q11
-	vst1.8	{q5},[r1]!
-
-Lcbc_done:
-	vst1.8	{q6},[r4]
-Lcbc_abort:
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
-	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
-
-.globl	_aes_hw_ctr32_encrypt_blocks
-.private_extern	_aes_hw_ctr32_encrypt_blocks
-#ifdef __thumb2__
-.thumb_func	_aes_hw_ctr32_encrypt_blocks
-#endif
-.align	5
-_aes_hw_ctr32_encrypt_blocks:
-	mov	ip,sp
-	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
-	ldr	r4, [ip]		@ load remaining arg
-	ldr	r5,[r3,#240]
-
-	ldr	r8, [r4, #12]
-	vld1.32	{q0},[r4]
-
-	vld1.32	{q8,q9},[r3]		@ load key schedule...
-	sub	r5,r5,#4
-	mov	r12,#16
-	cmp	r2,#2
-	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
-	sub	r5,r5,#2
-	vld1.32	{q12,q13},[r7]!
-	vld1.32	{q14,q15},[r7]!
-	vld1.32	{q7},[r7]
-	add	r7,r3,#32
-	mov	r6,r5
-	movlo	r12,#0
-#ifndef __ARMEB__
-	rev	r8, r8
-#endif
-	vorr	q1,q0,q0
-	add	r10, r8, #1
-	vorr	q10,q0,q0
-	add	r8, r8, #2
-	vorr	q6,q0,q0
-	rev	r10, r10
-	vmov.32	d3[1],r10
-	bls	Lctr32_tail
-	rev	r12, r8
-	sub	r2,r2,#3		@ bias
-	vmov.32	d21[1],r12
-	b	Loop3x_ctr32
-
-.align	4
-Loop3x_ctr32:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
-.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
-.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
-	vld1.32	{q9},[r7]!
-	bgt	Loop3x_ctr32
-
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
-	vld1.8	{q2},[r0]!
-	vorr	q0,q6,q6
-.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
-.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
-	vld1.8	{q3},[r0]!
-	vorr	q1,q6,q6
-.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	vld1.8	{q11},[r0]!
-	mov	r7,r3
-.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
-.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
-	vorr	q10,q6,q6
-	add	r9,r8,#1
-.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	veor	q2,q2,q7
-	add	r10,r8,#2
-.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
-.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	veor	q3,q3,q7
-	add	r8,r8,#3
-.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	veor	q11,q11,q7
-	rev	r9,r9
-.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
-.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d1[1], r9
-	rev	r10,r10
-.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	vmov.32	d3[1], r10
-	rev	r12,r8
-.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
-.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d21[1], r12
-	subs	r2,r2,#3
-.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
-.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
-.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
-
-	veor	q2,q2,q4
-	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
-	vst1.8	{q2},[r1]!
-	veor	q3,q3,q5
-	mov	r6,r5
-	vst1.8	{q3},[r1]!
-	veor	q11,q11,q9
-	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
-	vst1.8	{q11},[r1]!
-	bhs	Loop3x_ctr32
-
-	adds	r2,r2,#3
-	beq	Lctr32_done
-	cmp	r2,#1
-	mov	r12,#16
-	moveq	r12,#0
-
-Lctr32_tail:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.32	{q9},[r7]!
-	bgt	Lctr32_tail
-
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.8	{q2},[r0],r12
-.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.8	{q3},[r0]
-.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	veor	q2,q2,q7
-.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	veor	q3,q3,q7
-.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
-.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
-
-	cmp	r2,#1
-	veor	q2,q2,q0
-	veor	q3,q3,q1
-	vst1.8	{q2},[r1]!
-	beq	Lctr32_done
-	vst1.8	{q3},[r1]
-
-Lctr32_done:
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
-	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
-
-#endif
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/armv4-mont.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/armv4-mont.S
@ -1,982 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
-@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
-
-
-.text
-#if defined(__thumb2__)
-.syntax	unified
-.thumb
-#else
-.code	32
-#endif
-
-#if __ARM_MAX_ARCH__>=7
-.align	5
-LOPENSSL_armcap:
-.word	OPENSSL_armcap_P-Lbn_mul_mont
-#endif
-
-.globl	_bn_mul_mont
-.private_extern	_bn_mul_mont
-#ifdef __thumb2__
-.thumb_func	_bn_mul_mont
-#endif
-
-.align	5
-_bn_mul_mont:
-Lbn_mul_mont:
-	ldr	ip,[sp,#4]		@ load num
-	stmdb	sp!,{r0,r2}		@ sp points at argument block
-#if __ARM_MAX_ARCH__>=7
-	tst	ip,#7
-	bne	Lialu
-	adr	r0,Lbn_mul_mont
-	ldr	r2,LOPENSSL_armcap
-	ldr	r0,[r0,r2]
-#ifdef	__APPLE__
-	ldr	r0,[r0]
-#endif
-	tst	r0,#ARMV7_NEON		@ NEON available?
-	ldmia	sp, {r0,r2}
-	beq	Lialu
-	add	sp,sp,#8
-	b	bn_mul8x_mont_neon
-.align	4
-Lialu:
-#endif
-	cmp	ip,#2
-	mov	r0,ip			@ load num
-#ifdef	__thumb2__
-	ittt	lt
-#endif
-	movlt	r0,#0
-	addlt	sp,sp,#2*4
-	blt	Labrt
-
-	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ save 10 registers
-
-	mov	r0,r0,lsl#2		@ rescale r0 for byte count
-	sub	sp,sp,r0		@ alloca(4*num)
-	sub	sp,sp,#4		@ +extra dword
-	sub	r0,r0,#4		@ "num=num-1"
-	add	r4,r2,r0		@ &bp[num-1]
-
-	add	r0,sp,r0		@ r0 to point at &tp[num-1]
-	ldr	r8,[r0,#14*4]		@ &n0
-	ldr	r2,[r2]		@ bp[0]
-	ldr	r5,[r1],#4		@ ap[0],ap++
-	ldr	r6,[r3],#4		@ np[0],np++
-	ldr	r8,[r8]		@ *n0
-	str	r4,[r0,#15*4]		@ save &bp[num]
-
-	umull	r10,r11,r5,r2	@ ap[0]*bp[0]
-	str	r8,[r0,#14*4]		@ save n0 value
-	mul	r8,r10,r8		@ "tp[0]"*n0
-	mov	r12,#0
-	umlal	r10,r12,r6,r8	@ np[0]*n0+"t[0]"
-	mov	r4,sp
-
-L1st:
-	ldr	r5,[r1],#4		@ ap[j],ap++
-	mov	r10,r11
-	ldr	r6,[r3],#4		@ np[j],np++
-	mov	r11,#0
-	umlal	r10,r11,r5,r2	@ ap[j]*bp[0]
-	mov	r14,#0
-	umlal	r12,r14,r6,r8	@ np[j]*n0
-	adds	r12,r12,r10
-	str	r12,[r4],#4		@ tp[j-1]=,tp++
-	adc	r12,r14,#0
-	cmp	r4,r0
-	bne	L1st
-
-	adds	r12,r12,r11
-	ldr	r4,[r0,#13*4]		@ restore bp
-	mov	r14,#0
-	ldr	r8,[r0,#14*4]		@ restore n0
-	adc	r14,r14,#0
-	str	r12,[r0]		@ tp[num-1]=
-	mov	r7,sp
-	str	r14,[r0,#4]		@ tp[num]=
-
-Louter:
-	sub	r7,r0,r7		@ "original" r0-1 value
-	sub	r1,r1,r7		@ "rewind" ap to &ap[1]
-	ldr	r2,[r4,#4]!		@ *(++bp)
-	sub	r3,r3,r7		@ "rewind" np to &np[1]
-	ldr	r5,[r1,#-4]		@ ap[0]
-	ldr	r10,[sp]		@ tp[0]
-	ldr	r6,[r3,#-4]		@ np[0]
-	ldr	r7,[sp,#4]		@ tp[1]
-
-	mov	r11,#0
-	umlal	r10,r11,r5,r2	@ ap[0]*bp[i]+tp[0]
-	str	r4,[r0,#13*4]		@ save bp
-	mul	r8,r10,r8
-	mov	r12,#0
-	umlal	r10,r12,r6,r8	@ np[0]*n0+"tp[0]"
-	mov	r4,sp
-
-Linner:
-	ldr	r5,[r1],#4		@ ap[j],ap++
-	adds	r10,r11,r7		@ +=tp[j]
-	ldr	r6,[r3],#4		@ np[j],np++
-	mov	r11,#0
-	umlal	r10,r11,r5,r2	@ ap[j]*bp[i]
-	mov	r14,#0
-	umlal	r12,r14,r6,r8	@ np[j]*n0
-	adc	r11,r11,#0
-	ldr	r7,[r4,#8]		@ tp[j+1]
-	adds	r12,r12,r10
-	str	r12,[r4],#4		@ tp[j-1]=,tp++
-	adc	r12,r14,#0
-	cmp	r4,r0
-	bne	Linner
-
-	adds	r12,r12,r11
-	mov	r14,#0
-	ldr	r4,[r0,#13*4]		@ restore bp
-	adc	r14,r14,#0
-	ldr	r8,[r0,#14*4]		@ restore n0
-	adds	r12,r12,r7
-	ldr	r7,[r0,#15*4]		@ restore &bp[num]
-	adc	r14,r14,#0
-	str	r12,[r0]		@ tp[num-1]=
-	str	r14,[r0,#4]		@ tp[num]=
-
-	cmp	r4,r7
-#ifdef	__thumb2__
-	itt	ne
-#endif
-	movne	r7,sp
-	bne	Louter
-
-	ldr	r2,[r0,#12*4]		@ pull rp
-	mov	r5,sp
-	add	r0,r0,#4		@ r0 to point at &tp[num]
-	sub	r5,r0,r5		@ "original" num value
-	mov	r4,sp			@ "rewind" r4
-	mov	r1,r4			@ "borrow" r1
-	sub	r3,r3,r5		@ "rewind" r3 to &np[0]
-
-	subs	r7,r7,r7		@ "clear" carry flag
-Lsub:	ldr	r7,[r4],#4
-	ldr	r6,[r3],#4
-	sbcs	r7,r7,r6		@ tp[j]-np[j]
-	str	r7,[r2],#4		@ rp[j]=
-	teq	r4,r0		@ preserve carry
-	bne	Lsub
-	sbcs	r14,r14,#0		@ upmost carry
-	mov	r4,sp			@ "rewind" r4
-	sub	r2,r2,r5		@ "rewind" r2
-
-Lcopy:	ldr	r7,[r4]		@ conditional copy
-	ldr	r5,[r2]
-	str	sp,[r4],#4		@ zap tp
-#ifdef	__thumb2__
-	it	cc
-#endif
-	movcc	r5,r7
-	str	r5,[r2],#4
-	teq	r4,r0		@ preserve carry
-	bne	Lcopy
-
-	mov	sp,r0
-	add	sp,sp,#4		@ skip over tp[num+1]
-	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ restore registers
-	add	sp,sp,#2*4		@ skip over {r0,r2}
-	mov	r0,#1
-Labrt:
-#if __ARM_ARCH__>=5
-	bx	lr				@ bx lr
-#else
-	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-#endif
-
-#if __ARM_MAX_ARCH__>=7
-
-
-
-#ifdef __thumb2__
-.thumb_func	bn_mul8x_mont_neon
-#endif
-.align	5
-bn_mul8x_mont_neon:
-	mov	ip,sp
-	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ ABI specification says so
-	ldmia	ip,{r4,r5}		@ load rest of parameter block
-	mov	ip,sp
-
-	cmp	r5,#8
-	bhi	LNEON_8n
-
-	@ special case for r5==8, everything is in register bank...
-
-	vld1.32	{d28[0]}, [r2,:32]!
-	veor	d8,d8,d8
-	sub	r7,sp,r5,lsl#4
-	vld1.32	{d0,d1,d2,d3},  [r1]!		@ can't specify :32 :-(
-	and	r7,r7,#-64
-	vld1.32	{d30[0]}, [r4,:32]
-	mov	sp,r7			@ alloca
-	vzip.16	d28,d8
-
-	vmull.u32	q6,d28,d0[0]
-	vmull.u32	q7,d28,d0[1]
-	vmull.u32	q8,d28,d1[0]
-	vshl.i64	d29,d13,#16
-	vmull.u32	q9,d28,d1[1]
-
-	vadd.u64	d29,d29,d12
-	veor	d8,d8,d8
-	vmul.u32	d29,d29,d30
-
-	vmull.u32	q10,d28,d2[0]
-	vld1.32	{d4,d5,d6,d7}, [r3]!
-	vmull.u32	q11,d28,d2[1]
-	vmull.u32	q12,d28,d3[0]
-	vzip.16	d29,d8
-	vmull.u32	q13,d28,d3[1]
-
-	vmlal.u32	q6,d29,d4[0]
-	sub	r9,r5,#1
-	vmlal.u32	q7,d29,d4[1]
-	vmlal.u32	q8,d29,d5[0]
-	vmlal.u32	q9,d29,d5[1]
-
-	vmlal.u32	q10,d29,d6[0]
-	vmov	q5,q6
-	vmlal.u32	q11,d29,d6[1]
-	vmov	q6,q7
-	vmlal.u32	q12,d29,d7[0]
-	vmov	q7,q8
-	vmlal.u32	q13,d29,d7[1]
-	vmov	q8,q9
-	vmov	q9,q10
-	vshr.u64	d10,d10,#16
-	vmov	q10,q11
-	vmov	q11,q12
-	vadd.u64	d10,d10,d11
-	vmov	q12,q13
-	veor	q13,q13
-	vshr.u64	d10,d10,#16
-
-	b	LNEON_outer8
-
-.align	4
-LNEON_outer8:
-	vld1.32	{d28[0]}, [r2,:32]!
-	veor	d8,d8,d8
-	vzip.16	d28,d8
-	vadd.u64	d12,d12,d10
-
-	vmlal.u32	q6,d28,d0[0]
-	vmlal.u32	q7,d28,d0[1]
-	vmlal.u32	q8,d28,d1[0]
-	vshl.i64	d29,d13,#16
-	vmlal.u32	q9,d28,d1[1]
-
-	vadd.u64	d29,d29,d12
-	veor	d8,d8,d8
-	subs	r9,r9,#1
-	vmul.u32	d29,d29,d30
-
-	vmlal.u32	q10,d28,d2[0]
-	vmlal.u32	q11,d28,d2[1]
-	vmlal.u32	q12,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q13,d28,d3[1]
-
-	vmlal.u32	q6,d29,d4[0]
-	vmlal.u32	q7,d29,d4[1]
-	vmlal.u32	q8,d29,d5[0]
-	vmlal.u32	q9,d29,d5[1]
-
-	vmlal.u32	q10,d29,d6[0]
-	vmov	q5,q6
-	vmlal.u32	q11,d29,d6[1]
-	vmov	q6,q7
-	vmlal.u32	q12,d29,d7[0]
-	vmov	q7,q8
-	vmlal.u32	q13,d29,d7[1]
-	vmov	q8,q9
-	vmov	q9,q10
-	vshr.u64	d10,d10,#16
-	vmov	q10,q11
-	vmov	q11,q12
-	vadd.u64	d10,d10,d11
-	vmov	q12,q13
-	veor	q13,q13
-	vshr.u64	d10,d10,#16
-
-	bne	LNEON_outer8
-
-	vadd.u64	d12,d12,d10
-	mov	r7,sp
-	vshr.u64	d10,d12,#16
-	mov	r8,r5
-	vadd.u64	d13,d13,d10
-	add	r6,sp,#96
-	vshr.u64	d10,d13,#16
-	vzip.16	d12,d13
-
-	b	LNEON_tail_entry
-
-.align	4
-LNEON_8n:
-	veor	q6,q6,q6
-	sub	r7,sp,#128
-	veor	q7,q7,q7
-	sub	r7,r7,r5,lsl#4
-	veor	q8,q8,q8
-	and	r7,r7,#-64
-	veor	q9,q9,q9
-	mov	sp,r7			@ alloca
-	veor	q10,q10,q10
-	add	r7,r7,#256
-	veor	q11,q11,q11
-	sub	r8,r5,#8
-	veor	q12,q12,q12
-	veor	q13,q13,q13
-
-LNEON_8n_init:
-	vst1.64	{q6,q7},[r7,:256]!
-	subs	r8,r8,#8
-	vst1.64	{q8,q9},[r7,:256]!
-	vst1.64	{q10,q11},[r7,:256]!
-	vst1.64	{q12,q13},[r7,:256]!
-	bne	LNEON_8n_init
-
-	add	r6,sp,#256
-	vld1.32	{d0,d1,d2,d3},[r1]!
-	add	r10,sp,#8
-	vld1.32	{d30[0]},[r4,:32]
-	mov	r9,r5
-	b	LNEON_8n_outer
-
-.align	4
-LNEON_8n_outer:
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	veor	d8,d8,d8
-	vzip.16	d28,d8
-	add	r7,sp,#128
-	vld1.32	{d4,d5,d6,d7},[r3]!
-
-	vmlal.u32	q6,d28,d0[0]
-	vmlal.u32	q7,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q8,d28,d1[0]
-	vshl.i64	d29,d13,#16
-	vmlal.u32	q9,d28,d1[1]
-	vadd.u64	d29,d29,d12
-	vmlal.u32	q10,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q11,d28,d2[1]
-	vst1.32	{d28},[sp,:64]		@ put aside smashed b[8*i+0]
-	vmlal.u32	q12,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q13,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q6,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q7,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q8,d29,d5[0]
-	vshr.u64	d12,d12,#16
-	vmlal.u32	q9,d29,d5[1]
-	vmlal.u32	q10,d29,d6[0]
-	vadd.u64	d12,d12,d13
-	vmlal.u32	q11,d29,d6[1]
-	vshr.u64	d12,d12,#16
-	vmlal.u32	q12,d29,d7[0]
-	vmlal.u32	q13,d29,d7[1]
-	vadd.u64	d14,d14,d12
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+0]
-	vmlal.u32	q7,d28,d0[0]
-	vld1.64	{q6},[r6,:128]!
-	vmlal.u32	q8,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q9,d28,d1[0]
-	vshl.i64	d29,d15,#16
-	vmlal.u32	q10,d28,d1[1]
-	vadd.u64	d29,d29,d14
-	vmlal.u32	q11,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q12,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+1]
-	vmlal.u32	q13,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q6,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q7,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q8,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q9,d29,d5[0]
-	vshr.u64	d14,d14,#16
-	vmlal.u32	q10,d29,d5[1]
-	vmlal.u32	q11,d29,d6[0]
-	vadd.u64	d14,d14,d15
-	vmlal.u32	q12,d29,d6[1]
-	vshr.u64	d14,d14,#16
-	vmlal.u32	q13,d29,d7[0]
-	vmlal.u32	q6,d29,d7[1]
-	vadd.u64	d16,d16,d14
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+1]
-	vmlal.u32	q8,d28,d0[0]
-	vld1.64	{q7},[r6,:128]!
-	vmlal.u32	q9,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q10,d28,d1[0]
-	vshl.i64	d29,d17,#16
-	vmlal.u32	q11,d28,d1[1]
-	vadd.u64	d29,d29,d16
-	vmlal.u32	q12,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q13,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+2]
-	vmlal.u32	q6,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q7,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q8,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q9,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q10,d29,d5[0]
-	vshr.u64	d16,d16,#16
-	vmlal.u32	q11,d29,d5[1]
-	vmlal.u32	q12,d29,d6[0]
-	vadd.u64	d16,d16,d17
-	vmlal.u32	q13,d29,d6[1]
-	vshr.u64	d16,d16,#16
-	vmlal.u32	q6,d29,d7[0]
-	vmlal.u32	q7,d29,d7[1]
-	vadd.u64	d18,d18,d16
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+2]
-	vmlal.u32	q9,d28,d0[0]
-	vld1.64	{q8},[r6,:128]!
-	vmlal.u32	q10,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q11,d28,d1[0]
-	vshl.i64	d29,d19,#16
-	vmlal.u32	q12,d28,d1[1]
-	vadd.u64	d29,d29,d18
-	vmlal.u32	q13,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q6,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+3]
-	vmlal.u32	q7,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q8,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q9,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q10,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q11,d29,d5[0]
-	vshr.u64	d18,d18,#16
-	vmlal.u32	q12,d29,d5[1]
-	vmlal.u32	q13,d29,d6[0]
-	vadd.u64	d18,d18,d19
-	vmlal.u32	q6,d29,d6[1]
-	vshr.u64	d18,d18,#16
-	vmlal.u32	q7,d29,d7[0]
-	vmlal.u32	q8,d29,d7[1]
-	vadd.u64	d20,d20,d18
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+3]
-	vmlal.u32	q10,d28,d0[0]
-	vld1.64	{q9},[r6,:128]!
-	vmlal.u32	q11,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q12,d28,d1[0]
-	vshl.i64	d29,d21,#16
-	vmlal.u32	q13,d28,d1[1]
-	vadd.u64	d29,d29,d20
-	vmlal.u32	q6,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q7,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+4]
-	vmlal.u32	q8,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q9,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q10,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q11,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q12,d29,d5[0]
-	vshr.u64	d20,d20,#16
-	vmlal.u32	q13,d29,d5[1]
-	vmlal.u32	q6,d29,d6[0]
-	vadd.u64	d20,d20,d21
-	vmlal.u32	q7,d29,d6[1]
-	vshr.u64	d20,d20,#16
-	vmlal.u32	q8,d29,d7[0]
-	vmlal.u32	q9,d29,d7[1]
-	vadd.u64	d22,d22,d20
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+4]
-	vmlal.u32	q11,d28,d0[0]
-	vld1.64	{q10},[r6,:128]!
-	vmlal.u32	q12,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q13,d28,d1[0]
-	vshl.i64	d29,d23,#16
-	vmlal.u32	q6,d28,d1[1]
-	vadd.u64	d29,d29,d22
-	vmlal.u32	q7,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q8,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+5]
-	vmlal.u32	q9,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q10,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q11,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q12,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q13,d29,d5[0]
-	vshr.u64	d22,d22,#16
-	vmlal.u32	q6,d29,d5[1]
-	vmlal.u32	q7,d29,d6[0]
-	vadd.u64	d22,d22,d23
-	vmlal.u32	q8,d29,d6[1]
-	vshr.u64	d22,d22,#16
-	vmlal.u32	q9,d29,d7[0]
-	vmlal.u32	q10,d29,d7[1]
-	vadd.u64	d24,d24,d22
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+5]
-	vmlal.u32	q12,d28,d0[0]
-	vld1.64	{q11},[r6,:128]!
-	vmlal.u32	q13,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q6,d28,d1[0]
-	vshl.i64	d29,d25,#16
-	vmlal.u32	q7,d28,d1[1]
-	vadd.u64	d29,d29,d24
-	vmlal.u32	q8,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q9,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+6]
-	vmlal.u32	q10,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q11,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q12,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q13,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q6,d29,d5[0]
-	vshr.u64	d24,d24,#16
-	vmlal.u32	q7,d29,d5[1]
-	vmlal.u32	q8,d29,d6[0]
-	vadd.u64	d24,d24,d25
-	vmlal.u32	q9,d29,d6[1]
-	vshr.u64	d24,d24,#16
-	vmlal.u32	q10,d29,d7[0]
-	vmlal.u32	q11,d29,d7[1]
-	vadd.u64	d26,d26,d24
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+6]
-	vmlal.u32	q13,d28,d0[0]
-	vld1.64	{q12},[r6,:128]!
-	vmlal.u32	q6,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q7,d28,d1[0]
-	vshl.i64	d29,d27,#16
-	vmlal.u32	q8,d28,d1[1]
-	vadd.u64	d29,d29,d26
-	vmlal.u32	q9,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q10,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+7]
-	vmlal.u32	q11,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q12,d28,d3[1]
-	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
-	vmlal.u32	q13,d29,d4[0]
-	vld1.32	{d0,d1,d2,d3},[r1]!
-	vmlal.u32	q6,d29,d4[1]
-	vmlal.u32	q7,d29,d5[0]
-	vshr.u64	d26,d26,#16
-	vmlal.u32	q8,d29,d5[1]
-	vmlal.u32	q9,d29,d6[0]
-	vadd.u64	d26,d26,d27
-	vmlal.u32	q10,d29,d6[1]
-	vshr.u64	d26,d26,#16
-	vmlal.u32	q11,d29,d7[0]
-	vmlal.u32	q12,d29,d7[1]
-	vadd.u64	d12,d12,d26
-	vst1.32	{d29},[r10,:64]	@ put aside smashed m[8*i+7]
-	add	r10,sp,#8		@ rewind
-	sub	r8,r5,#8
-	b	LNEON_8n_inner
-
-.align	4
-LNEON_8n_inner:
-	subs	r8,r8,#8
-	vmlal.u32	q6,d28,d0[0]
-	vld1.64	{q13},[r6,:128]
-	vmlal.u32	q7,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+0]
-	vmlal.u32	q8,d28,d1[0]
-	vld1.32	{d4,d5,d6,d7},[r3]!
-	vmlal.u32	q9,d28,d1[1]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q10,d28,d2[0]
-	vmlal.u32	q11,d28,d2[1]
-	vmlal.u32	q12,d28,d3[0]
-	vmlal.u32	q13,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+1]
-	vmlal.u32	q6,d29,d4[0]
-	vmlal.u32	q7,d29,d4[1]
-	vmlal.u32	q8,d29,d5[0]
-	vmlal.u32	q9,d29,d5[1]
-	vmlal.u32	q10,d29,d6[0]
-	vmlal.u32	q11,d29,d6[1]
-	vmlal.u32	q12,d29,d7[0]
-	vmlal.u32	q13,d29,d7[1]
-	vst1.64	{q6},[r7,:128]!
-	vmlal.u32	q7,d28,d0[0]
-	vld1.64	{q6},[r6,:128]
-	vmlal.u32	q8,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+1]
-	vmlal.u32	q9,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q10,d28,d1[1]
-	vmlal.u32	q11,d28,d2[0]
-	vmlal.u32	q12,d28,d2[1]
-	vmlal.u32	q13,d28,d3[0]
-	vmlal.u32	q6,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+2]
-	vmlal.u32	q7,d29,d4[0]
-	vmlal.u32	q8,d29,d4[1]
-	vmlal.u32	q9,d29,d5[0]
-	vmlal.u32	q10,d29,d5[1]
-	vmlal.u32	q11,d29,d6[0]
-	vmlal.u32	q12,d29,d6[1]
-	vmlal.u32	q13,d29,d7[0]
-	vmlal.u32	q6,d29,d7[1]
-	vst1.64	{q7},[r7,:128]!
-	vmlal.u32	q8,d28,d0[0]
-	vld1.64	{q7},[r6,:128]
-	vmlal.u32	q9,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+2]
-	vmlal.u32	q10,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q11,d28,d1[1]
-	vmlal.u32	q12,d28,d2[0]
-	vmlal.u32	q13,d28,d2[1]
-	vmlal.u32	q6,d28,d3[0]
-	vmlal.u32	q7,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+3]
-	vmlal.u32	q8,d29,d4[0]
-	vmlal.u32	q9,d29,d4[1]
-	vmlal.u32	q10,d29,d5[0]
-	vmlal.u32	q11,d29,d5[1]
-	vmlal.u32	q12,d29,d6[0]
-	vmlal.u32	q13,d29,d6[1]
-	vmlal.u32	q6,d29,d7[0]
-	vmlal.u32	q7,d29,d7[1]
-	vst1.64	{q8},[r7,:128]!
-	vmlal.u32	q9,d28,d0[0]
-	vld1.64	{q8},[r6,:128]
-	vmlal.u32	q10,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+3]
-	vmlal.u32	q11,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q12,d28,d1[1]
-	vmlal.u32	q13,d28,d2[0]
-	vmlal.u32	q6,d28,d2[1]
-	vmlal.u32	q7,d28,d3[0]
-	vmlal.u32	q8,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+4]
-	vmlal.u32	q9,d29,d4[0]
-	vmlal.u32	q10,d29,d4[1]
-	vmlal.u32	q11,d29,d5[0]
-	vmlal.u32	q12,d29,d5[1]
-	vmlal.u32	q13,d29,d6[0]
-	vmlal.u32	q6,d29,d6[1]
-	vmlal.u32	q7,d29,d7[0]
-	vmlal.u32	q8,d29,d7[1]
-	vst1.64	{q9},[r7,:128]!
-	vmlal.u32	q10,d28,d0[0]
-	vld1.64	{q9},[r6,:128]
-	vmlal.u32	q11,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+4]
-	vmlal.u32	q12,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q13,d28,d1[1]
-	vmlal.u32	q6,d28,d2[0]
-	vmlal.u32	q7,d28,d2[1]
-	vmlal.u32	q8,d28,d3[0]
-	vmlal.u32	q9,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+5]
-	vmlal.u32	q10,d29,d4[0]
-	vmlal.u32	q11,d29,d4[1]
-	vmlal.u32	q12,d29,d5[0]
-	vmlal.u32	q13,d29,d5[1]
-	vmlal.u32	q6,d29,d6[0]
-	vmlal.u32	q7,d29,d6[1]
-	vmlal.u32	q8,d29,d7[0]
-	vmlal.u32	q9,d29,d7[1]
-	vst1.64	{q10},[r7,:128]!
-	vmlal.u32	q11,d28,d0[0]
-	vld1.64	{q10},[r6,:128]
-	vmlal.u32	q12,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+5]
-	vmlal.u32	q13,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q6,d28,d1[1]
-	vmlal.u32	q7,d28,d2[0]
-	vmlal.u32	q8,d28,d2[1]
-	vmlal.u32	q9,d28,d3[0]
-	vmlal.u32	q10,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+6]
-	vmlal.u32	q11,d29,d4[0]
-	vmlal.u32	q12,d29,d4[1]
-	vmlal.u32	q13,d29,d5[0]
-	vmlal.u32	q6,d29,d5[1]
-	vmlal.u32	q7,d29,d6[0]
-	vmlal.u32	q8,d29,d6[1]
-	vmlal.u32	q9,d29,d7[0]
-	vmlal.u32	q10,d29,d7[1]
-	vst1.64	{q11},[r7,:128]!
-	vmlal.u32	q12,d28,d0[0]
-	vld1.64	{q11},[r6,:128]
-	vmlal.u32	q13,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+6]
-	vmlal.u32	q6,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q7,d28,d1[1]
-	vmlal.u32	q8,d28,d2[0]
-	vmlal.u32	q9,d28,d2[1]
-	vmlal.u32	q10,d28,d3[0]
-	vmlal.u32	q11,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+7]
-	vmlal.u32	q12,d29,d4[0]
-	vmlal.u32	q13,d29,d4[1]
-	vmlal.u32	q6,d29,d5[0]
-	vmlal.u32	q7,d29,d5[1]
-	vmlal.u32	q8,d29,d6[0]
-	vmlal.u32	q9,d29,d6[1]
-	vmlal.u32	q10,d29,d7[0]
-	vmlal.u32	q11,d29,d7[1]
-	vst1.64	{q12},[r7,:128]!
-	vmlal.u32	q13,d28,d0[0]
-	vld1.64	{q12},[r6,:128]
-	vmlal.u32	q6,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+7]
-	vmlal.u32	q7,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q8,d28,d1[1]
-	vmlal.u32	q9,d28,d2[0]
-	vmlal.u32	q10,d28,d2[1]
-	vmlal.u32	q11,d28,d3[0]
-	vmlal.u32	q12,d28,d3[1]
-	it	eq
-	subeq	r1,r1,r5,lsl#2	@ rewind
-	vmlal.u32	q13,d29,d4[0]
-	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
-	vmlal.u32	q6,d29,d4[1]
-	vld1.32	{d0,d1,d2,d3},[r1]!
-	vmlal.u32	q7,d29,d5[0]
-	add	r10,sp,#8		@ rewind
-	vmlal.u32	q8,d29,d5[1]
-	vmlal.u32	q9,d29,d6[0]
-	vmlal.u32	q10,d29,d6[1]
-	vmlal.u32	q11,d29,d7[0]
-	vst1.64	{q13},[r7,:128]!
-	vmlal.u32	q12,d29,d7[1]
-
-	bne	LNEON_8n_inner
-	add	r6,sp,#128
-	vst1.64	{q6,q7},[r7,:256]!
-	veor	q2,q2,q2		@ d4-d5
-	vst1.64	{q8,q9},[r7,:256]!
-	veor	q3,q3,q3		@ d6-d7
-	vst1.64	{q10,q11},[r7,:256]!
-	vst1.64	{q12},[r7,:128]
-
-	subs	r9,r9,#8
-	vld1.64	{q6,q7},[r6,:256]!
-	vld1.64	{q8,q9},[r6,:256]!
-	vld1.64	{q10,q11},[r6,:256]!
-	vld1.64	{q12,q13},[r6,:256]!
-
-	itt	ne
-	subne	r3,r3,r5,lsl#2	@ rewind
-	bne	LNEON_8n_outer
-
-	add	r7,sp,#128
-	vst1.64	{q2,q3}, [sp,:256]!	@ start wiping stack frame
-	vshr.u64	d10,d12,#16
-	vst1.64	{q2,q3},[sp,:256]!
-	vadd.u64	d13,d13,d10
-	vst1.64	{q2,q3}, [sp,:256]!
-	vshr.u64	d10,d13,#16
-	vst1.64	{q2,q3}, [sp,:256]!
-	vzip.16	d12,d13
-
-	mov	r8,r5
-	b	LNEON_tail_entry
-
-.align	4
-LNEON_tail:
-	vadd.u64	d12,d12,d10
-	vshr.u64	d10,d12,#16
-	vld1.64	{q8,q9}, [r6, :256]!
-	vadd.u64	d13,d13,d10
-	vld1.64	{q10,q11}, [r6, :256]!
-	vshr.u64	d10,d13,#16
-	vld1.64	{q12,q13}, [r6, :256]!
-	vzip.16	d12,d13
-
-LNEON_tail_entry:
-	vadd.u64	d14,d14,d10
-	vst1.32	{d12[0]}, [r7, :32]!
-	vshr.u64	d10,d14,#16
-	vadd.u64	d15,d15,d10
-	vshr.u64	d10,d15,#16
-	vzip.16	d14,d15
-	vadd.u64	d16,d16,d10
-	vst1.32	{d14[0]}, [r7, :32]!
-	vshr.u64	d10,d16,#16
-	vadd.u64	d17,d17,d10
-	vshr.u64	d10,d17,#16
-	vzip.16	d16,d17
-	vadd.u64	d18,d18,d10
-	vst1.32	{d16[0]}, [r7, :32]!
-	vshr.u64	d10,d18,#16
-	vadd.u64	d19,d19,d10
-	vshr.u64	d10,d19,#16
-	vzip.16	d18,d19
-	vadd.u64	d20,d20,d10
-	vst1.32	{d18[0]}, [r7, :32]!
-	vshr.u64	d10,d20,#16
-	vadd.u64	d21,d21,d10
-	vshr.u64	d10,d21,#16
-	vzip.16	d20,d21
-	vadd.u64	d22,d22,d10
-	vst1.32	{d20[0]}, [r7, :32]!
-	vshr.u64	d10,d22,#16
-	vadd.u64	d23,d23,d10
-	vshr.u64	d10,d23,#16
-	vzip.16	d22,d23
-	vadd.u64	d24,d24,d10
-	vst1.32	{d22[0]}, [r7, :32]!
-	vshr.u64	d10,d24,#16
-	vadd.u64	d25,d25,d10
-	vshr.u64	d10,d25,#16
-	vzip.16	d24,d25
-	vadd.u64	d26,d26,d10
-	vst1.32	{d24[0]}, [r7, :32]!
-	vshr.u64	d10,d26,#16
-	vadd.u64	d27,d27,d10
-	vshr.u64	d10,d27,#16
-	vzip.16	d26,d27
-	vld1.64	{q6,q7}, [r6, :256]!
-	subs	r8,r8,#8
-	vst1.32	{d26[0]},   [r7, :32]!
-	bne	LNEON_tail
-
-	vst1.32	{d10[0]}, [r7, :32]		@ top-most bit
-	sub	r3,r3,r5,lsl#2			@ rewind r3
-	subs	r1,sp,#0				@ clear carry flag
-	add	r2,sp,r5,lsl#2
-
-LNEON_sub:
-	ldmia	r1!, {r4,r5,r6,r7}
-	ldmia	r3!, {r8,r9,r10,r11}
-	sbcs	r8, r4,r8
-	sbcs	r9, r5,r9
-	sbcs	r10,r6,r10
-	sbcs	r11,r7,r11
-	teq	r1,r2				@ preserves carry
-	stmia	r0!, {r8,r9,r10,r11}
-	bne	LNEON_sub
-
-	ldr	r10, [r1]				@ load top-most bit
-	mov	r11,sp
-	veor	q0,q0,q0
-	sub	r11,r2,r11				@ this is num*4
-	veor	q1,q1,q1
-	mov	r1,sp
-	sub	r0,r0,r11				@ rewind r0
-	mov	r3,r2				@ second 3/4th of frame
-	sbcs	r10,r10,#0				@ result is carry flag
-
-LNEON_copy_n_zap:
-	ldmia	r1!, {r4,r5,r6,r7}
-	ldmia	r0,  {r8,r9,r10,r11}
-	it	cc
-	movcc	r8, r4
-	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
-	itt	cc
-	movcc	r9, r5
-	movcc	r10,r6
-	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
-	it	cc
-	movcc	r11,r7
-	ldmia	r1, {r4,r5,r6,r7}
-	stmia	r0!, {r8,r9,r10,r11}
-	sub	r1,r1,#16
-	ldmia	r0, {r8,r9,r10,r11}
-	it	cc
-	movcc	r8, r4
-	vst1.64	{q0,q1}, [r1,:256]!			@ wipe
-	itt	cc
-	movcc	r9, r5
-	movcc	r10,r6
-	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
-	it	cc
-	movcc	r11,r7
-	teq	r1,r2				@ preserves carry
-	stmia	r0!, {r8,r9,r10,r11}
-	bne	LNEON_copy_n_zap
-
-	mov	sp,ip
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
-	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
-	bx	lr						@ bx lr
-
-#endif
-.byte	77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#if __ARM_MAX_ARCH__>=7
-.comm	_OPENSSL_armcap_P,4
-.non_lazy_symbol_pointer
-OPENSSL_armcap_P:
-.indirect_symbol	_OPENSSL_armcap_P
-.long	0
-.private_extern	_OPENSSL_armcap_P
-#endif
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/bsaes-armv7.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/bsaes-armv7.S
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/ghash-armv4.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/ghash-armv4.S
@ -1,258 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
-@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
-@ instructions are in aesv8-armx.pl.)
-
-
-.text
-#if defined(__thumb2__) || defined(__clang__)
-.syntax	unified
-#define ldrplb  ldrbpl
-#define ldrneb  ldrbne
-#endif
-#if defined(__thumb2__)
-.thumb
-#else
-.code	32
-#endif
-#if __ARM_MAX_ARCH__>=7
-
-
-
-.globl	_gcm_init_neon
-.private_extern	_gcm_init_neon
-#ifdef __thumb2__
-.thumb_func	_gcm_init_neon
-#endif
-.align	4
-_gcm_init_neon:
-	vld1.64	d7,[r1]!		@ load H
-	vmov.i8	q8,#0xe1
-	vld1.64	d6,[r1]
-	vshl.i64	d17,#57
-	vshr.u64	d16,#63		@ t0=0xc2....01
-	vdup.8	q9,d7[7]
-	vshr.u64	d26,d6,#63
-	vshr.s8	q9,#7			@ broadcast carry bit
-	vshl.i64	q3,q3,#1
-	vand	q8,q8,q9
-	vorr	d7,d26		@ H<<<=1
-	veor	q3,q3,q8		@ twisted H
-	vstmia	r0,{q3}
-
-	bx	lr					@ bx lr
-
-
-.globl	_gcm_gmult_neon
-.private_extern	_gcm_gmult_neon
-#ifdef __thumb2__
-.thumb_func	_gcm_gmult_neon
-#endif
-.align	4
-_gcm_gmult_neon:
-	vld1.64	d7,[r0]!		@ load Xi
-	vld1.64	d6,[r0]!
-	vmov.i64	d29,#0x0000ffffffffffff
-	vldmia	r1,{d26,d27}	@ load twisted H
-	vmov.i64	d30,#0x00000000ffffffff
-#ifdef __ARMEL__
-	vrev64.8	q3,q3
-#endif
-	vmov.i64	d31,#0x000000000000ffff
-	veor	d28,d26,d27		@ Karatsuba pre-processing
-	mov	r3,#16
-	b	Lgmult_neon
-
-
-.globl	_gcm_ghash_neon
-.private_extern	_gcm_ghash_neon
-#ifdef __thumb2__
-.thumb_func	_gcm_ghash_neon
-#endif
-.align	4
-_gcm_ghash_neon:
-	vld1.64	d1,[r0]!		@ load Xi
-	vld1.64	d0,[r0]!
-	vmov.i64	d29,#0x0000ffffffffffff
-	vldmia	r1,{d26,d27}	@ load twisted H
-	vmov.i64	d30,#0x00000000ffffffff
-#ifdef __ARMEL__
-	vrev64.8	q0,q0
-#endif
-	vmov.i64	d31,#0x000000000000ffff
-	veor	d28,d26,d27		@ Karatsuba pre-processing
-
-Loop_neon:
-	vld1.64	d7,[r2]!		@ load inp
-	vld1.64	d6,[r2]!
-#ifdef __ARMEL__
-	vrev64.8	q3,q3
-#endif
-	veor	q3,q0			@ inp^=Xi
-Lgmult_neon:
-	vext.8	d16, d26, d26, #1	@ A1
-	vmull.p8	q8, d16, d6		@ F = A1*B
-	vext.8	d0, d6, d6, #1	@ B1
-	vmull.p8	q0, d26, d0		@ E = A*B1
-	vext.8	d18, d26, d26, #2	@ A2
-	vmull.p8	q9, d18, d6		@ H = A2*B
-	vext.8	d22, d6, d6, #2	@ B2
-	vmull.p8	q11, d26, d22		@ G = A*B2
-	vext.8	d20, d26, d26, #3	@ A3
-	veor	q8, q8, q0		@ L = E + F
-	vmull.p8	q10, d20, d6		@ J = A3*B
-	vext.8	d0, d6, d6, #3	@ B3
-	veor	q9, q9, q11		@ M = G + H
-	vmull.p8	q0, d26, d0		@ I = A*B3
-	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
-	vand	d17, d17, d29
-	vext.8	d22, d6, d6, #4	@ B4
-	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
-	vand	d19, d19, d30
-	vmull.p8	q11, d26, d22		@ K = A*B4
-	veor	q10, q10, q0		@ N = I + J
-	veor	d16, d16, d17
-	veor	d18, d18, d19
-	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
-	vand	d21, d21, d31
-	vext.8	q8, q8, q8, #15
-	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
-	vmov.i64	d23, #0
-	vext.8	q9, q9, q9, #14
-	veor	d20, d20, d21
-	vmull.p8	q0, d26, d6		@ D = A*B
-	vext.8	q11, q11, q11, #12
-	vext.8	q10, q10, q10, #13
-	veor	q8, q8, q9
-	veor	q10, q10, q11
-	veor	q0, q0, q8
-	veor	q0, q0, q10
-	veor	d6,d6,d7	@ Karatsuba pre-processing
-	vext.8	d16, d28, d28, #1	@ A1
-	vmull.p8	q8, d16, d6		@ F = A1*B
-	vext.8	d2, d6, d6, #1	@ B1
-	vmull.p8	q1, d28, d2		@ E = A*B1
-	vext.8	d18, d28, d28, #2	@ A2
-	vmull.p8	q9, d18, d6		@ H = A2*B
-	vext.8	d22, d6, d6, #2	@ B2
-	vmull.p8	q11, d28, d22		@ G = A*B2
-	vext.8	d20, d28, d28, #3	@ A3
-	veor	q8, q8, q1		@ L = E + F
-	vmull.p8	q10, d20, d6		@ J = A3*B
-	vext.8	d2, d6, d6, #3	@ B3
-	veor	q9, q9, q11		@ M = G + H
-	vmull.p8	q1, d28, d2		@ I = A*B3
-	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
-	vand	d17, d17, d29
-	vext.8	d22, d6, d6, #4	@ B4
-	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
-	vand	d19, d19, d30
-	vmull.p8	q11, d28, d22		@ K = A*B4
-	veor	q10, q10, q1		@ N = I + J
-	veor	d16, d16, d17
-	veor	d18, d18, d19
-	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
-	vand	d21, d21, d31
-	vext.8	q8, q8, q8, #15
-	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
-	vmov.i64	d23, #0
-	vext.8	q9, q9, q9, #14
-	veor	d20, d20, d21
-	vmull.p8	q1, d28, d6		@ D = A*B
-	vext.8	q11, q11, q11, #12
-	vext.8	q10, q10, q10, #13
-	veor	q8, q8, q9
-	veor	q10, q10, q11
-	veor	q1, q1, q8
-	veor	q1, q1, q10
-	vext.8	d16, d27, d27, #1	@ A1
-	vmull.p8	q8, d16, d7		@ F = A1*B
-	vext.8	d4, d7, d7, #1	@ B1
-	vmull.p8	q2, d27, d4		@ E = A*B1
-	vext.8	d18, d27, d27, #2	@ A2
-	vmull.p8	q9, d18, d7		@ H = A2*B
-	vext.8	d22, d7, d7, #2	@ B2
-	vmull.p8	q11, d27, d22		@ G = A*B2
-	vext.8	d20, d27, d27, #3	@ A3
-	veor	q8, q8, q2		@ L = E + F
-	vmull.p8	q10, d20, d7		@ J = A3*B
-	vext.8	d4, d7, d7, #3	@ B3
-	veor	q9, q9, q11		@ M = G + H
-	vmull.p8	q2, d27, d4		@ I = A*B3
-	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
-	vand	d17, d17, d29
-	vext.8	d22, d7, d7, #4	@ B4
-	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
-	vand	d19, d19, d30
-	vmull.p8	q11, d27, d22		@ K = A*B4
-	veor	q10, q10, q2		@ N = I + J
-	veor	d16, d16, d17
-	veor	d18, d18, d19
-	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
-	vand	d21, d21, d31
-	vext.8	q8, q8, q8, #15
-	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
-	vmov.i64	d23, #0
-	vext.8	q9, q9, q9, #14
-	veor	d20, d20, d21
-	vmull.p8	q2, d27, d7		@ D = A*B
-	vext.8	q11, q11, q11, #12
-	vext.8	q10, q10, q10, #13
-	veor	q8, q8, q9
-	veor	q10, q10, q11
-	veor	q2, q2, q8
-	veor	q2, q2, q10
-	veor	q1,q1,q0		@ Karatsuba post-processing
-	veor	q1,q1,q2
-	veor	d1,d1,d2
-	veor	d4,d4,d3	@ Xh|Xl - 256-bit result
-
-	@ equivalent of reduction_avx from ghash-x86_64.pl
-	vshl.i64	q9,q0,#57		@ 1st phase
-	vshl.i64	q10,q0,#62
-	veor	q10,q10,q9		@
-	vshl.i64	q9,q0,#63
-	veor	q10, q10, q9		@
-	veor	d1,d1,d20	@
-	veor	d4,d4,d21
-
-	vshr.u64	q10,q0,#1		@ 2nd phase
-	veor	q2,q2,q0
-	veor	q0,q0,q10		@
-	vshr.u64	q10,q10,#6
-	vshr.u64	q0,q0,#1		@
-	veor	q0,q0,q2		@
-	veor	q0,q0,q10		@
-
-	subs	r3,#16
-	bne	Loop_neon
-
-#ifdef __ARMEL__
-	vrev64.8	q0,q0
-#endif
-	sub	r0,#16
-	vst1.64	d1,[r0]!		@ write out Xi
-	vst1.64	d0,[r0]
-
-	bx	lr					@ bx lr
-
-#endif
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/ghashv8-armx32.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/ghashv8-armx32.S
@ -1,256 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-
-.code	32
-#undef	__thumb2__
-.globl	_gcm_init_v8
-.private_extern	_gcm_init_v8
-#ifdef __thumb2__
-.thumb_func	_gcm_init_v8
-#endif
-.align	4
-_gcm_init_v8:
-	vld1.64	{q9},[r1]		@ load input H
-	vmov.i8	q11,#0xe1
-	vshl.i64	q11,q11,#57		@ 0xc2.0
-	vext.8	q3,q9,q9,#8
-	vshr.u64	q10,q11,#63
-	vdup.32	q9,d18[1]
-	vext.8	q8,q10,q11,#8		@ t0=0xc2....01
-	vshr.u64	q10,q3,#63
-	vshr.s32	q9,q9,#31		@ broadcast carry bit
-	vand	q10,q10,q8
-	vshl.i64	q3,q3,#1
-	vext.8	q10,q10,q10,#8
-	vand	q8,q8,q9
-	vorr	q3,q3,q10		@ H<<<=1
-	veor	q12,q3,q8		@ twisted H
-	vst1.64	{q12},[r0]!		@ store Htable[0]
-
-	@ calculate H^2
-	vext.8	q8,q12,q12,#8		@ Karatsuba pre-processing
-.byte	0xa8,0x0e,0xa8,0xf2	@ pmull q0,q12,q12
-	veor	q8,q8,q12
-.byte	0xa9,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q12
-.byte	0xa0,0x2e,0xa0,0xf2	@ pmull q1,q8,q8
-
-	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
-	veor	q10,q0,q2
-	veor	q1,q1,q9
-	veor	q1,q1,q10
-.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase
-
-	vmov	d4,d3		@ Xh|Xm - 256-bit result
-	vmov	d3,d0		@ Xm is rotated Xl
-	veor	q0,q1,q10
-
-	vext.8	q10,q0,q0,#8		@ 2nd phase
-.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
-	veor	q10,q10,q2
-	veor	q14,q0,q10
-
-	vext.8	q9,q14,q14,#8		@ Karatsuba pre-processing
-	veor	q9,q9,q14
-	vext.8	q13,q8,q9,#8		@ pack Karatsuba pre-processed
-	vst1.64	{q13,q14},[r0]		@ store Htable[1..2]
-
-	bx	lr
-
-.globl	_gcm_gmult_v8
-.private_extern	_gcm_gmult_v8
-#ifdef __thumb2__
-.thumb_func	_gcm_gmult_v8
-#endif
-.align	4
-_gcm_gmult_v8:
-	vld1.64	{q9},[r0]		@ load Xi
-	vmov.i8	q11,#0xe1
-	vld1.64	{q12,q13},[r1]	@ load twisted H, ...
-	vshl.u64	q11,q11,#57
-#ifndef __ARMEB__
-	vrev64.8	q9,q9
-#endif
-	vext.8	q3,q9,q9,#8
-
-.byte	0x86,0x0e,0xa8,0xf2	@ pmull q0,q12,q3		@ H.lo·Xi.lo
-	veor	q9,q9,q3		@ Karatsuba pre-processing
-.byte	0x87,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q3		@ H.hi·Xi.hi
-.byte	0xa2,0x2e,0xaa,0xf2	@ pmull q1,q13,q9		@ (H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
-	veor	q10,q0,q2
-	veor	q1,q1,q9
-	veor	q1,q1,q10
-.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
-
-	vmov	d4,d3		@ Xh|Xm - 256-bit result
-	vmov	d3,d0		@ Xm is rotated Xl
-	veor	q0,q1,q10
-
-	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
-.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
-	veor	q10,q10,q2
-	veor	q0,q0,q10
-
-#ifndef __ARMEB__
-	vrev64.8	q0,q0
-#endif
-	vext.8	q0,q0,q0,#8
-	vst1.64	{q0},[r0]		@ write out Xi
-
-	bx	lr
-
-.globl	_gcm_ghash_v8
-.private_extern	_gcm_ghash_v8
-#ifdef __thumb2__
-.thumb_func	_gcm_ghash_v8
-#endif
-.align	4
-_gcm_ghash_v8:
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
-	vld1.64	{q0},[r0]		@ load [rotated] Xi
-						@ "[rotated]" means that
-						@ loaded value would have
-						@ to be rotated in order to
-						@ make it appear as in
-						@ algorithm specification
-	subs	r3,r3,#32		@ see if r3 is 32 or larger
-	mov	r12,#16		@ r12 is used as post-
-						@ increment for input pointer;
-						@ as loop is modulo-scheduled
-						@ r12 is zeroed just in time
-						@ to preclude overstepping
-						@ inp[len], which means that
-						@ last block[s] are actually
-						@ loaded twice, but last
-						@ copy is not processed
-	vld1.64	{q12,q13},[r1]!	@ load twisted H, ..., H^2
-	vmov.i8	q11,#0xe1
-	vld1.64	{q14},[r1]
-	moveq	r12,#0			@ is it time to zero r12?
-	vext.8	q0,q0,q0,#8		@ rotate Xi
-	vld1.64	{q8},[r2]!	@ load [rotated] I[0]
-	vshl.u64	q11,q11,#57		@ compose 0xc2.0 constant
-#ifndef __ARMEB__
-	vrev64.8	q8,q8
-	vrev64.8	q0,q0
-#endif
-	vext.8	q3,q8,q8,#8		@ rotate I[0]
-	blo	Lodd_tail_v8		@ r3 was less than 32
-	vld1.64	{q9},[r2],r12	@ load [rotated] I[1]
-#ifndef __ARMEB__
-	vrev64.8	q9,q9
-#endif
-	vext.8	q7,q9,q9,#8
-	veor	q3,q3,q0		@ I[i]^=Xi
-.byte	0x8e,0x8e,0xa8,0xf2	@ pmull q4,q12,q7		@ H·Ii+1
-	veor	q9,q9,q7		@ Karatsuba pre-processing
-.byte	0x8f,0xce,0xa9,0xf2	@ pmull2 q6,q12,q7
-	b	Loop_mod2x_v8
-
-.align	4
-Loop_mod2x_v8:
-	vext.8	q10,q3,q3,#8
-	subs	r3,r3,#32		@ is there more data?
-.byte	0x86,0x0e,0xac,0xf2	@ pmull q0,q14,q3		@ H^2.lo·Xi.lo
-	movlo	r12,#0			@ is it time to zero r12?
-
-.byte	0xa2,0xae,0xaa,0xf2	@ pmull q5,q13,q9
-	veor	q10,q10,q3		@ Karatsuba pre-processing
-.byte	0x87,0x4e,0xad,0xf2	@ pmull2 q2,q14,q3		@ H^2.hi·Xi.hi
-	veor	q0,q0,q4		@ accumulate
-.byte	0xa5,0x2e,0xab,0xf2	@ pmull2 q1,q13,q10		@ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
-	vld1.64	{q8},[r2],r12	@ load [rotated] I[i+2]
-
-	veor	q2,q2,q6
-	moveq	r12,#0			@ is it time to zero r12?
-	veor	q1,q1,q5
-
-	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
-	veor	q10,q0,q2
-	veor	q1,q1,q9
-	vld1.64	{q9},[r2],r12	@ load [rotated] I[i+3]
-#ifndef __ARMEB__
-	vrev64.8	q8,q8
-#endif
-	veor	q1,q1,q10
-.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
-
-#ifndef __ARMEB__
-	vrev64.8	q9,q9
-#endif
-	vmov	d4,d3		@ Xh|Xm - 256-bit result
-	vmov	d3,d0		@ Xm is rotated Xl
-	vext.8	q7,q9,q9,#8
-	vext.8	q3,q8,q8,#8
-	veor	q0,q1,q10
-.byte	0x8e,0x8e,0xa8,0xf2	@ pmull q4,q12,q7		@ H·Ii+1
-	veor	q3,q3,q2		@ accumulate q3 early
-
-	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
-.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
-	veor	q3,q3,q10
-	veor	q9,q9,q7		@ Karatsuba pre-processing
-	veor	q3,q3,q0
-.byte	0x8f,0xce,0xa9,0xf2	@ pmull2 q6,q12,q7
-	bhs	Loop_mod2x_v8		@ there was at least 32 more bytes
-
-	veor	q2,q2,q10
-	vext.8	q3,q8,q8,#8		@ re-construct q3
-	adds	r3,r3,#32		@ re-construct r3
-	veor	q0,q0,q2		@ re-construct q0
-	beq	Ldone_v8		@ is r3 zero?
-Lodd_tail_v8:
-	vext.8	q10,q0,q0,#8
-	veor	q3,q3,q0		@ inp^=Xi
-	veor	q9,q8,q10		@ q9 is rotated inp^Xi
-
-.byte	0x86,0x0e,0xa8,0xf2	@ pmull q0,q12,q3		@ H.lo·Xi.lo
-	veor	q9,q9,q3		@ Karatsuba pre-processing
-.byte	0x87,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q3		@ H.hi·Xi.hi
-.byte	0xa2,0x2e,0xaa,0xf2	@ pmull q1,q13,q9		@ (H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
-	veor	q10,q0,q2
-	veor	q1,q1,q9
-	veor	q1,q1,q10
-.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
-
-	vmov	d4,d3		@ Xh|Xm - 256-bit result
-	vmov	d3,d0		@ Xm is rotated Xl
-	veor	q0,q1,q10
-
-	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
-.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
-	veor	q10,q10,q2
-	veor	q0,q0,q10
-
-Ldone_v8:
-#ifndef __ARMEB__
-	vrev64.8	q0,q0
-#endif
-	vext.8	q0,q0,q0,#8
-	vst1.64	{q0},[r0]		@ write out Xi
-
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
-	bx	lr
-
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha1-armv4-large.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha1-armv4-large.S
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha256-armv4.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha256-armv4.S
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha512-armv4.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/sha512-armv4.S
--- a/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/vpaes-armv7.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/fipsmodule/vpaes-armv7.S
--- a/contrib/boringssl-cmake/ios-arm/crypto/test/trampoline-armv4.S
+++ b/contrib/boringssl-cmake/ios-arm/crypto/test/trampoline-armv4.S
@ -1,376 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-.syntax	unified
-
-
-
-
-.text
-
-@ abi_test_trampoline loads callee-saved registers from |state|, calls |func|
-@ with |argv|, then saves the callee-saved registers into |state|. It returns
-@ the result of |func|. The |unwind| argument is unused.
-@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state,
-@                              const uint32_t *argv, size_t argc,
-@                              int unwind);
-
-.globl	_abi_test_trampoline
-.private_extern	_abi_test_trampoline
-.align	4
-_abi_test_trampoline:
-	@ Save parameters and all callee-saved registers. For convenience, we
-	@ save r9 on iOS even though it's volatile.
-	vstmdb	sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
-	stmdb	sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr}
-
-	@ Reserve stack space for six (10-4) stack parameters, plus an extra 4
-	@ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3).
-	sub	sp, sp, #28
-
-	@ Every register in AAPCS is either non-volatile or a parameter (except
-	@ r9 on iOS), so this code, by the actual call, loses all its scratch
-	@ registers. First fill in stack parameters while there are registers
-	@ to spare.
-	cmp	r3, #4
-	bls	Lstack_args_done
-	mov	r4, sp				@ r4 is the output pointer.
-	add	r5, r2, r3, lsl #2	@ Set r5 to the end of argv.
-	add	r2, r2, #16		@ Skip four arguments.
-Lstack_args_loop:
-	ldr	r6, [r2], #4
-	cmp	r2, r5
-	str	r6, [r4], #4
-	bne	Lstack_args_loop
-
-Lstack_args_done:
-	@ Load registers from |r1|.
-	vldmia	r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
-#if defined(__APPLE__)
-	@ r9 is not volatile on iOS.
-	ldmia	r1!, {r4,r5,r6,r7,r8,r10-r11}
-#else
-	ldmia	r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
-#endif
-
-	@ Load register parameters. This uses up our remaining registers, so we
-	@ repurpose lr as scratch space.
-	ldr	r3, [sp, #40]	@ Reload argc.
-	ldr	lr, [sp, #36]		@ Load argv into lr.
-	cmp	r3, #3
-	bhi	Larg_r3
-	beq	Larg_r2
-	cmp	r3, #1
-	bhi	Larg_r1
-	beq	Larg_r0
-	b	Largs_done
-
-Larg_r3:
-	ldr	r3, [lr, #12]	@ argv[3]
-Larg_r2:
-	ldr	r2, [lr, #8]	@ argv[2]
-Larg_r1:
-	ldr	r1, [lr, #4]	@ argv[1]
-Larg_r0:
-	ldr	r0, [lr]	@ argv[0]
-Largs_done:
-
-	@ With every other register in use, load the function pointer into lr
-	@ and call the function.
-	ldr	lr, [sp, #28]
-	blx	lr
-
-	@ r1-r3 are free for use again. The trampoline only supports
-	@ single-return functions. Pass r4-r11 to the caller.
-	ldr	r1, [sp, #32]
-	vstmia	r1!, {d8,d9,d10,d11,d12,d13,d14,d15}
-#if defined(__APPLE__)
-	@ r9 is not volatile on iOS.
-	stmia	r1!, {r4,r5,r6,r7,r8,r10-r11}
-#else
-	stmia	r1!, {r4,r5,r6,r7,r8,r9,r10,r11}
-#endif
-
-	@ Unwind the stack and restore registers.
-	add	sp, sp, #44		@ 44 = 28+16
-	ldmia	sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr}	@ Skip r0-r3 (see +16 above).
-	vldmia	sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
-
-	bx	lr
-
-
-.globl	_abi_test_clobber_r0
-.private_extern	_abi_test_clobber_r0
-.align	4
-_abi_test_clobber_r0:
-	mov	r0, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r1
-.private_extern	_abi_test_clobber_r1
-.align	4
-_abi_test_clobber_r1:
-	mov	r1, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r2
-.private_extern	_abi_test_clobber_r2
-.align	4
-_abi_test_clobber_r2:
-	mov	r2, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r3
-.private_extern	_abi_test_clobber_r3
-.align	4
-_abi_test_clobber_r3:
-	mov	r3, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r4
-.private_extern	_abi_test_clobber_r4
-.align	4
-_abi_test_clobber_r4:
-	mov	r4, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r5
-.private_extern	_abi_test_clobber_r5
-.align	4
-_abi_test_clobber_r5:
-	mov	r5, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r6
-.private_extern	_abi_test_clobber_r6
-.align	4
-_abi_test_clobber_r6:
-	mov	r6, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r7
-.private_extern	_abi_test_clobber_r7
-.align	4
-_abi_test_clobber_r7:
-	mov	r7, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r8
-.private_extern	_abi_test_clobber_r8
-.align	4
-_abi_test_clobber_r8:
-	mov	r8, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r9
-.private_extern	_abi_test_clobber_r9
-.align	4
-_abi_test_clobber_r9:
-	mov	r9, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r10
-.private_extern	_abi_test_clobber_r10
-.align	4
-_abi_test_clobber_r10:
-	mov	r10, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r11
-.private_extern	_abi_test_clobber_r11
-.align	4
-_abi_test_clobber_r11:
-	mov	r11, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_r12
-.private_extern	_abi_test_clobber_r12
-.align	4
-_abi_test_clobber_r12:
-	mov	r12, #0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d0
-.private_extern	_abi_test_clobber_d0
-.align	4
-_abi_test_clobber_d0:
-	mov	r0, #0
-	vmov	s0, r0
-	vmov	s1, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d1
-.private_extern	_abi_test_clobber_d1
-.align	4
-_abi_test_clobber_d1:
-	mov	r0, #0
-	vmov	s2, r0
-	vmov	s3, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d2
-.private_extern	_abi_test_clobber_d2
-.align	4
-_abi_test_clobber_d2:
-	mov	r0, #0
-	vmov	s4, r0
-	vmov	s5, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d3
-.private_extern	_abi_test_clobber_d3
-.align	4
-_abi_test_clobber_d3:
-	mov	r0, #0
-	vmov	s6, r0
-	vmov	s7, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d4
-.private_extern	_abi_test_clobber_d4
-.align	4
-_abi_test_clobber_d4:
-	mov	r0, #0
-	vmov	s8, r0
-	vmov	s9, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d5
-.private_extern	_abi_test_clobber_d5
-.align	4
-_abi_test_clobber_d5:
-	mov	r0, #0
-	vmov	s10, r0
-	vmov	s11, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d6
-.private_extern	_abi_test_clobber_d6
-.align	4
-_abi_test_clobber_d6:
-	mov	r0, #0
-	vmov	s12, r0
-	vmov	s13, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d7
-.private_extern	_abi_test_clobber_d7
-.align	4
-_abi_test_clobber_d7:
-	mov	r0, #0
-	vmov	s14, r0
-	vmov	s15, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d8
-.private_extern	_abi_test_clobber_d8
-.align	4
-_abi_test_clobber_d8:
-	mov	r0, #0
-	vmov	s16, r0
-	vmov	s17, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d9
-.private_extern	_abi_test_clobber_d9
-.align	4
-_abi_test_clobber_d9:
-	mov	r0, #0
-	vmov	s18, r0
-	vmov	s19, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d10
-.private_extern	_abi_test_clobber_d10
-.align	4
-_abi_test_clobber_d10:
-	mov	r0, #0
-	vmov	s20, r0
-	vmov	s21, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d11
-.private_extern	_abi_test_clobber_d11
-.align	4
-_abi_test_clobber_d11:
-	mov	r0, #0
-	vmov	s22, r0
-	vmov	s23, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d12
-.private_extern	_abi_test_clobber_d12
-.align	4
-_abi_test_clobber_d12:
-	mov	r0, #0
-	vmov	s24, r0
-	vmov	s25, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d13
-.private_extern	_abi_test_clobber_d13
-.align	4
-_abi_test_clobber_d13:
-	mov	r0, #0
-	vmov	s26, r0
-	vmov	s27, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d14
-.private_extern	_abi_test_clobber_d14
-.align	4
-_abi_test_clobber_d14:
-	mov	r0, #0
-	vmov	s28, r0
-	vmov	s29, r0
-	bx	lr
-
-
-.globl	_abi_test_clobber_d15
-.private_extern	_abi_test_clobber_d15
-.align	4
-_abi_test_clobber_d15:
-	mov	r0, #0
-	vmov	s30, r0
-	vmov	s31, r0
-	bx	lr
-
-#endif  // !OPENSSL_NO_ASM
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/chacha/chacha-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/chacha/chacha-armv8.S
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/aesv8-armx64.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/aesv8-armx64.S
@ -1,785 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__aarch64__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-#if __ARM_MAX_ARCH__>=7
-.text
-.arch	armv8-a+crypto
-.section	.rodata
-.align	5
-.Lrcon:
-.long	0x01,0x01,0x01,0x01
-.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
-.long	0x1b,0x1b,0x1b,0x1b
-
-.text
-
-.globl	aes_hw_set_encrypt_key
-.hidden	aes_hw_set_encrypt_key
-.type	aes_hw_set_encrypt_key,%function
-.align	5
-aes_hw_set_encrypt_key:
-.Lenc_key:
-	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
-	AARCH64_VALID_CALL_TARGET
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	mov	x3,#-1
-	cmp	x0,#0
-	b.eq	.Lenc_key_abort
-	cmp	x2,#0
-	b.eq	.Lenc_key_abort
-	mov	x3,#-2
-	cmp	w1,#128
-	b.lt	.Lenc_key_abort
-	cmp	w1,#256
-	b.gt	.Lenc_key_abort
-	tst	w1,#0x3f
-	b.ne	.Lenc_key_abort
-
-	adrp	x3,.Lrcon
-	add	x3,x3,:lo12:.Lrcon
-	cmp	w1,#192
-
-	eor	v0.16b,v0.16b,v0.16b
-	ld1	{v3.16b},[x0],#16
-	mov	w1,#8		// reuse w1
-	ld1	{v1.4s,v2.4s},[x3],#32
-
-	b.lt	.Loop128
-	b.eq	.L192
-	b	.L256
-
-.align	4
-.Loop128:
-	tbl	v6.16b,{v3.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v3.4s},[x2],#16
-	aese	v6.16b,v0.16b
-	subs	w1,w1,#1
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	shl	v1.16b,v1.16b,#1
-	eor	v3.16b,v3.16b,v6.16b
-	b.ne	.Loop128
-
-	ld1	{v1.4s},[x3]
-
-	tbl	v6.16b,{v3.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v3.4s},[x2],#16
-	aese	v6.16b,v0.16b
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	shl	v1.16b,v1.16b,#1
-	eor	v3.16b,v3.16b,v6.16b
-
-	tbl	v6.16b,{v3.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v3.4s},[x2],#16
-	aese	v6.16b,v0.16b
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	eor	v3.16b,v3.16b,v6.16b
-	st1	{v3.4s},[x2]
-	add	x2,x2,#0x50
-
-	mov	w12,#10
-	b	.Ldone
-
-.align	4
-.L192:
-	ld1	{v4.8b},[x0],#8
-	movi	v6.16b,#8			// borrow v6.16b
-	st1	{v3.4s},[x2],#16
-	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
-
-.Loop192:
-	tbl	v6.16b,{v4.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v4.8b},[x2],#8
-	aese	v6.16b,v0.16b
-	subs	w1,w1,#1
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-
-	dup	v5.4s,v3.s[3]
-	eor	v5.16b,v5.16b,v4.16b
-	eor	v6.16b,v6.16b,v1.16b
-	ext	v4.16b,v0.16b,v4.16b,#12
-	shl	v1.16b,v1.16b,#1
-	eor	v4.16b,v4.16b,v5.16b
-	eor	v3.16b,v3.16b,v6.16b
-	eor	v4.16b,v4.16b,v6.16b
-	st1	{v3.4s},[x2],#16
-	b.ne	.Loop192
-
-	mov	w12,#12
-	add	x2,x2,#0x20
-	b	.Ldone
-
-.align	4
-.L256:
-	ld1	{v4.16b},[x0]
-	mov	w1,#7
-	mov	w12,#14
-	st1	{v3.4s},[x2],#16
-
-.Loop256:
-	tbl	v6.16b,{v4.16b},v2.16b
-	ext	v5.16b,v0.16b,v3.16b,#12
-	st1	{v4.4s},[x2],#16
-	aese	v6.16b,v0.16b
-	subs	w1,w1,#1
-
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v3.16b,v3.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v6.16b,v6.16b,v1.16b
-	eor	v3.16b,v3.16b,v5.16b
-	shl	v1.16b,v1.16b,#1
-	eor	v3.16b,v3.16b,v6.16b
-	st1	{v3.4s},[x2],#16
-	b.eq	.Ldone
-
-	dup	v6.4s,v3.s[3]		// just splat
-	ext	v5.16b,v0.16b,v4.16b,#12
-	aese	v6.16b,v0.16b
-
-	eor	v4.16b,v4.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v4.16b,v4.16b,v5.16b
-	ext	v5.16b,v0.16b,v5.16b,#12
-	eor	v4.16b,v4.16b,v5.16b
-
-	eor	v4.16b,v4.16b,v6.16b
-	b	.Loop256
-
-.Ldone:
-	str	w12,[x2]
-	mov	x3,#0
-
-.Lenc_key_abort:
-	mov	x0,x3			// return value
-	ldr	x29,[sp],#16
-	ret
-.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
-
-.globl	aes_hw_set_decrypt_key
-.hidden	aes_hw_set_decrypt_key
-.type	aes_hw_set_decrypt_key,%function
-.align	5
-aes_hw_set_decrypt_key:
-	AARCH64_SIGN_LINK_REGISTER
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	bl	.Lenc_key
-
-	cmp	x0,#0
-	b.ne	.Ldec_key_abort
-
-	sub	x2,x2,#240		// restore original x2
-	mov	x4,#-16
-	add	x0,x2,x12,lsl#4	// end of key schedule
-
-	ld1	{v0.4s},[x2]
-	ld1	{v1.4s},[x0]
-	st1	{v0.4s},[x0],x4
-	st1	{v1.4s},[x2],#16
-
-.Loop_imc:
-	ld1	{v0.4s},[x2]
-	ld1	{v1.4s},[x0]
-	aesimc	v0.16b,v0.16b
-	aesimc	v1.16b,v1.16b
-	st1	{v0.4s},[x0],x4
-	st1	{v1.4s},[x2],#16
-	cmp	x0,x2
-	b.hi	.Loop_imc
-
-	ld1	{v0.4s},[x2]
-	aesimc	v0.16b,v0.16b
-	st1	{v0.4s},[x0]
-
-	eor	x0,x0,x0		// return value
-.Ldec_key_abort:
-	ldp	x29,x30,[sp],#16
-	AARCH64_VALIDATE_LINK_REGISTER
-	ret
-.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
-.globl	aes_hw_encrypt
-.hidden	aes_hw_encrypt
-.type	aes_hw_encrypt,%function
-.align	5
-aes_hw_encrypt:
-	AARCH64_VALID_CALL_TARGET
-	ldr	w3,[x2,#240]
-	ld1	{v0.4s},[x2],#16
-	ld1	{v2.16b},[x0]
-	sub	w3,w3,#2
-	ld1	{v1.4s},[x2],#16
-
-.Loop_enc:
-	aese	v2.16b,v0.16b
-	aesmc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2],#16
-	subs	w3,w3,#2
-	aese	v2.16b,v1.16b
-	aesmc	v2.16b,v2.16b
-	ld1	{v1.4s},[x2],#16
-	b.gt	.Loop_enc
-
-	aese	v2.16b,v0.16b
-	aesmc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2]
-	aese	v2.16b,v1.16b
-	eor	v2.16b,v2.16b,v0.16b
-
-	st1	{v2.16b},[x1]
-	ret
-.size	aes_hw_encrypt,.-aes_hw_encrypt
-.globl	aes_hw_decrypt
-.hidden	aes_hw_decrypt
-.type	aes_hw_decrypt,%function
-.align	5
-aes_hw_decrypt:
-	AARCH64_VALID_CALL_TARGET
-	ldr	w3,[x2,#240]
-	ld1	{v0.4s},[x2],#16
-	ld1	{v2.16b},[x0]
-	sub	w3,w3,#2
-	ld1	{v1.4s},[x2],#16
-
-.Loop_dec:
-	aesd	v2.16b,v0.16b
-	aesimc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2],#16
-	subs	w3,w3,#2
-	aesd	v2.16b,v1.16b
-	aesimc	v2.16b,v2.16b
-	ld1	{v1.4s},[x2],#16
-	b.gt	.Loop_dec
-
-	aesd	v2.16b,v0.16b
-	aesimc	v2.16b,v2.16b
-	ld1	{v0.4s},[x2]
-	aesd	v2.16b,v1.16b
-	eor	v2.16b,v2.16b,v0.16b
-
-	st1	{v2.16b},[x1]
-	ret
-.size	aes_hw_decrypt,.-aes_hw_decrypt
-.globl	aes_hw_cbc_encrypt
-.hidden	aes_hw_cbc_encrypt
-.type	aes_hw_cbc_encrypt,%function
-.align	5
-aes_hw_cbc_encrypt:
-	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
-	AARCH64_VALID_CALL_TARGET
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	subs	x2,x2,#16
-	mov	x8,#16
-	b.lo	.Lcbc_abort
-	csel	x8,xzr,x8,eq
-
-	cmp	w5,#0			// en- or decrypting?
-	ldr	w5,[x3,#240]
-	and	x2,x2,#-16
-	ld1	{v6.16b},[x4]
-	ld1	{v0.16b},[x0],x8
-
-	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
-	sub	w5,w5,#6
-	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
-	sub	w5,w5,#2
-	ld1	{v18.4s,v19.4s},[x7],#32
-	ld1	{v20.4s,v21.4s},[x7],#32
-	ld1	{v22.4s,v23.4s},[x7],#32
-	ld1	{v7.4s},[x7]
-
-	add	x7,x3,#32
-	mov	w6,w5
-	b.eq	.Lcbc_dec
-
-	cmp	w5,#2
-	eor	v0.16b,v0.16b,v6.16b
-	eor	v5.16b,v16.16b,v7.16b
-	b.eq	.Lcbc_enc128
-
-	ld1	{v2.4s,v3.4s},[x7]
-	add	x7,x3,#16
-	add	x6,x3,#16*4
-	add	x12,x3,#16*5
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	add	x14,x3,#16*6
-	add	x3,x3,#16*7
-	b	.Lenter_cbc_enc
-
-.align	4
-.Loop_cbc_enc:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	st1	{v6.16b},[x1],#16
-.Lenter_cbc_enc:
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v2.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.4s},[x6]
-	cmp	w5,#4
-	aese	v0.16b,v3.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v17.4s},[x12]
-	b.eq	.Lcbc_enc192
-
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.4s},[x14]
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v17.4s},[x3]
-	nop
-
-.Lcbc_enc192:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	subs	x2,x2,#16
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	csel	x8,xzr,x8,eq
-	aese	v0.16b,v18.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v19.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.16b},[x0],x8
-	aese	v0.16b,v20.16b
-	aesmc	v0.16b,v0.16b
-	eor	v16.16b,v16.16b,v5.16b
-	aese	v0.16b,v21.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
-	aese	v0.16b,v22.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v23.16b
-	eor	v6.16b,v0.16b,v7.16b
-	b.hs	.Loop_cbc_enc
-
-	st1	{v6.16b},[x1],#16
-	b	.Lcbc_done
-
-.align	5
-.Lcbc_enc128:
-	ld1	{v2.4s,v3.4s},[x7]
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	b	.Lenter_cbc_enc128
-.Loop_cbc_enc128:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	st1	{v6.16b},[x1],#16
-.Lenter_cbc_enc128:
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	subs	x2,x2,#16
-	aese	v0.16b,v2.16b
-	aesmc	v0.16b,v0.16b
-	csel	x8,xzr,x8,eq
-	aese	v0.16b,v3.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v18.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v19.16b
-	aesmc	v0.16b,v0.16b
-	ld1	{v16.16b},[x0],x8
-	aese	v0.16b,v20.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v21.16b
-	aesmc	v0.16b,v0.16b
-	aese	v0.16b,v22.16b
-	aesmc	v0.16b,v0.16b
-	eor	v16.16b,v16.16b,v5.16b
-	aese	v0.16b,v23.16b
-	eor	v6.16b,v0.16b,v7.16b
-	b.hs	.Loop_cbc_enc128
-
-	st1	{v6.16b},[x1],#16
-	b	.Lcbc_done
-.align	5
-.Lcbc_dec:
-	ld1	{v18.16b},[x0],#16
-	subs	x2,x2,#32		// bias
-	add	w6,w5,#2
-	orr	v3.16b,v0.16b,v0.16b
-	orr	v1.16b,v0.16b,v0.16b
-	orr	v19.16b,v18.16b,v18.16b
-	b.lo	.Lcbc_dec_tail
-
-	orr	v1.16b,v18.16b,v18.16b
-	ld1	{v18.16b},[x0],#16
-	orr	v2.16b,v0.16b,v0.16b
-	orr	v3.16b,v1.16b,v1.16b
-	orr	v19.16b,v18.16b,v18.16b
-
-.Loop3x_cbc_dec:
-	aesd	v0.16b,v16.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aesd	v0.16b,v17.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	.Loop3x_cbc_dec
-
-	aesd	v0.16b,v16.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	eor	v4.16b,v6.16b,v7.16b
-	subs	x2,x2,#0x30
-	eor	v5.16b,v2.16b,v7.16b
-	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
-	aesd	v0.16b,v17.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	eor	v17.16b,v3.16b,v7.16b
-	add	x0,x0,x6		// x0 is adjusted in such way that
-					// at exit from the loop v1.16b-v18.16b
-					// are loaded with last "words"
-	orr	v6.16b,v19.16b,v19.16b
-	mov	x7,x3
-	aesd	v0.16b,v20.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v20.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v20.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v2.16b},[x0],#16
-	aesd	v0.16b,v21.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v21.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v21.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v3.16b},[x0],#16
-	aesd	v0.16b,v22.16b
-	aesimc	v0.16b,v0.16b
-	aesd	v1.16b,v22.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v22.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v19.16b},[x0],#16
-	aesd	v0.16b,v23.16b
-	aesd	v1.16b,v23.16b
-	aesd	v18.16b,v23.16b
-	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
-	add	w6,w5,#2
-	eor	v4.16b,v4.16b,v0.16b
-	eor	v5.16b,v5.16b,v1.16b
-	eor	v18.16b,v18.16b,v17.16b
-	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
-	st1	{v4.16b},[x1],#16
-	orr	v0.16b,v2.16b,v2.16b
-	st1	{v5.16b},[x1],#16
-	orr	v1.16b,v3.16b,v3.16b
-	st1	{v18.16b},[x1],#16
-	orr	v18.16b,v19.16b,v19.16b
-	b.hs	.Loop3x_cbc_dec
-
-	cmn	x2,#0x30
-	b.eq	.Lcbc_done
-	nop
-
-.Lcbc_dec_tail:
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	.Lcbc_dec_tail
-
-	aesd	v1.16b,v16.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v16.16b
-	aesimc	v18.16b,v18.16b
-	aesd	v1.16b,v17.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v17.16b
-	aesimc	v18.16b,v18.16b
-	aesd	v1.16b,v20.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v20.16b
-	aesimc	v18.16b,v18.16b
-	cmn	x2,#0x20
-	aesd	v1.16b,v21.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v21.16b
-	aesimc	v18.16b,v18.16b
-	eor	v5.16b,v6.16b,v7.16b
-	aesd	v1.16b,v22.16b
-	aesimc	v1.16b,v1.16b
-	aesd	v18.16b,v22.16b
-	aesimc	v18.16b,v18.16b
-	eor	v17.16b,v3.16b,v7.16b
-	aesd	v1.16b,v23.16b
-	aesd	v18.16b,v23.16b
-	b.eq	.Lcbc_dec_one
-	eor	v5.16b,v5.16b,v1.16b
-	eor	v17.16b,v17.16b,v18.16b
-	orr	v6.16b,v19.16b,v19.16b
-	st1	{v5.16b},[x1],#16
-	st1	{v17.16b},[x1],#16
-	b	.Lcbc_done
-
-.Lcbc_dec_one:
-	eor	v5.16b,v5.16b,v18.16b
-	orr	v6.16b,v19.16b,v19.16b
-	st1	{v5.16b},[x1],#16
-
-.Lcbc_done:
-	st1	{v6.16b},[x4]
-.Lcbc_abort:
-	ldr	x29,[sp],#16
-	ret
-.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
-.globl	aes_hw_ctr32_encrypt_blocks
-.hidden	aes_hw_ctr32_encrypt_blocks
-.type	aes_hw_ctr32_encrypt_blocks,%function
-.align	5
-aes_hw_ctr32_encrypt_blocks:
-	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
-	AARCH64_VALID_CALL_TARGET
-	stp	x29,x30,[sp,#-16]!
-	add	x29,sp,#0
-	ldr	w5,[x3,#240]
-
-	ldr	w8, [x4, #12]
-	ld1	{v0.4s},[x4]
-
-	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
-	sub	w5,w5,#4
-	mov	x12,#16
-	cmp	x2,#2
-	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
-	sub	w5,w5,#2
-	ld1	{v20.4s,v21.4s},[x7],#32
-	ld1	{v22.4s,v23.4s},[x7],#32
-	ld1	{v7.4s},[x7]
-	add	x7,x3,#32
-	mov	w6,w5
-	csel	x12,xzr,x12,lo
-#ifndef __ARMEB__
-	rev	w8, w8
-#endif
-	orr	v1.16b,v0.16b,v0.16b
-	add	w10, w8, #1
-	orr	v18.16b,v0.16b,v0.16b
-	add	w8, w8, #2
-	orr	v6.16b,v0.16b,v0.16b
-	rev	w10, w10
-	mov	v1.s[3],w10
-	b.ls	.Lctr32_tail
-	rev	w12, w8
-	sub	x2,x2,#3		// bias
-	mov	v18.s[3],w12
-	b	.Loop3x_ctr32
-
-.align	4
-.Loop3x_ctr32:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v1.16b,v1.16b
-	aese	v18.16b,v16.16b
-	aesmc	v18.16b,v18.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v17.16b
-	aesmc	v1.16b,v1.16b
-	aese	v18.16b,v17.16b
-	aesmc	v18.16b,v18.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	.Loop3x_ctr32
-
-	aese	v0.16b,v16.16b
-	aesmc	v4.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v5.16b,v1.16b
-	ld1	{v2.16b},[x0],#16
-	orr	v0.16b,v6.16b,v6.16b
-	aese	v18.16b,v16.16b
-	aesmc	v18.16b,v18.16b
-	ld1	{v3.16b},[x0],#16
-	orr	v1.16b,v6.16b,v6.16b
-	aese	v4.16b,v17.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v17.16b
-	aesmc	v5.16b,v5.16b
-	ld1	{v19.16b},[x0],#16
-	mov	x7,x3
-	aese	v18.16b,v17.16b
-	aesmc	v17.16b,v18.16b
-	orr	v18.16b,v6.16b,v6.16b
-	add	w9,w8,#1
-	aese	v4.16b,v20.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v20.16b
-	aesmc	v5.16b,v5.16b
-	eor	v2.16b,v2.16b,v7.16b
-	add	w10,w8,#2
-	aese	v17.16b,v20.16b
-	aesmc	v17.16b,v17.16b
-	eor	v3.16b,v3.16b,v7.16b
-	add	w8,w8,#3
-	aese	v4.16b,v21.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v21.16b
-	aesmc	v5.16b,v5.16b
-	eor	v19.16b,v19.16b,v7.16b
-	rev	w9,w9
-	aese	v17.16b,v21.16b
-	aesmc	v17.16b,v17.16b
-	mov	v0.s[3], w9
-	rev	w10,w10
-	aese	v4.16b,v22.16b
-	aesmc	v4.16b,v4.16b
-	aese	v5.16b,v22.16b
-	aesmc	v5.16b,v5.16b
-	mov	v1.s[3], w10
-	rev	w12,w8
-	aese	v17.16b,v22.16b
-	aesmc	v17.16b,v17.16b
-	mov	v18.s[3], w12
-	subs	x2,x2,#3
-	aese	v4.16b,v23.16b
-	aese	v5.16b,v23.16b
-	aese	v17.16b,v23.16b
-
-	eor	v2.16b,v2.16b,v4.16b
-	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
-	st1	{v2.16b},[x1],#16
-	eor	v3.16b,v3.16b,v5.16b
-	mov	w6,w5
-	st1	{v3.16b},[x1],#16
-	eor	v19.16b,v19.16b,v17.16b
-	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
-	st1	{v19.16b},[x1],#16
-	b.hs	.Loop3x_ctr32
-
-	adds	x2,x2,#3
-	b.eq	.Lctr32_done
-	cmp	x2,#1
-	mov	x12,#16
-	csel	x12,xzr,x12,eq
-
-.Lctr32_tail:
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v16.4s},[x7],#16
-	subs	w6,w6,#2
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v17.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v17.4s},[x7],#16
-	b.gt	.Lctr32_tail
-
-	aese	v0.16b,v16.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v16.16b
-	aesmc	v1.16b,v1.16b
-	aese	v0.16b,v17.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v17.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v2.16b},[x0],x12
-	aese	v0.16b,v20.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v20.16b
-	aesmc	v1.16b,v1.16b
-	ld1	{v3.16b},[x0]
-	aese	v0.16b,v21.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v21.16b
-	aesmc	v1.16b,v1.16b
-	eor	v2.16b,v2.16b,v7.16b
-	aese	v0.16b,v22.16b
-	aesmc	v0.16b,v0.16b
-	aese	v1.16b,v22.16b
-	aesmc	v1.16b,v1.16b
-	eor	v3.16b,v3.16b,v7.16b
-	aese	v0.16b,v23.16b
-	aese	v1.16b,v23.16b
-
-	cmp	x2,#1
-	eor	v2.16b,v2.16b,v0.16b
-	eor	v3.16b,v3.16b,v1.16b
-	st1	{v2.16b},[x1],#16
-	b.eq	.Lctr32_done
-	st1	{v3.16b},[x1]
-
-.Lctr32_done:
-	ldr	x29,[sp],#16
-	ret
-.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
-#endif
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/armv8-mont.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/armv8-mont.S
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
@ -1,346 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__aarch64__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-
-.globl	gcm_init_neon
-.hidden	gcm_init_neon
-.type	gcm_init_neon,%function
-.align	4
-gcm_init_neon:
-	AARCH64_VALID_CALL_TARGET
-	// This function is adapted from gcm_init_v8. xC2 is t3.
-	ld1	{v17.2d}, [x1]			// load H
-	movi	v19.16b, #0xe1
-	shl	v19.2d, v19.2d, #57		// 0xc2.0
-	ext	v3.16b, v17.16b, v17.16b, #8
-	ushr	v18.2d, v19.2d, #63
-	dup	v17.4s, v17.s[1]
-	ext	v16.16b, v18.16b, v19.16b, #8	// t0=0xc2....01
-	ushr	v18.2d, v3.2d, #63
-	sshr	v17.4s, v17.4s, #31		// broadcast carry bit
-	and	v18.16b, v18.16b, v16.16b
-	shl	v3.2d, v3.2d, #1
-	ext	v18.16b, v18.16b, v18.16b, #8
-	and	v16.16b, v16.16b, v17.16b
-	orr	v3.16b, v3.16b, v18.16b	// H<<<=1
-	eor	v5.16b, v3.16b, v16.16b	// twisted H
-	st1	{v5.2d}, [x0]			// store Htable[0]
-	ret
-.size	gcm_init_neon,.-gcm_init_neon
-
-.globl	gcm_gmult_neon
-.hidden	gcm_gmult_neon
-.type	gcm_gmult_neon,%function
-.align	4
-gcm_gmult_neon:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v3.16b}, [x0]		// load Xi
-	ld1	{v5.1d}, [x1], #8		// load twisted H
-	ld1	{v6.1d}, [x1]
-	adrp	x9, .Lmasks		// load constants
-	add	x9, x9, :lo12:.Lmasks
-	ld1	{v24.2d, v25.2d}, [x9]
-	rev64	v3.16b, v3.16b		// byteswap Xi
-	ext	v3.16b, v3.16b, v3.16b, #8
-	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
-
-	mov	x3, #16
-	b	.Lgmult_neon
-.size	gcm_gmult_neon,.-gcm_gmult_neon
-
-.globl	gcm_ghash_neon
-.hidden	gcm_ghash_neon
-.type	gcm_ghash_neon,%function
-.align	4
-gcm_ghash_neon:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v0.16b}, [x0]		// load Xi
-	ld1	{v5.1d}, [x1], #8		// load twisted H
-	ld1	{v6.1d}, [x1]
-	adrp	x9, .Lmasks		// load constants
-	add	x9, x9, :lo12:.Lmasks
-	ld1	{v24.2d, v25.2d}, [x9]
-	rev64	v0.16b, v0.16b		// byteswap Xi
-	ext	v0.16b, v0.16b, v0.16b, #8
-	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
-
-.Loop_neon:
-	ld1	{v3.16b}, [x2], #16	// load inp
-	rev64	v3.16b, v3.16b		// byteswap inp
-	ext	v3.16b, v3.16b, v3.16b, #8
-	eor	v3.16b, v3.16b, v0.16b	// inp ^= Xi
-
-.Lgmult_neon:
-	// Split the input into v3 and v4. (The upper halves are unused,
-	// so it is okay to leave them alone.)
-	ins	v4.d[0], v3.d[1]
-	ext	v16.8b, v5.8b, v5.8b, #1	// A1
-	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
-	ext	v0.8b, v3.8b, v3.8b, #1		// B1
-	pmull	v0.8h, v5.8b, v0.8b		// E = A*B1
-	ext	v17.8b, v5.8b, v5.8b, #2	// A2
-	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
-	ext	v19.8b, v3.8b, v3.8b, #2	// B2
-	pmull	v19.8h, v5.8b, v19.8b		// G = A*B2
-	ext	v18.8b, v5.8b, v5.8b, #3	// A3
-	eor	v16.16b, v16.16b, v0.16b	// L = E + F
-	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
-	ext	v0.8b, v3.8b, v3.8b, #3		// B3
-	eor	v17.16b, v17.16b, v19.16b	// M = G + H
-	pmull	v0.8h, v5.8b, v0.8b		// I = A*B3
-
-	// Here we diverge from the 32-bit version. It computes the following
-	// (instructions reordered for clarity):
-	//
-	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
-	//     vand	$t0#hi, $t0#hi, $k48
-	//     veor	$t0#lo, $t0#lo, $t0#hi
-	//
-	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
-	//     vand	$t1#hi, $t1#hi, $k32
-	//     veor	$t1#lo, $t1#lo, $t1#hi
-	//
-	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
-	//     vand	$t2#hi, $t2#hi, $k16
-	//     veor	$t2#lo, $t2#lo, $t2#hi
-	//
-	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
-	//     vmov.i64	$t3#hi, #0
-	//
-	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
-	// upper halves of SIMD registers, so we must split each half into
-	// separate registers. To compensate, we pair computations up and
-	// parallelize.
-
-	ext	v19.8b, v3.8b, v3.8b, #4	// B4
-	eor	v18.16b, v18.16b, v0.16b	// N = I + J
-	pmull	v19.8h, v5.8b, v19.8b		// K = A*B4
-
-	// This can probably be scheduled more efficiently. For now, we just
-	// pair up independent instructions.
-	zip1	v20.2d, v16.2d, v17.2d
-	zip1	v22.2d, v18.2d, v19.2d
-	zip2	v21.2d, v16.2d, v17.2d
-	zip2	v23.2d, v18.2d, v19.2d
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	and	v21.16b, v21.16b, v24.16b
-	and	v23.16b, v23.16b, v25.16b
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	zip1	v16.2d, v20.2d, v21.2d
-	zip1	v18.2d, v22.2d, v23.2d
-	zip2	v17.2d, v20.2d, v21.2d
-	zip2	v19.2d, v22.2d, v23.2d
-
-	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
-	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
-	pmull	v0.8h, v5.8b, v3.8b		// D = A*B
-	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
-	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
-	eor	v16.16b, v16.16b, v17.16b
-	eor	v18.16b, v18.16b, v19.16b
-	eor	v0.16b, v0.16b, v16.16b
-	eor	v0.16b, v0.16b, v18.16b
-	eor	v3.8b, v3.8b, v4.8b	// Karatsuba pre-processing
-	ext	v16.8b, v7.8b, v7.8b, #1	// A1
-	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
-	ext	v1.8b, v3.8b, v3.8b, #1		// B1
-	pmull	v1.8h, v7.8b, v1.8b		// E = A*B1
-	ext	v17.8b, v7.8b, v7.8b, #2	// A2
-	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
-	ext	v19.8b, v3.8b, v3.8b, #2	// B2
-	pmull	v19.8h, v7.8b, v19.8b		// G = A*B2
-	ext	v18.8b, v7.8b, v7.8b, #3	// A3
-	eor	v16.16b, v16.16b, v1.16b	// L = E + F
-	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
-	ext	v1.8b, v3.8b, v3.8b, #3		// B3
-	eor	v17.16b, v17.16b, v19.16b	// M = G + H
-	pmull	v1.8h, v7.8b, v1.8b		// I = A*B3
-
-	// Here we diverge from the 32-bit version. It computes the following
-	// (instructions reordered for clarity):
-	//
-	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
-	//     vand	$t0#hi, $t0#hi, $k48
-	//     veor	$t0#lo, $t0#lo, $t0#hi
-	//
-	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
-	//     vand	$t1#hi, $t1#hi, $k32
-	//     veor	$t1#lo, $t1#lo, $t1#hi
-	//
-	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
-	//     vand	$t2#hi, $t2#hi, $k16
-	//     veor	$t2#lo, $t2#lo, $t2#hi
-	//
-	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
-	//     vmov.i64	$t3#hi, #0
-	//
-	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
-	// upper halves of SIMD registers, so we must split each half into
-	// separate registers. To compensate, we pair computations up and
-	// parallelize.
-
-	ext	v19.8b, v3.8b, v3.8b, #4	// B4
-	eor	v18.16b, v18.16b, v1.16b	// N = I + J
-	pmull	v19.8h, v7.8b, v19.8b		// K = A*B4
-
-	// This can probably be scheduled more efficiently. For now, we just
-	// pair up independent instructions.
-	zip1	v20.2d, v16.2d, v17.2d
-	zip1	v22.2d, v18.2d, v19.2d
-	zip2	v21.2d, v16.2d, v17.2d
-	zip2	v23.2d, v18.2d, v19.2d
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	and	v21.16b, v21.16b, v24.16b
-	and	v23.16b, v23.16b, v25.16b
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	zip1	v16.2d, v20.2d, v21.2d
-	zip1	v18.2d, v22.2d, v23.2d
-	zip2	v17.2d, v20.2d, v21.2d
-	zip2	v19.2d, v22.2d, v23.2d
-
-	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
-	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
-	pmull	v1.8h, v7.8b, v3.8b		// D = A*B
-	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
-	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
-	eor	v16.16b, v16.16b, v17.16b
-	eor	v18.16b, v18.16b, v19.16b
-	eor	v1.16b, v1.16b, v16.16b
-	eor	v1.16b, v1.16b, v18.16b
-	ext	v16.8b, v6.8b, v6.8b, #1	// A1
-	pmull	v16.8h, v16.8b, v4.8b		// F = A1*B
-	ext	v2.8b, v4.8b, v4.8b, #1		// B1
-	pmull	v2.8h, v6.8b, v2.8b		// E = A*B1
-	ext	v17.8b, v6.8b, v6.8b, #2	// A2
-	pmull	v17.8h, v17.8b, v4.8b		// H = A2*B
-	ext	v19.8b, v4.8b, v4.8b, #2	// B2
-	pmull	v19.8h, v6.8b, v19.8b		// G = A*B2
-	ext	v18.8b, v6.8b, v6.8b, #3	// A3
-	eor	v16.16b, v16.16b, v2.16b	// L = E + F
-	pmull	v18.8h, v18.8b, v4.8b		// J = A3*B
-	ext	v2.8b, v4.8b, v4.8b, #3		// B3
-	eor	v17.16b, v17.16b, v19.16b	// M = G + H
-	pmull	v2.8h, v6.8b, v2.8b		// I = A*B3
-
-	// Here we diverge from the 32-bit version. It computes the following
-	// (instructions reordered for clarity):
-	//
-	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
-	//     vand	$t0#hi, $t0#hi, $k48
-	//     veor	$t0#lo, $t0#lo, $t0#hi
-	//
-	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
-	//     vand	$t1#hi, $t1#hi, $k32
-	//     veor	$t1#lo, $t1#lo, $t1#hi
-	//
-	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
-	//     vand	$t2#hi, $t2#hi, $k16
-	//     veor	$t2#lo, $t2#lo, $t2#hi
-	//
-	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
-	//     vmov.i64	$t3#hi, #0
-	//
-	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
-	// upper halves of SIMD registers, so we must split each half into
-	// separate registers. To compensate, we pair computations up and
-	// parallelize.
-
-	ext	v19.8b, v4.8b, v4.8b, #4	// B4
-	eor	v18.16b, v18.16b, v2.16b	// N = I + J
-	pmull	v19.8h, v6.8b, v19.8b		// K = A*B4
-
-	// This can probably be scheduled more efficiently. For now, we just
-	// pair up independent instructions.
-	zip1	v20.2d, v16.2d, v17.2d
-	zip1	v22.2d, v18.2d, v19.2d
-	zip2	v21.2d, v16.2d, v17.2d
-	zip2	v23.2d, v18.2d, v19.2d
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	and	v21.16b, v21.16b, v24.16b
-	and	v23.16b, v23.16b, v25.16b
-	eor	v20.16b, v20.16b, v21.16b
-	eor	v22.16b, v22.16b, v23.16b
-	zip1	v16.2d, v20.2d, v21.2d
-	zip1	v18.2d, v22.2d, v23.2d
-	zip2	v17.2d, v20.2d, v21.2d
-	zip2	v19.2d, v22.2d, v23.2d
-
-	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
-	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
-	pmull	v2.8h, v6.8b, v4.8b		// D = A*B
-	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
-	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
-	eor	v16.16b, v16.16b, v17.16b
-	eor	v18.16b, v18.16b, v19.16b
-	eor	v2.16b, v2.16b, v16.16b
-	eor	v2.16b, v2.16b, v18.16b
-	ext	v16.16b, v0.16b, v2.16b, #8
-	eor	v1.16b, v1.16b, v0.16b	// Karatsuba post-processing
-	eor	v1.16b, v1.16b, v2.16b
-	eor	v1.16b, v1.16b, v16.16b	// Xm overlaps Xh.lo and Xl.hi
-	ins	v0.d[1], v1.d[0]		// Xh|Xl - 256-bit result
-	// This is a no-op due to the ins instruction below.
-	// ins	v2.d[0], v1.d[1]
-
-	// equivalent of reduction_avx from ghash-x86_64.pl
-	shl	v17.2d, v0.2d, #57		// 1st phase
-	shl	v18.2d, v0.2d, #62
-	eor	v18.16b, v18.16b, v17.16b	//
-	shl	v17.2d, v0.2d, #63
-	eor	v18.16b, v18.16b, v17.16b	//
-	// Note Xm contains {Xl.d[1], Xh.d[0]}.
-	eor	v18.16b, v18.16b, v1.16b
-	ins	v0.d[1], v18.d[0]		// Xl.d[1] ^= t2.d[0]
-	ins	v2.d[0], v18.d[1]		// Xh.d[0] ^= t2.d[1]
-
-	ushr	v18.2d, v0.2d, #1		// 2nd phase
-	eor	v2.16b, v2.16b,v0.16b
-	eor	v0.16b, v0.16b,v18.16b	//
-	ushr	v18.2d, v18.2d, #6
-	ushr	v0.2d, v0.2d, #1		//
-	eor	v0.16b, v0.16b, v2.16b	//
-	eor	v0.16b, v0.16b, v18.16b	//
-
-	subs	x3, x3, #16
-	bne	.Loop_neon
-
-	rev64	v0.16b, v0.16b		// byteswap Xi and write
-	ext	v0.16b, v0.16b, v0.16b, #8
-	st1	{v0.16b}, [x0]
-
-	ret
-.size	gcm_ghash_neon,.-gcm_ghash_neon
-
-.section	.rodata
-.align	4
-.Lmasks:
-.quad	0x0000ffffffffffff	// k48
-.quad	0x00000000ffffffff	// k32
-.quad	0x000000000000ffff	// k16
-.quad	0x0000000000000000	// k0
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S
@ -1,252 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__aarch64__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-.arch	armv8-a+crypto
-.globl	gcm_init_v8
-.hidden	gcm_init_v8
-.type	gcm_init_v8,%function
-.align	4
-gcm_init_v8:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v17.2d},[x1]		//load input H
-	movi	v19.16b,#0xe1
-	shl	v19.2d,v19.2d,#57		//0xc2.0
-	ext	v3.16b,v17.16b,v17.16b,#8
-	ushr	v18.2d,v19.2d,#63
-	dup	v17.4s,v17.s[1]
-	ext	v16.16b,v18.16b,v19.16b,#8		//t0=0xc2....01
-	ushr	v18.2d,v3.2d,#63
-	sshr	v17.4s,v17.4s,#31		//broadcast carry bit
-	and	v18.16b,v18.16b,v16.16b
-	shl	v3.2d,v3.2d,#1
-	ext	v18.16b,v18.16b,v18.16b,#8
-	and	v16.16b,v16.16b,v17.16b
-	orr	v3.16b,v3.16b,v18.16b		//H<<<=1
-	eor	v20.16b,v3.16b,v16.16b		//twisted H
-	st1	{v20.2d},[x0],#16		//store Htable[0]
-
-	//calculate H^2
-	ext	v16.16b,v20.16b,v20.16b,#8		//Karatsuba pre-processing
-	pmull	v0.1q,v20.1d,v20.1d
-	eor	v16.16b,v16.16b,v20.16b
-	pmull2	v2.1q,v20.2d,v20.2d
-	pmull	v1.1q,v16.1d,v16.1d
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase
-
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	eor	v0.16b,v1.16b,v18.16b
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v18.16b,v18.16b,v2.16b
-	eor	v22.16b,v0.16b,v18.16b
-
-	ext	v17.16b,v22.16b,v22.16b,#8		//Karatsuba pre-processing
-	eor	v17.16b,v17.16b,v22.16b
-	ext	v21.16b,v16.16b,v17.16b,#8		//pack Karatsuba pre-processed
-	st1	{v21.2d,v22.2d},[x0]		//store Htable[1..2]
-
-	ret
-.size	gcm_init_v8,.-gcm_init_v8
-.globl	gcm_gmult_v8
-.hidden	gcm_gmult_v8
-.type	gcm_gmult_v8,%function
-.align	4
-gcm_gmult_v8:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v17.2d},[x0]		//load Xi
-	movi	v19.16b,#0xe1
-	ld1	{v20.2d,v21.2d},[x1]	//load twisted H, ...
-	shl	v19.2d,v19.2d,#57
-#ifndef __ARMEB__
-	rev64	v17.16b,v17.16b
-#endif
-	ext	v3.16b,v17.16b,v17.16b,#8
-
-	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
-	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
-	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
-	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
-
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	eor	v0.16b,v1.16b,v18.16b
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v18.16b,v18.16b,v2.16b
-	eor	v0.16b,v0.16b,v18.16b
-
-#ifndef __ARMEB__
-	rev64	v0.16b,v0.16b
-#endif
-	ext	v0.16b,v0.16b,v0.16b,#8
-	st1	{v0.2d},[x0]		//write out Xi
-
-	ret
-.size	gcm_gmult_v8,.-gcm_gmult_v8
-.globl	gcm_ghash_v8
-.hidden	gcm_ghash_v8
-.type	gcm_ghash_v8,%function
-.align	4
-gcm_ghash_v8:
-	AARCH64_VALID_CALL_TARGET
-	ld1	{v0.2d},[x0]		//load [rotated] Xi
-						//"[rotated]" means that
-						//loaded value would have
-						//to be rotated in order to
-						//make it appear as in
-						//algorithm specification
-	subs	x3,x3,#32		//see if x3 is 32 or larger
-	mov	x12,#16		//x12 is used as post-
-						//increment for input pointer;
-						//as loop is modulo-scheduled
-						//x12 is zeroed just in time
-						//to preclude overstepping
-						//inp[len], which means that
-						//last block[s] are actually
-						//loaded twice, but last
-						//copy is not processed
-	ld1	{v20.2d,v21.2d},[x1],#32	//load twisted H, ..., H^2
-	movi	v19.16b,#0xe1
-	ld1	{v22.2d},[x1]
-	csel	x12,xzr,x12,eq			//is it time to zero x12?
-	ext	v0.16b,v0.16b,v0.16b,#8		//rotate Xi
-	ld1	{v16.2d},[x2],#16	//load [rotated] I[0]
-	shl	v19.2d,v19.2d,#57		//compose 0xc2.0 constant
-#ifndef __ARMEB__
-	rev64	v16.16b,v16.16b
-	rev64	v0.16b,v0.16b
-#endif
-	ext	v3.16b,v16.16b,v16.16b,#8		//rotate I[0]
-	b.lo	.Lodd_tail_v8		//x3 was less than 32
-	ld1	{v17.2d},[x2],x12	//load [rotated] I[1]
-#ifndef __ARMEB__
-	rev64	v17.16b,v17.16b
-#endif
-	ext	v7.16b,v17.16b,v17.16b,#8
-	eor	v3.16b,v3.16b,v0.16b		//I[i]^=Xi
-	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
-	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
-	pmull2	v6.1q,v20.2d,v7.2d
-	b	.Loop_mod2x_v8
-
-.align	4
-.Loop_mod2x_v8:
-	ext	v18.16b,v3.16b,v3.16b,#8
-	subs	x3,x3,#32		//is there more data?
-	pmull	v0.1q,v22.1d,v3.1d		//H^2.lo·Xi.lo
-	csel	x12,xzr,x12,lo			//is it time to zero x12?
-
-	pmull	v5.1q,v21.1d,v17.1d
-	eor	v18.16b,v18.16b,v3.16b		//Karatsuba pre-processing
-	pmull2	v2.1q,v22.2d,v3.2d		//H^2.hi·Xi.hi
-	eor	v0.16b,v0.16b,v4.16b		//accumulate
-	pmull2	v1.1q,v21.2d,v18.2d		//(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
-	ld1	{v16.2d},[x2],x12	//load [rotated] I[i+2]
-
-	eor	v2.16b,v2.16b,v6.16b
-	csel	x12,xzr,x12,eq			//is it time to zero x12?
-	eor	v1.16b,v1.16b,v5.16b
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	ld1	{v17.2d},[x2],x12	//load [rotated] I[i+3]
-#ifndef __ARMEB__
-	rev64	v16.16b,v16.16b
-#endif
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
-
-#ifndef __ARMEB__
-	rev64	v17.16b,v17.16b
-#endif
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	ext	v7.16b,v17.16b,v17.16b,#8
-	ext	v3.16b,v16.16b,v16.16b,#8
-	eor	v0.16b,v1.16b,v18.16b
-	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
-	eor	v3.16b,v3.16b,v2.16b		//accumulate v3.16b early
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v3.16b,v3.16b,v18.16b
-	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
-	eor	v3.16b,v3.16b,v0.16b
-	pmull2	v6.1q,v20.2d,v7.2d
-	b.hs	.Loop_mod2x_v8		//there was at least 32 more bytes
-
-	eor	v2.16b,v2.16b,v18.16b
-	ext	v3.16b,v16.16b,v16.16b,#8		//re-construct v3.16b
-	adds	x3,x3,#32		//re-construct x3
-	eor	v0.16b,v0.16b,v2.16b		//re-construct v0.16b
-	b.eq	.Ldone_v8		//is x3 zero?
-.Lodd_tail_v8:
-	ext	v18.16b,v0.16b,v0.16b,#8
-	eor	v3.16b,v3.16b,v0.16b		//inp^=Xi
-	eor	v17.16b,v16.16b,v18.16b		//v17.16b is rotated inp^Xi
-
-	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
-	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
-	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
-	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
-
-	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
-	eor	v18.16b,v0.16b,v2.16b
-	eor	v1.16b,v1.16b,v17.16b
-	eor	v1.16b,v1.16b,v18.16b
-	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
-
-	ins	v2.d[0],v1.d[1]
-	ins	v1.d[1],v0.d[0]
-	eor	v0.16b,v1.16b,v18.16b
-
-	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
-	pmull	v0.1q,v0.1d,v19.1d
-	eor	v18.16b,v18.16b,v2.16b
-	eor	v0.16b,v0.16b,v18.16b
-
-.Ldone_v8:
-#ifndef __ARMEB__
-	rev64	v0.16b,v0.16b
-#endif
-	ext	v0.16b,v0.16b,v0.16b,#8
-	st1	{v0.2d},[x0]		//write out Xi
-
-	ret
-.size	gcm_ghash_v8,.-gcm_ghash_v8
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha1-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha1-armv8.S
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha256-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha256-armv8.S
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha512-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/sha512-armv8.S
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
--- a/contrib/boringssl-cmake/linux-aarch64/crypto/test/trampoline-armv8.S
+++ b/contrib/boringssl-cmake/linux-aarch64/crypto/test/trampoline-armv8.S
@ -1,761 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__aarch64__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-.text
-
-// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
-// with |argv|, then saves the callee-saved registers into |state|. It returns
-// the result of |func|. The |unwind| argument is unused.
-// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
-//                              const uint64_t *argv, size_t argc,
-//                              uint64_t unwind);
-.type	abi_test_trampoline, %function
-.globl	abi_test_trampoline
-.hidden	abi_test_trampoline
-.align	4
-abi_test_trampoline:
-.Labi_test_trampoline_begin:
-	AARCH64_SIGN_LINK_REGISTER
-	// Stack layout (low to high addresses)
-	//   x29,x30 (16 bytes)
-	//    d8-d15 (64 bytes)
-	//   x19-x28 (80 bytes)
-	//    x1 (8 bytes)
-	//   padding (8 bytes)
-	stp	x29, x30, [sp, #-176]!
-	mov	x29, sp
-
-	// Saved callee-saved registers and |state|.
-	stp	d8, d9, [sp, #16]
-	stp	d10, d11, [sp, #32]
-	stp	d12, d13, [sp, #48]
-	stp	d14, d15, [sp, #64]
-	stp	x19, x20, [sp, #80]
-	stp	x21, x22, [sp, #96]
-	stp	x23, x24, [sp, #112]
-	stp	x25, x26, [sp, #128]
-	stp	x27, x28, [sp, #144]
-	str	x1, [sp, #160]
-
-	// Load registers from |state|, with the exception of x29. x29 is the
-	// frame pointer and also callee-saved, but AAPCS64 allows platforms to
-	// mandate that x29 always point to a frame. iOS64 does so, which means
-	// we cannot fill x29 with entropy without violating ABI rules
-	// ourselves. x29 is tested separately below.
-	ldp	d8, d9, [x1], #16
-	ldp	d10, d11, [x1], #16
-	ldp	d12, d13, [x1], #16
-	ldp	d14, d15, [x1], #16
-	ldp	x19, x20, [x1], #16
-	ldp	x21, x22, [x1], #16
-	ldp	x23, x24, [x1], #16
-	ldp	x25, x26, [x1], #16
-	ldp	x27, x28, [x1], #16
-
-	// Move parameters into temporary registers.
-	mov	x9, x0
-	mov	x10, x2
-	mov	x11, x3
-
-	// Load parameters into registers.
-	cbz	x11, .Largs_done
-	ldr	x0, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x1, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x2, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x3, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x4, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x5, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x6, [x10], #8
-	subs	x11, x11, #1
-	b.eq	.Largs_done
-	ldr	x7, [x10], #8
-
-.Largs_done:
-	blr	x9
-
-	// Reload |state| and store registers.
-	ldr	x1, [sp, #160]
-	stp	d8, d9, [x1], #16
-	stp	d10, d11, [x1], #16
-	stp	d12, d13, [x1], #16
-	stp	d14, d15, [x1], #16
-	stp	x19, x20, [x1], #16
-	stp	x21, x22, [x1], #16
-	stp	x23, x24, [x1], #16
-	stp	x25, x26, [x1], #16
-	stp	x27, x28, [x1], #16
-
-	// |func| is required to preserve x29, the frame pointer. We cannot load
-	// random values into x29 (see comment above), so compare it against the
-	// expected value and zero the field of |state| if corrupted.
-	mov	x9, sp
-	cmp	x29, x9
-	b.eq	.Lx29_ok
-	str	xzr, [x1]
-
-.Lx29_ok:
-	// Restore callee-saved registers.
-	ldp	d8, d9, [sp, #16]
-	ldp	d10, d11, [sp, #32]
-	ldp	d12, d13, [sp, #48]
-	ldp	d14, d15, [sp, #64]
-	ldp	x19, x20, [sp, #80]
-	ldp	x21, x22, [sp, #96]
-	ldp	x23, x24, [sp, #112]
-	ldp	x25, x26, [sp, #128]
-	ldp	x27, x28, [sp, #144]
-
-	ldp	x29, x30, [sp], #176
-	AARCH64_VALIDATE_LINK_REGISTER
-	ret
-.size	abi_test_trampoline,.-abi_test_trampoline
-.type	abi_test_clobber_x0, %function
-.globl	abi_test_clobber_x0
-.hidden	abi_test_clobber_x0
-.align	4
-abi_test_clobber_x0:
-	AARCH64_VALID_CALL_TARGET
-	mov	x0, xzr
-	ret
-.size	abi_test_clobber_x0,.-abi_test_clobber_x0
-.type	abi_test_clobber_x1, %function
-.globl	abi_test_clobber_x1
-.hidden	abi_test_clobber_x1
-.align	4
-abi_test_clobber_x1:
-	AARCH64_VALID_CALL_TARGET
-	mov	x1, xzr
-	ret
-.size	abi_test_clobber_x1,.-abi_test_clobber_x1
-.type	abi_test_clobber_x2, %function
-.globl	abi_test_clobber_x2
-.hidden	abi_test_clobber_x2
-.align	4
-abi_test_clobber_x2:
-	AARCH64_VALID_CALL_TARGET
-	mov	x2, xzr
-	ret
-.size	abi_test_clobber_x2,.-abi_test_clobber_x2
-.type	abi_test_clobber_x3, %function
-.globl	abi_test_clobber_x3
-.hidden	abi_test_clobber_x3
-.align	4
-abi_test_clobber_x3:
-	AARCH64_VALID_CALL_TARGET
-	mov	x3, xzr
-	ret
-.size	abi_test_clobber_x3,.-abi_test_clobber_x3
-.type	abi_test_clobber_x4, %function
-.globl	abi_test_clobber_x4
-.hidden	abi_test_clobber_x4
-.align	4
-abi_test_clobber_x4:
-	AARCH64_VALID_CALL_TARGET
-	mov	x4, xzr
-	ret
-.size	abi_test_clobber_x4,.-abi_test_clobber_x4
-.type	abi_test_clobber_x5, %function
-.globl	abi_test_clobber_x5
-.hidden	abi_test_clobber_x5
-.align	4
-abi_test_clobber_x5:
-	AARCH64_VALID_CALL_TARGET
-	mov	x5, xzr
-	ret
-.size	abi_test_clobber_x5,.-abi_test_clobber_x5
-.type	abi_test_clobber_x6, %function
-.globl	abi_test_clobber_x6
-.hidden	abi_test_clobber_x6
-.align	4
-abi_test_clobber_x6:
-	AARCH64_VALID_CALL_TARGET
-	mov	x6, xzr
-	ret
-.size	abi_test_clobber_x6,.-abi_test_clobber_x6
-.type	abi_test_clobber_x7, %function
-.globl	abi_test_clobber_x7
-.hidden	abi_test_clobber_x7
-.align	4
-abi_test_clobber_x7:
-	AARCH64_VALID_CALL_TARGET
-	mov	x7, xzr
-	ret
-.size	abi_test_clobber_x7,.-abi_test_clobber_x7
-.type	abi_test_clobber_x8, %function
-.globl	abi_test_clobber_x8
-.hidden	abi_test_clobber_x8
-.align	4
-abi_test_clobber_x8:
-	AARCH64_VALID_CALL_TARGET
-	mov	x8, xzr
-	ret
-.size	abi_test_clobber_x8,.-abi_test_clobber_x8
-.type	abi_test_clobber_x9, %function
-.globl	abi_test_clobber_x9
-.hidden	abi_test_clobber_x9
-.align	4
-abi_test_clobber_x9:
-	AARCH64_VALID_CALL_TARGET
-	mov	x9, xzr
-	ret
-.size	abi_test_clobber_x9,.-abi_test_clobber_x9
-.type	abi_test_clobber_x10, %function
-.globl	abi_test_clobber_x10
-.hidden	abi_test_clobber_x10
-.align	4
-abi_test_clobber_x10:
-	AARCH64_VALID_CALL_TARGET
-	mov	x10, xzr
-	ret
-.size	abi_test_clobber_x10,.-abi_test_clobber_x10
-.type	abi_test_clobber_x11, %function
-.globl	abi_test_clobber_x11
-.hidden	abi_test_clobber_x11
-.align	4
-abi_test_clobber_x11:
-	AARCH64_VALID_CALL_TARGET
-	mov	x11, xzr
-	ret
-.size	abi_test_clobber_x11,.-abi_test_clobber_x11
-.type	abi_test_clobber_x12, %function
-.globl	abi_test_clobber_x12
-.hidden	abi_test_clobber_x12
-.align	4
-abi_test_clobber_x12:
-	AARCH64_VALID_CALL_TARGET
-	mov	x12, xzr
-	ret
-.size	abi_test_clobber_x12,.-abi_test_clobber_x12
-.type	abi_test_clobber_x13, %function
-.globl	abi_test_clobber_x13
-.hidden	abi_test_clobber_x13
-.align	4
-abi_test_clobber_x13:
-	AARCH64_VALID_CALL_TARGET
-	mov	x13, xzr
-	ret
-.size	abi_test_clobber_x13,.-abi_test_clobber_x13
-.type	abi_test_clobber_x14, %function
-.globl	abi_test_clobber_x14
-.hidden	abi_test_clobber_x14
-.align	4
-abi_test_clobber_x14:
-	AARCH64_VALID_CALL_TARGET
-	mov	x14, xzr
-	ret
-.size	abi_test_clobber_x14,.-abi_test_clobber_x14
-.type	abi_test_clobber_x15, %function
-.globl	abi_test_clobber_x15
-.hidden	abi_test_clobber_x15
-.align	4
-abi_test_clobber_x15:
-	AARCH64_VALID_CALL_TARGET
-	mov	x15, xzr
-	ret
-.size	abi_test_clobber_x15,.-abi_test_clobber_x15
-.type	abi_test_clobber_x16, %function
-.globl	abi_test_clobber_x16
-.hidden	abi_test_clobber_x16
-.align	4
-abi_test_clobber_x16:
-	AARCH64_VALID_CALL_TARGET
-	mov	x16, xzr
-	ret
-.size	abi_test_clobber_x16,.-abi_test_clobber_x16
-.type	abi_test_clobber_x17, %function
-.globl	abi_test_clobber_x17
-.hidden	abi_test_clobber_x17
-.align	4
-abi_test_clobber_x17:
-	AARCH64_VALID_CALL_TARGET
-	mov	x17, xzr
-	ret
-.size	abi_test_clobber_x17,.-abi_test_clobber_x17
-.type	abi_test_clobber_x19, %function
-.globl	abi_test_clobber_x19
-.hidden	abi_test_clobber_x19
-.align	4
-abi_test_clobber_x19:
-	AARCH64_VALID_CALL_TARGET
-	mov	x19, xzr
-	ret
-.size	abi_test_clobber_x19,.-abi_test_clobber_x19
-.type	abi_test_clobber_x20, %function
-.globl	abi_test_clobber_x20
-.hidden	abi_test_clobber_x20
-.align	4
-abi_test_clobber_x20:
-	AARCH64_VALID_CALL_TARGET
-	mov	x20, xzr
-	ret
-.size	abi_test_clobber_x20,.-abi_test_clobber_x20
-.type	abi_test_clobber_x21, %function
-.globl	abi_test_clobber_x21
-.hidden	abi_test_clobber_x21
-.align	4
-abi_test_clobber_x21:
-	AARCH64_VALID_CALL_TARGET
-	mov	x21, xzr
-	ret
-.size	abi_test_clobber_x21,.-abi_test_clobber_x21
-.type	abi_test_clobber_x22, %function
-.globl	abi_test_clobber_x22
-.hidden	abi_test_clobber_x22
-.align	4
-abi_test_clobber_x22:
-	AARCH64_VALID_CALL_TARGET
-	mov	x22, xzr
-	ret
-.size	abi_test_clobber_x22,.-abi_test_clobber_x22
-.type	abi_test_clobber_x23, %function
-.globl	abi_test_clobber_x23
-.hidden	abi_test_clobber_x23
-.align	4
-abi_test_clobber_x23:
-	AARCH64_VALID_CALL_TARGET
-	mov	x23, xzr
-	ret
-.size	abi_test_clobber_x23,.-abi_test_clobber_x23
-.type	abi_test_clobber_x24, %function
-.globl	abi_test_clobber_x24
-.hidden	abi_test_clobber_x24
-.align	4
-abi_test_clobber_x24:
-	AARCH64_VALID_CALL_TARGET
-	mov	x24, xzr
-	ret
-.size	abi_test_clobber_x24,.-abi_test_clobber_x24
-.type	abi_test_clobber_x25, %function
-.globl	abi_test_clobber_x25
-.hidden	abi_test_clobber_x25
-.align	4
-abi_test_clobber_x25:
-	AARCH64_VALID_CALL_TARGET
-	mov	x25, xzr
-	ret
-.size	abi_test_clobber_x25,.-abi_test_clobber_x25
-.type	abi_test_clobber_x26, %function
-.globl	abi_test_clobber_x26
-.hidden	abi_test_clobber_x26
-.align	4
-abi_test_clobber_x26:
-	AARCH64_VALID_CALL_TARGET
-	mov	x26, xzr
-	ret
-.size	abi_test_clobber_x26,.-abi_test_clobber_x26
-.type	abi_test_clobber_x27, %function
-.globl	abi_test_clobber_x27
-.hidden	abi_test_clobber_x27
-.align	4
-abi_test_clobber_x27:
-	AARCH64_VALID_CALL_TARGET
-	mov	x27, xzr
-	ret
-.size	abi_test_clobber_x27,.-abi_test_clobber_x27
-.type	abi_test_clobber_x28, %function
-.globl	abi_test_clobber_x28
-.hidden	abi_test_clobber_x28
-.align	4
-abi_test_clobber_x28:
-	AARCH64_VALID_CALL_TARGET
-	mov	x28, xzr
-	ret
-.size	abi_test_clobber_x28,.-abi_test_clobber_x28
-.type	abi_test_clobber_x29, %function
-.globl	abi_test_clobber_x29
-.hidden	abi_test_clobber_x29
-.align	4
-abi_test_clobber_x29:
-	AARCH64_VALID_CALL_TARGET
-	mov	x29, xzr
-	ret
-.size	abi_test_clobber_x29,.-abi_test_clobber_x29
-.type	abi_test_clobber_d0, %function
-.globl	abi_test_clobber_d0
-.hidden	abi_test_clobber_d0
-.align	4
-abi_test_clobber_d0:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d0, xzr
-	ret
-.size	abi_test_clobber_d0,.-abi_test_clobber_d0
-.type	abi_test_clobber_d1, %function
-.globl	abi_test_clobber_d1
-.hidden	abi_test_clobber_d1
-.align	4
-abi_test_clobber_d1:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d1, xzr
-	ret
-.size	abi_test_clobber_d1,.-abi_test_clobber_d1
-.type	abi_test_clobber_d2, %function
-.globl	abi_test_clobber_d2
-.hidden	abi_test_clobber_d2
-.align	4
-abi_test_clobber_d2:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d2, xzr
-	ret
-.size	abi_test_clobber_d2,.-abi_test_clobber_d2
-.type	abi_test_clobber_d3, %function
-.globl	abi_test_clobber_d3
-.hidden	abi_test_clobber_d3
-.align	4
-abi_test_clobber_d3:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d3, xzr
-	ret
-.size	abi_test_clobber_d3,.-abi_test_clobber_d3
-.type	abi_test_clobber_d4, %function
-.globl	abi_test_clobber_d4
-.hidden	abi_test_clobber_d4
-.align	4
-abi_test_clobber_d4:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d4, xzr
-	ret
-.size	abi_test_clobber_d4,.-abi_test_clobber_d4
-.type	abi_test_clobber_d5, %function
-.globl	abi_test_clobber_d5
-.hidden	abi_test_clobber_d5
-.align	4
-abi_test_clobber_d5:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d5, xzr
-	ret
-.size	abi_test_clobber_d5,.-abi_test_clobber_d5
-.type	abi_test_clobber_d6, %function
-.globl	abi_test_clobber_d6
-.hidden	abi_test_clobber_d6
-.align	4
-abi_test_clobber_d6:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d6, xzr
-	ret
-.size	abi_test_clobber_d6,.-abi_test_clobber_d6
-.type	abi_test_clobber_d7, %function
-.globl	abi_test_clobber_d7
-.hidden	abi_test_clobber_d7
-.align	4
-abi_test_clobber_d7:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d7, xzr
-	ret
-.size	abi_test_clobber_d7,.-abi_test_clobber_d7
-.type	abi_test_clobber_d8, %function
-.globl	abi_test_clobber_d8
-.hidden	abi_test_clobber_d8
-.align	4
-abi_test_clobber_d8:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d8, xzr
-	ret
-.size	abi_test_clobber_d8,.-abi_test_clobber_d8
-.type	abi_test_clobber_d9, %function
-.globl	abi_test_clobber_d9
-.hidden	abi_test_clobber_d9
-.align	4
-abi_test_clobber_d9:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d9, xzr
-	ret
-.size	abi_test_clobber_d9,.-abi_test_clobber_d9
-.type	abi_test_clobber_d10, %function
-.globl	abi_test_clobber_d10
-.hidden	abi_test_clobber_d10
-.align	4
-abi_test_clobber_d10:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d10, xzr
-	ret
-.size	abi_test_clobber_d10,.-abi_test_clobber_d10
-.type	abi_test_clobber_d11, %function
-.globl	abi_test_clobber_d11
-.hidden	abi_test_clobber_d11
-.align	4
-abi_test_clobber_d11:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d11, xzr
-	ret
-.size	abi_test_clobber_d11,.-abi_test_clobber_d11
-.type	abi_test_clobber_d12, %function
-.globl	abi_test_clobber_d12
-.hidden	abi_test_clobber_d12
-.align	4
-abi_test_clobber_d12:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d12, xzr
-	ret
-.size	abi_test_clobber_d12,.-abi_test_clobber_d12
-.type	abi_test_clobber_d13, %function
-.globl	abi_test_clobber_d13
-.hidden	abi_test_clobber_d13
-.align	4
-abi_test_clobber_d13:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d13, xzr
-	ret
-.size	abi_test_clobber_d13,.-abi_test_clobber_d13
-.type	abi_test_clobber_d14, %function
-.globl	abi_test_clobber_d14
-.hidden	abi_test_clobber_d14
-.align	4
-abi_test_clobber_d14:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d14, xzr
-	ret
-.size	abi_test_clobber_d14,.-abi_test_clobber_d14
-.type	abi_test_clobber_d15, %function
-.globl	abi_test_clobber_d15
-.hidden	abi_test_clobber_d15
-.align	4
-abi_test_clobber_d15:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d15, xzr
-	ret
-.size	abi_test_clobber_d15,.-abi_test_clobber_d15
-.type	abi_test_clobber_d16, %function
-.globl	abi_test_clobber_d16
-.hidden	abi_test_clobber_d16
-.align	4
-abi_test_clobber_d16:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d16, xzr
-	ret
-.size	abi_test_clobber_d16,.-abi_test_clobber_d16
-.type	abi_test_clobber_d17, %function
-.globl	abi_test_clobber_d17
-.hidden	abi_test_clobber_d17
-.align	4
-abi_test_clobber_d17:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d17, xzr
-	ret
-.size	abi_test_clobber_d17,.-abi_test_clobber_d17
-.type	abi_test_clobber_d18, %function
-.globl	abi_test_clobber_d18
-.hidden	abi_test_clobber_d18
-.align	4
-abi_test_clobber_d18:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d18, xzr
-	ret
-.size	abi_test_clobber_d18,.-abi_test_clobber_d18
-.type	abi_test_clobber_d19, %function
-.globl	abi_test_clobber_d19
-.hidden	abi_test_clobber_d19
-.align	4
-abi_test_clobber_d19:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d19, xzr
-	ret
-.size	abi_test_clobber_d19,.-abi_test_clobber_d19
-.type	abi_test_clobber_d20, %function
-.globl	abi_test_clobber_d20
-.hidden	abi_test_clobber_d20
-.align	4
-abi_test_clobber_d20:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d20, xzr
-	ret
-.size	abi_test_clobber_d20,.-abi_test_clobber_d20
-.type	abi_test_clobber_d21, %function
-.globl	abi_test_clobber_d21
-.hidden	abi_test_clobber_d21
-.align	4
-abi_test_clobber_d21:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d21, xzr
-	ret
-.size	abi_test_clobber_d21,.-abi_test_clobber_d21
-.type	abi_test_clobber_d22, %function
-.globl	abi_test_clobber_d22
-.hidden	abi_test_clobber_d22
-.align	4
-abi_test_clobber_d22:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d22, xzr
-	ret
-.size	abi_test_clobber_d22,.-abi_test_clobber_d22
-.type	abi_test_clobber_d23, %function
-.globl	abi_test_clobber_d23
-.hidden	abi_test_clobber_d23
-.align	4
-abi_test_clobber_d23:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d23, xzr
-	ret
-.size	abi_test_clobber_d23,.-abi_test_clobber_d23
-.type	abi_test_clobber_d24, %function
-.globl	abi_test_clobber_d24
-.hidden	abi_test_clobber_d24
-.align	4
-abi_test_clobber_d24:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d24, xzr
-	ret
-.size	abi_test_clobber_d24,.-abi_test_clobber_d24
-.type	abi_test_clobber_d25, %function
-.globl	abi_test_clobber_d25
-.hidden	abi_test_clobber_d25
-.align	4
-abi_test_clobber_d25:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d25, xzr
-	ret
-.size	abi_test_clobber_d25,.-abi_test_clobber_d25
-.type	abi_test_clobber_d26, %function
-.globl	abi_test_clobber_d26
-.hidden	abi_test_clobber_d26
-.align	4
-abi_test_clobber_d26:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d26, xzr
-	ret
-.size	abi_test_clobber_d26,.-abi_test_clobber_d26
-.type	abi_test_clobber_d27, %function
-.globl	abi_test_clobber_d27
-.hidden	abi_test_clobber_d27
-.align	4
-abi_test_clobber_d27:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d27, xzr
-	ret
-.size	abi_test_clobber_d27,.-abi_test_clobber_d27
-.type	abi_test_clobber_d28, %function
-.globl	abi_test_clobber_d28
-.hidden	abi_test_clobber_d28
-.align	4
-abi_test_clobber_d28:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d28, xzr
-	ret
-.size	abi_test_clobber_d28,.-abi_test_clobber_d28
-.type	abi_test_clobber_d29, %function
-.globl	abi_test_clobber_d29
-.hidden	abi_test_clobber_d29
-.align	4
-abi_test_clobber_d29:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d29, xzr
-	ret
-.size	abi_test_clobber_d29,.-abi_test_clobber_d29
-.type	abi_test_clobber_d30, %function
-.globl	abi_test_clobber_d30
-.hidden	abi_test_clobber_d30
-.align	4
-abi_test_clobber_d30:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d30, xzr
-	ret
-.size	abi_test_clobber_d30,.-abi_test_clobber_d30
-.type	abi_test_clobber_d31, %function
-.globl	abi_test_clobber_d31
-.hidden	abi_test_clobber_d31
-.align	4
-abi_test_clobber_d31:
-	AARCH64_VALID_CALL_TARGET
-	fmov	d31, xzr
-	ret
-.size	abi_test_clobber_d31,.-abi_test_clobber_d31
-.type	abi_test_clobber_v8_upper, %function
-.globl	abi_test_clobber_v8_upper
-.hidden	abi_test_clobber_v8_upper
-.align	4
-abi_test_clobber_v8_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v8.d[1], xzr
-	ret
-.size	abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
-.type	abi_test_clobber_v9_upper, %function
-.globl	abi_test_clobber_v9_upper
-.hidden	abi_test_clobber_v9_upper
-.align	4
-abi_test_clobber_v9_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v9.d[1], xzr
-	ret
-.size	abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
-.type	abi_test_clobber_v10_upper, %function
-.globl	abi_test_clobber_v10_upper
-.hidden	abi_test_clobber_v10_upper
-.align	4
-abi_test_clobber_v10_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v10.d[1], xzr
-	ret
-.size	abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
-.type	abi_test_clobber_v11_upper, %function
-.globl	abi_test_clobber_v11_upper
-.hidden	abi_test_clobber_v11_upper
-.align	4
-abi_test_clobber_v11_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v11.d[1], xzr
-	ret
-.size	abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
-.type	abi_test_clobber_v12_upper, %function
-.globl	abi_test_clobber_v12_upper
-.hidden	abi_test_clobber_v12_upper
-.align	4
-abi_test_clobber_v12_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v12.d[1], xzr
-	ret
-.size	abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
-.type	abi_test_clobber_v13_upper, %function
-.globl	abi_test_clobber_v13_upper
-.hidden	abi_test_clobber_v13_upper
-.align	4
-abi_test_clobber_v13_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v13.d[1], xzr
-	ret
-.size	abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
-.type	abi_test_clobber_v14_upper, %function
-.globl	abi_test_clobber_v14_upper
-.hidden	abi_test_clobber_v14_upper
-.align	4
-abi_test_clobber_v14_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v14.d[1], xzr
-	ret
-.size	abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
-.type	abi_test_clobber_v15_upper, %function
-.globl	abi_test_clobber_v15_upper
-.hidden	abi_test_clobber_v15_upper
-.align	4
-abi_test_clobber_v15_upper:
-	AARCH64_VALID_CALL_TARGET
-	fmov	v15.d[1], xzr
-	ret
-.size	abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-arm/crypto/chacha/chacha-armv4.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/chacha/chacha-armv4.S
--- a/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/aesv8-armx32.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/aesv8-armx32.S
@ -1,781 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__arm__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-#if __ARM_MAX_ARCH__>=7
-.text
-.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
-.fpu	neon
-.code	32
-#undef	__thumb2__
-.align	5
-.Lrcon:
-.long	0x01,0x01,0x01,0x01
-.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
-.long	0x1b,0x1b,0x1b,0x1b
-
-.text
-
-.globl	aes_hw_set_encrypt_key
-.hidden	aes_hw_set_encrypt_key
-.type	aes_hw_set_encrypt_key,%function
-.align	5
-aes_hw_set_encrypt_key:
-.Lenc_key:
-	mov	r3,#-1
-	cmp	r0,#0
-	beq	.Lenc_key_abort
-	cmp	r2,#0
-	beq	.Lenc_key_abort
-	mov	r3,#-2
-	cmp	r1,#128
-	blt	.Lenc_key_abort
-	cmp	r1,#256
-	bgt	.Lenc_key_abort
-	tst	r1,#0x3f
-	bne	.Lenc_key_abort
-
-	adr	r3,.Lrcon
-	cmp	r1,#192
-
-	veor	q0,q0,q0
-	vld1.8	{q3},[r0]!
-	mov	r1,#8		@ reuse r1
-	vld1.32	{q1,q2},[r3]!
-
-	blt	.Loop128
-	beq	.L192
-	b	.L256
-
-.align	4
-.Loop128:
-	vtbl.8	d20,{q3},d4
-	vtbl.8	d21,{q3},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q3},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-	subs	r1,r1,#1
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	vshl.u8	q1,q1,#1
-	veor	q3,q3,q10
-	bne	.Loop128
-
-	vld1.32	{q1},[r3]
-
-	vtbl.8	d20,{q3},d4
-	vtbl.8	d21,{q3},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q3},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	vshl.u8	q1,q1,#1
-	veor	q3,q3,q10
-
-	vtbl.8	d20,{q3},d4
-	vtbl.8	d21,{q3},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q3},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	veor	q3,q3,q10
-	vst1.32	{q3},[r2]
-	add	r2,r2,#0x50
-
-	mov	r12,#10
-	b	.Ldone
-
-.align	4
-.L192:
-	vld1.8	{d16},[r0]!
-	vmov.i8	q10,#8			@ borrow q10
-	vst1.32	{q3},[r2]!
-	vsub.i8	q2,q2,q10	@ adjust the mask
-
-.Loop192:
-	vtbl.8	d20,{q8},d4
-	vtbl.8	d21,{q8},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{d16},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-	subs	r1,r1,#1
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-
-	vdup.32	q9,d7[1]
-	veor	q9,q9,q8
-	veor	q10,q10,q1
-	vext.8	q8,q0,q8,#12
-	vshl.u8	q1,q1,#1
-	veor	q8,q8,q9
-	veor	q3,q3,q10
-	veor	q8,q8,q10
-	vst1.32	{q3},[r2]!
-	bne	.Loop192
-
-	mov	r12,#12
-	add	r2,r2,#0x20
-	b	.Ldone
-
-.align	4
-.L256:
-	vld1.8	{q8},[r0]
-	mov	r1,#7
-	mov	r12,#14
-	vst1.32	{q3},[r2]!
-
-.Loop256:
-	vtbl.8	d20,{q8},d4
-	vtbl.8	d21,{q8},d5
-	vext.8	q9,q0,q3,#12
-	vst1.32	{q8},[r2]!
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-	subs	r1,r1,#1
-
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q3,q3,q9
-	vext.8	q9,q0,q9,#12
-	veor	q10,q10,q1
-	veor	q3,q3,q9
-	vshl.u8	q1,q1,#1
-	veor	q3,q3,q10
-	vst1.32	{q3},[r2]!
-	beq	.Ldone
-
-	vdup.32	q10,d7[1]
-	vext.8	q9,q0,q8,#12
-.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
-
-	veor	q8,q8,q9
-	vext.8	q9,q0,q9,#12
-	veor	q8,q8,q9
-	vext.8	q9,q0,q9,#12
-	veor	q8,q8,q9
-
-	veor	q8,q8,q10
-	b	.Loop256
-
-.Ldone:
-	str	r12,[r2]
-	mov	r3,#0
-
-.Lenc_key_abort:
-	mov	r0,r3			@ return value
-
-	bx	lr
-.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
-
-.globl	aes_hw_set_decrypt_key
-.hidden	aes_hw_set_decrypt_key
-.type	aes_hw_set_decrypt_key,%function
-.align	5
-aes_hw_set_decrypt_key:
-	stmdb	sp!,{r4,lr}
-	bl	.Lenc_key
-
-	cmp	r0,#0
-	bne	.Ldec_key_abort
-
-	sub	r2,r2,#240		@ restore original r2
-	mov	r4,#-16
-	add	r0,r2,r12,lsl#4	@ end of key schedule
-
-	vld1.32	{q0},[r2]
-	vld1.32	{q1},[r0]
-	vst1.32	{q0},[r0],r4
-	vst1.32	{q1},[r2]!
-
-.Loop_imc:
-	vld1.32	{q0},[r2]
-	vld1.32	{q1},[r0]
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-	vst1.32	{q0},[r0],r4
-	vst1.32	{q1},[r2]!
-	cmp	r0,r2
-	bhi	.Loop_imc
-
-	vld1.32	{q0},[r2]
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-	vst1.32	{q0},[r0]
-
-	eor	r0,r0,r0		@ return value
-.Ldec_key_abort:
-	ldmia	sp!,{r4,pc}
-.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
-.globl	aes_hw_encrypt
-.hidden	aes_hw_encrypt
-.type	aes_hw_encrypt,%function
-.align	5
-aes_hw_encrypt:
-	ldr	r3,[r2,#240]
-	vld1.32	{q0},[r2]!
-	vld1.8	{q2},[r0]
-	sub	r3,r3,#2
-	vld1.32	{q1},[r2]!
-
-.Loop_enc:
-.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
-.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
-	vld1.32	{q0},[r2]!
-	subs	r3,r3,#2
-.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
-.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
-	vld1.32	{q1},[r2]!
-	bgt	.Loop_enc
-
-.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
-.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
-	vld1.32	{q0},[r2]
-.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
-	veor	q2,q2,q0
-
-	vst1.8	{q2},[r1]
-	bx	lr
-.size	aes_hw_encrypt,.-aes_hw_encrypt
-.globl	aes_hw_decrypt
-.hidden	aes_hw_decrypt
-.type	aes_hw_decrypt,%function
-.align	5
-aes_hw_decrypt:
-	ldr	r3,[r2,#240]
-	vld1.32	{q0},[r2]!
-	vld1.8	{q2},[r0]
-	sub	r3,r3,#2
-	vld1.32	{q1},[r2]!
-
-.Loop_dec:
-.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
-.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
-	vld1.32	{q0},[r2]!
-	subs	r3,r3,#2
-.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
-.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
-	vld1.32	{q1},[r2]!
-	bgt	.Loop_dec
-
-.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
-.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
-	vld1.32	{q0},[r2]
-.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
-	veor	q2,q2,q0
-
-	vst1.8	{q2},[r1]
-	bx	lr
-.size	aes_hw_decrypt,.-aes_hw_decrypt
-.globl	aes_hw_cbc_encrypt
-.hidden	aes_hw_cbc_encrypt
-.type	aes_hw_cbc_encrypt,%function
-.align	5
-aes_hw_cbc_encrypt:
-	mov	ip,sp
-	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
-	ldmia	ip,{r4,r5}		@ load remaining args
-	subs	r2,r2,#16
-	mov	r8,#16
-	blo	.Lcbc_abort
-	moveq	r8,#0
-
-	cmp	r5,#0			@ en- or decrypting?
-	ldr	r5,[r3,#240]
-	and	r2,r2,#-16
-	vld1.8	{q6},[r4]
-	vld1.8	{q0},[r0],r8
-
-	vld1.32	{q8,q9},[r3]		@ load key schedule...
-	sub	r5,r5,#6
-	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
-	sub	r5,r5,#2
-	vld1.32	{q10,q11},[r7]!
-	vld1.32	{q12,q13},[r7]!
-	vld1.32	{q14,q15},[r7]!
-	vld1.32	{q7},[r7]
-
-	add	r7,r3,#32
-	mov	r6,r5
-	beq	.Lcbc_dec
-
-	cmp	r5,#2
-	veor	q0,q0,q6
-	veor	q5,q8,q7
-	beq	.Lcbc_enc128
-
-	vld1.32	{q2,q3},[r7]
-	add	r7,r3,#16
-	add	r6,r3,#16*4
-	add	r12,r3,#16*5
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	add	r14,r3,#16*6
-	add	r3,r3,#16*7
-	b	.Lenter_cbc_enc
-
-.align	4
-.Loop_cbc_enc:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vst1.8	{q6},[r1]!
-.Lenter_cbc_enc:
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q8},[r6]
-	cmp	r5,#4
-.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q9},[r12]
-	beq	.Lcbc_enc192
-
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q8},[r14]
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q9},[r3]
-	nop
-
-.Lcbc_enc192:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	subs	r2,r2,#16
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	moveq	r8,#0
-.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.8	{q8},[r0],r8
-.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	veor	q8,q8,q5
-.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
-.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
-	veor	q6,q0,q7
-	bhs	.Loop_cbc_enc
-
-	vst1.8	{q6},[r1]!
-	b	.Lcbc_done
-
-.align	5
-.Lcbc_enc128:
-	vld1.32	{q2,q3},[r7]
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	b	.Lenter_cbc_enc128
-.Loop_cbc_enc128:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vst1.8	{q6},[r1]!
-.Lenter_cbc_enc128:
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	subs	r2,r2,#16
-.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	moveq	r8,#0
-.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	vld1.8	{q8},[r0],r8
-.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-	veor	q8,q8,q5
-.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
-	veor	q6,q0,q7
-	bhs	.Loop_cbc_enc128
-
-	vst1.8	{q6},[r1]!
-	b	.Lcbc_done
-.align	5
-.Lcbc_dec:
-	vld1.8	{q10},[r0]!
-	subs	r2,r2,#32		@ bias
-	add	r6,r5,#2
-	vorr	q3,q0,q0
-	vorr	q1,q0,q0
-	vorr	q11,q10,q10
-	blo	.Lcbc_dec_tail
-
-	vorr	q1,q10,q10
-	vld1.8	{q10},[r0]!
-	vorr	q2,q0,q0
-	vorr	q3,q1,q1
-	vorr	q11,q10,q10
-
-.Loop3x_cbc_dec:
-.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q9},[r7]!
-	bgt	.Loop3x_cbc_dec
-
-.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q4,q6,q7
-	subs	r2,r2,#0x30
-	veor	q5,q2,q7
-	movlo	r6,r2			@ r6, r6, is zero at this point
-.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q9,q3,q7
-	add	r0,r0,r6		@ r0 is adjusted in such way that
-					@ at exit from the loop q1-q10
-					@ are loaded with last "words"
-	vorr	q6,q11,q11
-	mov	r7,r3
-.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.8	{q2},[r0]!
-.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.8	{q3},[r0]!
-.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
-.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
-.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.8	{q11},[r0]!
-.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
-.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
-.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
-	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
-	add	r6,r5,#2
-	veor	q4,q4,q0
-	veor	q5,q5,q1
-	veor	q10,q10,q9
-	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
-	vst1.8	{q4},[r1]!
-	vorr	q0,q2,q2
-	vst1.8	{q5},[r1]!
-	vorr	q1,q3,q3
-	vst1.8	{q10},[r1]!
-	vorr	q10,q11,q11
-	bhs	.Loop3x_cbc_dec
-
-	cmn	r2,#0x30
-	beq	.Lcbc_done
-	nop
-
-.Lcbc_dec_tail:
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	vld1.32	{q9},[r7]!
-	bgt	.Lcbc_dec_tail
-
-.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	cmn	r2,#0x20
-.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q5,q6,q7
-.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
-.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
-.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
-.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
-	veor	q9,q3,q7
-.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
-.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
-	beq	.Lcbc_dec_one
-	veor	q5,q5,q1
-	veor	q9,q9,q10
-	vorr	q6,q11,q11
-	vst1.8	{q5},[r1]!
-	vst1.8	{q9},[r1]!
-	b	.Lcbc_done
-
-.Lcbc_dec_one:
-	veor	q5,q5,q10
-	vorr	q6,q11,q11
-	vst1.8	{q5},[r1]!
-
-.Lcbc_done:
-	vst1.8	{q6},[r4]
-.Lcbc_abort:
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
-	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
-.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
-.globl	aes_hw_ctr32_encrypt_blocks
-.hidden	aes_hw_ctr32_encrypt_blocks
-.type	aes_hw_ctr32_encrypt_blocks,%function
-.align	5
-aes_hw_ctr32_encrypt_blocks:
-	mov	ip,sp
-	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
-	ldr	r4, [ip]		@ load remaining arg
-	ldr	r5,[r3,#240]
-
-	ldr	r8, [r4, #12]
-	vld1.32	{q0},[r4]
-
-	vld1.32	{q8,q9},[r3]		@ load key schedule...
-	sub	r5,r5,#4
-	mov	r12,#16
-	cmp	r2,#2
-	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
-	sub	r5,r5,#2
-	vld1.32	{q12,q13},[r7]!
-	vld1.32	{q14,q15},[r7]!
-	vld1.32	{q7},[r7]
-	add	r7,r3,#32
-	mov	r6,r5
-	movlo	r12,#0
-#ifndef __ARMEB__
-	rev	r8, r8
-#endif
-	vorr	q1,q0,q0
-	add	r10, r8, #1
-	vorr	q10,q0,q0
-	add	r8, r8, #2
-	vorr	q6,q0,q0
-	rev	r10, r10
-	vmov.32	d3[1],r10
-	bls	.Lctr32_tail
-	rev	r12, r8
-	sub	r2,r2,#3		@ bias
-	vmov.32	d21[1],r12
-	b	.Loop3x_ctr32
-
-.align	4
-.Loop3x_ctr32:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
-.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
-.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
-	vld1.32	{q9},[r7]!
-	bgt	.Loop3x_ctr32
-
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
-	vld1.8	{q2},[r0]!
-	vorr	q0,q6,q6
-.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
-.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
-	vld1.8	{q3},[r0]!
-	vorr	q1,q6,q6
-.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	vld1.8	{q11},[r0]!
-	mov	r7,r3
-.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
-.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
-	vorr	q10,q6,q6
-	add	r9,r8,#1
-.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	veor	q2,q2,q7
-	add	r10,r8,#2
-.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
-.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	veor	q3,q3,q7
-	add	r8,r8,#3
-.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	veor	q11,q11,q7
-	rev	r9,r9
-.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
-.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d1[1], r9
-	rev	r10,r10
-.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
-.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
-.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
-.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	vmov.32	d3[1], r10
-	rev	r12,r8
-.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
-.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d21[1], r12
-	subs	r2,r2,#3
-.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
-.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
-.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
-
-	veor	q2,q2,q4
-	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
-	vst1.8	{q2},[r1]!
-	veor	q3,q3,q5
-	mov	r6,r5
-	vst1.8	{q3},[r1]!
-	veor	q11,q11,q9
-	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
-	vst1.8	{q11},[r1]!
-	bhs	.Loop3x_ctr32
-
-	adds	r2,r2,#3
-	beq	.Lctr32_done
-	cmp	r2,#1
-	mov	r12,#16
-	moveq	r12,#0
-
-.Lctr32_tail:
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.32	{q8},[r7]!
-	subs	r6,r6,#2
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.32	{q9},[r7]!
-	bgt	.Lctr32_tail
-
-.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.8	{q2},[r0],r12
-.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	vld1.8	{q3},[r0]
-.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	veor	q2,q2,q7
-.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
-.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
-.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
-.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
-	veor	q3,q3,q7
-.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
-.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
-
-	cmp	r2,#1
-	veor	q2,q2,q0
-	veor	q3,q3,q1
-	vst1.8	{q2},[r1]!
-	beq	.Lctr32_done
-	vst1.8	{q3},[r1]
-
-.Lctr32_done:
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
-	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
-.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
-#endif
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/armv4-mont.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/armv4-mont.S
@ -1,977 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__arm__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
-@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
-.arch	armv7-a
-
-.text
-#if defined(__thumb2__)
-.syntax	unified
-.thumb
-#else
-.code	32
-#endif
-
-#if __ARM_MAX_ARCH__>=7
-.align	5
-.LOPENSSL_armcap:
-.word	OPENSSL_armcap_P-.Lbn_mul_mont
-#endif
-
-.globl	bn_mul_mont
-.hidden	bn_mul_mont
-.type	bn_mul_mont,%function
-
-.align	5
-bn_mul_mont:
-.Lbn_mul_mont:
-	ldr	ip,[sp,#4]		@ load num
-	stmdb	sp!,{r0,r2}		@ sp points at argument block
-#if __ARM_MAX_ARCH__>=7
-	tst	ip,#7
-	bne	.Lialu
-	adr	r0,.Lbn_mul_mont
-	ldr	r2,.LOPENSSL_armcap
-	ldr	r0,[r0,r2]
-#ifdef	__APPLE__
-	ldr	r0,[r0]
-#endif
-	tst	r0,#ARMV7_NEON		@ NEON available?
-	ldmia	sp, {r0,r2}
-	beq	.Lialu
-	add	sp,sp,#8
-	b	bn_mul8x_mont_neon
-.align	4
-.Lialu:
-#endif
-	cmp	ip,#2
-	mov	r0,ip			@ load num
-#ifdef	__thumb2__
-	ittt	lt
-#endif
-	movlt	r0,#0
-	addlt	sp,sp,#2*4
-	blt	.Labrt
-
-	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ save 10 registers
-
-	mov	r0,r0,lsl#2		@ rescale r0 for byte count
-	sub	sp,sp,r0		@ alloca(4*num)
-	sub	sp,sp,#4		@ +extra dword
-	sub	r0,r0,#4		@ "num=num-1"
-	add	r4,r2,r0		@ &bp[num-1]
-
-	add	r0,sp,r0		@ r0 to point at &tp[num-1]
-	ldr	r8,[r0,#14*4]		@ &n0
-	ldr	r2,[r2]		@ bp[0]
-	ldr	r5,[r1],#4		@ ap[0],ap++
-	ldr	r6,[r3],#4		@ np[0],np++
-	ldr	r8,[r8]		@ *n0
-	str	r4,[r0,#15*4]		@ save &bp[num]
-
-	umull	r10,r11,r5,r2	@ ap[0]*bp[0]
-	str	r8,[r0,#14*4]		@ save n0 value
-	mul	r8,r10,r8		@ "tp[0]"*n0
-	mov	r12,#0
-	umlal	r10,r12,r6,r8	@ np[0]*n0+"t[0]"
-	mov	r4,sp
-
-.L1st:
-	ldr	r5,[r1],#4		@ ap[j],ap++
-	mov	r10,r11
-	ldr	r6,[r3],#4		@ np[j],np++
-	mov	r11,#0
-	umlal	r10,r11,r5,r2	@ ap[j]*bp[0]
-	mov	r14,#0
-	umlal	r12,r14,r6,r8	@ np[j]*n0
-	adds	r12,r12,r10
-	str	r12,[r4],#4		@ tp[j-1]=,tp++
-	adc	r12,r14,#0
-	cmp	r4,r0
-	bne	.L1st
-
-	adds	r12,r12,r11
-	ldr	r4,[r0,#13*4]		@ restore bp
-	mov	r14,#0
-	ldr	r8,[r0,#14*4]		@ restore n0
-	adc	r14,r14,#0
-	str	r12,[r0]		@ tp[num-1]=
-	mov	r7,sp
-	str	r14,[r0,#4]		@ tp[num]=
-
-.Louter:
-	sub	r7,r0,r7		@ "original" r0-1 value
-	sub	r1,r1,r7		@ "rewind" ap to &ap[1]
-	ldr	r2,[r4,#4]!		@ *(++bp)
-	sub	r3,r3,r7		@ "rewind" np to &np[1]
-	ldr	r5,[r1,#-4]		@ ap[0]
-	ldr	r10,[sp]		@ tp[0]
-	ldr	r6,[r3,#-4]		@ np[0]
-	ldr	r7,[sp,#4]		@ tp[1]
-
-	mov	r11,#0
-	umlal	r10,r11,r5,r2	@ ap[0]*bp[i]+tp[0]
-	str	r4,[r0,#13*4]		@ save bp
-	mul	r8,r10,r8
-	mov	r12,#0
-	umlal	r10,r12,r6,r8	@ np[0]*n0+"tp[0]"
-	mov	r4,sp
-
-.Linner:
-	ldr	r5,[r1],#4		@ ap[j],ap++
-	adds	r10,r11,r7		@ +=tp[j]
-	ldr	r6,[r3],#4		@ np[j],np++
-	mov	r11,#0
-	umlal	r10,r11,r5,r2	@ ap[j]*bp[i]
-	mov	r14,#0
-	umlal	r12,r14,r6,r8	@ np[j]*n0
-	adc	r11,r11,#0
-	ldr	r7,[r4,#8]		@ tp[j+1]
-	adds	r12,r12,r10
-	str	r12,[r4],#4		@ tp[j-1]=,tp++
-	adc	r12,r14,#0
-	cmp	r4,r0
-	bne	.Linner
-
-	adds	r12,r12,r11
-	mov	r14,#0
-	ldr	r4,[r0,#13*4]		@ restore bp
-	adc	r14,r14,#0
-	ldr	r8,[r0,#14*4]		@ restore n0
-	adds	r12,r12,r7
-	ldr	r7,[r0,#15*4]		@ restore &bp[num]
-	adc	r14,r14,#0
-	str	r12,[r0]		@ tp[num-1]=
-	str	r14,[r0,#4]		@ tp[num]=
-
-	cmp	r4,r7
-#ifdef	__thumb2__
-	itt	ne
-#endif
-	movne	r7,sp
-	bne	.Louter
-
-	ldr	r2,[r0,#12*4]		@ pull rp
-	mov	r5,sp
-	add	r0,r0,#4		@ r0 to point at &tp[num]
-	sub	r5,r0,r5		@ "original" num value
-	mov	r4,sp			@ "rewind" r4
-	mov	r1,r4			@ "borrow" r1
-	sub	r3,r3,r5		@ "rewind" r3 to &np[0]
-
-	subs	r7,r7,r7		@ "clear" carry flag
-.Lsub:	ldr	r7,[r4],#4
-	ldr	r6,[r3],#4
-	sbcs	r7,r7,r6		@ tp[j]-np[j]
-	str	r7,[r2],#4		@ rp[j]=
-	teq	r4,r0		@ preserve carry
-	bne	.Lsub
-	sbcs	r14,r14,#0		@ upmost carry
-	mov	r4,sp			@ "rewind" r4
-	sub	r2,r2,r5		@ "rewind" r2
-
-.Lcopy:	ldr	r7,[r4]		@ conditional copy
-	ldr	r5,[r2]
-	str	sp,[r4],#4		@ zap tp
-#ifdef	__thumb2__
-	it	cc
-#endif
-	movcc	r5,r7
-	str	r5,[r2],#4
-	teq	r4,r0		@ preserve carry
-	bne	.Lcopy
-
-	mov	sp,r0
-	add	sp,sp,#4		@ skip over tp[num+1]
-	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ restore registers
-	add	sp,sp,#2*4		@ skip over {r0,r2}
-	mov	r0,#1
-.Labrt:
-#if __ARM_ARCH__>=5
-	bx	lr				@ bx lr
-#else
-	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-#endif
-.size	bn_mul_mont,.-bn_mul_mont
-#if __ARM_MAX_ARCH__>=7
-.arch	armv7-a
-.fpu	neon
-
-.type	bn_mul8x_mont_neon,%function
-.align	5
-bn_mul8x_mont_neon:
-	mov	ip,sp
-	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
-	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ ABI specification says so
-	ldmia	ip,{r4,r5}		@ load rest of parameter block
-	mov	ip,sp
-
-	cmp	r5,#8
-	bhi	.LNEON_8n
-
-	@ special case for r5==8, everything is in register bank...
-
-	vld1.32	{d28[0]}, [r2,:32]!
-	veor	d8,d8,d8
-	sub	r7,sp,r5,lsl#4
-	vld1.32	{d0,d1,d2,d3},  [r1]!		@ can't specify :32 :-(
-	and	r7,r7,#-64
-	vld1.32	{d30[0]}, [r4,:32]
-	mov	sp,r7			@ alloca
-	vzip.16	d28,d8
-
-	vmull.u32	q6,d28,d0[0]
-	vmull.u32	q7,d28,d0[1]
-	vmull.u32	q8,d28,d1[0]
-	vshl.i64	d29,d13,#16
-	vmull.u32	q9,d28,d1[1]
-
-	vadd.u64	d29,d29,d12
-	veor	d8,d8,d8
-	vmul.u32	d29,d29,d30
-
-	vmull.u32	q10,d28,d2[0]
-	vld1.32	{d4,d5,d6,d7}, [r3]!
-	vmull.u32	q11,d28,d2[1]
-	vmull.u32	q12,d28,d3[0]
-	vzip.16	d29,d8
-	vmull.u32	q13,d28,d3[1]
-
-	vmlal.u32	q6,d29,d4[0]
-	sub	r9,r5,#1
-	vmlal.u32	q7,d29,d4[1]
-	vmlal.u32	q8,d29,d5[0]
-	vmlal.u32	q9,d29,d5[1]
-
-	vmlal.u32	q10,d29,d6[0]
-	vmov	q5,q6
-	vmlal.u32	q11,d29,d6[1]
-	vmov	q6,q7
-	vmlal.u32	q12,d29,d7[0]
-	vmov	q7,q8
-	vmlal.u32	q13,d29,d7[1]
-	vmov	q8,q9
-	vmov	q9,q10
-	vshr.u64	d10,d10,#16
-	vmov	q10,q11
-	vmov	q11,q12
-	vadd.u64	d10,d10,d11
-	vmov	q12,q13
-	veor	q13,q13
-	vshr.u64	d10,d10,#16
-
-	b	.LNEON_outer8
-
-.align	4
-.LNEON_outer8:
-	vld1.32	{d28[0]}, [r2,:32]!
-	veor	d8,d8,d8
-	vzip.16	d28,d8
-	vadd.u64	d12,d12,d10
-
-	vmlal.u32	q6,d28,d0[0]
-	vmlal.u32	q7,d28,d0[1]
-	vmlal.u32	q8,d28,d1[0]
-	vshl.i64	d29,d13,#16
-	vmlal.u32	q9,d28,d1[1]
-
-	vadd.u64	d29,d29,d12
-	veor	d8,d8,d8
-	subs	r9,r9,#1
-	vmul.u32	d29,d29,d30
-
-	vmlal.u32	q10,d28,d2[0]
-	vmlal.u32	q11,d28,d2[1]
-	vmlal.u32	q12,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q13,d28,d3[1]
-
-	vmlal.u32	q6,d29,d4[0]
-	vmlal.u32	q7,d29,d4[1]
-	vmlal.u32	q8,d29,d5[0]
-	vmlal.u32	q9,d29,d5[1]
-
-	vmlal.u32	q10,d29,d6[0]
-	vmov	q5,q6
-	vmlal.u32	q11,d29,d6[1]
-	vmov	q6,q7
-	vmlal.u32	q12,d29,d7[0]
-	vmov	q7,q8
-	vmlal.u32	q13,d29,d7[1]
-	vmov	q8,q9
-	vmov	q9,q10
-	vshr.u64	d10,d10,#16
-	vmov	q10,q11
-	vmov	q11,q12
-	vadd.u64	d10,d10,d11
-	vmov	q12,q13
-	veor	q13,q13
-	vshr.u64	d10,d10,#16
-
-	bne	.LNEON_outer8
-
-	vadd.u64	d12,d12,d10
-	mov	r7,sp
-	vshr.u64	d10,d12,#16
-	mov	r8,r5
-	vadd.u64	d13,d13,d10
-	add	r6,sp,#96
-	vshr.u64	d10,d13,#16
-	vzip.16	d12,d13
-
-	b	.LNEON_tail_entry
-
-.align	4
-.LNEON_8n:
-	veor	q6,q6,q6
-	sub	r7,sp,#128
-	veor	q7,q7,q7
-	sub	r7,r7,r5,lsl#4
-	veor	q8,q8,q8
-	and	r7,r7,#-64
-	veor	q9,q9,q9
-	mov	sp,r7			@ alloca
-	veor	q10,q10,q10
-	add	r7,r7,#256
-	veor	q11,q11,q11
-	sub	r8,r5,#8
-	veor	q12,q12,q12
-	veor	q13,q13,q13
-
-.LNEON_8n_init:
-	vst1.64	{q6,q7},[r7,:256]!
-	subs	r8,r8,#8
-	vst1.64	{q8,q9},[r7,:256]!
-	vst1.64	{q10,q11},[r7,:256]!
-	vst1.64	{q12,q13},[r7,:256]!
-	bne	.LNEON_8n_init
-
-	add	r6,sp,#256
-	vld1.32	{d0,d1,d2,d3},[r1]!
-	add	r10,sp,#8
-	vld1.32	{d30[0]},[r4,:32]
-	mov	r9,r5
-	b	.LNEON_8n_outer
-
-.align	4
-.LNEON_8n_outer:
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	veor	d8,d8,d8
-	vzip.16	d28,d8
-	add	r7,sp,#128
-	vld1.32	{d4,d5,d6,d7},[r3]!
-
-	vmlal.u32	q6,d28,d0[0]
-	vmlal.u32	q7,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q8,d28,d1[0]
-	vshl.i64	d29,d13,#16
-	vmlal.u32	q9,d28,d1[1]
-	vadd.u64	d29,d29,d12
-	vmlal.u32	q10,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q11,d28,d2[1]
-	vst1.32	{d28},[sp,:64]		@ put aside smashed b[8*i+0]
-	vmlal.u32	q12,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q13,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q6,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q7,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q8,d29,d5[0]
-	vshr.u64	d12,d12,#16
-	vmlal.u32	q9,d29,d5[1]
-	vmlal.u32	q10,d29,d6[0]
-	vadd.u64	d12,d12,d13
-	vmlal.u32	q11,d29,d6[1]
-	vshr.u64	d12,d12,#16
-	vmlal.u32	q12,d29,d7[0]
-	vmlal.u32	q13,d29,d7[1]
-	vadd.u64	d14,d14,d12
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+0]
-	vmlal.u32	q7,d28,d0[0]
-	vld1.64	{q6},[r6,:128]!
-	vmlal.u32	q8,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q9,d28,d1[0]
-	vshl.i64	d29,d15,#16
-	vmlal.u32	q10,d28,d1[1]
-	vadd.u64	d29,d29,d14
-	vmlal.u32	q11,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q12,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+1]
-	vmlal.u32	q13,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q6,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q7,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q8,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q9,d29,d5[0]
-	vshr.u64	d14,d14,#16
-	vmlal.u32	q10,d29,d5[1]
-	vmlal.u32	q11,d29,d6[0]
-	vadd.u64	d14,d14,d15
-	vmlal.u32	q12,d29,d6[1]
-	vshr.u64	d14,d14,#16
-	vmlal.u32	q13,d29,d7[0]
-	vmlal.u32	q6,d29,d7[1]
-	vadd.u64	d16,d16,d14
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+1]
-	vmlal.u32	q8,d28,d0[0]
-	vld1.64	{q7},[r6,:128]!
-	vmlal.u32	q9,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q10,d28,d1[0]
-	vshl.i64	d29,d17,#16
-	vmlal.u32	q11,d28,d1[1]
-	vadd.u64	d29,d29,d16
-	vmlal.u32	q12,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q13,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+2]
-	vmlal.u32	q6,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q7,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q8,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q9,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q10,d29,d5[0]
-	vshr.u64	d16,d16,#16
-	vmlal.u32	q11,d29,d5[1]
-	vmlal.u32	q12,d29,d6[0]
-	vadd.u64	d16,d16,d17
-	vmlal.u32	q13,d29,d6[1]
-	vshr.u64	d16,d16,#16
-	vmlal.u32	q6,d29,d7[0]
-	vmlal.u32	q7,d29,d7[1]
-	vadd.u64	d18,d18,d16
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+2]
-	vmlal.u32	q9,d28,d0[0]
-	vld1.64	{q8},[r6,:128]!
-	vmlal.u32	q10,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q11,d28,d1[0]
-	vshl.i64	d29,d19,#16
-	vmlal.u32	q12,d28,d1[1]
-	vadd.u64	d29,d29,d18
-	vmlal.u32	q13,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q6,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+3]
-	vmlal.u32	q7,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q8,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q9,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q10,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q11,d29,d5[0]
-	vshr.u64	d18,d18,#16
-	vmlal.u32	q12,d29,d5[1]
-	vmlal.u32	q13,d29,d6[0]
-	vadd.u64	d18,d18,d19
-	vmlal.u32	q6,d29,d6[1]
-	vshr.u64	d18,d18,#16
-	vmlal.u32	q7,d29,d7[0]
-	vmlal.u32	q8,d29,d7[1]
-	vadd.u64	d20,d20,d18
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+3]
-	vmlal.u32	q10,d28,d0[0]
-	vld1.64	{q9},[r6,:128]!
-	vmlal.u32	q11,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q12,d28,d1[0]
-	vshl.i64	d29,d21,#16
-	vmlal.u32	q13,d28,d1[1]
-	vadd.u64	d29,d29,d20
-	vmlal.u32	q6,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q7,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+4]
-	vmlal.u32	q8,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q9,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q10,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q11,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q12,d29,d5[0]
-	vshr.u64	d20,d20,#16
-	vmlal.u32	q13,d29,d5[1]
-	vmlal.u32	q6,d29,d6[0]
-	vadd.u64	d20,d20,d21
-	vmlal.u32	q7,d29,d6[1]
-	vshr.u64	d20,d20,#16
-	vmlal.u32	q8,d29,d7[0]
-	vmlal.u32	q9,d29,d7[1]
-	vadd.u64	d22,d22,d20
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+4]
-	vmlal.u32	q11,d28,d0[0]
-	vld1.64	{q10},[r6,:128]!
-	vmlal.u32	q12,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q13,d28,d1[0]
-	vshl.i64	d29,d23,#16
-	vmlal.u32	q6,d28,d1[1]
-	vadd.u64	d29,d29,d22
-	vmlal.u32	q7,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q8,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+5]
-	vmlal.u32	q9,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q10,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q11,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q12,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q13,d29,d5[0]
-	vshr.u64	d22,d22,#16
-	vmlal.u32	q6,d29,d5[1]
-	vmlal.u32	q7,d29,d6[0]
-	vadd.u64	d22,d22,d23
-	vmlal.u32	q8,d29,d6[1]
-	vshr.u64	d22,d22,#16
-	vmlal.u32	q9,d29,d7[0]
-	vmlal.u32	q10,d29,d7[1]
-	vadd.u64	d24,d24,d22
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+5]
-	vmlal.u32	q12,d28,d0[0]
-	vld1.64	{q11},[r6,:128]!
-	vmlal.u32	q13,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q6,d28,d1[0]
-	vshl.i64	d29,d25,#16
-	vmlal.u32	q7,d28,d1[1]
-	vadd.u64	d29,d29,d24
-	vmlal.u32	q8,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q9,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+6]
-	vmlal.u32	q10,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q11,d28,d3[1]
-	vld1.32	{d28[0]},[r2,:32]!	@ *b++
-	vmlal.u32	q12,d29,d4[0]
-	veor	d10,d10,d10
-	vmlal.u32	q13,d29,d4[1]
-	vzip.16	d28,d10
-	vmlal.u32	q6,d29,d5[0]
-	vshr.u64	d24,d24,#16
-	vmlal.u32	q7,d29,d5[1]
-	vmlal.u32	q8,d29,d6[0]
-	vadd.u64	d24,d24,d25
-	vmlal.u32	q9,d29,d6[1]
-	vshr.u64	d24,d24,#16
-	vmlal.u32	q10,d29,d7[0]
-	vmlal.u32	q11,d29,d7[1]
-	vadd.u64	d26,d26,d24
-	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+6]
-	vmlal.u32	q13,d28,d0[0]
-	vld1.64	{q12},[r6,:128]!
-	vmlal.u32	q6,d28,d0[1]
-	veor	d8,d8,d8
-	vmlal.u32	q7,d28,d1[0]
-	vshl.i64	d29,d27,#16
-	vmlal.u32	q8,d28,d1[1]
-	vadd.u64	d29,d29,d26
-	vmlal.u32	q9,d28,d2[0]
-	vmul.u32	d29,d29,d30
-	vmlal.u32	q10,d28,d2[1]
-	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+7]
-	vmlal.u32	q11,d28,d3[0]
-	vzip.16	d29,d8
-	vmlal.u32	q12,d28,d3[1]
-	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
-	vmlal.u32	q13,d29,d4[0]
-	vld1.32	{d0,d1,d2,d3},[r1]!
-	vmlal.u32	q6,d29,d4[1]
-	vmlal.u32	q7,d29,d5[0]
-	vshr.u64	d26,d26,#16
-	vmlal.u32	q8,d29,d5[1]
-	vmlal.u32	q9,d29,d6[0]
-	vadd.u64	d26,d26,d27
-	vmlal.u32	q10,d29,d6[1]
-	vshr.u64	d26,d26,#16
-	vmlal.u32	q11,d29,d7[0]
-	vmlal.u32	q12,d29,d7[1]
-	vadd.u64	d12,d12,d26
-	vst1.32	{d29},[r10,:64]	@ put aside smashed m[8*i+7]
-	add	r10,sp,#8		@ rewind
-	sub	r8,r5,#8
-	b	.LNEON_8n_inner
-
-.align	4
-.LNEON_8n_inner:
-	subs	r8,r8,#8
-	vmlal.u32	q6,d28,d0[0]
-	vld1.64	{q13},[r6,:128]
-	vmlal.u32	q7,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+0]
-	vmlal.u32	q8,d28,d1[0]
-	vld1.32	{d4,d5,d6,d7},[r3]!
-	vmlal.u32	q9,d28,d1[1]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q10,d28,d2[0]
-	vmlal.u32	q11,d28,d2[1]
-	vmlal.u32	q12,d28,d3[0]
-	vmlal.u32	q13,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+1]
-	vmlal.u32	q6,d29,d4[0]
-	vmlal.u32	q7,d29,d4[1]
-	vmlal.u32	q8,d29,d5[0]
-	vmlal.u32	q9,d29,d5[1]
-	vmlal.u32	q10,d29,d6[0]
-	vmlal.u32	q11,d29,d6[1]
-	vmlal.u32	q12,d29,d7[0]
-	vmlal.u32	q13,d29,d7[1]
-	vst1.64	{q6},[r7,:128]!
-	vmlal.u32	q7,d28,d0[0]
-	vld1.64	{q6},[r6,:128]
-	vmlal.u32	q8,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+1]
-	vmlal.u32	q9,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q10,d28,d1[1]
-	vmlal.u32	q11,d28,d2[0]
-	vmlal.u32	q12,d28,d2[1]
-	vmlal.u32	q13,d28,d3[0]
-	vmlal.u32	q6,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+2]
-	vmlal.u32	q7,d29,d4[0]
-	vmlal.u32	q8,d29,d4[1]
-	vmlal.u32	q9,d29,d5[0]
-	vmlal.u32	q10,d29,d5[1]
-	vmlal.u32	q11,d29,d6[0]
-	vmlal.u32	q12,d29,d6[1]
-	vmlal.u32	q13,d29,d7[0]
-	vmlal.u32	q6,d29,d7[1]
-	vst1.64	{q7},[r7,:128]!
-	vmlal.u32	q8,d28,d0[0]
-	vld1.64	{q7},[r6,:128]
-	vmlal.u32	q9,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+2]
-	vmlal.u32	q10,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q11,d28,d1[1]
-	vmlal.u32	q12,d28,d2[0]
-	vmlal.u32	q13,d28,d2[1]
-	vmlal.u32	q6,d28,d3[0]
-	vmlal.u32	q7,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+3]
-	vmlal.u32	q8,d29,d4[0]
-	vmlal.u32	q9,d29,d4[1]
-	vmlal.u32	q10,d29,d5[0]
-	vmlal.u32	q11,d29,d5[1]
-	vmlal.u32	q12,d29,d6[0]
-	vmlal.u32	q13,d29,d6[1]
-	vmlal.u32	q6,d29,d7[0]
-	vmlal.u32	q7,d29,d7[1]
-	vst1.64	{q8},[r7,:128]!
-	vmlal.u32	q9,d28,d0[0]
-	vld1.64	{q8},[r6,:128]
-	vmlal.u32	q10,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+3]
-	vmlal.u32	q11,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q12,d28,d1[1]
-	vmlal.u32	q13,d28,d2[0]
-	vmlal.u32	q6,d28,d2[1]
-	vmlal.u32	q7,d28,d3[0]
-	vmlal.u32	q8,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+4]
-	vmlal.u32	q9,d29,d4[0]
-	vmlal.u32	q10,d29,d4[1]
-	vmlal.u32	q11,d29,d5[0]
-	vmlal.u32	q12,d29,d5[1]
-	vmlal.u32	q13,d29,d6[0]
-	vmlal.u32	q6,d29,d6[1]
-	vmlal.u32	q7,d29,d7[0]
-	vmlal.u32	q8,d29,d7[1]
-	vst1.64	{q9},[r7,:128]!
-	vmlal.u32	q10,d28,d0[0]
-	vld1.64	{q9},[r6,:128]
-	vmlal.u32	q11,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+4]
-	vmlal.u32	q12,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q13,d28,d1[1]
-	vmlal.u32	q6,d28,d2[0]
-	vmlal.u32	q7,d28,d2[1]
-	vmlal.u32	q8,d28,d3[0]
-	vmlal.u32	q9,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+5]
-	vmlal.u32	q10,d29,d4[0]
-	vmlal.u32	q11,d29,d4[1]
-	vmlal.u32	q12,d29,d5[0]
-	vmlal.u32	q13,d29,d5[1]
-	vmlal.u32	q6,d29,d6[0]
-	vmlal.u32	q7,d29,d6[1]
-	vmlal.u32	q8,d29,d7[0]
-	vmlal.u32	q9,d29,d7[1]
-	vst1.64	{q10},[r7,:128]!
-	vmlal.u32	q11,d28,d0[0]
-	vld1.64	{q10},[r6,:128]
-	vmlal.u32	q12,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+5]
-	vmlal.u32	q13,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q6,d28,d1[1]
-	vmlal.u32	q7,d28,d2[0]
-	vmlal.u32	q8,d28,d2[1]
-	vmlal.u32	q9,d28,d3[0]
-	vmlal.u32	q10,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+6]
-	vmlal.u32	q11,d29,d4[0]
-	vmlal.u32	q12,d29,d4[1]
-	vmlal.u32	q13,d29,d5[0]
-	vmlal.u32	q6,d29,d5[1]
-	vmlal.u32	q7,d29,d6[0]
-	vmlal.u32	q8,d29,d6[1]
-	vmlal.u32	q9,d29,d7[0]
-	vmlal.u32	q10,d29,d7[1]
-	vst1.64	{q11},[r7,:128]!
-	vmlal.u32	q12,d28,d0[0]
-	vld1.64	{q11},[r6,:128]
-	vmlal.u32	q13,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+6]
-	vmlal.u32	q6,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q7,d28,d1[1]
-	vmlal.u32	q8,d28,d2[0]
-	vmlal.u32	q9,d28,d2[1]
-	vmlal.u32	q10,d28,d3[0]
-	vmlal.u32	q11,d28,d3[1]
-	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+7]
-	vmlal.u32	q12,d29,d4[0]
-	vmlal.u32	q13,d29,d4[1]
-	vmlal.u32	q6,d29,d5[0]
-	vmlal.u32	q7,d29,d5[1]
-	vmlal.u32	q8,d29,d6[0]
-	vmlal.u32	q9,d29,d6[1]
-	vmlal.u32	q10,d29,d7[0]
-	vmlal.u32	q11,d29,d7[1]
-	vst1.64	{q12},[r7,:128]!
-	vmlal.u32	q13,d28,d0[0]
-	vld1.64	{q12},[r6,:128]
-	vmlal.u32	q6,d28,d0[1]
-	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+7]
-	vmlal.u32	q7,d28,d1[0]
-	it	ne
-	addne	r6,r6,#16	@ don't advance in last iteration
-	vmlal.u32	q8,d28,d1[1]
-	vmlal.u32	q9,d28,d2[0]
-	vmlal.u32	q10,d28,d2[1]
-	vmlal.u32	q11,d28,d3[0]
-	vmlal.u32	q12,d28,d3[1]
-	it	eq
-	subeq	r1,r1,r5,lsl#2	@ rewind
-	vmlal.u32	q13,d29,d4[0]
-	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
-	vmlal.u32	q6,d29,d4[1]
-	vld1.32	{d0,d1,d2,d3},[r1]!
-	vmlal.u32	q7,d29,d5[0]
-	add	r10,sp,#8		@ rewind
-	vmlal.u32	q8,d29,d5[1]
-	vmlal.u32	q9,d29,d6[0]
-	vmlal.u32	q10,d29,d6[1]
-	vmlal.u32	q11,d29,d7[0]
-	vst1.64	{q13},[r7,:128]!
-	vmlal.u32	q12,d29,d7[1]
-
-	bne	.LNEON_8n_inner
-	add	r6,sp,#128
-	vst1.64	{q6,q7},[r7,:256]!
-	veor	q2,q2,q2		@ d4-d5
-	vst1.64	{q8,q9},[r7,:256]!
-	veor	q3,q3,q3		@ d6-d7
-	vst1.64	{q10,q11},[r7,:256]!
-	vst1.64	{q12},[r7,:128]
-
-	subs	r9,r9,#8
-	vld1.64	{q6,q7},[r6,:256]!
-	vld1.64	{q8,q9},[r6,:256]!
-	vld1.64	{q10,q11},[r6,:256]!
-	vld1.64	{q12,q13},[r6,:256]!
-
-	itt	ne
-	subne	r3,r3,r5,lsl#2	@ rewind
-	bne	.LNEON_8n_outer
-
-	add	r7,sp,#128
-	vst1.64	{q2,q3}, [sp,:256]!	@ start wiping stack frame
-	vshr.u64	d10,d12,#16
-	vst1.64	{q2,q3},[sp,:256]!
-	vadd.u64	d13,d13,d10
-	vst1.64	{q2,q3}, [sp,:256]!
-	vshr.u64	d10,d13,#16
-	vst1.64	{q2,q3}, [sp,:256]!
-	vzip.16	d12,d13
-
-	mov	r8,r5
-	b	.LNEON_tail_entry
-
-.align	4
-.LNEON_tail:
-	vadd.u64	d12,d12,d10
-	vshr.u64	d10,d12,#16
-	vld1.64	{q8,q9}, [r6, :256]!
-	vadd.u64	d13,d13,d10
-	vld1.64	{q10,q11}, [r6, :256]!
-	vshr.u64	d10,d13,#16
-	vld1.64	{q12,q13}, [r6, :256]!
-	vzip.16	d12,d13
-
-.LNEON_tail_entry:
-	vadd.u64	d14,d14,d10
-	vst1.32	{d12[0]}, [r7, :32]!
-	vshr.u64	d10,d14,#16
-	vadd.u64	d15,d15,d10
-	vshr.u64	d10,d15,#16
-	vzip.16	d14,d15
-	vadd.u64	d16,d16,d10
-	vst1.32	{d14[0]}, [r7, :32]!
-	vshr.u64	d10,d16,#16
-	vadd.u64	d17,d17,d10
-	vshr.u64	d10,d17,#16
-	vzip.16	d16,d17
-	vadd.u64	d18,d18,d10
-	vst1.32	{d16[0]}, [r7, :32]!
-	vshr.u64	d10,d18,#16
-	vadd.u64	d19,d19,d10
-	vshr.u64	d10,d19,#16
-	vzip.16	d18,d19
-	vadd.u64	d20,d20,d10
-	vst1.32	{d18[0]}, [r7, :32]!
-	vshr.u64	d10,d20,#16
-	vadd.u64	d21,d21,d10
-	vshr.u64	d10,d21,#16
-	vzip.16	d20,d21
-	vadd.u64	d22,d22,d10
-	vst1.32	{d20[0]}, [r7, :32]!
-	vshr.u64	d10,d22,#16
-	vadd.u64	d23,d23,d10
-	vshr.u64	d10,d23,#16
-	vzip.16	d22,d23
-	vadd.u64	d24,d24,d10
-	vst1.32	{d22[0]}, [r7, :32]!
-	vshr.u64	d10,d24,#16
-	vadd.u64	d25,d25,d10
-	vshr.u64	d10,d25,#16
-	vzip.16	d24,d25
-	vadd.u64	d26,d26,d10
-	vst1.32	{d24[0]}, [r7, :32]!
-	vshr.u64	d10,d26,#16
-	vadd.u64	d27,d27,d10
-	vshr.u64	d10,d27,#16
-	vzip.16	d26,d27
-	vld1.64	{q6,q7}, [r6, :256]!
-	subs	r8,r8,#8
-	vst1.32	{d26[0]},   [r7, :32]!
-	bne	.LNEON_tail
-
-	vst1.32	{d10[0]}, [r7, :32]		@ top-most bit
-	sub	r3,r3,r5,lsl#2			@ rewind r3
-	subs	r1,sp,#0				@ clear carry flag
-	add	r2,sp,r5,lsl#2
-
-.LNEON_sub:
-	ldmia	r1!, {r4,r5,r6,r7}
-	ldmia	r3!, {r8,r9,r10,r11}
-	sbcs	r8, r4,r8
-	sbcs	r9, r5,r9
-	sbcs	r10,r6,r10
-	sbcs	r11,r7,r11
-	teq	r1,r2				@ preserves carry
-	stmia	r0!, {r8,r9,r10,r11}
-	bne	.LNEON_sub
-
-	ldr	r10, [r1]				@ load top-most bit
-	mov	r11,sp
-	veor	q0,q0,q0
-	sub	r11,r2,r11				@ this is num*4
-	veor	q1,q1,q1
-	mov	r1,sp
-	sub	r0,r0,r11				@ rewind r0
-	mov	r3,r2				@ second 3/4th of frame
-	sbcs	r10,r10,#0				@ result is carry flag
-
-.LNEON_copy_n_zap:
-	ldmia	r1!, {r4,r5,r6,r7}
-	ldmia	r0,  {r8,r9,r10,r11}
-	it	cc
-	movcc	r8, r4
-	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
-	itt	cc
-	movcc	r9, r5
-	movcc	r10,r6
-	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
-	it	cc
-	movcc	r11,r7
-	ldmia	r1, {r4,r5,r6,r7}
-	stmia	r0!, {r8,r9,r10,r11}
-	sub	r1,r1,#16
-	ldmia	r0, {r8,r9,r10,r11}
-	it	cc
-	movcc	r8, r4
-	vst1.64	{q0,q1}, [r1,:256]!			@ wipe
-	itt	cc
-	movcc	r9, r5
-	movcc	r10,r6
-	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
-	it	cc
-	movcc	r11,r7
-	teq	r1,r2				@ preserves carry
-	stmia	r0!, {r8,r9,r10,r11}
-	bne	.LNEON_copy_n_zap
-
-	mov	sp,ip
-	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
-	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
-	bx	lr						@ bx lr
-.size	bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
-#endif
-.byte	77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#if __ARM_MAX_ARCH__>=7
-.comm	OPENSSL_armcap_P,4,4
-.hidden	OPENSSL_armcap_P
-#endif
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/bsaes-armv7.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/bsaes-armv7.S
--- a/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/ghash-armv4.S
+++ b/contrib/boringssl-cmake/linux-arm/crypto/fipsmodule/ghash-armv4.S
@ -1,255 +0,0 @@
-// This file is generated from a similarly-named Perl script in the BoringSSL
-// source tree. Do not edit by hand.
-
-#if !defined(__has_feature)
-#define __has_feature(x) 0
-#endif
-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
-#define OPENSSL_NO_ASM
-#endif
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__arm__)
-#if defined(BORINGSSL_PREFIX)
-#include <boringssl_prefix_symbols_asm.h>
-#endif
-#include <openssl/arm_arch.h>
-
-@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
-@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
-@ instructions are in aesv8-armx.pl.)
-.arch	armv7-a
-
-.text
-#if defined(__thumb2__) || defined(__clang__)
-.syntax	unified
-#define ldrplb  ldrbpl
-#define ldrneb  ldrbne
-#endif
-#if defined(__thumb2__)
-.thumb
-#else
-.code	32
-#endif
-#if __ARM_MAX_ARCH__>=7
-.arch	armv7-a
-.fpu	neon
-
-.globl	gcm_init_neon
-.hidden	gcm_init_neon
-.type	gcm_init_neon,%function
-.align	4
-gcm_init_neon:
-	vld1.64	d7,[r1]!		@ load H
-	vmov.i8	q8,#0xe1
-	vld1.64	d6,[r1]
-	vshl.i64	d17,#57
-	vshr.u64	d16,#63		@ t0=0xc2....01
-	vdup.8	q9,d7[7]
-	vshr.u64	d26,d6,#63
-	vshr.s8	q9,#7			@ broadcast carry bit
-	vshl.i64	q3,q3,#1
-	vand	q8,q8,q9
-	vorr	d7,d26		@ H<<<=1
-	veor	q3,q3,q8		@ twisted H
-	vstmia	r0,{q3}
-
-	bx	lr					@ bx lr
-.size	gcm_init_neon,.-gcm_init_neon
-
-.globl	gcm_gmult_neon
-.hidden	gcm_gmult_neon
-.type	gcm_gmult_neon,%function
-.align	4
-gcm_gmult_neon:
-	vld1.64	d7,[r0]!		@ load Xi
-	vld1.64	d6,[r0]!
-	vmov.i64	d29,#0x0000ffffffffffff
-	vldmia	r1,{d26,d27}	@ load twisted H
-	vmov.i64	d30,#0x00000000ffffffff
-#ifdef __ARMEL__
-	vrev64.8	q3,q3
-#endif
-	vmov.i64	d31,#0x000000000000ffff
-	veor	d28,d26,d27		@ Karatsuba pre-processing
-	mov	r3,#16
-	b	.Lgmult_neon
-.size	gcm_gmult_neon,.-gcm_gmult_neon
-
-.globl	gcm_ghash_neon
-.hidden	gcm_ghash_neon
-.type	gcm_ghash_neon,%function
-.align	4
-gcm_ghash_neon:
-	vld1.64	d1,[r0]!		@ load Xi
-	vld1.64	d0,[r0]!
-	vmov.i64	d29,#0x0000ffffffffffff
-	vldmia	r1,{d26,d27}	@ load twisted H
-	vmov.i64	d30,#0x00000000ffffffff
-#ifdef __ARMEL__
-	vrev64.8	q0,q0
-#endif
-	vmov.i64	d31,#0x000000000000ffff
-	veor	d28,d26,d27		@ Karatsuba pre-processing
-
-.Loop_neon:
-	vld1.64	d7,[r2]!		@ load inp
-	vld1.64	d6,[r2]!
-#ifdef __ARMEL__
-	vrev64.8	q3,q3
-#endif
-	veor	q3,q0			@ inp^=Xi
-.Lgmult_neon:
-	vext.8	d16, d26, d26, #1	@ A1
-	vmull.p8	q8, d16, d6		@ F = A1*B
-	vext.8	d0, d6, d6, #1	@ B1
-	vmull.p8	q0, d26, d0		@ E = A*B1
-	vext.8	d18, d26, d26, #2	@ A2
-	vmull.p8	q9, d18, d6		@ H = A2*B
-	vext.8	d22, d6, d6, #2	@ B2
-	vmull.p8	q11, d26, d22		@ G = A*B2
-	vext.8	d20, d26, d26, #3	@ A3
-	veor	q8, q8, q0		@ L = E + F
-	vmull.p8	q10, d20, d6		@ J = A3*B
-	vext.8	d0, d6, d6, #3	@ B3
-	veor	q9, q9, q11		@ M = G + H
-	vmull.p8	q0, d26, d0		@ I = A*B3
-	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
-	vand	d17, d17, d29
-	vext.8	d22, d6, d6, #4	@ B4
-	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
-	vand	d19, d19, d30
-	vmull.p8	q11, d26, d22		@ K = A*B4
-	veor	q10, q10, q0		@ N = I + J
-	veor	d16, d16, d17
-	veor	d18, d18, d19
-	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
-	vand	d21, d21, d31
-	vext.8	q8, q8, q8, #15
-	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
-	vmov.i64	d23, #0
-	vext.8	q9, q9, q9, #14
-	veor	d20, d20, d21
-	vmull.p8	q0, d26, d6		@ D = A*B
-	vext.8	q11, q11, q11, #12
-	vext.8	q10, q10, q10, #13
-	veor	q8, q8, q9
-	veor	q10, q10, q11
-	veor	q0, q0, q8
-	veor	q0, q0, q10
-	veor	d6,d6,d7	@ Karatsuba pre-processing
-	vext.8	d16, d28, d28, #1	@ A1
-	vmull.p8	q8, d16, d6		@ F = A1*B
-	vext.8	d2, d6, d6, #1	@ B1
-	vmull.p8	q1, d28, d2		@ E = A*B1
-	vext.8	d18, d28, d28, #2	@ A2
-	vmull.p8	q9, d18, d6		@ H = A2*B
-	vext.8	d22, d6, d6, #2	@ B2
-	vmull.p8	q11, d28, d22		@ G = A*B2
-	vext.8	d20, d28, d28, #3	@ A3
-	veor	q8, q8, q1		@ L = E + F
-	vmull.p8	q10, d20, d6		@ J = A3*B
-	vext.8	d2, d6, d6, #3	@ B3
-	veor	q9, q9, q11		@ M = G + H
-	vmull.p8	q1, d28, d2		@ I = A*B3
-	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
-	vand	d17, d17, d29
-	vext.8	d22, d6, d6, #4	@ B4
-	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
-	vand	d19, d19, d30
-	vmull.p8	q11, d28, d22		@ K = A*B4
-	veor	q10, q10, q1		@ N = I + J
-	veor	d16, d16, d17
-	veor	d18, d18, d19
-	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
-	vand	d21, d21, d31
-	vext.8	q8, q8, q8, #15
-	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
-	vmov.i64	d23, #0
-	vext.8	q9, q9, q9, #14
-	veor	d20, d20, d21
-	vmull.p8	q1, d28, d6		@ D = A*B
-	vext.8	q11, q11, q11, #12
-	vext.8	q10, q10, q10, #13
-	veor	q8, q8, q9
-	veor	q10, q10, q11
-	veor	q1, q1, q8
-	veor	q1, q1, q10
-	vext.8	d16, d27, d27, #1	@ A1
-	vmull.p8	q8, d16, d7		@ F = A1*B
-	vext.8	d4, d7, d7, #1	@ B1
-	vmull.p8	q2, d27, d4		@ E = A*B1
-	vext.8	d18, d27, d27, #2	@ A2
-	vmull.p8	q9, d18, d7		@ H = A2*B
-	vext.8	d22, d7, d7, #2	@ B2
-	vmull.p8	q11, d27, d22		@ G = A*B2
-	vext.8	d20, d27, d27, #3	@ A3
-	veor	q8, q8, q2		@ L = E + F
-	vmull.p8	q10, d20, d7		@ J = A3*B
-	vext.8	d4, d7, d7, #3	@ B3
-	veor	q9, q9, q11		@ M = G + H
-	vmull.p8	q2, d27, d4		@ I = A*B3
-	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
-	vand	d17, d17, d29
-	vext.8	d22, d7, d7, #4	@ B4
-	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
-	vand	d19, d19, d30
-	vmull.p8	q11, d27, d22		@ K = A*B4
-	veor	q10, q10, q2		@ N = I + J
-	veor	d16, d16, d17
-	veor	d18, d18, d19
-	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
-	vand	d21, d21, d31
-	vext.8	q8, q8, q8, #15
-	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
-	vmov.i64	d23, #0
-	vext.8	q9, q9, q9, #14
-	veor	d20, d20, d21
-	vmull.p8	q2, d27, d7		@ D = A*B
-	vext.8	q11, q11, q11, #12
-	vext.8	q10, q10, q10, #13
-	veor	q8, q8, q9
-	veor	q10, q10, q11
-	veor	q2, q2, q8
-	veor	q2, q2, q10
-	veor	q1,q1,q0		@ Karatsuba post-processing
-	veor	q1,q1,q2
-	veor	d1,d1,d2
-	veor	d4,d4,d3	@ Xh|Xl - 256-bit result
-
-	@ equivalent of reduction_avx from ghash-x86_64.pl
-	vshl.i64	q9,q0,#57		@ 1st phase
-	vshl.i64	q10,q0,#62
-	veor	q10,q10,q9		@
-	vshl.i64	q9,q0,#63
-	veor	q10, q10, q9		@
-	veor	d1,d1,d20	@
-	veor	d4,d4,d21
-
-	vshr.u64	q10,q0,#1		@ 2nd phase
-	veor	q2,q2,q0
-	veor	q0,q0,q10		@
-	vshr.u64	q10,q10,#6
-	vshr.u64	q0,q0,#1		@
-	veor	q0,q0,q2		@
-	veor	q0,q0,q10		@
-
-	subs	r3,#16
-	bne	.Loop_neon
-
-#ifdef __ARMEL__
-	vrev64.8	q0,q0
-#endif
-	sub	r0,#16
-	vst1.64	d1,[r0]!		@ write out Xi
-	vst1.64	d0,[r0]
-
-	bx	lr					@ bx lr
-.size	gcm_ghash_neon,.-gcm_ghash_neon
-#endif
-.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align	2
-.align	2
-#endif
-#endif  // !OPENSSL_NO_ASM
-.section	.note.GNU-stack,"",%progbits
--- a/Show More
+++ b/Show More