Merge branch 'master' into fix-dropping-detached-parts-with-try-n-suffix

2024-11-27 01:51:59 +00:00 · 2024-10-31 16:41:50 +01:00 · 2024-10-31 16:41:50 +01:00 · 4f3d643a5f
commit 4f3d643a5f
parent 4aeb43441b f2b6b4048e
4437 changed files with 123077 additions and 55245 deletions
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@ -16,3 +16,6 @@
 # Applied Black formatter for Python code
 e6f5a3f98b21ba99cf274a9833797889e020a2b3
 # Enabling clang-tidy readability-else-no-return rule
 67c1e89d90ef576e62f8b1c68269742a3c6f9b1e
--- a/.github/ISSUE_TEMPLATE/20_feature-request.md
+++ b/.github/ISSUE_TEMPLATE/20_feature-request.md
@ -15,7 +15,7 @@ assignees: ''
 **Use case**
-> A clear and concise description of what is the intended usage scenario is.
+> A clear and concise description of what the intended usage scenario is.
 **Describe the solution you'd like**
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@ -4,7 +4,6 @@ self-hosted-runner:
    - func-tester
    - func-tester-aarch64
    - fuzzer-unit-tester
    - stress-tester
    - style-checker
    - style-checker-aarch64
    - release-maker
--- a/.github/actions/debug/action.yml
+++ b/.github/actions/debug/action.yml
@ -4,15 +4,31 @@ description: Prints workflow debug info
 runs:
  using: "composite"
  steps:
-    - name: Print envs
+    - name: Envs, event.json and contexts
      shell: bash
      run: |
-          echo "::group::Envs"
+          echo '::group::Environment variables'
-          env
+          env | sort
-          echo "::endgroup::"
+          echo '::endgroup::'
-    - name: Print Event.json
+
-      shell: bash
+          echo '::group::event.json'
      run: |
          echo "::group::Event.json"
          python3 -m json.tool "$GITHUB_EVENT_PATH"
-          echo "::endgroup::"
+          echo '::endgroup::'
          cat << 'EOF'
          ::group::github context
          ${{ toJSON(github) }}
          ::endgroup::
          ::group::env context
          ${{ toJSON(env) }}
          ::endgroup::
          ::group::runner context
          ${{ toJSON(runner) }}
          ::endgroup::
          ::group::job context
          ${{ toJSON(job) }}
          ::endgroup::
          EOF
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@ -27,6 +27,8 @@ jobs:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get version
          filter: tree:0
      - name: Debug Info
        uses: ./.github/actions/debug
      - name: Labels check
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
@ -227,18 +229,26 @@ jobs:
    uses: ./.github/workflows/reusable_test.yml
    with:
      test_name: Stress test (tsan)
-      runner_type: stress-tester
+      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
 #############################################################################################
 ############################# INTEGRATION TESTS #############################################
 #############################################################################################
-  IntegrationTestsRelease:
+  IntegrationTestsAsanOldAnalyzer:
-    needs: [RunConfig, BuilderDebRelease]
+    needs: [RunConfig, BuilderDebAsan]
    if: ${{ !failure() && !cancelled() }}
    uses: ./.github/workflows/reusable_test.yml
    with:
-      test_name: Integration tests (release)
+      test_name: Integration tests (asan, old analyzer)
-      runner_type: stress-tester
+      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
  IntegrationTestsTsan:
    needs: [RunConfig, BuilderDebTsan]
    if: ${{ !failure() && !cancelled() }}
    uses: ./.github/workflows/reusable_test.yml
    with:
      test_name: Integration tests (tsan)
      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
  FinishCheck:
    if: ${{ !cancelled() }}
@ -248,7 +258,8 @@ jobs:
      - FunctionalStatelessTestAsan
      - FunctionalStatefulTestDebug
      - StressTestTsan
-      - IntegrationTestsRelease
+      - IntegrationTestsTsan
      - IntegrationTestsAsanOldAnalyzer
      - CompatibilityCheckX86
      - CompatibilityCheckAarch64
    runs-on: [self-hosted, style-checker]
--- a/.github/workflows/cherry_pick.yml
+++ b/.github/workflows/cherry_pick.yml
@ -33,6 +33,8 @@ jobs:
          clear-repository: true
          token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
          fetch-depth: 0
      - name: Debug Info
        uses: ./.github/actions/debug
      - name: Cherry pick
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
--- a/.github/workflows/create_release.yml
+++ b/.github/workflows/create_release.yml
@ -3,6 +3,9 @@ name: CreateRelease
 concurrency:
  group: release
 env:
  PYTHONUNBUFFERED: 1
 'on':
  workflow_dispatch:
    inputs:
@ -56,13 +59,13 @@ jobs:
      GH_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
    runs-on: [self-hosted, release-maker]
    steps:
      - name: DebugInfo
        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
          token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
          fetch-depth: 0
      - name: Debug Info
        uses: ./.github/actions/debug
      - name: Prepare Release Info
        shell: bash
        run: |
--- a/.github/workflows/docker_test_images.yml
+++ b/.github/workflows/docker_test_images.yml
@ -1,4 +1,5 @@
 name: Build docker images
 'on':
  workflow_call:
    inputs:
@ -11,6 +12,10 @@ name: Build docker images
        required: false
        type: boolean
        default: false
 env:
  PYTHONUNBUFFERED: 1
 jobs:
  DockerBuildAarch64:
    runs-on: [self-hosted, style-checker-aarch64]
--- a/.github/workflows/jepsen.yml
+++ b/.github/workflows/jepsen.yml
@ -8,27 +8,28 @@ on: # yamllint disable-line rule:truthy
  schedule:
    - cron: '0 */6 * * *'
  workflow_dispatch:
 jobs:
  RunConfig:
    runs-on: [self-hosted, style-checker-aarch64]
    outputs:
      data: ${{ steps.runconfig.outputs.CI_DATA }}
    steps:
      - name: DebugInfo
        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get version
          filter: tree:0
      - name: Debug Info
        uses: ./.github/actions/debug
      - name: PrepareRunConfig
        id: runconfig
        run: |
          echo "::group::configure CI run"
          python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --workflow "$GITHUB_WORKFLOW" --outfile ${{ runner.temp }}/ci_run_data.json
          echo "::endgroup::"
-          
+
          echo "::group::CI run configure results"
          python3 -m json.tool ${{ runner.temp }}/ci_run_data.json
          echo "::endgroup::"
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@ -15,14 +15,14 @@ jobs:
    outputs:
      data: ${{ steps.runconfig.outputs.CI_DATA }}
    steps:
      - name: DebugInfo
        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get version
          filter: tree:0
      - name: Debug Info
        uses: ./.github/actions/debug
      - name: Merge sync PR
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
--- a/.github/workflows/merge_queue.yml
+++ b/.github/workflows/merge_queue.yml
@ -14,14 +14,14 @@ jobs:
    outputs:
      data: ${{ steps.runconfig.outputs.CI_DATA }}
    steps:
      - name: DebugInfo
        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get a version
          filter: tree:0
      - name: Debug Info
        uses: ./.github/actions/debug
      - name: Cancel PR workflow
        run: |
          python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@ -15,14 +15,14 @@ jobs:
    outputs:
      data: ${{ steps.runconfig.outputs.CI_DATA }}
    steps:
      - name: DebugInfo
        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get version
          filter: tree:0
      - name: Debug Info
        uses: ./.github/actions/debug
      - name: PrepareRunConfig
        id: runconfig
        run: |
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@ -25,17 +25,14 @@ jobs:
    outputs:
      data: ${{ steps.runconfig.outputs.CI_DATA }}
    steps:
      - name: DebugInfo
        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get a version
          filter: tree:0
-      - name: Cancel previous Sync PR workflow
+      - name: Debug Info
-        run: |
+        uses: ./.github/actions/debug
          python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run
      - name: Set pending Sync status
        run: |
          python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --set-pending-status
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@ -24,6 +24,8 @@ jobs:
          clear-repository: true # to ensure correct digests
          fetch-depth: 0 # to get version
          filter: tree:0
      - name: Debug Info
        uses: ./.github/actions/debug
      - name: Labels check
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
@ -372,7 +374,7 @@ jobs:
    uses: ./.github/workflows/reusable_test.yml
    with:
      test_name: Stress test (asan)
-      runner_type: stress-tester
+      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
  StressTestTsan:
    needs: [RunConfig, BuilderDebTsan]
@ -380,7 +382,7 @@ jobs:
    uses: ./.github/workflows/reusable_test.yml
    with:
      test_name: Stress test (tsan)
-      runner_type: stress-tester
+      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
  StressTestMsan:
    needs: [RunConfig, BuilderDebMsan]
@ -388,7 +390,7 @@ jobs:
    uses: ./.github/workflows/reusable_test.yml
    with:
      test_name: Stress test (msan)
-      runner_type: stress-tester
+      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
  StressTestUBsan:
    needs: [RunConfig, BuilderDebUBsan]
@ -396,7 +398,7 @@ jobs:
    uses: ./.github/workflows/reusable_test.yml
    with:
      test_name: Stress test (ubsan)
-      runner_type: stress-tester
+      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
  StressTestDebug:
    needs: [RunConfig, BuilderDebDebug]
@ -404,7 +406,7 @@ jobs:
    uses: ./.github/workflows/reusable_test.yml
    with:
      test_name: Stress test (debug)
-      runner_type: stress-tester
+      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
 #############################################################################################
 ############################# INTEGRATION TESTS #############################################
@ -415,7 +417,7 @@ jobs:
    uses: ./.github/workflows/reusable_test.yml
    with:
      test_name: Integration tests (asan)
-      runner_type: stress-tester
+      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
  IntegrationTestsAnalyzerAsan:
    needs: [RunConfig, BuilderDebAsan]
@ -423,7 +425,7 @@ jobs:
    uses: ./.github/workflows/reusable_test.yml
    with:
      test_name: Integration tests (asan, old analyzer)
-      runner_type: stress-tester
+      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
  IntegrationTestsTsan:
    needs: [RunConfig, BuilderDebTsan]
@ -431,7 +433,7 @@ jobs:
    uses: ./.github/workflows/reusable_test.yml
    with:
      test_name: Integration tests (tsan)
-      runner_type: stress-tester
+      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
  IntegrationTestsRelease:
    needs: [RunConfig, BuilderDebRelease]
@ -439,7 +441,7 @@ jobs:
    uses: ./.github/workflows/reusable_test.yml
    with:
      test_name: Integration tests (release)
-      runner_type: stress-tester
+      runner_type: func-tester
      data: ${{ needs.RunConfig.outputs.data }}
  FinishCheck:
    if: ${{ !cancelled() }}
--- a/.github/workflows/reusable_build_stage.yml
+++ b/.github/workflows/reusable_build_stage.yml
@ -1,7 +1,11 @@
 ### FIXME: merge reusable_test.yml and reusable_build.yml as they are almost identical
 #          and then merge reusable_build_stage.yml and reusable_test_stage.yml
 env:
  PYTHONUNBUFFERED: 1
 name: BuildStageWF
 'on':
  workflow_call:
    inputs:
--- a/.github/workflows/reusable_simple_job.yml
+++ b/.github/workflows/reusable_simple_job.yml
@ -62,8 +62,6 @@ jobs:
    env:
      GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}
    steps:
      - name: DebugInfo
        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
@ -72,6 +70,8 @@ jobs:
          submodules: ${{inputs.submodules}}
          fetch-depth: ${{inputs.checkout_depth}}
          filter: tree:0
      - name: Debug Info
        uses: ./.github/actions/debug
      - name: Set build envs
        run: |
          cat >> "$GITHUB_ENV" << 'EOF'
--- a/.github/workflows/reusable_test_stage.yml
+++ b/.github/workflows/reusable_test_stage.yml
@ -1,4 +1,8 @@
 env:
  PYTHONUNBUFFERED: 1
 name: StageWF
 'on':
  workflow_call:
    inputs:
--- a/.gitignore
+++ b/.gitignore
@ -159,6 +159,7 @@ website/package-lock.json
 /programs/server/store
 /programs/server/uuid
 /programs/server/coordination
 /programs/server/workload
 # temporary test files
 tests/queries/0_stateless/test_*
--- a/.gitmodules
+++ b/.gitmodules
@ -170,9 +170,6 @@
 [submodule "contrib/fast_float"]
 	path = contrib/fast_float
 	url = https://github.com/fastfloat/fast_float
 [submodule "contrib/libpq"]
 	path = contrib/libpq
 	url = https://github.com/ClickHouse/libpq
 [submodule "contrib/NuRaft"]
 	path = contrib/NuRaft
 	url = https://github.com/ClickHouse/NuRaft
@ -230,12 +227,6 @@
 [submodule "contrib/minizip-ng"]
 	path = contrib/minizip-ng
 	url = https://github.com/zlib-ng/minizip-ng
 [submodule "contrib/qpl"]
 	path = contrib/qpl
 	url = https://github.com/intel/qpl
 [submodule "contrib/idxd-config"]
 	path = contrib/idxd-config
 	url = https://github.com/intel/idxd-config
 [submodule "contrib/QAT-ZSTD-Plugin"]
 	path = contrib/QAT-ZSTD-Plugin
 	url = https://github.com/intel/QAT-ZSTD-Plugin
@ -366,6 +357,15 @@
 [submodule "contrib/double-conversion"]
 	path = contrib/double-conversion
 	url = https://github.com/ClickHouse/double-conversion.git
 [submodule "contrib/mongo-cxx-driver"]
 	path = contrib/mongo-cxx-driver
 	url = https://github.com/ClickHouse/mongo-cxx-driver.git
 [submodule "contrib/mongo-c-driver"]
 	path = contrib/mongo-c-driver
 	url = https://github.com/ClickHouse/mongo-c-driver.git
 [submodule "contrib/numactl"]
 	path = contrib/numactl
 	url = https://github.com/ClickHouse/numactl.git
 [submodule "contrib/postgres"]
 	path = contrib/postgres
 	url = https://github.com/ClickHouse/postgres.git
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,7 @@
 ### Table of Contents
-**[ClickHouse release v24.8 LTS, 2024-08-20](#243)**<br/>
+**[ClickHouse release v24.10, 2024-10-31](#2410)**<br/>
 **[ClickHouse release v24.9, 2024-09-26](#249)**<br/>
 **[ClickHouse release v24.8 LTS, 2024-08-20](#248)**<br/>
 **[ClickHouse release v24.7, 2024-07-30](#247)**<br/>
 **[ClickHouse release v24.6, 2024-07-01](#246)**<br/>
 **[ClickHouse release v24.5, 2024-05-30](#245)**<br/>
@ -11,6 +13,327 @@
 # 2024 Changelog
 ### <a id="2410"></a> ClickHouse release 24.10, 2024-10-31
 #### Backward Incompatible Change
 * Allow to write `SETTINGS` before `FORMAT` in a chain of queries with `UNION` when subqueries are inside parentheses. This closes [#39712](https://github.com/ClickHouse/ClickHouse/issues/39712). Change the behavior when a query has the SETTINGS clause specified twice in a sequence. The closest SETTINGS clause will have a preference for the corresponding subquery. In the previous versions, the outermost SETTINGS clause could take a preference over the inner one. [#68614](https://github.com/ClickHouse/ClickHouse/pull/68614) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Reordering of filter conditions from `[PRE]WHERE` clause is now allowed by default. It could be disabled by setting `allow_reorder_prewhere_conditions` to `false`. [#70657](https://github.com/ClickHouse/ClickHouse/pull/70657) ([Nikita Taranov](https://github.com/nickitat)).
 * Remove the `idxd-config` library, which has an incompatible license. This also removes the experimental Intel DeflateQPL codec. [#70987](https://github.com/ClickHouse/ClickHouse/pull/70987) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 #### New Feature
 * Allow to grant access to the wildcard prefixes. `GRANT SELECT ON db.table_pefix_* TO user`. [#65311](https://github.com/ClickHouse/ClickHouse/pull/65311) ([pufit](https://github.com/pufit)).
 * If you press space bar during query runtime, the client will display a real-time table with detailed metrics. You can enable it globally with the new `--progress-table` option in clickhouse-client; a new `--enable-progress-table-toggle` is associated with the `--progress-table` option, and toggles the rendering of the progress table by pressing the control key (Space). [#63689](https://github.com/ClickHouse/ClickHouse/pull/63689) ([Maria Khristenko](https://github.com/mariaKhr)), [#70423](https://github.com/ClickHouse/ClickHouse/pull/70423) ([Julia Kartseva](https://github.com/jkartseva)).
 * Allow to cache read files for object storage table engines and data lakes using hash from ETag + file path as cache key. [#70135](https://github.com/ClickHouse/ClickHouse/pull/70135) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Support creating a table with a query: `CREATE TABLE ... CLONE AS ...`. It clones the source table's schema and then attaches all partitions to the newly created table. This feature is only supported with tables of the `MergeTree` family Closes [#65015](https://github.com/ClickHouse/ClickHouse/issues/65015). [#69091](https://github.com/ClickHouse/ClickHouse/pull/69091) ([tuanpach](https://github.com/tuanpach)).
 * Add a new system table, `system.query_metric_log` which contains history of memory and metric values from table system.events for individual queries, periodically flushed to disk. [#66532](https://github.com/ClickHouse/ClickHouse/pull/66532) ([Pablo Marcos](https://github.com/pamarcos)).
 * A simple SELECT query can be written with implicit SELECT to enable calculator-style expressions, e.g., `ch "1 + 2"`. This is controlled by a new setting, `implicit_select`. [#68502](https://github.com/ClickHouse/ClickHouse/pull/68502) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Support the `--copy` mode for clickhouse local as a shortcut for format conversion [#68503](https://github.com/ClickHouse/ClickHouse/issues/68503). [#68583](https://github.com/ClickHouse/ClickHouse/pull/68583) ([Denis Hananein](https://github.com/denis-hananein)).
 * Add a builtin HTML page for visualizing merges which is available at the `/merges` path. [#70821](https://github.com/ClickHouse/ClickHouse/pull/70821) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Add support for `arrayUnion` function. [#68989](https://github.com/ClickHouse/ClickHouse/pull/68989) ([Peter Nguyen](https://github.com/petern48)).
 * Allow parametrised SQL aliases. [#50665](https://github.com/ClickHouse/ClickHouse/pull/50665) ([Anton Kozlov](https://github.com/tonickkozlov)).
 * A new aggregate function `quantileExactWeightedInterpolated`, which is a interpolated version based on quantileExactWeighted. Some people may wonder why we need a new `quantileExactWeightedInterpolated` since we already have `quantileExactInterpolatedWeighted`. The reason is the new one is more accurate than the old one. This is for spark compatibility. [#69619](https://github.com/ClickHouse/ClickHouse/pull/69619) ([李扬](https://github.com/taiyang-li)).
 * A new function `arrayElementOrNull`. It returns `NULL` if the array index is out of range or a Map key not found. [#69646](https://github.com/ClickHouse/ClickHouse/pull/69646) ([李扬](https://github.com/taiyang-li)).
 * Allows users to specify regular expressions through new `message_regexp` and `message_regexp_negative` fields in the `config.xml` file to filter out logging. The logging is applied to the formatted un-colored text for the most intuitive developer experience. [#69657](https://github.com/ClickHouse/ClickHouse/pull/69657) ([Peter Nguyen](https://github.com/petern48)).
 * Added `RIPEMD160` function, which computes the RIPEMD-160 cryptographic hash of a string. Example: `SELECT HEX(RIPEMD160('The quick brown fox jumps over the lazy dog'))` returns `37F332F68DB77BD9D7EDD4969571AD671CF9DD3B`. [#70087](https://github.com/ClickHouse/ClickHouse/pull/70087) ([Dergousov Maxim](https://github.com/m7kss1)).
 * Support reading `Iceberg` tables on `HDFS`. [#70268](https://github.com/ClickHouse/ClickHouse/pull/70268) ([flynn](https://github.com/ucasfl)).
 * Support for CTE in the form of `WITH ... INSERT`, as previously we only supported `INSERT ... WITH ...`. [#70593](https://github.com/ClickHouse/ClickHouse/pull/70593) ([Shichao Jin](https://github.com/jsc0218)).
 * MongoDB integration: support for all MongoDB types, support for WHERE and ORDER BY statements on MongoDB side, restriction for expressions unsupported by MongoDB. Note that the new inegration is disabled by default, to use it, please set `<use_legacy_mongodb_integration>` to `false` in server config. [#63279](https://github.com/ClickHouse/ClickHouse/pull/63279) ([Kirill Nikiforov](https://github.com/allmazz)).
 * A new function `getSettingOrDefault` added to return the default value and avoid exception if a custom setting is not found in the current profile. [#69917](https://github.com/ClickHouse/ClickHouse/pull/69917) ([Shankar](https://github.com/shiyer7474)).
 #### Experimental feature
 * Refreshable materialized views are production ready. [#70550](https://github.com/ClickHouse/ClickHouse/pull/70550) ([Michael Kolupaev](https://github.com/al13n321)). Refreshable materialized views are now supported in Replicated databases. [#60669](https://github.com/ClickHouse/ClickHouse/pull/60669) ([Michael Kolupaev](https://github.com/al13n321)).
 * Parallel replicas are moved from experimental to beta. Reworked settings that control the behavior of parallel replicas algorithms. A quick recap: ClickHouse has four different algorithms for parallel reading involving multiple replicas, which is reflected in the setting `parallel_replicas_mode`, the default value for it is `read_tasks` Additionally, the toggle-switch setting `enable_parallel_replicas` has been added. [#63151](https://github.com/ClickHouse/ClickHouse/pull/63151) ([Alexey Milovidov](https://github.com/alexey-milovidov)), ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 * Support for the `Dynamic` type in most functions by executing them on internal types inside `Dynamic`. [#69691](https://github.com/ClickHouse/ClickHouse/pull/69691) ([Pavel Kruglov](https://github.com/Avogar)).
 * Allow to read/write the `JSON` type as a binary string in `RowBinary` format under settings `input_format_binary_read_json_as_string/output_format_binary_write_json_as_string`. [#70288](https://github.com/ClickHouse/ClickHouse/pull/70288) ([Pavel Kruglov](https://github.com/Avogar)).
 * Allow to serialize/deserialize `JSON` column as single String column in the Native format. For output use setting `output_format_native_write_json_as_string`. For input, use serialization version `1` before the column data. [#70312](https://github.com/ClickHouse/ClickHouse/pull/70312) ([Pavel Kruglov](https://github.com/Avogar)).
 * Introduced a special (experimental) mode of a merge selector for MergeTree tables which makes it more aggressive for the partitions that are close to the limit by the number of parts. It is controlled by the `merge_selector_use_blurry_base` MergeTree-level setting. [#70645](https://github.com/ClickHouse/ClickHouse/pull/70645) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 * Implement generic ser/de between Avro's `Union` and ClickHouse's `Variant` types. Resolves [#69713](https://github.com/ClickHouse/ClickHouse/issues/69713). [#69712](https://github.com/ClickHouse/ClickHouse/pull/69712) ([Jiří Kozlovský](https://github.com/jirislav)).
 #### Performance Improvement
 * Refactor `IDisk` and `IObjectStorage` for better performance. Tables from `plain` and `plain_rewritable` object storages will initialize faster. [#68146](https://github.com/ClickHouse/ClickHouse/pull/68146) ([Alexey Milovidov](https://github.com/alexey-milovidov), [Julia Kartseva](https://github.com/jkartseva)). Do not call the LIST object storage API when determining if a file or directory exists on the plain rewritable disk, as it can be cost-inefficient. [#70852](https://github.com/ClickHouse/ClickHouse/pull/70852) ([Julia Kartseva](https://github.com/jkartseva)). Reduce the number of object storage HEAD API requests in the plain_rewritable disk. [#70915](https://github.com/ClickHouse/ClickHouse/pull/70915) ([Julia Kartseva](https://github.com/jkartseva)).
 * Added an ability to parse data directly into sparse columns. [#69828](https://github.com/ClickHouse/ClickHouse/pull/69828) ([Anton Popov](https://github.com/CurtizJ)).
 * Improved performance of parsing formats with high number of missed values (e.g. `JSONEachRow`). [#69875](https://github.com/ClickHouse/ClickHouse/pull/69875) ([Anton Popov](https://github.com/CurtizJ)).
 * Supports parallel reading of parquet row groups and prefetching of row groups in single-threaded mode. [#69862](https://github.com/ClickHouse/ClickHouse/pull/69862) ([LiuNeng](https://github.com/liuneng1994)).
 * Support minmax index for `pointInPolygon`. [#62085](https://github.com/ClickHouse/ClickHouse/pull/62085) ([JackyWoo](https://github.com/JackyWoo)).
 * Use bloom filters when reading Parquet files. [#62966](https://github.com/ClickHouse/ClickHouse/pull/62966) ([Arthur Passos](https://github.com/arthurpassos)).
 * Lock-free parts rename to avoid INSERT affect SELECT (due to parts lock) (under normal circumstances with `fsync_part_directory`, QPS of SELECT with INSERT in parallel, increased 2x, under heavy load the effect is even bigger). Note, this only includes `ReplicatedMergeTree` for now. [#64955](https://github.com/ClickHouse/ClickHouse/pull/64955) ([Azat Khuzhin](https://github.com/azat)).
 * Respect `ttl_only_drop_parts` on `materialize ttl`; only read necessary columns to recalculate TTL and drop parts by replacing them with an empty one. [#65488](https://github.com/ClickHouse/ClickHouse/pull/65488) ([Andrey Zvonov](https://github.com/zvonand)).
 * Optimized thread creation in the ThreadPool to minimize lock contention. Thread creation is now performed outside of the critical section to avoid delays in job scheduling and thread management under high load conditions. This leads to a much more responsive ClickHouse under heavy concurrent load. [#68694](https://github.com/ClickHouse/ClickHouse/pull/68694) ([filimonov](https://github.com/filimonov)).
 * Enable reading `LowCardinality` string columns from `ORC`. [#69481](https://github.com/ClickHouse/ClickHouse/pull/69481) ([李扬](https://github.com/taiyang-li)).
 * Use `LowCardinality` for `ProfileEvents` in system logs such as `part_log`, `query_views_log`, `filesystem_cache_log`. [#70152](https://github.com/ClickHouse/ClickHouse/pull/70152) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Improve performance of `fromUnixTimestamp`/`toUnixTimestamp` functions. [#71042](https://github.com/ClickHouse/ClickHouse/pull/71042) ([kevinyhzou](https://github.com/KevinyhZou)).
 * Don't disable nonblocking read from page cache for the entire server when reading from a blocking I/O. This was leading to a poorer performance when a single filesystem (e.g., tmpfs) didn't support the `preadv2` syscall while others do. [#70299](https://github.com/ClickHouse/ClickHouse/pull/70299) ([Antonio Andelic](https://github.com/antonio2368)).
 * `ALTER TABLE .. REPLACE PARTITION` doesn't wait anymore for mutations/merges that happen in other partitions. [#59138](https://github.com/ClickHouse/ClickHouse/pull/59138) ([Vasily Nemkov](https://github.com/Enmk)).
 * Don't do validation when synchronizing ACL from Keeper. It's validating during creation. It shouldn't matter that much, but there are installations with tens of thousands or even more user created, and the unnecessary hash validation can take a long time to finish during server startup (it synchronizes everything from keeper). [#70644](https://github.com/ClickHouse/ClickHouse/pull/70644) ([Raúl Marín](https://github.com/Algunenano)).
 #### Improvement
 * `CREATE TABLE AS` will copy `PRIMARY KEY`, `ORDER BY`, and similar clauses (of `MergeTree` tables). [#69739](https://github.com/ClickHouse/ClickHouse/pull/69739) ([sakulali](https://github.com/sakulali)).
 * Support 64-bit XID in Keeper. It can be enabled with the `use_xid_64` configuration value. [#69908](https://github.com/ClickHouse/ClickHouse/pull/69908) ([Antonio Andelic](https://github.com/antonio2368)).
 * Command-line arguments for Bool settings are set to true when no value is provided for the argument (e.g. `clickhouse-client --optimize_aggregation_in_order --query "SELECT 1"`). [#70459](https://github.com/ClickHouse/ClickHouse/pull/70459) ([davidtsuk](https://github.com/davidtsuk)).
 * Added user-level settings `min_free_disk_bytes_to_throw_insert` and `min_free_disk_ratio_to_throw_insert` to prevent insertions on disks that are almost full. [#69755](https://github.com/ClickHouse/ClickHouse/pull/69755) ([Marco Vilas Boas](https://github.com/marco-vb)).
 * Embedded documentation for settings will be strictly more detailed and complete than the documentation on the website. This is the first step before making the website documentation always auto-generated from the source code. This has long-standing implications: - it will be guaranteed to have every setting; - there is no chance of having default values obsolete; - we can generate this documentation for each ClickHouse version; - the documentation can be displayed by the server itself even without Internet access. Generate the docs on the website from the source code. [#70289](https://github.com/ClickHouse/ClickHouse/pull/70289) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Allow empty needle in the function `replace`, the same behavior with PostgreSQL. [#69918](https://github.com/ClickHouse/ClickHouse/pull/69918) ([zhanglistar](https://github.com/zhanglistar)).
 * Allow empty needle in functions `replaceRegexp*`. [#70053](https://github.com/ClickHouse/ClickHouse/pull/70053) ([zhanglistar](https://github.com/zhanglistar)).
 * Symbolic links for tables in the `data/database_name/` directory are created for the actual paths to the table's data, depending on the storage policy, instead of the `store/...` directory on the default disk. [#61777](https://github.com/ClickHouse/ClickHouse/pull/61777) ([Kirill](https://github.com/kirillgarbar)).
 * While parsing an `Enum` field from `JSON`, a string containing an integer will be interpreted as the corresponding `Enum` element. This closes [#65119](https://github.com/ClickHouse/ClickHouse/issues/65119). [#66801](https://github.com/ClickHouse/ClickHouse/pull/66801) ([scanhex12](https://github.com/scanhex12)).
 * Allow `TRIM` -ing `LEADING` or `TRAILING` empty string as a no-op. Closes [#67792](https://github.com/ClickHouse/ClickHouse/issues/67792). [#68455](https://github.com/ClickHouse/ClickHouse/pull/68455) ([Peter Nguyen](https://github.com/petern48)).
 * Improve compatibility of `cast(timestamp as String)` with Spark. [#69179](https://github.com/ClickHouse/ClickHouse/pull/69179) ([Wenzheng Liu](https://github.com/lwz9103)).
 * Always use the new analyzer to calculate constant expressions when `enable_analyzer` is set to `true`. Support calculation of `executable` table function arguments without using `SELECT` query for constant expressions. [#69292](https://github.com/ClickHouse/ClickHouse/pull/69292) ([Dmitry Novik](https://github.com/novikd)).
 * Add a setting `enable_secure_identifiers` to disallow identifiers with special characters. [#69411](https://github.com/ClickHouse/ClickHouse/pull/69411) ([tuanpach](https://github.com/tuanpach)).
 * Add `show_create_query_identifier_quoting_rule` to define identifier quoting behavior in the `SHOW CREATE TABLE` query result. Possible values: - `user_display`: When the identifiers is a keyword. - `when_necessary`: When the identifiers is one of `{"distinct", "all", "table"}` and when it could lead to ambiguity: column names, dictionary attribute names. - `always`: Always quote identifiers. [#69448](https://github.com/ClickHouse/ClickHouse/pull/69448) ([tuanpach](https://github.com/tuanpach)).
 * Improve restoring of access entities' dependencies [#69563](https://github.com/ClickHouse/ClickHouse/pull/69563) ([Vitaly Baranov](https://github.com/vitlibar)).
 * If you run `clickhouse-client` or other CLI application, and it starts up slowly due to an overloaded server, and you start typing your query, such as `SELECT`, the previous versions will display the remaining of the terminal echo contents before printing the greetings message, such as `SELECTClickHouse local version 24.10.1.1.` instead of `ClickHouse local version 24.10.1.1.`. Now it is fixed. This closes [#31696](https://github.com/ClickHouse/ClickHouse/issues/31696). [#69856](https://github.com/ClickHouse/ClickHouse/pull/69856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Add new column `readonly_duration` to the `system.replicas` table. Needed to be able to distinguish actual readonly replicas from sentinel ones in alerts. [#69871](https://github.com/ClickHouse/ClickHouse/pull/69871) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
 * Change the type of `join_output_by_rowlist_perkey_rows_threshold` setting type to unsigned integer. [#69886](https://github.com/ClickHouse/ClickHouse/pull/69886) ([kevinyhzou](https://github.com/KevinyhZou)).
 * Enhance OpenTelemetry span logging to include query settings. [#70011](https://github.com/ClickHouse/ClickHouse/pull/70011) ([sharathks118](https://github.com/sharathks118)).
 * Add diagnostic info about higher-order array functions if lambda result type is unexpected. [#70093](https://github.com/ClickHouse/ClickHouse/pull/70093) ([ttanay](https://github.com/ttanay)).
 * Keeper improvement: less locking during cluster changes. [#70275](https://github.com/ClickHouse/ClickHouse/pull/70275) ([Antonio Andelic](https://github.com/antonio2368)).
 * Add `WITH IMPLICIT` and `FINAL` keywords to the `SHOW GRANTS` command. Fix a minor bug with implicit grants: [#70094](https://github.com/ClickHouse/ClickHouse/issues/70094). [#70293](https://github.com/ClickHouse/ClickHouse/pull/70293) ([pufit](https://github.com/pufit)).
 * Respect `compatibility` for MergeTree settings. The `compatibility` value is taken from the `default` profile on server startup, and default MergeTree settings are changed accordingly. Further changes of the `compatibility` setting do not affect MergeTree settings. [#70322](https://github.com/ClickHouse/ClickHouse/pull/70322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
 * Avoid spamming the logs with large HTTP response bodies in case of errors during inter-server communication. [#70487](https://github.com/ClickHouse/ClickHouse/pull/70487) ([Vladimir Cherkasov](https://github.com/vdimir)).
 * Added a new setting `max_parts_to_move` to control the maximum number of parts that can be moved at once. [#70520](https://github.com/ClickHouse/ClickHouse/pull/70520) ([Vladimir Cherkasov](https://github.com/vdimir)).
 * Limit the frequency of certain log messages. [#70601](https://github.com/ClickHouse/ClickHouse/pull/70601) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * `CHECK TABLE` with `PART` qualifier was incorrectly formatted in the client. [#70660](https://github.com/ClickHouse/ClickHouse/pull/70660) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Support writing the column index and the offset index using parquet native writer. [#70669](https://github.com/ClickHouse/ClickHouse/pull/70669) ([LiuNeng](https://github.com/liuneng1994)).
 * Support parsing `DateTime64` for microsecond and timezone in joda syntax ("joda" is a popular Java library for date and time, and the "joda syntax" is that library's style). [#70737](https://github.com/ClickHouse/ClickHouse/pull/70737) ([kevinyhzou](https://github.com/KevinyhZou)).
 * Changed an approach to figure out if a cloud storage supports [batch delete](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html) or not. [#70786](https://github.com/ClickHouse/ClickHouse/pull/70786) ([Vitaly Baranov](https://github.com/vitlibar)).
 * Support for Parquet page v2 in the native reader. [#70807](https://github.com/ClickHouse/ClickHouse/pull/70807) ([Arthur Passos](https://github.com/arthurpassos)).
 * A check if table has both `storage_policy` and `disk` set. A check if a new storage policy is compatible with an old one when using `disk` setting is added. [#70839](https://github.com/ClickHouse/ClickHouse/pull/70839) ([Kirill](https://github.com/kirillgarbar)).
 * Add `system.s3_queue_settings` and `system.azure_queue_settings`. [#70841](https://github.com/ClickHouse/ClickHouse/pull/70841) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Functions `base58Encode` and `base58Decode` now accept arguments of type `FixedString`. Example: `SELECT base58Encode(toFixedString('plaintext', 9));`. [#70846](https://github.com/ClickHouse/ClickHouse/pull/70846) ([Faizan Patel](https://github.com/faizan2786)).
 * Add the `partition` column to every entry type of the part log. Previously, it was set only for some entries. This closes [#70819](https://github.com/ClickHouse/ClickHouse/issues/70819). [#70848](https://github.com/ClickHouse/ClickHouse/pull/70848) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Add `MergeStart` and `MutateStart` events into `system.part_log` which helps with merges analysis and visualization. [#70850](https://github.com/ClickHouse/ClickHouse/pull/70850) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Add a profile event about the number of merged source parts. It allows the monitoring of the fanout of the merge tree in production. [#70908](https://github.com/ClickHouse/ClickHouse/pull/70908) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Background downloads to the filesystem cache were enabled back. [#70929](https://github.com/ClickHouse/ClickHouse/pull/70929) ([Nikita Taranov](https://github.com/nickitat)).
 * Add a new merge selector algorithm, named `Trivial`, for professional usage only. It is worse than the `Simple` merge selector. [#70969](https://github.com/ClickHouse/ClickHouse/pull/70969) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Support for atomic `CREATE OR REPLACE VIEW`. [#70536](https://github.com/ClickHouse/ClickHouse/pull/70536) ([tuanpach](https://github.com/tuanpach))
 * Added `strict_once` mode to aggregate function `windowFunnel` to avoid counting one event several times in case it matches multiple conditions, close [#21835](https://github.com/ClickHouse/ClickHouse/issues/21835). [#69738](https://github.com/ClickHouse/ClickHouse/pull/69738) ([Vladimir Cherkasov](https://github.com/vdimir)).
 #### Bug Fix (user-visible misbehavior in an official stable release)
 * Apply configuration updates in global context object. It fixes issues like [#62308](https://github.com/ClickHouse/ClickHouse/issues/62308). [#62944](https://github.com/ClickHouse/ClickHouse/pull/62944) ([Amos Bird](https://github.com/amosbird)).
 * Fix `ReadSettings` not using user set values, because defaults were only used. [#65625](https://github.com/ClickHouse/ClickHouse/pull/65625) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Fix type mismatch issue in `sumMapFiltered` when using signed arguments. [#58408](https://github.com/ClickHouse/ClickHouse/pull/58408) ([Chen768959](https://github.com/Chen768959)).
 * Fix toHour-like conversion functions' monotonicity when optional time zone argument is passed. [#60264](https://github.com/ClickHouse/ClickHouse/pull/60264) ([Amos Bird](https://github.com/amosbird)).
 * Relax `supportsPrewhere` check for `Merge` tables. This fixes [#61064](https://github.com/ClickHouse/ClickHouse/issues/61064). It was hardened unnecessarily in [#60082](https://github.com/ClickHouse/ClickHouse/issues/60082). [#61091](https://github.com/ClickHouse/ClickHouse/pull/61091) ([Amos Bird](https://github.com/amosbird)).
 * Fix `use_concurrency_control` setting handling for proper `concurrent_threads_soft_limit_num` limit enforcing. This enables concurrency control by default because previously it was broken. [#61473](https://github.com/ClickHouse/ClickHouse/pull/61473) ([Sergei Trifonov](https://github.com/serxa)).
 * Fix incorrect `JOIN ON` section optimization in case of `IS NULL` check under any other function (like `NOT`) that may lead to wrong results. Closes [#67915](https://github.com/ClickHouse/ClickHouse/issues/67915). [#68049](https://github.com/ClickHouse/ClickHouse/pull/68049) ([Vladimir Cherkasov](https://github.com/vdimir)).
 * Prevent `ALTER` queries that would make the `CREATE` query of tables invalid. [#68574](https://github.com/ClickHouse/ClickHouse/pull/68574) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
 * Fix inconsistent AST formatting for `negate` (`-`) and `NOT` functions with tuples and arrays. [#68600](https://github.com/ClickHouse/ClickHouse/pull/68600) ([Vladimir Cherkasov](https://github.com/vdimir)).
 * Fix insertion of incomplete type into `Dynamic` during deserialization. It could lead to `Parameter out of bound` errors. [#69291](https://github.com/ClickHouse/ClickHouse/pull/69291) ([Pavel Kruglov](https://github.com/Avogar)).
 * Zero-copy replication, which is experimental and should not be used in production: fix inf loop after `restore replica` in the replicated merge tree with zero copy. [#69293](https://github.com/CljmnickHouse/ClickHouse/pull/69293) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
 * Return back default value of `processing_threads_num` as number of cpu cores in storage `S3Queue`. [#69384](https://github.com/ClickHouse/ClickHouse/pull/69384) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Bypass try/catch flow when de/serializing nested repeated protobuf to nested columns (fixes [#41971](https://github.com/ClickHouse/ClickHouse/issues/41971)). [#69556](https://github.com/ClickHouse/ClickHouse/pull/69556) ([Eliot Hautefeuille](https://github.com/hileef)).
 * Fix crash during insertion into FixedString column in PostgreSQL engine. [#69584](https://github.com/ClickHouse/ClickHouse/pull/69584) ([Pavel Kruglov](https://github.com/Avogar)).
 * Fix crash when executing `create view t as (with recursive 42 as ttt select ttt);`. [#69676](https://github.com/ClickHouse/ClickHouse/pull/69676) ([Han Fei](https://github.com/hanfei1991)).
 * Fixed `maxMapState` throwing 'Bad get' if value type is DateTime64. [#69787](https://github.com/ClickHouse/ClickHouse/pull/69787) ([Michael Kolupaev](https://github.com/al13n321)).
 * Fix `getSubcolumn` with `LowCardinality` columns by overriding `useDefaultImplementationForLowCardinalityColumns` to return `true`. [#69831](https://github.com/ClickHouse/ClickHouse/pull/69831) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
 * Fix permanent blocked distributed sends if a DROP of distributed table failed. [#69843](https://github.com/ClickHouse/ClickHouse/pull/69843) ([Azat Khuzhin](https://github.com/azat)).
 * Fix non-cancellable queries containing WITH FILL with NaN keys. This closes [#69261](https://github.com/ClickHouse/ClickHouse/issues/69261). [#69845](https://github.com/ClickHouse/ClickHouse/pull/69845) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Fix analyzer default with old compatibility value. [#69895](https://github.com/ClickHouse/ClickHouse/pull/69895) ([Raúl Marín](https://github.com/Algunenano)).
 * Don't check dependencies during CREATE OR REPLACE VIEW during DROP of old table. Previously CREATE OR REPLACE query failed when there are dependent tables of the recreated view. [#69907](https://github.com/ClickHouse/ClickHouse/pull/69907) ([Pavel Kruglov](https://github.com/Avogar)).
 * Something for Decimal. Fixes [#69730](https://github.com/ClickHouse/ClickHouse/issues/69730). [#69978](https://github.com/ClickHouse/ClickHouse/pull/69978) ([Arthur Passos](https://github.com/arthurpassos)).
 * Now DEFINER/INVOKER will work with parameterized views. [#69984](https://github.com/ClickHouse/ClickHouse/pull/69984) ([pufit](https://github.com/pufit)).
 * Fix parsing for view's  definers. [#69985](https://github.com/ClickHouse/ClickHouse/pull/69985) ([pufit](https://github.com/pufit)).
 * Fixed a bug when the timezone could change the result of the query with a `Date` or `Date32` arguments. [#70036](https://github.com/ClickHouse/ClickHouse/pull/70036) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
 * Fixes `Block structure mismatch` for queries with nested views and `WHERE` condition. Fixes [#66209](https://github.com/ClickHouse/ClickHouse/issues/66209). [#70054](https://github.com/ClickHouse/ClickHouse/pull/70054) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
 * Avoid reusing columns among different named tuples when evaluating `tuple` functions. This fixes [#70022](https://github.com/ClickHouse/ClickHouse/issues/70022). [#70103](https://github.com/ClickHouse/ClickHouse/pull/70103) ([Amos Bird](https://github.com/amosbird)).
 * Fix wrong LOGICAL_ERROR when replacing literals in ranges. [#70122](https://github.com/ClickHouse/ClickHouse/pull/70122) ([Pablo Marcos](https://github.com/pamarcos)).
 * Check for Nullable(Nothing) type during ALTER TABLE MODIFY COLUMN/QUERY to prevent tables with such data type. [#70123](https://github.com/ClickHouse/ClickHouse/pull/70123) ([Pavel Kruglov](https://github.com/Avogar)).
 * Proper error message for illegal query `JOIN ... ON *` , close [#68650](https://github.com/ClickHouse/ClickHouse/issues/68650). [#70124](https://github.com/ClickHouse/ClickHouse/pull/70124) ([Vladimir Cherkasov](https://github.com/vdimir)).
 * Fix wrong result with skipping index. [#70127](https://github.com/ClickHouse/ClickHouse/pull/70127) ([Raúl Marín](https://github.com/Algunenano)).
 * Fix data race in ColumnObject/ColumnTuple decompress method that could lead to heap use after free. [#70137](https://github.com/ClickHouse/ClickHouse/pull/70137) ([Pavel Kruglov](https://github.com/Avogar)).
 * Fix possible hung in ALTER COLUMN with Dynamic type. [#70144](https://github.com/ClickHouse/ClickHouse/pull/70144) ([Pavel Kruglov](https://github.com/Avogar)).
 * Now ClickHouse will consider more errors as retriable and will not mark data parts as broken in case of such errors. [#70145](https://github.com/ClickHouse/ClickHouse/pull/70145) ([alesapin](https://github.com/alesapin)).
 * Use correct `max_types` parameter during Dynamic type creation for JSON subcolumn. [#70147](https://github.com/ClickHouse/ClickHouse/pull/70147) ([Pavel Kruglov](https://github.com/Avogar)).
 * Fix the password being displayed in `system.query_log` for users with bcrypt password authentication method. [#70148](https://github.com/ClickHouse/ClickHouse/pull/70148) ([Nikolay Degterinsky](https://github.com/evillique)).
 * Fix event counter for the native interface (InterfaceNativeSendBytes). [#70153](https://github.com/ClickHouse/ClickHouse/pull/70153) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
 * Fix possible crash related to JSON columns. [#70172](https://github.com/ClickHouse/ClickHouse/pull/70172) ([Pavel Kruglov](https://github.com/Avogar)).
 * Fix multiple issues with arrayMin and arrayMax. [#70207](https://github.com/ClickHouse/ClickHouse/pull/70207) ([Raúl Marín](https://github.com/Algunenano)).
 * Respect setting allow_simdjson in the JSON type parser. [#70218](https://github.com/ClickHouse/ClickHouse/pull/70218) ([Pavel Kruglov](https://github.com/Avogar)).
 * Fix a null pointer dereference on creating a materialized view with two selects and an `INTERSECT`, e.g. `CREATE MATERIALIZED VIEW v0 AS (SELECT 1) INTERSECT (SELECT 1);`. [#70264](https://github.com/ClickHouse/ClickHouse/pull/70264) ([Konstantin Bogdanov](https://github.com/thevar1able)).
 * Don't modify global settings with startup scripts. Previously, changing a setting in a startup script would change it globally. [#70310](https://github.com/ClickHouse/ClickHouse/pull/70310) ([Antonio Andelic](https://github.com/antonio2368)).
 * Fix ALTER of `Dynamic` type with reducing max_types parameter that could lead to server crash. [#70328](https://github.com/ClickHouse/ClickHouse/pull/70328) ([Pavel Kruglov](https://github.com/Avogar)).
 * Fix crash when using WITH FILL incorrectly. [#70338](https://github.com/ClickHouse/ClickHouse/pull/70338) ([Raúl Marín](https://github.com/Algunenano)).
 * Fix possible use-after-free in `SYSTEM DROP FORMAT SCHEMA CACHE FOR Protobuf`. [#70358](https://github.com/ClickHouse/ClickHouse/pull/70358) ([Azat Khuzhin](https://github.com/azat)).
 * Fix crash during GROUP BY JSON sub-object subcolumn. [#70374](https://github.com/ClickHouse/ClickHouse/pull/70374) ([Pavel Kruglov](https://github.com/Avogar)).
 * Don't prefetch parts for vertical merges if part has no rows. [#70452](https://github.com/ClickHouse/ClickHouse/pull/70452) ([Antonio Andelic](https://github.com/antonio2368)).
 * Fix crash in WHERE with lambda functions. [#70464](https://github.com/ClickHouse/ClickHouse/pull/70464) ([Raúl Marín](https://github.com/Algunenano)).
 * Fix table creation with `CREATE ... AS table_function(...)` with database `Replicated` and unavailable table function source on secondary replica. [#70511](https://github.com/ClickHouse/ClickHouse/pull/70511) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Ignore all output on async insert with `wait_for_async_insert=1`. Closes [#62644](https://github.com/ClickHouse/ClickHouse/issues/62644). [#70530](https://github.com/ClickHouse/ClickHouse/pull/70530) ([Konstantin Bogdanov](https://github.com/thevar1able)).
 * Ignore frozen_metadata.txt while traversing shadow directory from system.remote_data_paths. [#70590](https://github.com/ClickHouse/ClickHouse/pull/70590) ([Aleksei Filatov](https://github.com/aalexfvk)).
 * Fix creation of stateful window functions on misaligned memory. [#70631](https://github.com/ClickHouse/ClickHouse/pull/70631) ([Raúl Marín](https://github.com/Algunenano)).
 * Fixed rare crashes in `SELECT`-s and merges after adding a column of `Array` type with non-empty default expression. [#70695](https://github.com/ClickHouse/ClickHouse/pull/70695) ([Anton Popov](https://github.com/CurtizJ)).
 * Insert into table function s3 will respect query settings. [#70696](https://github.com/ClickHouse/ClickHouse/pull/70696) ([Vladimir Cherkasov](https://github.com/vdimir)).
 * Fix infinite recursion when inferring a protobuf schema when skipping unsupported fields is enabled. [#70697](https://github.com/ClickHouse/ClickHouse/pull/70697) ([Raúl Marín](https://github.com/Algunenano)).
 * Disable enable_named_columns_in_function_tuple by default. [#70833](https://github.com/ClickHouse/ClickHouse/pull/70833) ([Raúl Marín](https://github.com/Algunenano)).
 * Fix S3Queue table engine setting processing_threads_num not being effective in case it was deduced from the number of cpu cores on the server. [#70837](https://github.com/ClickHouse/ClickHouse/pull/70837) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Normalize named tuple arguments in aggregation states. This fixes [#69732](https://github.com/ClickHouse/ClickHouse/issues/69732) . [#70853](https://github.com/ClickHouse/ClickHouse/pull/70853) ([Amos Bird](https://github.com/amosbird)).
 * Fix a logical error due to negative zeros in the two-level hash table. This closes [#70973](https://github.com/ClickHouse/ClickHouse/issues/70973). [#70979](https://github.com/ClickHouse/ClickHouse/pull/70979) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Fix `limit by`, `limit with ties` for distributed and parallel replicas. [#70880](https://github.com/ClickHouse/ClickHouse/pull/70880) ([Nikita Taranov](https://github.com/nickitat)).
 ### <a id="249"></a> ClickHouse release 24.9, 2024-09-26
 #### Backward Incompatible Change
 * Expressions like `a[b].c` are supported for named tuples, as well as named subscripts from arbitrary expressions, e.g., `expr().name`. This is useful for processing JSON. This closes [#54965](https://github.com/ClickHouse/ClickHouse/issues/54965). In previous versions, an expression of form `expr().name` was parsed as `tupleElement(expr(), name)`, and the query analyzer was searching for a column `name` rather than for the corresponding tuple element; while in the new version, it is changed to `tupleElement(expr(), 'name')`. In most cases, the previous version was not working, but it is possible to imagine a very unusual scenario when this change could lead to incompatibility: if you stored names of tuple elements in a column or an alias, that was named differently than the tuple element's name: `SELECT 'b' AS a, CAST([tuple(123)] AS 'Array(Tuple(b UInt8))') AS t, t[1].a`. It is very unlikely that you used such queries, but we still have to mark this change as potentially backward incompatible. [#68435](https://github.com/ClickHouse/ClickHouse/pull/68435) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * When the setting `print_pretty_type_names` is enabled, it will print `Tuple` data type in a pretty form in `SHOW CREATE TABLE` statements, `formatQuery` function, and in the interactive mode in `clickhouse-client` and `clickhouse-local`. In previous versions, this setting was only applied to `DESCRIBE` queries and `toTypeName`. This closes [#65753](https://github.com/ClickHouse/ClickHouse/issues/65753). [#68492](https://github.com/ClickHouse/ClickHouse/pull/68492) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Do not allow explicitly specifying UUID when creating a table in `Replicated` databases. Also, do not allow explicitly specifying Keeper path and replica name for *MergeTree tables in Replicated databases. It introduces a new setting `database_replicated_allow_explicit_uuid` and changes the type of `database_replicated_allow_replicated_engine_arguments` from Bool to UInt64 [#66104](https://github.com/ClickHouse/ClickHouse/pull/66104) ([Alexander Tokmakov](https://github.com/tavplubix)).
 #### New Feature
 * Allow a user to have multiple authentication methods instead of only one. Allow authentication methods to be reset to most recently added method. If you want to run instances on 24.8 and one on 24.9 for some time, it's better to set `max_authentication_methods_per_user` = 1 for that period to avoid potential incompatibilities. [#65277](https://github.com/ClickHouse/ClickHouse/pull/65277) ([Arthur Passos](https://github.com/arthurpassos)).
 * Add support for `ATTACH PARTITION ALL FROM`. [#61987](https://github.com/ClickHouse/ClickHouse/pull/61987) ([Kirill Nikiforov](https://github.com/allmazz)).
 * Add the `input_format_json_empty_as_default` setting which, when enabled, treats empty fields in JSON inputs as default values. Closes [#59339](https://github.com/ClickHouse/ClickHouse/issues/59339). [#66782](https://github.com/ClickHouse/ClickHouse/pull/66782) ([Alexis Arnaud](https://github.com/a-a-f)).
 * Added functions `overlay` and `overlayUTF8` which replace parts of a string by another string. Example: `SELECT overlay('Hello New York', 'Jersey', 11)` returns `Hello New Jersey`. [#66933](https://github.com/ClickHouse/ClickHouse/pull/66933) ([李扬](https://github.com/taiyang-li)).
 * Add support for lightweight deletes in partition `DELETE FROM [db.]table [ON CLUSTER cluster] [IN PARTITION partition_expr] WHERE expr; ` [#67805](https://github.com/ClickHouse/ClickHouse/pull/67805) ([sunny](https://github.com/sunny19930321)).
 * Implemented comparison for `Interval` data type values of different domains (such as seconds and minutes) so they are converting now to the least supertype. [#68057](https://github.com/ClickHouse/ClickHouse/pull/68057) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
 * Add `create_if_not_exists` setting to default to `IF NOT EXISTS` behavior during CREATE statements. [#68164](https://github.com/ClickHouse/ClickHouse/pull/68164) ([Peter Nguyen](https://github.com/petern48)).
 * Makes it possible to read `Iceberg` tables in Azure and locally. [#68210](https://github.com/ClickHouse/ClickHouse/pull/68210) ([Daniil Ivanik](https://github.com/divanik)).
 * Query cache entries can now be dropped by tag. For example, the query cache entry created by `SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc'` can now be dropped by `SYSTEM DROP QUERY CACHE TAG 'abc'`. [#68477](https://github.com/ClickHouse/ClickHouse/pull/68477) ([Michał Tabaszewski](https://github.com/pinsvin00)).
 * Add storage encryption for named collections. [#68615](https://github.com/ClickHouse/ClickHouse/pull/68615) ([Pablo Marcos](https://github.com/pamarcos)).
 * Add virtual column `_headers` for the `URL` table engine. Closes [#65026](https://github.com/ClickHouse/ClickHouse/issues/65026). [#68867](https://github.com/ClickHouse/ClickHouse/pull/68867) ([flynn](https://github.com/ucasfl)).
 * Add `system.projections` table to track available projections. [#68901](https://github.com/ClickHouse/ClickHouse/pull/68901) ([Jordi Villar](https://github.com/jrdi)).
 * Add new function `arrayZipUnaligned` for spark compatibility (which is named `arrays_zip` in Spark), which allowed unaligned arrays based on original `arrayZip`. [#69030](https://github.com/ClickHouse/ClickHouse/pull/69030) ([李扬](https://github.com/taiyang-li)).
 * Added `cp`/`mv` commands for the keeper client command line application which atomically copies/moves node. [#69034](https://github.com/ClickHouse/ClickHouse/pull/69034) ([Mikhail Artemenko](https://github.com/Michicosun)).
 * Adds argument `scale` (default: `true`) to function `arrayAUC` which allows to skip the normalization step (issue [#69609](https://github.com/ClickHouse/ClickHouse/issues/69609)). [#69717](https://github.com/ClickHouse/ClickHouse/pull/69717) ([gabrielmcg44](https://github.com/gabrielmcg44)).
 #### Experimental feature
 * Adds a setting `input_format_try_infer_variants` which allows `Variant` type to be inferred during schema inference for text formats when there is more than one possible type for column/array elements. [#63798](https://github.com/ClickHouse/ClickHouse/pull/63798) ([Shaun Struwig](https://github.com/Blargian)).
 * Add aggregate functions `distinctDynamicTypes`/`distinctJSONPaths`/`distinctJSONPathsAndTypes` for better introspection of JSON column type content. [#68463](https://github.com/ClickHouse/ClickHouse/pull/68463) ([Kruglov Pavel](https://github.com/Avogar)).
 * New algorithm to determine the unit of marks distribution between parallel replicas by a consistent hash. Different numbers of marks chosen for different read patterns to improve performance. [#68424](https://github.com/ClickHouse/ClickHouse/pull/68424) ([Nikita Taranov](https://github.com/nickitat)).
 * Previously the algorithmic complexity of part deduplication logic in parallel replica announcement handling was O(n^2) which could take noticeable time for tables with many part (or partitions). This change makes the complexity O(n*log(n)). [#69596](https://github.com/ClickHouse/ClickHouse/pull/69596) ([Alexander Gololobov](https://github.com/davenger)).
 * Refreshable materialized view improvements: append mode (`... REFRESH EVERY 1 MINUTE APPEND ...`) to add rows to existing table instead of overwriting the whole table, retries (disabled by default, configured in SETTINGS section of the query), `SYSTEM WAIT VIEW <name>` query that waits for the currently running refresh, some fixes. [#58934](https://github.com/ClickHouse/ClickHouse/pull/58934) ([Michael Kolupaev](https://github.com/al13n321)).
 * Added `min_max`as a new type of (experimental) statistics. It supports estimating range predicates over numeric columns, e.g. `x < 100`. [#67013](https://github.com/ClickHouse/ClickHouse/pull/67013) ([JackyWoo](https://github.com/JackyWoo)).
 * Improve castOrDefault from Variant/Dynamic columns so it works when inner types are not convertable at all. [#67150](https://github.com/ClickHouse/ClickHouse/pull/67150) ([Kruglov Pavel](https://github.com/Avogar)).
 * Replication of subset of columns is now available through MaterializedPostgreSQL. Closes [#33748](https://github.com/ClickHouse/ClickHouse/issues/33748). [#69092](https://github.com/ClickHouse/ClickHouse/pull/69092) ([Kruglov Kirill](https://github.com/1on)).
 #### Performance Improvement
 * Implemented reading of required files only for Hive partitioning. [#68963](https://github.com/ClickHouse/ClickHouse/pull/68963) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
 * Improve the JOIN performance by rearranging the right table by keys while the table keys are dense in the LEFT or INNER hash joins. [#60341](https://github.com/ClickHouse/ClickHouse/pull/60341) ([kevinyhzou](https://github.com/KevinyhZou)).
 * Improve ALL JOIN performance by appending the list of rows lazily. [#63677](https://github.com/ClickHouse/ClickHouse/pull/63677) ([kevinyhzou](https://github.com/KevinyhZou)).
 * Load filesystem cache metadata asynchronously during the boot process, in order to make restarts faster (controlled by setting `load_metadata_asynchronously`). [#65736](https://github.com/ClickHouse/ClickHouse/pull/65736) ([Daniel Pozo Escalona](https://github.com/danipozo)).
 * Functions `array` and `map` were optimized to process certain common cases much faster. [#67707](https://github.com/ClickHouse/ClickHouse/pull/67707) ([李扬](https://github.com/taiyang-li)).
 * Trivial optimize on ORC strings reading especially when a column contains no NULLs. [#67794](https://github.com/ClickHouse/ClickHouse/pull/67794) ([李扬](https://github.com/taiyang-li)).
 * Improved overall performance of merges by reducing the overhead of scheduling steps of merges. [#68016](https://github.com/ClickHouse/ClickHouse/pull/68016) ([Anton Popov](https://github.com/CurtizJ)).
 * Speed up requests to S3 when a profile is not set, credentials are not set, and IMDS is not available (for example, when you are querying a public bucket on a machine outside of a cloud). This closes [#52771](https://github.com/ClickHouse/ClickHouse/issues/52771). [#68082](https://github.com/ClickHouse/ClickHouse/pull/68082) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Devirtualize format reader in `RowInputFormatWithNamesAndTypes` for some performance improvement. [#68437](https://github.com/ClickHouse/ClickHouse/pull/68437) ([李扬](https://github.com/taiyang-li)).
 * Add the parallel merge for `uniq` aggregate function when aggregating with a group by key to maximize the CPU utilization. [#68441](https://github.com/ClickHouse/ClickHouse/pull/68441) ([Jiebin Sun](https://github.com/jiebinn)).
 * Add settings `output_format_orc_dictionary_key_size_threshold` to allow user to enable dict encoding for string column in `ORC` output format. It helps reduce the output `ORC` file size and improve reading performance significantly. [#68591](https://github.com/ClickHouse/ClickHouse/pull/68591) ([李扬](https://github.com/taiyang-li)).
 * Introduce new Keeper request RemoveRecursive which removes node with all it's subtree. [#69332](https://github.com/ClickHouse/ClickHouse/pull/69332) ([Mikhail Artemenko](https://github.com/Michicosun)).
 * Speedup insertion performance into a table with a vector similarity index by adding data to the vector index in parallel. [#69493](https://github.com/ClickHouse/ClickHouse/pull/69493) ([flynn](https://github.com/ucasfl)).
 * Reduce memory usage of inserts to JSON by using adaptive write buffer size. A lot of files created by JSON column in wide part contains small amount of data and it doesn't make sense to allocate 1MB buffer for them. [#69272](https://github.com/ClickHouse/ClickHouse/pull/69272) ([Kruglov Pavel](https://github.com/Avogar)).
 * Avoid returning a thread in the concurrent hash join threadpool to avoid query excessively spawn threads. [#69406](https://github.com/ClickHouse/ClickHouse/pull/69406) ([Duc Canh Le](https://github.com/canhld94)).
 #### Improvement
 * CREATE TABLE AS now copies PRIMARY KEY, ORDER BY, and similar clauses. Now it supports only for MergeTree family of table engines. [#69076](https://github.com/ClickHouse/ClickHouse/pull/69076) ([sakulali](https://github.com/sakulali)).
 * Hardened parts of the codebase related to parsing of small entities. The following (minor) bugs were found and fixed: - if a `DeltaLake` table is partitioned by Bool, the partition value is always interpreted as false; - `ExternalDistributed` table was using only a single shard in the provided addresses; the value of `max_threads` setting and similar were printed as `'auto(N)'` instead of `auto(N)`. [#52503](https://github.com/ClickHouse/ClickHouse/pull/52503) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Use cgroup-specific metrics for CPU usage accounting instead of system-wide metrics. [#62003](https://github.com/ClickHouse/ClickHouse/pull/62003) ([Nikita Taranov](https://github.com/nickitat)).
 * IO scheduling for remote S3 disks is now done on the level of HTTP socket streams (instead of the whole S3 requests) to resolve `bandwidth_limit` throttling issues. [#65182](https://github.com/ClickHouse/ClickHouse/pull/65182) ([Sergei Trifonov](https://github.com/serxa)).
 * Functions `upperUTF8` and `lowerUTF8` were previously only able to uppercase / lowercase Cyrillic characters. This limitation is now removed and characters in arbitrary languages are uppercased/lowercased. Example: `SELECT upperUTF8('Süden')` now returns `SÜDEN`. [#65761](https://github.com/ClickHouse/ClickHouse/pull/65761) ([李扬](https://github.com/taiyang-li)).
 * When lightweight delete happens on a table with projection(s), despite users have choices either throw an exception (by default) or drop the projection when the lightweight delete would happen, now there is the third option to still have lightweight delete and then rebuild projection(s). [#66169](https://github.com/ClickHouse/ClickHouse/pull/66169) ([jsc0218](https://github.com/jsc0218)).
 * Two options (`dns_allow_resolve_names_to_ipv4` and `dns_allow_resolve_names_to_ipv6`) have been added, to allow block connections ip family. [#66895](https://github.com/ClickHouse/ClickHouse/pull/66895) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
 * Make Ctrl-Z ignorance configurable (ignore_shell_suspend) in clickhouse-client. [#67134](https://github.com/ClickHouse/ClickHouse/pull/67134) ([Azat Khuzhin](https://github.com/azat)).
 * Improve UTF-8 validation in JSON output formats. Ensures that valid JSON is generated in the case of certain byte sequences in the result data. [#67938](https://github.com/ClickHouse/ClickHouse/pull/67938) ([mwoenker](https://github.com/mwoenker)).
 * Added profile events for merges and mutations for better introspection. [#68015](https://github.com/ClickHouse/ClickHouse/pull/68015) ([Anton Popov](https://github.com/CurtizJ)).
 * ODBC: get http_max_tries from the server configuration. [#68128](https://github.com/ClickHouse/ClickHouse/pull/68128) ([Rodolphe Dugé de Bernonville](https://github.com/RodolpheDuge)).
 * Add wildcard support for user identification in X.509 SubjectAltName extension. [#68236](https://github.com/ClickHouse/ClickHouse/pull/68236) ([Marco Vilas Boas](https://github.com/marco-vb)).
 * Improve schema inference of date times. Now `DateTime64` used only when date time has fractional part, otherwise regular DateTime is used. Inference of Date/DateTime is more strict now, especially when `date_time_input_format='best_effort'` to avoid inferring date times from strings in corner cases. [#68382](https://github.com/ClickHouse/ClickHouse/pull/68382) ([Kruglov Pavel](https://github.com/Avogar)).
 * Delete old code of named collections from dictionaries and substitute it to the new, which allows to use DDL created named collections in dictionaries. Closes [#60936](https://github.com/ClickHouse/ClickHouse/issues/60936), closes [#36890](https://github.com/ClickHouse/ClickHouse/issues/36890). [#68412](https://github.com/ClickHouse/ClickHouse/pull/68412) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Use HTTP/1.1 instead of HTTP/1.0 (set by default) for external HTTP authenticators. [#68456](https://github.com/ClickHouse/ClickHouse/pull/68456) ([Aleksei Filatov](https://github.com/aalexfvk)).
 * Added a new set of metrics for thread pool introspection, providing deeper insights into thread pool performance and behavior. [#68674](https://github.com/ClickHouse/ClickHouse/pull/68674) ([filimonov](https://github.com/filimonov)).
 * Support query parameters in async inserts with format `Values`. [#68741](https://github.com/ClickHouse/ClickHouse/pull/68741) ([Anton Popov](https://github.com/CurtizJ)).
 * Support `Date32` on `dateTrunc` and `toStartOfInterval`. [#68874](https://github.com/ClickHouse/ClickHouse/pull/68874) ([LiuNeng](https://github.com/liuneng1994)).
 * Add `plan_step_name` and `plan_step_description` columns to `system.processors_profile_log`. [#68954](https://github.com/ClickHouse/ClickHouse/pull/68954) ([Alexander Gololobov](https://github.com/davenger)).
 * Support for the Spanish language in the embedded dictionaries. [#69035](https://github.com/ClickHouse/ClickHouse/pull/69035) ([Vasily Okunev](https://github.com/VOkunev)).
 * Add CPU arch to the short fault information message. [#69037](https://github.com/ClickHouse/ClickHouse/pull/69037) ([Konstantin Bogdanov](https://github.com/thevar1able)).
 * Queries will fail faster if a new Keeper connection cannot be established during retries. [#69148](https://github.com/ClickHouse/ClickHouse/pull/69148) ([Raúl Marín](https://github.com/Algunenano)).
 * Update Database Factory so it would be possible for user defined database engines to have arguments, settings and table overrides (similar to StorageFactory). [#69201](https://github.com/ClickHouse/ClickHouse/pull/69201) ([NikBarykin](https://github.com/NikBarykin)).
 * Restore mode that replaces all external table engines and functions to the `Null` engine (`restore_replace_external_engines_to_null`, `restore_replace_external_table_functions_to_null` settings) was failing if table had SETTINGS. Now it removes settings from table definition in this case and allows to restore such tables. [#69253](https://github.com/ClickHouse/ClickHouse/pull/69253) ([Ilya Yatsishin](https://github.com/qoega)).
 * CLICKHOUSE_PASSWORD is correctly escaped for XML in clickhouse image's entrypoint. [#69301](https://github.com/ClickHouse/ClickHouse/pull/69301) ([aohoyd](https://github.com/aohoyd)).
 * Allow empty arguments for `arrayZip`/`arrayZipUnaligned`, as concat did in https://github.com/ClickHouse/ClickHouse/pull/65887. It is for spark compatiability in Gluten CH Backend. [#69576](https://github.com/ClickHouse/ClickHouse/pull/69576) ([李扬](https://github.com/taiyang-li)).
 * Support more advanced SSL options for Keeper's internal communication (e.g. private keys with passphrase). [#69582](https://github.com/ClickHouse/ClickHouse/pull/69582) ([Antonio Andelic](https://github.com/antonio2368)).
 * Index analysis can take noticeable time for big tables with many parts or partitions. This change should enable killing a heavy query at that stage. [#69606](https://github.com/ClickHouse/ClickHouse/pull/69606) ([Alexander Gololobov](https://github.com/davenger)).
 * Masking sensitive info in `gcs` table function. [#69611](https://github.com/ClickHouse/ClickHouse/pull/69611) ([Vitaly Baranov](https://github.com/vitlibar)).
 * Rebuild projection for merges that reduce number of rows. [#62364](https://github.com/ClickHouse/ClickHouse/pull/62364) ([cangyin](https://github.com/cangyin)).
 #### Bug Fix (user-visible misbehavior in an official stable release)
 * Fix attaching table when pg dbname contains "-" in the experimental and unsupported MaterializedPostgreSQL engine. [#62730](https://github.com/ClickHouse/ClickHouse/pull/62730) ([takakawa](https://github.com/takakawa)).
 * Fixed error on generated columns in the experimental and totally unsupported MaterializedPostgreSQL engine when adnum ordering is broken [#63161](https://github.com/ClickHouse/ClickHouse/issues/63161). Fixed error on id column with nextval expression as default in the experimental and totally unsupported MaterializedPostgreSQL when there are generated columns in table. Fixed error on dropping publication with symbols except \[a-z1-9-\]. [#67664](https://github.com/ClickHouse/ClickHouse/pull/67664) ([Kruglov Kirill](https://github.com/1on)).
 * Storage Join to support Nullable columns in the left table, close [#61247](https://github.com/ClickHouse/ClickHouse/issues/61247). [#66926](https://github.com/ClickHouse/ClickHouse/pull/66926) ([vdimir](https://github.com/vdimir)).
 * Incorrect query result with parallel replicas (distribute queries as well) when `IN` operator contains conversion to Decimal(). The bug was introduced with the new analyzer. [#67234](https://github.com/ClickHouse/ClickHouse/pull/67234) ([Igor Nikonov](https://github.com/devcrafter)).
 * Fix the problem that alter modify order by causes inconsistent metadata. [#67436](https://github.com/ClickHouse/ClickHouse/pull/67436) ([iceFireser](https://github.com/iceFireser)).
 * Fix the upper bound of the function `fromModifiedJulianDay`. It was supposed to be `9999-12-31` but was mistakenly set to `9999-01-01`. [#67583](https://github.com/ClickHouse/ClickHouse/pull/67583) ([PHO](https://github.com/depressed-pho)).
 * Fix when the index is not at the beginning of the tuple during `IN` query. [#67626](https://github.com/ClickHouse/ClickHouse/pull/67626) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
 * Fix expiration in `RoleCache`. [#67748](https://github.com/ClickHouse/ClickHouse/pull/67748) ([Vitaly Baranov](https://github.com/vitlibar)).
 * Fix window view missing blocks due to slow flush to view. [#67983](https://github.com/ClickHouse/ClickHouse/pull/67983) ([Raúl Marín](https://github.com/Algunenano)).
 * Fix MSan issue caused by incorrect date format. [#68105](https://github.com/ClickHouse/ClickHouse/pull/68105) ([JackyWoo](https://github.com/JackyWoo)).
 * Fixed crash in Parquet filtering when data types in the file substantially differ from requested types (e.g. `... FROM file('a.parquet', Parquet, 'x String')`, but the file has `x Int64`). Without this fix, use `input_format_parquet_filter_push_down = 0` as a workaround. [#68131](https://github.com/ClickHouse/ClickHouse/pull/68131) ([Michael Kolupaev](https://github.com/al13n321)).
 * Fix crash in `lag`/`lead` which is introduced in [#67091](https://github.com/ClickHouse/ClickHouse/issues/67091). [#68262](https://github.com/ClickHouse/ClickHouse/pull/68262) ([lgbo](https://github.com/lgbo-ustc)).
 * Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * After https://github.com/ClickHouse/ClickHouse/pull/61984 `schema_inference_make_columns_nullable=0` still can make columns `Nullable` in Parquet/Arrow formats. The change was backward incompatible and users noticed the changes in the behaviour. This PR makes `schema_inference_make_columns_nullable=0` to work as before (no Nullable columns will be inferred) and introduces new value `auto` for this setting that will make columns `Nullable` only if data has information about nullability. [#68298](https://github.com/ClickHouse/ClickHouse/pull/68298) ([Kruglov Pavel](https://github.com/Avogar)).
 * Fixes [#50868](https://github.com/ClickHouse/ClickHouse/issues/50868). Small DateTime64 constant values returned by a nested subquery inside a distributed query were wrongly transformed to Nulls, thus causing errors and possible incorrect query results. [#68323](https://github.com/ClickHouse/ClickHouse/pull/68323) ([Shankar](https://github.com/shiyer7474)).
 * Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
 * Fix bug in key condition. [#68354](https://github.com/ClickHouse/ClickHouse/pull/68354) ([Han Fei](https://github.com/hanfei1991)).
 * Fix crash on drop or rename a role that is used in LDAP external user directory. [#68355](https://github.com/ClickHouse/ClickHouse/pull/68355) ([Andrey Zvonov](https://github.com/zvonand)).
 * Fix Progress column value of system.view_refreshes greater than 1 [#68377](https://github.com/ClickHouse/ClickHouse/issues/68377). [#68378](https://github.com/ClickHouse/ClickHouse/pull/68378) ([megao](https://github.com/jetgm)).
 * Process regexp flags correctly. [#68389](https://github.com/ClickHouse/ClickHouse/pull/68389) ([Han Fei](https://github.com/hanfei1991)).
 * PostgreSQL-style cast operator (`::`) works correctly even for SQL-style hex and binary string literals (e.g., `SELECT x'414243'::String`). This closes [#68324](https://github.com/ClickHouse/ClickHouse/issues/68324). [#68482](https://github.com/ClickHouse/ClickHouse/pull/68482) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Minor patch for https://github.com/ClickHouse/ClickHouse/pull/68131. [#68494](https://github.com/ClickHouse/ClickHouse/pull/68494) ([Chang chen](https://github.com/baibaichen)).
 * Fix [#68239](https://github.com/ClickHouse/ClickHouse/issues/68239) SAMPLE n where n is an integer. [#68499](https://github.com/ClickHouse/ClickHouse/pull/68499) ([Denis Hananein](https://github.com/denis-hananein)).
 * Fix bug in mann-whitney-utest when the size of two districutions are not equal. [#68556](https://github.com/ClickHouse/ClickHouse/pull/68556) ([Han Fei](https://github.com/hanfei1991)).
 * After unexpected restart, fail to start replication of ReplicatedMergeTree due to abnormal handling of covered-by-broken part. [#68584](https://github.com/ClickHouse/ClickHouse/pull/68584) ([baolin](https://github.com/baolinhuang)).
 * Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
 * Full text index may filter out wrong columns when index multiple columns, it didn't reset row_id between different columns, the reproduce procedure is in tests/queries/0_stateless/03228_full_text_with_multi_col.sql. Without this. [#68644](https://github.com/ClickHouse/ClickHouse/pull/68644) ([siyuan](https://github.com/linkwk7)).
 * Fix invalid character '\t' and '\n' in replica_name when creating a Replicated table, which causes incorrect parsing of 'source replica' in LogEntry. Mentioned in issue [#68640](https://github.com/ClickHouse/ClickHouse/issues/68640). [#68645](https://github.com/ClickHouse/ClickHouse/pull/68645) ([Zhigao Hong](https://github.com/zghong)).
 * Added back virtual columns ` _table` and `_database` to distributed tables. They were available until version 24.3. [#68672](https://github.com/ClickHouse/ClickHouse/pull/68672) ([Anton Popov](https://github.com/CurtizJ)).
 * Fix possible error `Size of permutation (0) is less than required (...)` during Variant column permutation. [#68681](https://github.com/ClickHouse/ClickHouse/pull/68681) ([Kruglov Pavel](https://github.com/Avogar)).
 * Fix possible error `DB::Exception: Block structure mismatch in joined block stream: different columns:` with new JSON column. [#68686](https://github.com/ClickHouse/ClickHouse/pull/68686) ([Kruglov Pavel](https://github.com/Avogar)).
 * Fix issue with materialized constant keys when hashing maps with arrays as keys in functions `sipHash(64/128)Keyed`. [#68731](https://github.com/ClickHouse/ClickHouse/pull/68731) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
 * Make `ColumnsDescription::toString` format each column using the same `IAST::FormatState object`. This results in uniform columns metadata being written to disk and ZooKeeper. [#68733](https://github.com/ClickHouse/ClickHouse/pull/68733) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
 * Fix merging of aggregated data for grouping sets. [#68744](https://github.com/ClickHouse/ClickHouse/pull/68744) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
 * Fix logical error, when we create a replicated merge tree, alter a column and then execute modify statistics. [#68820](https://github.com/ClickHouse/ClickHouse/pull/68820) ([Han Fei](https://github.com/hanfei1991)).
 * Fix resolving dynamic subcolumns from subqueries in analyzer. [#68824](https://github.com/ClickHouse/ClickHouse/pull/68824) ([Kruglov Pavel](https://github.com/Avogar)).
 * Fix complex types metadata parsing in DeltaLake. Closes [#68739](https://github.com/ClickHouse/ClickHouse/issues/68739). [#68836](https://github.com/ClickHouse/ClickHouse/pull/68836) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Fixed asynchronous inserts in case when metadata of table is changed (by `ALTER ADD/MODIFY COLUMN` queries) after insert but before flush to the table. [#68837](https://github.com/ClickHouse/ClickHouse/pull/68837) ([Anton Popov](https://github.com/CurtizJ)).
 * Fix unexpected exception when passing empty tuple in array. This fixes [#68618](https://github.com/ClickHouse/ClickHouse/issues/68618). [#68848](https://github.com/ClickHouse/ClickHouse/pull/68848) ([Amos Bird](https://github.com/amosbird)).
 * Fix parsing pure metadata mutations commands. [#68935](https://github.com/ClickHouse/ClickHouse/pull/68935) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
 * Fix possible wrong result during anyHeavy state merge. [#68950](https://github.com/ClickHouse/ClickHouse/pull/68950) ([Raúl Marín](https://github.com/Algunenano)).
 * Fixed writing to Materialized Views with enabled setting `optimize_functions_to_subcolumns`. [#68951](https://github.com/ClickHouse/ClickHouse/pull/68951) ([Anton Popov](https://github.com/CurtizJ)).
 * Don't use serializations cache in const Dynamic column methods. It could let to use-of-uninitialized value or even race condition during aggregations. [#68953](https://github.com/ClickHouse/ClickHouse/pull/68953) ([Kruglov Pavel](https://github.com/Avogar)).
 * Fix parsing error when null should be inserted as default in some cases during JSON type parsing. [#68955](https://github.com/ClickHouse/ClickHouse/pull/68955) ([Kruglov Pavel](https://github.com/Avogar)).
 * Fix `Content-Encoding` not sent in some compressed responses. [#64802](https://github.com/ClickHouse/ClickHouse/issues/64802). [#68975](https://github.com/ClickHouse/ClickHouse/pull/68975) ([Konstantin Bogdanov](https://github.com/thevar1able)).
 * There were cases when path was concatenated incorrectly and had the `//` part in it, solving this problem using path normalization. [#69066](https://github.com/ClickHouse/ClickHouse/pull/69066) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
 * Fix logical error when we have empty async insert. [#69080](https://github.com/ClickHouse/ClickHouse/pull/69080) ([Han Fei](https://github.com/hanfei1991)).
 * Fixed data race of progress indication in clickhouse-client during query canceling. [#69081](https://github.com/ClickHouse/ClickHouse/pull/69081) ([Sergei Trifonov](https://github.com/serxa)).
 * Fix a bug that the vector similarity index (currently experimental) was not utilized when used with cosine distance as distance function. [#69090](https://github.com/ClickHouse/ClickHouse/pull/69090) ([flynn](https://github.com/ucasfl)).
 * This change addresses an issue where attempting to create a Replicated database again after a server failure during the initial creation process could result in error. [#69102](https://github.com/ClickHouse/ClickHouse/pull/69102) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
 * Don't infer Bool type from String in CSV when `input_format_csv_try_infer_numbers_from_strings = 1` because we don't allow reading bool values from strings. [#69109](https://github.com/ClickHouse/ClickHouse/pull/69109) ([Kruglov Pavel](https://github.com/Avogar)).
 * Fix explain ast insert queries parsing errors on client when `--multiquery` is enabled. [#69123](https://github.com/ClickHouse/ClickHouse/pull/69123) ([wxybear](https://github.com/wxybear)).
 * `UNION` clause in subqueries wasn't handled correctly in queries with parallel replicas and lead to LOGICAL_ERROR `Duplicate announcement received for replica`. [#69146](https://github.com/ClickHouse/ClickHouse/pull/69146) ([Igor Nikonov](https://github.com/devcrafter)).
 * Fix propogating structure argument in s3Cluster. Previously the `DEFAULT` expression of the column could be lost when sending the query to the replicas in s3Cluster. [#69147](https://github.com/ClickHouse/ClickHouse/pull/69147) ([Kruglov Pavel](https://github.com/Avogar)).
 * Respect format settings in Values format during conversion from expression to the destination type. [#69149](https://github.com/ClickHouse/ClickHouse/pull/69149) ([Kruglov Pavel](https://github.com/Avogar)).
 * Fix `clickhouse-client --queries-file` for readonly users (previously fails with `Cannot modify 'log_comment' setting in readonly mode`). [#69175](https://github.com/ClickHouse/ClickHouse/pull/69175) ([Azat Khuzhin](https://github.com/azat)).
 * Fix data race in clickhouse-client when it's piped to a process that terminated early. [#69186](https://github.com/ClickHouse/ClickHouse/pull/69186) ([vdimir](https://github.com/vdimir)).
 * Fix incorrect results of Fix uniq and GROUP BY for JSON/Dynamic types. [#69203](https://github.com/ClickHouse/ClickHouse/pull/69203) ([Kruglov Pavel](https://github.com/Avogar)).
 * Fix the INFILE format detection for asynchronous inserts. If the format is not explicitly defined in the FORMAT clause, it can be detected from the INFILE file extension. [#69237](https://github.com/ClickHouse/ClickHouse/pull/69237) ([Julia Kartseva](https://github.com/jkartseva)).
 * After [this issue](https://github.com/ClickHouse/ClickHouse/pull/59946#issuecomment-1943653197) there are quite a few table replicas in production such that their `metadata_version` node value is both equal to `0` and is different from the respective table's `metadata` node version. This leads to `alter` queries failing on such replicas. [#69274](https://github.com/ClickHouse/ClickHouse/pull/69274) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
 * Mark Dynamic type as not safe primary key type to avoid issues with Fields. [#69311](https://github.com/ClickHouse/ClickHouse/pull/69311) ([Kruglov Pavel](https://github.com/Avogar)).
 * Improve restoring of access entities' dependencies. [#69346](https://github.com/ClickHouse/ClickHouse/pull/69346) ([Vitaly Baranov](https://github.com/vitlibar)).
 * Fix undefined behavior when all connection attempts fail getting a connection for insertions. [#69390](https://github.com/ClickHouse/ClickHouse/pull/69390) ([Pablo Marcos](https://github.com/pamarcos)).
 * Close [#69135](https://github.com/ClickHouse/ClickHouse/issues/69135). If we try to reuse joined data for `cross` join, but this could not happen in ClickHouse at present. It's better to keep `have_compressed` in `reuseJoinedData`. [#69404](https://github.com/ClickHouse/ClickHouse/pull/69404) ([lgbo](https://github.com/lgbo-ustc)).
 * Make `materialize()` function return full column when parameter is a sparse column. [#69429](https://github.com/ClickHouse/ClickHouse/pull/69429) ([Alexander Gololobov](https://github.com/davenger)).
 * Fixed a `LOGICAL_ERROR` with function `sqidDecode` ([#69450](https://github.com/ClickHouse/ClickHouse/issues/69450)). [#69451](https://github.com/ClickHouse/ClickHouse/pull/69451) ([Robert Schulze](https://github.com/rschu1ze)).
 * Quick fix for s3queue problem on 24.6 or create query with database replicated. [#69454](https://github.com/ClickHouse/ClickHouse/pull/69454) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Fixed case when memory consumption was too high because of the squashing in `INSERT INTO ... SELECT` or `CREATE TABLE AS SELECT` queries. [#69469](https://github.com/ClickHouse/ClickHouse/pull/69469) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
 * Statements `SHOW COLUMNS` and `SHOW INDEX` now work properly if the table has dots in its name. [#69514](https://github.com/ClickHouse/ClickHouse/pull/69514) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
 * Usage of the query cache for queries with an overflow mode != 'throw' is now disallowed. This prevents situations where potentially truncated and incorrect query results could be stored in the query cache. (issue [#67476](https://github.com/ClickHouse/ClickHouse/issues/67476)). [#69549](https://github.com/ClickHouse/ClickHouse/pull/69549) ([Robert Schulze](https://github.com/rschu1ze)).
 * Keep original order of conditions during move to prewhere. Previously the order could change and it could lead to failing queries when the order is important. [#69560](https://github.com/ClickHouse/ClickHouse/pull/69560) ([Kruglov Pavel](https://github.com/Avogar)).
 * Fix Keeper multi-request preprocessing after ZNOAUTH error. [#69627](https://github.com/ClickHouse/ClickHouse/pull/69627) ([Antonio Andelic](https://github.com/antonio2368)).
 * Fix METADATA_MISMATCH that might have happened due to TTL with a WHERE clause in DatabaseReplicated when creating a new replica. [#69736](https://github.com/ClickHouse/ClickHouse/pull/69736) ([Nikolay Degterinsky](https://github.com/evillique)).
 * Fix `StorageS3(Azure)Queue` settings `tracked_file_ttl_sec`. We wrote it to keeper with key `tracked_file_ttl_sec`, but read as `tracked_files_ttl_sec`, which was a typo. [#69742](https://github.com/ClickHouse/ClickHouse/pull/69742) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Use tryconvertfieldtotype in gethyperrectangleforrowgroup. [#69745](https://github.com/ClickHouse/ClickHouse/pull/69745) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
 * Revert "Fix prewhere without columns and without adaptive index granularity (almost w/o anything)"'. Due to the reverted changes some errors might happen when reading data parts produced by old CH releases (presumably 2021 or older). [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
 ### <a id="248"></a> ClickHouse release 24.8 LTS, 2024-08-20
 #### Backward Incompatible Change
@ -28,7 +351,7 @@
 * Add `_etag` virtual column for S3 table engine. Fixes [#65312](https://github.com/ClickHouse/ClickHouse/issues/65312). [#65386](https://github.com/ClickHouse/ClickHouse/pull/65386) ([skyoct](https://github.com/skyoct)).
 * Added a tagging (namespace) mechanism for the query cache. The same queries with different tags are considered different by the query cache. Example: `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'abc'` and `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'def'` now create different query cache entries. [#68235](https://github.com/ClickHouse/ClickHouse/pull/68235) ([sakulali](https://github.com/sakulali)).
 * Support more variants of JOIN strictness (`LEFT/RIGHT SEMI/ANTI/ANY JOIN`) with inequality conditions which involve columns from both left and right table. e.g. `t1.y < t2.y` (see the setting `allow_experimental_join_condition`). [#64281](https://github.com/ClickHouse/ClickHouse/pull/64281) ([lgbo](https://github.com/lgbo-ustc)).
-* Intrpret Hive-style partitioning for different engines (`File`, `URL`, `S3`, `AzureBlobStorage`, `HDFS`). Hive-style partitioning organizes data into partitioned sub-directories, making it efficient to query and manage large datasets. Currently, it only creates virtual columns with the appropriate name and data. The follow-up PR will introduce the appropriate data filtering (performance speedup). [#65997](https://github.com/ClickHouse/ClickHouse/pull/65997) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
+* Interpret Hive-style partitioning for different engines (`File`, `URL`, `S3`, `AzureBlobStorage`, `HDFS`). Hive-style partitioning organizes data into partitioned sub-directories, making it efficient to query and manage large datasets. Currently, it only creates virtual columns with the appropriate name and data. The follow-up PR will introduce the appropriate data filtering (performance speedup). [#65997](https://github.com/ClickHouse/ClickHouse/pull/65997) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
 * Add function `printf` for Spark compatiability (but you can use the existing `format` function). [#66257](https://github.com/ClickHouse/ClickHouse/pull/66257) ([李扬](https://github.com/taiyang-li)).
 * Add options `restore_replace_external_engines_to_null` and `restore_replace_external_table_functions_to_null` to replace external engines and table_engines to `Null` engine that can be useful for testing. It should work for RESTORE and explicit table creation. [#66536](https://github.com/ClickHouse/ClickHouse/pull/66536) ([Ilya Yatsishin](https://github.com/qoega)).
 * Added support for reading `MULTILINESTRING` geometry in `WKT` format using function `readWKTLineString`. [#67647](https://github.com/ClickHouse/ClickHouse/pull/67647) ([Jacob Reckhard](https://github.com/jacobrec)).
@ -80,7 +403,6 @@
 * Automatically retry Keeper requests in KeeperMap if they happen because of timeout or connection loss. [#67448](https://github.com/ClickHouse/ClickHouse/pull/67448) ([Antonio Andelic](https://github.com/antonio2368)).
 * Add `-no-pie` to Aarch64 Linux builds to allow proper introspection and symbolizing of stacktraces after a ClickHouse restart. [#67916](https://github.com/ClickHouse/ClickHouse/pull/67916) ([filimonov](https://github.com/filimonov)).
 * Added profile events for merges and mutations for better introspection. [#68015](https://github.com/ClickHouse/ClickHouse/pull/68015) ([Anton Popov](https://github.com/CurtizJ)).
 * Fix settings and `current_database` in `system.processes` for async BACKUP/RESTORE. [#68163](https://github.com/ClickHouse/ClickHouse/pull/68163) ([Azat Khuzhin](https://github.com/azat)).
 * Remove unnecessary logs for non-replicated `MergeTree`. [#68238](https://github.com/ClickHouse/ClickHouse/pull/68238) ([Daniil Ivanik](https://github.com/divanik)).
 #### Build/Testing/Packaging Improvement
--- a/CITATION.cff
+++ b/CITATION.cff
@ -0,0 +1,31 @@
 # This CITATION.cff file was generated with cffinit.
 cff-version: 1.2.0
 title: "ClickHouse"
 message: "If you use this software, please cite it as below."
 type: software
 authors:
  - family-names: "Milovidov"
    given-names: "Alexey"
 repository-code: 'https://github.com/ClickHouse/ClickHouse'
 url: 'https://clickhouse.com'
 license: Apache-2.0
 preferred-citation:
  type: article
  authors:
  - family-names: "Schulze"
    given-names: "Robert"
  - family-names: "Schreiber"
    given-names: "Tom"
  - family-names: "Yatsishin"
    given-names: "Ilya"
  - family-names: "Dahimene"
    given-names: "Ryadh"
  - family-names: "Milovidov"
    given-names: "Alexey"
  journal: "Proceedings of the VLDB Endowment"
  title: "ClickHouse - Lightning Fast Analytics for Everyone"
  year: 2024
  volume: 17
  issue: 12
  doi: 10.14778/3685800.3685802
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -339,7 +339,6 @@ set (CMAKE_ASM_FLAGS_RELWITHDEBINFO      "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -O3
 set (CMAKE_ASM_FLAGS_DEBUG               "${CMAKE_ASM_FLAGS_DEBUG} -O${DEBUG_O_LEVEL} ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
 if (OS_DARWIN)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main")
 endif()
@ -546,7 +545,7 @@ endif()
 if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
    AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND NOT ENABLE_FUZZING
-    AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
+    AND OMIT_HEAVY_DEBUG_SYMBOLS AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
    set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON)
 else ()
    set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF)
--- a/README.md
+++ b/README.md
@ -34,37 +34,27 @@ curl https://clickhouse.com/ | sh
 Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
-* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 26
+* [v24.10 Community Call](https://clickhouse.com/company/events/v24-10-community-release-call) - October 31
 ## Upcoming Events
 Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
-The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey Milovidov:
+Upcoming meetups
-* [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
+* [Barcelona Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096876/) - November 12
 * [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5
 * [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9
 * [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
 * [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
 Other upcoming meetups
 * [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27
 * [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27
 * [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5
 * [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5
 * [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
 * [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
 * [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
 * [Tel Aviv Meetup](https://www.meetup.com/clickhouse-meetup-israel/events/303095121) - September 22
 * [Madrid Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096564/) - October 22
 * [Barcelona Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096876/) - October 29
 * [Oslo Meetup](https://www.meetup.com/open-source-real-time-data-warehouse-real-time-analytics/events/302938622) - October 31
 * [Ghent Meetup](https://www.meetup.com/clickhouse-belgium-user-group/events/303049405/) - November 19
 * [Dubai Meetup](https://www.meetup.com/clickhouse-dubai-meetup-group/events/303096989/) - November 21
 * [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26
 * [Amsterdam Meetup](https://www.meetup.com/clickhouse-netherlands-user-group/events/303638814) - December 3
 * [New York Meetup](https://www.meetup.com/clickhouse-new-york-user-group/events/304268174) - December 9
 * [San Francisco Meetup](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/304286951/) - December 12
 Recently completed meetups
 * [Madrid Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096564/) - October 22
 * [Singapore Meetup](https://www.meetup.com/clickhouse-singapore-meetup-group/events/303212064/) - October 3
 * [Jakarta Meetup](https://www.meetup.com/clickhouse-indonesia-user-group/events/303191359/) - October 1
 ## Recent Recordings
 * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
--- a/SECURITY.md
+++ b/SECURITY.md
@ -14,9 +14,10 @@ The following versions of ClickHouse server are currently supported with securit
 | Version | Supported |
 |:-|:-|
 | 24.9 | ✔️ |
 | 24.8 | ✔️ |
 | 24.7 | ✔️ |
-| 24.6 | ✔️ |
+| 24.6 | ❌ |
 | 24.5 | ❌ |
 | 24.4 | ❌ |
 | 24.3 | ✔️ |
--- a/base/base/DecomposedFloat.h
+++ b/base/base/DecomposedFloat.h
@ -110,8 +110,7 @@ struct DecomposedFloat
        {
            if (!isNegative())
                return rhs > 0 ? -1 : 1;
-            else
+            return rhs >= 0 ? -1 : 1;
                return rhs >= 0 ? -1 : 1;
        }
        /// The case of the most negative integer
@ -128,8 +127,7 @@ struct DecomposedFloat
                if (mantissa() == 0)
                    return 0;
-                else
+                return -1;
                    return -1;
            }
        }
@ -169,9 +167,8 @@ struct DecomposedFloat
        /// Float has no fractional part means that the numbers are equal.
        if (large_and_always_integer || (mantissa() & ((1ULL << (Traits::mantissa_bits - normalizedExponent())) - 1)) == 0)
            return 0;
-        else
+        /// Float has fractional part means its abs value is larger.
-            /// Float has fractional part means its abs value is larger.
+        return isNegative() ? -1 : 1;
            return isNegative() ? -1 : 1;
    }
--- a/base/base/JSON.cpp
+++ b/base/base/JSON.cpp
@ -205,8 +205,7 @@ JSON::ElementType JSON::getType() const
            Pos after_string = skipString();
            if (after_string < ptr_end && *after_string == ':')
                return TYPE_NAME_VALUE_PAIR;
-            else
+            return TYPE_STRING;
                return TYPE_STRING;
        }
        default:
            throw JSONException(std::string("JSON: unexpected char ") + *ptr_begin + ", expected one of '{[tfn-0123456789\"'");
@ -474,8 +473,7 @@ JSON::Pos JSON::searchField(const char * data, size_t size) const
    if (it == end())
        return nullptr;
-    else
+    return it->data();
        return it->data();
 }
@ -487,7 +485,7 @@ bool JSON::hasEscapes() const
    if (*pos == '"')
        return false;
-    else if (*pos == '\\')
+    if (*pos == '\\')
        return true;
    throw JSONException("JSON: unexpected end of data.");
 }
@ -503,7 +501,7 @@ bool JSON::hasSpecialChars() const
    if (*pos == '"')
        return false;
-    else if (pos < ptr_end)
+    if (pos < ptr_end)
        return true;
    throw JSONException("JSON: unexpected end of data.");
 }
@ -682,10 +680,9 @@ double JSON::toDouble() const
    if (type == TYPE_NUMBER)
        return getDouble();
-    else if (type == TYPE_STRING)
+    if (type == TYPE_STRING)
        return JSON(ptr_begin + 1, ptr_end, level + 1).getDouble();
-    else
+    throw JSONException("JSON: cannot convert value to double.");
        throw JSONException("JSON: cannot convert value to double.");
 }
 Int64 JSON::toInt() const
@ -694,10 +691,9 @@ Int64 JSON::toInt() const
    if (type == TYPE_NUMBER)
        return getInt();
-    else if (type == TYPE_STRING)
+    if (type == TYPE_STRING)
        return JSON(ptr_begin + 1, ptr_end, level + 1).getInt();
-    else
+    throw JSONException("JSON: cannot convert value to signed integer.");
        throw JSONException("JSON: cannot convert value to signed integer.");
 }
 UInt64 JSON::toUInt() const
@ -706,10 +702,9 @@ UInt64 JSON::toUInt() const
    if (type == TYPE_NUMBER)
        return getUInt();
-    else if (type == TYPE_STRING)
+    if (type == TYPE_STRING)
        return JSON(ptr_begin + 1, ptr_end, level + 1).getUInt();
-    else
+    throw JSONException("JSON: cannot convert value to unsigned integer.");
        throw JSONException("JSON: cannot convert value to unsigned integer.");
 }
 std::string JSON::toString() const
@ -718,11 +713,9 @@ std::string JSON::toString() const
    if (type == TYPE_STRING)
        return getString();
-    else
+
-    {
+    Pos pos = skipElement();
-        Pos pos = skipElement();
+    return std::string(ptr_begin, pos - ptr_begin);
        return std::string(ptr_begin, pos - ptr_begin);
    }
 }
--- a/base/base/JSON.h
+++ b/base/base/JSON.h
@ -203,9 +203,7 @@ T JSON::getWithDefault(const std::string & key, const T & default_) const
        if (key_json.isType<T>())
            return key_json.get<T>();
        else
            return default_;
    }
    else
        return default_;
    }
    return default_;
 }
--- a/base/base/StringRef.h
+++ b/base/base/StringRef.h
@ -151,19 +151,19 @@ inline bool memequalWide(const char * p1, const char * p2, size_t size)
            return unalignedLoad<uint64_t>(p1) == unalignedLoad<uint64_t>(p2)
                && unalignedLoad<uint64_t>(p1 + size - 8) == unalignedLoad<uint64_t>(p2 + size - 8);
        }
-        else if (size >= 4)
+        if (size >= 4)
        {
            /// Chunks of 4..7 bytes.
            return unalignedLoad<uint32_t>(p1) == unalignedLoad<uint32_t>(p2)
                && unalignedLoad<uint32_t>(p1 + size - 4) == unalignedLoad<uint32_t>(p2 + size - 4);
        }
-        else if (size >= 2)
+        if (size >= 2)
        {
            /// Chunks of 2..3 bytes.
            return unalignedLoad<uint16_t>(p1) == unalignedLoad<uint16_t>(p2)
                && unalignedLoad<uint16_t>(p1 + size - 2) == unalignedLoad<uint16_t>(p2 + size - 2);
        }
-        else if (size >= 1)
+        if (size >= 1)
        {
            /// A single byte.
            return *p1 == *p2;
@ -369,11 +369,15 @@ namespace PackedZeroTraits
 {
    template <typename Second, template <typename, typename> class PackedPairNoInit>
    inline bool check(const PackedPairNoInit<StringRef, Second> p)
-    { return 0 == p.key.size; }
+    {
        return 0 == p.key.size;
    }
    template <typename Second, template <typename, typename> class PackedPairNoInit>
    inline void set(PackedPairNoInit<StringRef, Second> & p)
-    { p.key.size = 0; }
+    {
        p.key.size = 0;
    }
 }
--- a/base/base/argsToConfig.cpp
+++ b/base/base/argsToConfig.cpp
@ -53,10 +53,9 @@ void argsToConfig(const Poco::Util::Application::ArgVec & argv,
                key = arg.substr(key_start);
            continue;
        }
-        else
+
-        {
+        key = "";
-            key = "";
+
        }
        if (key_start == std::string::npos)
            continue;
--- a/base/base/find_symbols.h
+++ b/base/base/find_symbols.h
@ -330,9 +330,8 @@ inline const char * find_first_symbols_dispatch(const char * begin, const char *
 #if defined(__SSE4_2__)
    if (sizeof...(symbols) >= 5)
        return find_first_symbols_sse42<positive, return_mode, sizeof...(symbols), symbols...>(begin, end);
    else
 #endif
-        return find_first_symbols_sse2<positive, return_mode, symbols...>(begin, end);
+    return find_first_symbols_sse2<positive, return_mode, symbols...>(begin, end);
 }
 template <bool positive, ReturnMode return_mode>
@ -341,9 +340,8 @@ inline const char * find_first_symbols_dispatch(const std::string_view haystack,
 #if defined(__SSE4_2__)
    if (symbols.str.size() >= 5)
        return find_first_symbols_sse42<positive, return_mode>(haystack.begin(), haystack.end(), symbols);
    else
 #endif
-        return find_first_symbols_sse2<positive, return_mode>(haystack.begin(), haystack.end(), symbols.str.data(), symbols.str.size());
+    return find_first_symbols_sse2<positive, return_mode>(haystack.begin(), haystack.end(), symbols.str.data(), symbols.str.size());
 }
 }
--- a/base/base/getMemoryAmount.cpp
+++ b/base/base/getMemoryAmount.cpp
@ -33,8 +33,7 @@ std::optional<uint64_t> getCgroupsV2MemoryLimit()
            uint64_t value;
            if (setting_file >> value)
                return {value};
-            else
+            return {}; /// e.g. the cgroups default "max"
                return {}; /// e.g. the cgroups default "max"
        }
        current_cgroup = current_cgroup.parent_path();
    }
--- a/base/glibc-compatibility/CMakeLists.txt
+++ b/base/glibc-compatibility/CMakeLists.txt
@ -11,6 +11,9 @@ if (GLIBC_COMPATIBILITY)
    if (ARCH_AARCH64)
        list (APPEND glibc_compatibility_sources musl/aarch64/syscall.s musl/aarch64/longjmp.s)
        set (musl_arch_include_dir musl/aarch64)
        # Disable getauxval in aarch64. ARM glibc minimum requirement for the project is 2.18 and getauxval is present
        # in 2.16. Having a custom one introduces issues with sanitizers
        list (REMOVE_ITEM glibc_compatibility_sources musl/getauxval.c)
    elseif (ARCH_AMD64)
        list (APPEND glibc_compatibility_sources musl/x86_64/syscall.s musl/x86_64/longjmp.s)
        set (musl_arch_include_dir musl/x86_64)
@ -18,7 +21,7 @@ if (GLIBC_COMPATIBILITY)
        message (FATAL_ERROR "glibc_compatibility can only be used on x86_64 or aarch64.")
    endif ()
-    if (SANITIZE STREQUAL thread)
+    if (SANITIZE STREQUAL thread AND ARCH_AMD64)
        # Disable TSAN instrumentation that conflicts with re-exec due to high ASLR entropy using getauxval
        # See longer comment in __auxv_init_procfs
        # In the case of tsan we need to make sure getauxval is not instrumented as that would introduce tsan
--- a/base/glibc-compatibility/musl/getauxval.c
+++ b/base/glibc-compatibility/musl/getauxval.c
@ -25,9 +25,10 @@
 // We don't have libc struct available here.
 // Compute aux vector manually (from /proc/self/auxv).
 //
-// Right now there is only 51 AT_* constants,
+// Right now there are 51 AT_* constants. Custom kernels have been encountered
-// so 64 should be enough until this implementation will be replaced with musl.
+// making use of up to 71. 128 should be enough until this implementation is
-static unsigned long __auxv_procfs[64];
+// replaced with musl.
 static unsigned long __auxv_procfs[128];
 static unsigned long __auxv_secure = 0;
 // Common
 static unsigned long * __auxv_environ = NULL;
--- a/base/harmful/harmful.c
+++ b/base/harmful/harmful.c
@ -66,13 +66,11 @@ TRAP(gethostbyname)
 TRAP(gethostbyname2)
 TRAP(gethostent)
 TRAP(getlogin)
 TRAP(getmntent)
 TRAP(getnetbyaddr)
 TRAP(getnetbyname)
 TRAP(getnetent)
 TRAP(getnetgrent)
 TRAP(getnetgrent_r)
 TRAP(getopt)
 TRAP(getopt_long)
 TRAP(getopt_long_only)
 TRAP(getpass)
@ -133,7 +131,6 @@ TRAP(nrand48)
 TRAP(__ppc_get_timebase_freq)
 TRAP(ptsname)
 TRAP(putchar_unlocked)
 TRAP(putenv)
 TRAP(pututline)
 TRAP(pututxline)
 TRAP(putwchar_unlocked)
@ -148,7 +145,6 @@ TRAP(sethostent)
 TRAP(sethostid)
 TRAP(setkey)
 //TRAP(setlocale) // Used by replxx at startup
 TRAP(setlogmask)
 TRAP(setnetent)
 TRAP(setnetgrent)
 TRAP(setprotoent)
@ -203,7 +199,6 @@ TRAP(lgammal)
 TRAP(nftw)
 TRAP(nl_langinfo)
 TRAP(putc_unlocked)
 TRAP(rand)
 /** In  the current POSIX.1 specification (POSIX.1-2008), readdir() is not required to be thread-safe.  However, in modern
  * implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams
  * are thread-safe.  In cases where multiple threads must read from the same directory stream, using readdir() with external
@ -288,4 +283,14 @@ TRAP(tss_get)
 TRAP(tss_set)
 TRAP(tss_delete)
 #ifndef USE_MUSL
 /// These produce duplicate symbol errors when statically linking with musl.
 /// Maybe we can remove them from the musl fork.
 TRAP(getopt)
 TRAP(putenv)
 TRAP(setlogmask)
 TRAP(rand)
 TRAP(getmntent)
 #endif
 #endif
--- a/base/poco/CMakeLists.txt
+++ b/base/poco/CMakeLists.txt
@ -3,7 +3,11 @@ add_subdirectory (Data)
 add_subdirectory (Data/ODBC)
 add_subdirectory (Foundation)
 add_subdirectory (JSON)
-add_subdirectory (MongoDB)
+
 if (USE_MONGODB)
    add_subdirectory(MongoDB)
 endif()
 add_subdirectory (Net)
 add_subdirectory (NetSSL_OpenSSL)
 add_subdirectory (Redis)
--- a/base/poco/Crypto/include/Poco/Crypto/EVPPKey.h
+++ b/base/poco/Crypto/include/Poco/Crypto/EVPPKey.h
@ -188,8 +188,9 @@ namespace Crypto
                    pFile = fopen(keyFile.c_str(), "r");
                    if (pFile)
                    {
-                        pem_password_cb * pCB = pass.empty() ? (pem_password_cb *)0 : &passCB;
+                        pem_password_cb * pCB = &passCB;
-                        void * pPassword = pass.empty() ? (void *)0 : (void *)pass.c_str();
+                        static constexpr char * no_password = "";
                        void * pPassword = pass.empty() ? (void *)no_password : (void *)pass.c_str();
                        if (readFunc(pFile, &pKey, pCB, pPassword))
                        {
                            fclose(pFile);
@ -225,6 +226,13 @@ namespace Crypto
        error:
            if (pFile)
                fclose(pFile);
            if (*ppKey)
            {
                if constexpr (std::is_same_v<K, EVP_PKEY>)
                    EVP_PKEY_free(*ppKey);
                else
                    EC_KEY_free(*ppKey);
            }
            throw OpenSSLException("EVPKey::loadKey(string)");
        }
@ -286,6 +294,13 @@ namespace Crypto
        error:
            if (pBIO)
                BIO_free(pBIO);
            if (*ppKey)
            {
                if constexpr (std::is_same_v<K, EVP_PKEY>)
                    EVP_PKEY_free(*ppKey);
                else
                    EC_KEY_free(*ppKey);
            }
            throw OpenSSLException("EVPKey::loadKey(stream)");
        }
--- a/base/poco/Foundation/include/Poco/Logger.h
+++ b/base/poco/Foundation/include/Poco/Logger.h
@ -952,6 +952,8 @@ private:
    static std::pair<LoggerMapIterator, bool> add(Logger * pLogger);
    static std::optional<LoggerMapIterator> find(const std::string & name);
    static Logger * findRawPtr(const std::string & name);
    void unsafeSetChannel(Channel * pChannel);
    Channel* unsafeGetChannel() const;
    Logger();
    Logger(const Logger &);
--- a/base/poco/Foundation/src/Logger.cpp
+++ b/base/poco/Foundation/src/Logger.cpp
@ -61,6 +61,13 @@ Logger::~Logger()
 void Logger::setChannel(Channel* pChannel)
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
 	unsafeSetChannel(pChannel);
 }
 void Logger::unsafeSetChannel(Channel* pChannel)
 {
 	if (_pChannel) _pChannel->release();
 	_pChannel = pChannel;
@ -69,6 +76,14 @@ void Logger::setChannel(Channel* pChannel)
 Channel* Logger::getChannel() const
 {
 	std::lock_guard<std::mutex> lock(getLoggerMutex());
 	return unsafeGetChannel();
 }
 Channel* Logger::unsafeGetChannel() const
 {
 	return _pChannel;
 }
@ -89,7 +104,7 @@ void Logger::setLevel(const std::string& level)
 void Logger::setProperty(const std::string& name, const std::string& value)
 {
 	if (name == "channel")
-		setChannel(LoggingRegistry::defaultRegistry().channelForName(value));
+		unsafeSetChannel(LoggingRegistry::defaultRegistry().channelForName(value));
 	else if (name == "level")
 		setLevel(value);
 	else
@ -160,7 +175,7 @@ void Logger::setChannel(const std::string& name, Channel* pChannel)
 			if (len == 0 ||
 				(it.first.compare(0, len, name) == 0 && (it.first.length() == len || it.first[len] == '.')))
 			{
-				it.second.logger->setChannel(pChannel);
+				it.second.logger->unsafeSetChannel(pChannel);
 			}
 		}
 	}
@ -393,7 +408,7 @@ std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeGet(const std::string&
 		else
 		{
 			Logger& par = parent(name);
-			logger = new Logger(name, par.getChannel(), par.getLevel());
+			logger = new Logger(name, par.unsafeGetChannel(), par.getLevel());
 		}
 		return add(logger);
--- a/base/poco/Foundation/src/Path_UNIX.cpp
+++ b/base/poco/Foundation/src/Path_UNIX.cpp
@ -48,25 +48,17 @@ std::string PathImpl::currentImpl()
 std::string PathImpl::homeImpl()
 {
 	std::string path;
 #if defined(_POSIX_C_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_C_SOURCE)
 	size_t buf_size = 1024;     // Same as glibc use for getpwuid
 	std::vector<char> buf(buf_size);
 	struct passwd res;
 	struct passwd* pwd = nullptr;
 	getpwuid_r(getuid(), &res, buf.data(), buf_size, &pwd);
 #else
 	struct passwd* pwd = getpwuid(getuid());
 #endif
 	if (pwd)
 		path = pwd->pw_dir;
 	else
 	{
 #if defined(_POSIX_C_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_C_SOURCE)
 		getpwuid_r(getuid(), &res, buf.data(), buf_size, &pwd);
 #else
 		pwd = getpwuid(geteuid());
 #endif
 		if (pwd)
 			path = pwd->pw_dir;
 		else
@ -82,7 +74,7 @@ std::string PathImpl::configHomeImpl()
 {
 	std::string path = PathImpl::homeImpl();
 	std::string::size_type n = path.size();
-	if (n > 0 && path[n - 1] == '/') 
+	if (n > 0 && path[n - 1] == '/')
 #if POCO_OS == POCO_OS_MAC_OS_X
 	  path.append("Library/Preferences/");
 #else
@ -97,7 +89,7 @@ std::string PathImpl::dataHomeImpl()
 {
 	std::string path = PathImpl::homeImpl();
 	std::string::size_type n = path.size();
-	if (n > 0 && path[n - 1] == '/') 
+	if (n > 0 && path[n - 1] == '/')
 #if POCO_OS == POCO_OS_MAC_OS_X
 	  path.append("Library/Application Support/");
 #else
@ -112,7 +104,7 @@ std::string PathImpl::cacheHomeImpl()
 {
 	std::string path = PathImpl::homeImpl();
 	std::string::size_type n = path.size();
-	if (n > 0 && path[n - 1] == '/') 
+	if (n > 0 && path[n - 1] == '/')
 #if POCO_OS == POCO_OS_MAC_OS_X
 	  path.append("Library/Caches/");
 #else
@ -127,7 +119,7 @@ std::string PathImpl::tempHomeImpl()
 {
 	std::string path = PathImpl::homeImpl();
 	std::string::size_type n = path.size();
-	if (n > 0 && path[n - 1] == '/') 
+	if (n > 0 && path[n - 1] == '/')
 #if POCO_OS == POCO_OS_MAC_OS_X
 	  path.append("Library/Caches/");
 #else
@ -159,7 +151,7 @@ std::string PathImpl::tempImpl()
 std::string PathImpl::configImpl()
 {
 	std::string path;
-	
+
 #if POCO_OS == POCO_OS_MAC_OS_X
 	  path = "/Library/Preferences/";
 #else
--- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h
+++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h
@ -248,6 +248,9 @@ namespace Net
        SSL_CTX * sslContext() const;
        /// Returns the underlying OpenSSL SSL Context object.
        SSL_CTX * takeSslContext();
        /// Takes ownership of the underlying OpenSSL SSL Context object.
        Usage usage() const;
        /// Returns whether the context is for use by a client or by a server
        /// and whether TLSv1 is required.
@ -401,6 +404,13 @@ namespace Net
        return _pSSLContext;
    }
    inline SSL_CTX * Context::takeSslContext()
    {
        auto * result = _pSSLContext;
        _pSSLContext = nullptr;
        return result;
    }
    inline bool Context::extendedCertificateVerificationEnabled() const
    {
--- a/base/poco/NetSSL_OpenSSL/src/Context.cpp
+++ b/base/poco/NetSSL_OpenSSL/src/Context.cpp
@ -106,6 +106,11 @@ Context::Context(
 Context::~Context()
 {
    if (_pSSLContext == nullptr)
    {
        return;
    }
 	try
 	{
 		SSL_CTX_free(_pSSLContext);
--- a/ci/README.md
+++ b/ci/README.md
@ -0,0 +1 @@
 Note: This directory is under active development for CI improvements and is not currently in use within the scope of the existing CI pipeline.
--- a/ci/docker/fasttest/Dockerfile
+++ b/ci/docker/fasttest/Dockerfile
@ -0,0 +1,109 @@
 # docker build -t clickhouse/fasttest .
 FROM ubuntu:22.04
 # ARG for quick switch to a given ubuntu mirror
 ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=18
 RUN apt-get update \
    && apt-get install \
        apt-transport-https \
        apt-utils \
        ca-certificates \
        curl \
        gnupg \
        lsb-release \
        wget \
        git \
        --yes --no-install-recommends --verbose-versions \
    && export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
    && wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
    && echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
    && apt-key add /tmp/llvm-snapshot.gpg.key \
    && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
    && echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
        /etc/apt/sources.list \
    && apt-get update \
    && apt-get install --yes --no-install-recommends --verbose-versions llvm-${LLVM_VERSION} \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 # moreutils - provides ts fo FT
 # expect, bzip2 - requried by FT
 # bsdmainutils - provides hexdump for FT
 # nasm - nasm copiler for one of submodules, required from normal build
 # yasm - asssembler for libhdfs3, required from normal build
 RUN apt-get update \
    && apt-get install \
        clang-${LLVM_VERSION} \
        cmake \
        libclang-${LLVM_VERSION}-dev \
        libclang-rt-${LLVM_VERSION}-dev \
        lld-${LLVM_VERSION} \
        llvm-${LLVM_VERSION}-dev \
        lsof \
        ninja-build \
        python3 \
        python3-pip \
        zstd \
        moreutils \
        expect \
        bsdmainutils \
        pv \
        jq \
        bzip2 \
        nasm \
        yasm \
        --yes --no-install-recommends \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb
 # Give suid to gdb to grant it attach permissions
 RUN chmod u+s /opt/gdb/bin/gdb
 ENV PATH="/opt/gdb/bin:${PATH}"
 # This symlink is required by gcc to find the lld linker
 RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
 # FIXME: workaround for "The imported target "merge-fdata" references the file" error
 # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d
 RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake
 # LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path.
 # It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792
 RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu
 ARG TARGETARCH
 ARG SCCACHE_VERSION=v0.7.7
 ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1
 # sccache requires a value for the region. So by default we use The Default Region
 ENV SCCACHE_REGION=us-east-1
 RUN arch=${TARGETARCH} \
  && case $arch in \
    amd64) rarch=x86_64 ;; \
    arm64) rarch=aarch64 ;; \
  esac \
  && curl -Ls "https://github.com/mozilla/sccache/releases/download/$SCCACHE_VERSION/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl.tar.gz" | \
    tar xz -C /tmp \
  && mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \
  && rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r
 COPY requirements.txt /
 RUN pip3 install --no-cache-dir -r /requirements.txt
 # chmod 777 to make the container user independent
 RUN mkdir -p /var/lib/clickhouse \
  && chmod 777 /var/lib/clickhouse
 ENV TZ=Europe/Amsterdam
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
 RUN groupadd --system --gid 1000 clickhouse \
    && useradd --system --gid 1000 --uid 1000 -m clickhouse \
    && mkdir -p /.cache/sccache && chmod 777 /.cache/sccache
 ENV PYTHONPATH="/wd"
 ENV PYTHONUNBUFFERED=1
--- a/ci/docker/fasttest/requirements.txt
+++ b/ci/docker/fasttest/requirements.txt
@ -0,0 +1,6 @@
 Jinja2==3.1.3
 numpy==1.26.4
 requests==2.32.3
 pandas==1.5.3
 scipy==1.12.0
 #https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl
--- a/ci/docker/style-test/Dockerfile
+++ b/ci/docker/style-test/Dockerfile
@ -0,0 +1,17 @@
 # docker build -t clickhouse/style-test .
 FROM ubuntu:22.04
 RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
        aspell \
        libxml2-utils \
        python3-pip \
        locales \
        git \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
 RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
 ENV LC_ALL=en_US.UTF-8
 COPY requirements.txt /
 RUN pip3 install --no-cache-dir -r requirements.txt
--- a/ci/docker/style-test/requirements.txt
+++ b/ci/docker/style-test/requirements.txt
@ -0,0 +1,5 @@
 requests==2.32.3
 yamllint==1.26.3
 codespell==2.2.1
 #use praktika from CH repo
 #https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl
--- a/ci/jobs/build_clickhouse.py
+++ b/ci/jobs/build_clickhouse.py
@ -0,0 +1,102 @@
 import argparse
 from praktika.result import Result
 from praktika.settings import Settings
 from praktika.utils import MetaClasses, Shell, Utils
 class JobStages(metaclass=MetaClasses.WithIter):
    CHECKOUT_SUBMODULES = "checkout"
    CMAKE = "cmake"
    BUILD = "build"
 def parse_args():
    parser = argparse.ArgumentParser(description="ClickHouse Build Job")
    parser.add_argument("BUILD_TYPE", help="Type: <amd|arm_debug|release_sanitizer>")
    parser.add_argument("--param", help="Optional custom job start stage", default=None)
    return parser.parse_args()
 def main():
    args = parse_args()
    stop_watch = Utils.Stopwatch()
    stages = list(JobStages)
    stage = args.param or JobStages.CHECKOUT_SUBMODULES
    if stage:
        assert stage in JobStages, f"--param must be one of [{list(JobStages)}]"
        print(f"Job will start from stage [{stage}]")
        while stage in stages:
            stages.pop(0)
        stages.insert(0, stage)
    cmake_build_type = "Release"
    sanitizer = ""
    if "debug" in args.BUILD_TYPE.lower():
        print("Build type set: debug")
        cmake_build_type = "Debug"
    if "asan" in args.BUILD_TYPE.lower():
        print("Sanitizer set: address")
        sanitizer = "address"
    # if Environment.is_local_run():
    #     build_cache_type = "disabled"
    # else:
    build_cache_type = "sccache"
    current_directory = Utils.cwd()
    build_dir = f"{Settings.TEMP_DIR}/build"
    res = True
    results = []
    if res and JobStages.CHECKOUT_SUBMODULES in stages:
        Shell.check(f"rm -rf {build_dir} && mkdir -p {build_dir}")
        results.append(
            Result.create_from_command_execution(
                name="Checkout Submodules",
                command=f"git submodule sync --recursive && git submodule init && git submodule update --depth 1 --recursive --jobs {min([Utils.cpu_count(), 20])}",
            )
        )
        res = results[-1].is_ok()
    if res and JobStages.CMAKE in stages:
        results.append(
            Result.create_from_command_execution(
                name="Cmake configuration",
                command=f"cmake --debug-trycompile -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_BUILD_TYPE={cmake_build_type} \
                 -DSANITIZE={sanitizer} -DENABLE_CHECK_HEAVY_BUILDS=1 -DENABLE_CLICKHOUSE_SELF_EXTRACTING=1 -DENABLE_TESTS=0 \
                 -DENABLE_UTILS=0 -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON -DCMAKE_INSTALL_PREFIX=/usr \
                 -DCMAKE_INSTALL_SYSCONFDIR=/etc -DCMAKE_INSTALL_LOCALSTATEDIR=/var -DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=ON \
                 -DCMAKE_C_COMPILER=clang-18 -DCMAKE_CXX_COMPILER=clang++-18 -DCOMPILER_CACHE={build_cache_type} -DENABLE_TESTS=1 \
                 -DENABLE_BUILD_PROFILING=1 {current_directory}",
                workdir=build_dir,
                with_log=True,
            )
        )
        res = results[-1].is_ok()
    if res and JobStages.BUILD in stages:
        Shell.check("sccache --show-stats")
        results.append(
            Result.create_from_command_execution(
                name="Build ClickHouse",
                command="ninja clickhouse-bundle clickhouse-odbc-bridge clickhouse-library-bridge",
                workdir=build_dir,
                with_log=True,
            )
        )
        Shell.check("sccache --show-stats")
        Shell.check(f"ls -l {build_dir}/programs/")
        res = results[-1].is_ok()
    Result.create_from(results=results, stopwatch=stop_watch).finish_job_accordingly()
 if __name__ == "__main__":
    main()
--- a/ci/jobs/check_style.py
+++ b/ci/jobs/check_style.py
@ -0,0 +1,382 @@
 import math
 import multiprocessing
 import os
 import re
 from concurrent.futures import ProcessPoolExecutor
 from pathlib import Path
 from praktika.result import Result
 from praktika.utils import Shell, Utils
 NPROC = multiprocessing.cpu_count()
 def chunk_list(data, n):
    """Split the data list into n nearly equal-sized chunks."""
    chunk_size = math.ceil(len(data) / n)
    for i in range(0, len(data), chunk_size):
        yield data[i : i + chunk_size]
 def run_check_concurrent(check_name, check_function, files, nproc=NPROC):
    stop_watch = Utils.Stopwatch()
    if not files:
        print(f"File list is empty [{files}]")
        raise
    file_chunks = list(chunk_list(files, nproc))
    results = []
    # Run check_function concurrently on each chunk
    with ProcessPoolExecutor(max_workers=NPROC) as executor:
        futures = [executor.submit(check_function, chunk) for chunk in file_chunks]
        # Wait for results and process them (optional)
        for future in futures:
            try:
                res = future.result()
                if res and res not in results:
                    results.append(res)
            except Exception as e:
                results.append(f"Exception in {check_name}: {e}")
    result = Result(
        name=check_name,
        status=Result.Status.SUCCESS if not results else Result.Status.FAILED,
        start_time=stop_watch.start_time,
        duration=stop_watch.duration,
        info=f"errors: {results}" if results else "",
    )
    return result
 def check_duplicate_includes(file_path):
    includes = []
    with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
        for line in f:
            if re.match(r"^#include ", line):
                includes.append(line.strip())
    include_counts = {line: includes.count(line) for line in includes}
    duplicates = {line: count for line, count in include_counts.items() if count > 1}
    if duplicates:
        return f"{file_path}: {duplicates}"
    return ""
 def check_whitespaces(file_paths):
    for file in file_paths:
        exit_code, out, err = Shell.get_res_stdout_stderr(
            f'./ci/jobs/scripts/check_style/double_whitespaces.pl "{file}"',
            verbose=False,
        )
        if out or err:
            return out + " err: " + err
    return ""
 def check_yamllint(file_paths):
    file_paths = " ".join([f"'{file}'" for file in file_paths])
    exit_code, out, err = Shell.get_res_stdout_stderr(
        f"yamllint --config-file=./.yamllint {file_paths}", verbose=False
    )
    return out or err
 def check_xmllint(file_paths):
    if not isinstance(file_paths, list):
        file_paths = [file_paths]
    file_paths = " ".join([f"'{file}'" for file in file_paths])
    exit_code, out, err = Shell.get_res_stdout_stderr(
        f"xmllint --noout --nonet {file_paths}", verbose=False
    )
    return out or err
 def check_functional_test_cases(files):
    """
    Queries with event_date should have yesterday() not today()
    NOTE: it is not that accurate, but at least something.
    """
    patterns = [
        re.compile(
            r"(?i)where.*?\bevent_date\s*(=|>=)\s*today\(\)(?!\s*-\s*1)",
            re.IGNORECASE | re.DOTALL,
        )
    ]
    errors = []
    for test_case in files:
        try:
            with open(test_case, "r", encoding="utf-8", errors="replace") as f:
                file_content = " ".join(
                    f.read().splitlines()
                )  # Combine lines into a single string
            # Check if any pattern matches in the concatenated string
            if any(pattern.search(file_content) for pattern in patterns):
                errors.append(
                    f"event_date should be filtered using >=yesterday() in {test_case} (to avoid flakiness)"
                )
        except Exception as e:
            errors.append(f"Error checking {test_case}: {e}")
    for test_case in files:
        if "fail" in test_case:
            errors.append(f"test case {test_case} includes 'fail' in its name")
    return " ".join(errors)
 def check_gaps_in_tests_numbers(file_paths, gap_threshold=100):
    test_numbers = set()
    pattern = re.compile(r"(\d+)")
    for file in file_paths:
        file_name = os.path.basename(file)
        match = pattern.search(file_name)
        if match:
            test_numbers.add(int(match.group(1)))
    sorted_numbers = sorted(test_numbers)
    large_gaps = []
    for i in range(1, len(sorted_numbers)):
        prev_num = sorted_numbers[i - 1]
        next_num = sorted_numbers[i]
        diff = next_num - prev_num
        if diff >= gap_threshold:
            large_gaps.append(f"Gap ({prev_num}, {next_num}) > {gap_threshold}")
    return large_gaps
 def check_broken_links(path, exclude_paths):
    broken_symlinks = []
    for path in Path(path).rglob("*"):
        if any(exclude_path in str(path) for exclude_path in exclude_paths):
            continue
        if path.is_symlink():
            if not path.exists():
                broken_symlinks.append(str(path))
    if broken_symlinks:
        for symlink in broken_symlinks:
            print(symlink)
        return f"Broken symlinks found: {broken_symlinks}"
    else:
        return ""
 def check_cpp_code():
    res, out, err = Shell.get_res_stdout_stderr(
        "./ci/jobs/scripts/check_style/check_cpp.sh"
    )
    if err:
        out += err
    return out
 def check_repo_submodules():
    res, out, err = Shell.get_res_stdout_stderr(
        "./ci/jobs/scripts/check_style/check_submodules.sh"
    )
    if err:
        out += err
    return out
 def check_other():
    res, out, err = Shell.get_res_stdout_stderr(
        "./ci/jobs/scripts/check_style/checks_to_refactor.sh"
    )
    if err:
        out += err
    return out
 def check_codespell():
    res, out, err = Shell.get_res_stdout_stderr(
        "./ci/jobs/scripts/check_style/check_typos.sh"
    )
    if err:
        out += err
    return out
 def check_aspell():
    res, out, err = Shell.get_res_stdout_stderr(
        "./ci/jobs/scripts/check_style/check_aspell.sh"
    )
    if err:
        out += err
    return out
 def check_mypy():
    res, out, err = Shell.get_res_stdout_stderr(
        "./ci/jobs/scripts/check_style/check-mypy"
    )
    if err:
        out += err
    return out
 def check_pylint():
    res, out, err = Shell.get_res_stdout_stderr(
        "./ci/jobs/scripts/check_style/check-pylint"
    )
    if err:
        out += err
    return out
 def check_file_names(files):
    files_set = set()
    for file in files:
        file_ = file.lower()
        if file_ in files_set:
            return f"Non-uniq file name in lower case: {file}"
        files_set.add(file_)
    return ""
 if __name__ == "__main__":
    results = []
    stop_watch = Utils.Stopwatch()
    all_files = Utils.traverse_paths(
        include_paths=["."],
        exclude_paths=[
            "./.git",
            "./contrib",
            "./build",
        ],
        not_exists_ok=True,  # ./build may exist if runs locally
    )
    cpp_files = Utils.traverse_paths(
        include_paths=["./src", "./base", "./programs", "./utils"],
        exclude_paths=[
            "./base/glibc-compatibility",
            "./contrib/consistent-hashing",
            "./base/widechar_width",
        ],
        file_suffixes=[".h", ".cpp"],
    )
    yaml_workflow_files = Utils.traverse_paths(
        include_paths=["./.github"],
        exclude_paths=[],
        file_suffixes=[".yaml", ".yml"],
    )
    xml_files = Utils.traverse_paths(
        include_paths=["."],
        exclude_paths=["./.git", "./contrib/"],
        file_suffixes=[".xml"],
    )
    functional_test_files = Utils.traverse_paths(
        include_paths=["./tests/queries"],
        exclude_paths=[],
        file_suffixes=[".sql", ".sh", ".py", ".j2"],
    )
    results.append(
        Result(
            name="Read Files",
            status=Result.Status.SUCCESS,
            start_time=stop_watch.start_time,
            duration=stop_watch.duration,
        )
    )
    results.append(
        run_check_concurrent(
            check_name="Whitespace Check",
            check_function=check_whitespaces,
            files=cpp_files,
        )
    )
    results.append(
        run_check_concurrent(
            check_name="YamlLint Check",
            check_function=check_yamllint,
            files=yaml_workflow_files,
        )
    )
    results.append(
        run_check_concurrent(
            check_name="XmlLint Check",
            check_function=check_xmllint,
            files=xml_files,
        )
    )
    results.append(
        run_check_concurrent(
            check_name="Functional Tests scripts smoke check",
            check_function=check_functional_test_cases,
            files=functional_test_files,
        )
    )
    results.append(
        Result.create_from_command_execution(
            name="Check Tests Numbers",
            command=check_gaps_in_tests_numbers,
            command_args=[functional_test_files],
        )
    )
    results.append(
        Result.create_from_command_execution(
            name="Check Broken Symlinks",
            command=check_broken_links,
            command_kwargs={
                "path": "./",
                "exclude_paths": ["contrib/", "metadata/", "programs/server/data"],
            },
        )
    )
    results.append(
        Result.create_from_command_execution(
            name="Check CPP code",
            command=check_cpp_code,
        )
    )
    results.append(
        Result.create_from_command_execution(
            name="Check Submodules",
            command=check_repo_submodules,
        )
    )
    results.append(
        Result.create_from_command_execution(
            name="Check File Names",
            command=check_file_names,
            command_args=[all_files],
        )
    )
    results.append(
        Result.create_from_command_execution(
            name="Check Many Different Things",
            command=check_other,
        )
    )
    results.append(
        Result.create_from_command_execution(
            name="Check Codespell",
            command=check_codespell,
        )
    )
    results.append(
        Result.create_from_command_execution(
            name="Check Aspell",
            command=check_aspell,
        )
    )
    Result.create_from(results=results, stopwatch=stop_watch).finish_job_accordingly()
--- a/ci/jobs/fast_test.py
+++ b/ci/jobs/fast_test.py
@ -0,0 +1,337 @@
 import argparse
 import threading
 from pathlib import Path
 from praktika.result import Result
 from praktika.settings import Settings
 from praktika.utils import MetaClasses, Shell, Utils
 from ci.jobs.scripts.functional_tests_results import FTResultsProcessor
 class ClickHouseProc:
    def __init__(self):
        self.ch_config_dir = f"{Settings.TEMP_DIR}/etc/clickhouse-server"
        self.pid_file = f"{self.ch_config_dir}/clickhouse-server.pid"
        self.config_file = f"{self.ch_config_dir}/config.xml"
        self.user_files_path = f"{self.ch_config_dir}/user_files"
        self.test_output_file = f"{Settings.OUTPUT_DIR}/test_result.txt"
        self.command = f"clickhouse-server --config-file {self.config_file} --pid-file {self.pid_file} -- --path {self.ch_config_dir} --user_files_path {self.user_files_path} --top_level_domains_path {self.ch_config_dir}/top_level_domains --keeper_server.storage_path {self.ch_config_dir}/coordination"
        self.proc = None
        self.pid = 0
        nproc = int(Utils.cpu_count() / 2)
        self.fast_test_command = f"clickhouse-test --hung-check --fast-tests-only --no-random-settings --no-random-merge-tree-settings --no-long --testname --shard --zookeeper --check-zookeeper-session --order random --print-time --report-logs-stats --jobs {nproc} -- '' | ts '%Y-%m-%d %H:%M:%S' \
        | tee -a \"{self.test_output_file}\""
        # TODO: store info in case of failure
        self.info = ""
        self.info_file = ""
        Utils.set_env("CLICKHOUSE_CONFIG_DIR", self.ch_config_dir)
        Utils.set_env("CLICKHOUSE_CONFIG", self.config_file)
        Utils.set_env("CLICKHOUSE_USER_FILES", self.user_files_path)
        Utils.set_env("CLICKHOUSE_SCHEMA_FILES", f"{self.ch_config_dir}/format_schemas")
    def start(self):
        print("Starting ClickHouse server")
        Shell.check(f"rm {self.pid_file}")
        def run_clickhouse():
            self.proc = Shell.run_async(
                self.command, verbose=True, suppress_output=True
            )
        thread = threading.Thread(target=run_clickhouse)
        thread.daemon = True  # Allow program to exit even if thread is still running
        thread.start()
        # self.proc = Shell.run_async(self.command, verbose=True)
        started = False
        try:
            for _ in range(5):
                pid = Shell.get_output(f"cat {self.pid_file}").strip()
                if not pid:
                    Utils.sleep(1)
                    continue
                started = True
                print(f"Got pid from fs [{pid}]")
                _ = int(pid)
                break
        except Exception:
            pass
        if not started:
            stdout = self.proc.stdout.read().strip() if self.proc.stdout else ""
            stderr = self.proc.stderr.read().strip() if self.proc.stderr else ""
            Utils.print_formatted_error("Failed to start ClickHouse", stdout, stderr)
            return False
        print(f"ClickHouse server started successfully, pid [{pid}]")
        return True
    def wait_ready(self):
        res, out, err = 0, "", ""
        attempts = 30
        delay = 2
        for attempt in range(attempts):
            res, out, err = Shell.get_res_stdout_stderr(
                'clickhouse-client --query "select 1"', verbose=True
            )
            if out.strip() == "1":
                print("Server ready")
                break
            else:
                print(f"Server not ready, wait")
            Utils.sleep(delay)
        else:
            Utils.print_formatted_error(
                f"Server not ready after [{attempts*delay}s]", out, err
            )
            return False
        return True
    def run_fast_test(self):
        if Path(self.test_output_file).exists():
            Path(self.test_output_file).unlink()
        exit_code = Shell.run(self.fast_test_command)
        return exit_code == 0
    def terminate(self):
        print("Terminate ClickHouse process")
        timeout = 10
        if self.proc:
            Utils.terminate_process_group(self.proc.pid)
            self.proc.terminate()
            try:
                self.proc.wait(timeout=10)
                print(f"Process {self.proc.pid} terminated gracefully.")
            except Exception:
                print(
                    f"Process {self.proc.pid} did not terminate in {timeout} seconds, killing it..."
                )
                Utils.terminate_process_group(self.proc.pid, force=True)
                self.proc.wait()  # Wait for the process to be fully killed
                print(f"Process {self.proc} was killed.")
 def clone_submodules():
    submodules_to_update = [
        "contrib/sysroot",
        "contrib/magic_enum",
        "contrib/abseil-cpp",
        "contrib/boost",
        "contrib/zlib-ng",
        "contrib/libxml2",
        "contrib/libunwind",
        "contrib/fmtlib",
        "contrib/aklomp-base64",
        "contrib/cctz",
        "contrib/libcpuid",
        "contrib/libdivide",
        "contrib/double-conversion",
        "contrib/llvm-project",
        "contrib/lz4",
        "contrib/zstd",
        "contrib/fastops",
        "contrib/rapidjson",
        "contrib/re2",
        "contrib/sparsehash-c11",
        "contrib/croaring",
        "contrib/miniselect",
        "contrib/xz",
        "contrib/dragonbox",
        "contrib/fast_float",
        "contrib/NuRaft",
        "contrib/jemalloc",
        "contrib/replxx",
        "contrib/wyhash",
        "contrib/c-ares",
        "contrib/morton-nd",
        "contrib/xxHash",
        "contrib/expected",
        "contrib/simdjson",
        "contrib/liburing",
        "contrib/libfiu",
        "contrib/incbin",
        "contrib/yaml-cpp",
    ]
    res = Shell.check("git submodule sync", verbose=True, strict=True)
    res = res and Shell.check("git submodule init", verbose=True, strict=True)
    res = res and Shell.check(
        command=f"xargs --max-procs={min([Utils.cpu_count(), 20])} --null --no-run-if-empty --max-args=1 git submodule update --depth 1 --single-branch",
        stdin_str="\0".join(submodules_to_update) + "\0",
        timeout=120,
        retries=3,
        verbose=True,
    )
    res = res and Shell.check("git submodule foreach git reset --hard", verbose=True)
    res = res and Shell.check("git submodule foreach git checkout @ -f", verbose=True)
    res = res and Shell.check("git submodule foreach git clean -xfd", verbose=True)
    return res
 def update_path_ch_config(config_file_path=""):
    print("Updating path in clickhouse config")
    config_file_path = (
        config_file_path or f"{Settings.TEMP_DIR}/etc/clickhouse-server/config.xml"
    )
    ssl_config_file_path = (
        f"{Settings.TEMP_DIR}/etc/clickhouse-server/config.d/ssl_certs.xml"
    )
    try:
        with open(config_file_path, "r", encoding="utf-8") as file:
            content = file.read()
        with open(ssl_config_file_path, "r", encoding="utf-8") as file:
            ssl_config_content = file.read()
        content = content.replace(">/var/", f">{Settings.TEMP_DIR}/var/")
        content = content.replace(">/etc/", f">{Settings.TEMP_DIR}/etc/")
        ssl_config_content = ssl_config_content.replace(
            ">/etc/", f">{Settings.TEMP_DIR}/etc/"
        )
        with open(config_file_path, "w", encoding="utf-8") as file:
            file.write(content)
        with open(ssl_config_file_path, "w", encoding="utf-8") as file:
            file.write(ssl_config_content)
    except Exception as e:
        print(f"ERROR: failed to update config, exception: {e}")
        return False
    return True
 class JobStages(metaclass=MetaClasses.WithIter):
    CHECKOUT_SUBMODULES = "checkout"
    CMAKE = "cmake"
    BUILD = "build"
    CONFIG = "config"
    TEST = "test"
 def parse_args():
    parser = argparse.ArgumentParser(description="ClickHouse Fast Test Job")
    parser.add_argument("--param", help="Optional custom job start stage", default=None)
    return parser.parse_args()
 def main():
    args = parse_args()
    stop_watch = Utils.Stopwatch()
    stages = list(JobStages)
    stage = args.param or JobStages.CHECKOUT_SUBMODULES
    if stage:
        assert stage in JobStages, f"--param must be one of [{list(JobStages)}]"
        print(f"Job will start from stage [{stage}]")
        while stage in stages:
            stages.pop(0)
        stages.insert(0, stage)
    current_directory = Utils.cwd()
    build_dir = f"{Settings.TEMP_DIR}/build"
    Utils.add_to_PATH(f"{build_dir}/programs:{current_directory}/tests")
    res = True
    results = []
    if res and JobStages.CHECKOUT_SUBMODULES in stages:
        Shell.check(f"rm -rf {build_dir} && mkdir -p {build_dir}")
        results.append(
            Result.create_from_command_execution(
                name="Checkout Submodules for Minimal Build",
                command=clone_submodules,
            )
        )
        res = results[-1].is_ok()
    if res and JobStages.CMAKE in stages:
        results.append(
            Result.create_from_command_execution(
                name="Cmake configuration",
                command=f"cmake {current_directory} -DCMAKE_CXX_COMPILER=clang++-18 -DCMAKE_C_COMPILER=clang-18 \
                -DCMAKE_TOOLCHAIN_FILE={current_directory}/cmake/linux/toolchain-x86_64-musl.cmake -DENABLE_LIBRARIES=0 \
                -DENABLE_TESTS=0 -DENABLE_UTILS=0 -DENABLE_THINLTO=0 -DENABLE_NURAFT=1 -DENABLE_SIMDJSON=1 \
                -DENABLE_JEMALLOC=1 -DENABLE_LIBURING=1 -DENABLE_YAML_CPP=1 -DCOMPILER_CACHE=sccache",
                workdir=build_dir,
                with_log=True,
            )
        )
        res = results[-1].is_ok()
    if res and JobStages.BUILD in stages:
        Shell.check("sccache --show-stats")
        results.append(
            Result.create_from_command_execution(
                name="Build ClickHouse",
                command="ninja clickhouse-bundle clickhouse-stripped",
                workdir=build_dir,
                with_log=True,
            )
        )
        Shell.check("sccache --show-stats")
        res = results[-1].is_ok()
    if res and JobStages.BUILD in stages:
        commands = [
            f"mkdir -p {Settings.OUTPUT_DIR}/binaries",
            f"cp ./programs/clickhouse {Settings.OUTPUT_DIR}/binaries/clickhouse",
            f"zstd --threads=0 --force programs/clickhouse-stripped -o {Settings.OUTPUT_DIR}/binaries/clickhouse-stripped.zst",
            "sccache --show-stats",
            "clickhouse-client --version",
            "clickhouse-test --help",
        ]
        results.append(
            Result.create_from_command_execution(
                name="Check and Compress binary",
                command=commands,
                workdir=build_dir,
                with_log=True,
            )
        )
        res = results[-1].is_ok()
    if res and JobStages.CONFIG in stages:
        commands = [
            f"rm -rf {Settings.TEMP_DIR}/etc/ && mkdir -p {Settings.TEMP_DIR}/etc/clickhouse-client {Settings.TEMP_DIR}/etc/clickhouse-server",
            f"cp {current_directory}/programs/server/config.xml {current_directory}/programs/server/users.xml {Settings.TEMP_DIR}/etc/clickhouse-server/",
            f"{current_directory}/tests/config/install.sh {Settings.TEMP_DIR}/etc/clickhouse-server {Settings.TEMP_DIR}/etc/clickhouse-client",
            # f"cp -a {current_directory}/programs/server/config.d/log_to_console.xml {Settings.TEMP_DIR}/etc/clickhouse-server/config.d/",
            f"rm -f {Settings.TEMP_DIR}/etc/clickhouse-server/config.d/secure_ports.xml",
            update_path_ch_config,
        ]
        results.append(
            Result.create_from_command_execution(
                name="Install ClickHouse Config",
                command=commands,
                with_log=True,
            )
        )
        res = results[-1].is_ok()
    CH = ClickHouseProc()
    if res and JobStages.TEST in stages:
        stop_watch_ = Utils.Stopwatch()
        step_name = "Start ClickHouse Server"
        print(step_name)
        res = CH.start()
        res = res and CH.wait_ready()
        results.append(
            Result.create_from(name=step_name, status=res, stopwatch=stop_watch_)
        )
    if res and JobStages.TEST in stages:
        step_name = "Tests"
        print(step_name)
        res = res and CH.run_fast_test()
        if res:
            results.append(FTResultsProcessor(wd=Settings.OUTPUT_DIR).run())
    CH.terminate()
    Result.create_from(results=results, stopwatch=stop_watch).finish_job_accordingly()
 if __name__ == "__main__":
    main()
--- a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt
+++ b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt
--- a/ci/jobs/scripts/check_style/check_aspell.sh
+++ b/ci/jobs/scripts/check_style/check_aspell.sh
@ -0,0 +1,59 @@
 #!/usr/bin/env bash
 # force-enable double star globbing
 shopt -s globstar
 # Perform spell checking on the docs
 if [[ ${1:-} == "--help" ]] || [[ ${1:-} == "-h" ]]; then
    echo "Usage $0 [--help|-h] [-i [filename]]"
    echo "  --help|-h: print this help"
    echo "  -i: interactive mode. If filename is specified, check only this file, otherwise check all files"
    exit 0
 fi
 ROOT_PATH="."
 CHECK_LANG=en
 ASPELL_IGNORE_PATH="${ROOT_PATH}/utils/check-style/aspell-ignore/${CHECK_LANG}"
 if [[ ${1:-} == "-i" ]]; then
    if [[ ! -z ${2:-} ]]; then
        FILES_TO_CHECK=${ROOT_PATH}/docs/${CHECK_LANG}/${2}
    else
        FILES_TO_CHECK=${ROOT_PATH}/docs/${CHECK_LANG}/**/*.md
    fi
    for fname in ${FILES_TO_CHECK}; do
        echo "Checking $fname"
        aspell --personal=aspell-dict.txt --add-sgml-skip=code --encoding=utf-8 --mode=markdown -W 3 --lang=${CHECK_LANG} --home-dir=${ASPELL_IGNORE_PATH} -c "$fname"
    done
    exit
 fi
 STATUS=0
 for fname in ${ROOT_PATH}/docs/${CHECK_LANG}/**/*.md; do
    errors=$(cat "$fname" \
        | aspell list \
            -W 3 \
            --personal=aspell-dict.txt \
            --add-sgml-skip=code \
            --encoding=utf-8 \
            --mode=markdown \
            --lang=${CHECK_LANG} \
            --home-dir=${ASPELL_IGNORE_PATH} \
        | sort | uniq)
    if [ ! -z "$errors" ]; then
        STATUS=1
        echo "====== $fname ======"
        echo "$errors"
    fi
 done
 if (( STATUS != 0 )); then
    echo "====== Errors found ======"
    echo "To exclude some words add them to the dictionary file \"${ASPELL_IGNORE_PATH}/aspell-dict.txt\""
    echo "You can also run ${0} -i to see the errors interactively and fix them or add to the dictionary file"
 fi
 exit ${STATUS}
--- a/ci/jobs/scripts/check_style/check_cpp.sh
+++ b/ci/jobs/scripts/check_style/check_cpp.sh
@ -0,0 +1,321 @@
 #!/usr/bin/env bash
 # For code formatting we have clang-format.
 #
 # But it's not sane to apply clang-format for whole code base,
 #  because it sometimes makes worse for properly formatted files.
 #
 # It's only reasonable to blindly apply clang-format only in cases
 #  when the code is likely to be out of style.
 #
 # For this purpose we have a script that will use very primitive heuristics
 #  (simple regexps) to check if the code is likely to have basic style violations.
 #  and then to run formatter only for the specified files.
 LC_ALL="en_US.UTF-8"
 ROOT_PATH="."
 EXCLUDE='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml'
 EXCLUDE_DOCS='Settings\.cpp|FormatFactorySettingsDeclaration\.h'
 # From [1]:
 #     But since array_to_string_internal() in array.c still loops over array
 #     elements and concatenates them into a string, it's probably not more
 #     efficient than the looping solutions proposed, but it's more readable.
 #
 #  [1]: https://stackoverflow.com/a/15394738/328260
 function in_array()
 {
    local IFS="|"
    local value=$1 && shift
    [[ "${IFS}${*}${IFS}" =~ "${IFS}${value}${IFS}" ]]
 }
 find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
    grep -vP $EXCLUDE |
    grep -vP $EXCLUDE_DOCS |
    xargs grep $@ -P '((class|struct|namespace|enum|if|for|while|else|throw|switch).*|\)(\s*const)?(\s*override)?\s*)\{$|\s$|^ {1,3}[^\* ]\S|\t|^\s*(if|else if|if constexpr|else if constexpr|for|while|catch|switch)\(|\( [^\s\\]|\S \)' |
 # a curly brace not in a new line, but not for the case of C++11 init or agg. initialization | trailing whitespace | number of ws not a multiple of 4, but not in the case of comment continuation | missing whitespace after for/if/while... before opening brace | whitespaces inside braces
    grep -v -P '(//|:\s+\*|\$\(\()| \)"'
 # single-line comment | continuation of a multiline comment | a typical piece of embedded shell code | something like ending of raw string literal
 # Tabs
 find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
    grep -vP $EXCLUDE |
    xargs grep $@ -F $'\t' && echo '^ tabs are not allowed'
 # // namespace comments are unneeded
 find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
    grep -vP $EXCLUDE |
    xargs grep $@ -P '}\s*//+\s*namespace\s*'
 # Broken symlinks
 find -L $ROOT_PATH -type l 2>/dev/null | grep -v contrib && echo "^ Broken symlinks found"
 # Duplicated or incorrect setting declarations
 bash $ROOT_PATH/utils/check-style/check-settings-style
 # Unused/Undefined/Duplicates ErrorCodes/ProfileEvents/CurrentMetrics
 declare -A EXTERN_TYPES
 EXTERN_TYPES[ErrorCodes]=int
 EXTERN_TYPES[ProfileEvents]=Event
 EXTERN_TYPES[CurrentMetrics]=Metric
 EXTERN_TYPES_EXCLUDES=(
    ProfileEvents::global_counters
    ProfileEvents::Event
    ProfileEvents::Count
    ProfileEvents::Counters
    ProfileEvents::end
    ProfileEvents::increment
    ProfileEvents::incrementForLogMessage
    ProfileEvents::getName
    ProfileEvents::Timer
    ProfileEvents::Type
    ProfileEvents::TypeEnum
    ProfileEvents::ValueType
    ProfileEvents::dumpToMapColumn
    ProfileEvents::getProfileEvents
    ProfileEvents::ThreadIdToCountersSnapshot
    ProfileEvents::LOCAL_NAME
    ProfileEvents::keeper_profile_events
    ProfileEvents::CountersIncrement
    ProfileEvents::size
    CurrentMetrics::add
    CurrentMetrics::sub
    CurrentMetrics::get
    CurrentMetrics::set
    CurrentMetrics::end
    CurrentMetrics::Increment
    CurrentMetrics::Metric
    CurrentMetrics::values
    CurrentMetrics::Value
    CurrentMetrics::keeper_metrics
    CurrentMetrics::size
    ErrorCodes::ErrorCode
    ErrorCodes::getName
    ErrorCodes::increment
    ErrorCodes::end
    ErrorCodes::values
    ErrorCodes::values[i]
    ErrorCodes::getErrorCodeByName
    ErrorCodes::Value
 )
 for extern_type in ${!EXTERN_TYPES[@]}; do
    type_of_extern=${EXTERN_TYPES[$extern_type]}
    allowed_chars='[_A-Za-z]+'
    # Unused
    # NOTE: to fix automatically, replace echo with:
    # sed -i "/extern const $type_of_extern $val/d" $file
    find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
        # NOTE: the check is pretty dumb and distinguish only by the type_of_extern,
        # and this matches with zkutil::CreateMode
        grep -v -e 'src/Common/ZooKeeper/Types.h' -e 'src/Coordination/KeeperConstants.cpp'
    } | {
        grep -vP $EXCLUDE | xargs grep -l -P "extern const $type_of_extern $allowed_chars"
    } | while read file; do
        grep -P "extern const $type_of_extern $allowed_chars;" $file | sed -r -e "s/^.*?extern const $type_of_extern ($allowed_chars);.*?$/\1/" | while read val; do
            if ! grep -q "$extern_type::$val" $file; then
                # Excludes for SOFTWARE_EVENT/HARDWARE_EVENT/CACHE_EVENT in ThreadProfileEvents.cpp
                if [[ ! $extern_type::$val =~ ProfileEvents::Perf.* ]]; then
                    echo "$extern_type::$val is defined but not used in file $file"
                fi
            fi
        done
    done
    # Undefined
    # NOTE: to fix automatically, replace echo with:
    # ( grep -q -F 'namespace $extern_type' $file && \
    #   sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || \
    #     awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n    extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file )
    find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
        grep -vP $EXCLUDE | xargs grep -l -P "$extern_type::$allowed_chars"
    } | while read file; do
        grep -P "$extern_type::$allowed_chars" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::($allowed_chars).*?$/\1/" | while read val; do
            if ! grep -q "extern const $type_of_extern $val" $file; then
                if ! in_array "$extern_type::$val" "${EXTERN_TYPES_EXCLUDES[@]}"; then
                    echo "$extern_type::$val is used in file $file but not defined"
                fi
            fi
        done
    done
    # Duplicates
    find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
        grep -vP $EXCLUDE | xargs grep -l -P "$extern_type::$allowed_chars"
    } | while read file; do
        grep -P "extern const $type_of_extern $allowed_chars;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file"
    done
 done
 # Three or more consecutive empty lines
 find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
    grep -vP $EXCLUDE |
    while read file; do awk '/^$/ { ++i; if (i > 2) { print "More than two consecutive empty lines in file '$file'" } } /./ { i = 0 }' $file; done
 # Check that every header file has #pragma once in first line
 find $ROOT_PATH/{src,programs,utils} -name '*.h' |
    grep -vP $EXCLUDE |
    while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done
 # Too many exclamation marks
 find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
    grep -vP $EXCLUDE |
    xargs grep -F '!!!' | grep -P '.' && echo "Too many exclamation marks (looks dirty, unconfident)."
 # Exclamation mark in a message
 find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
    grep -vP $EXCLUDE |
    xargs grep -F '!",' | grep -P '.' && echo "No need for an exclamation mark (looks dirty, unconfident)."
 # Trailing whitespaces
 find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
    grep -vP $EXCLUDE |
    xargs grep -n -P ' $' | grep -n -P '.' && echo "^ Trailing whitespaces."
 # Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
    grep -vP $EXCLUDE |
    xargs grep -P 'std::[io]?stringstream' | grep -v "STYLE_CHECK_ALLOW_STD_STRING_STREAM" && echo "Use WriteBufferFromOwnString or ReadBufferFromString instead of std::stringstream"
 # Forbid std::cerr/std::cout in src (fine in programs/utils)
 std_cerr_cout_excludes=(
    /examples/
    /tests/
    _fuzzer
    # OK
    src/Common/ProgressIndication.cpp
    src/Common/ProgressTable.cpp
    # only under #ifdef DBMS_HASH_MAP_DEBUG_RESIZES, that is used only in tests
    src/Common/HashTable/HashTable.h
    # SensitiveDataMasker::printStats()
    src/Common/SensitiveDataMasker.cpp
    # StreamStatistics::print()
    src/Compression/LZ4_decompress_faster.cpp
    # ContextSharedPart with subsequent std::terminate()
    src/Interpreters/Context.cpp
    # IProcessor::dump()
    src/Processors/IProcessor.cpp
    src/Client/ClientApplicationBase.cpp
    src/Client/ClientBase.cpp
    src/Client/LineReader.cpp
    src/Client/QueryFuzzer.cpp
    src/Client/Suggest.cpp
    src/Client/ClientBase.h
    src/Client/LineReader.h
    src/Client/ReplxxLineReader.h
    src/Bridge/IBridge.cpp
    src/Daemon/BaseDaemon.cpp
    src/Loggers/Loggers.cpp
    src/Common/GWPAsan.cpp
    src/Common/ProgressIndication.h
 )
 sources_with_std_cerr_cout=( $(
    find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \
        grep -vP $EXCLUDE | \
        grep -F -v $(printf -- "-e %s " "${std_cerr_cout_excludes[@]}") | \
        xargs grep -F --with-filename -e std::cerr -e std::cout | cut -d: -f1 | sort -u
 ) )
 # Exclude comments
 for src in "${sources_with_std_cerr_cout[@]}"; do
    # suppress stderr, since it may contain warning for #pargma once in headers
    if gcc -fpreprocessed -dD -E "$src" 2>/dev/null | grep -F -q -e std::cerr -e std::cout; then
        echo "$src: uses std::cerr/std::cout"
    fi
 done
 expect_tests=( $(find $ROOT_PATH/tests/queries -name '*.expect') )
 for test_case in "${expect_tests[@]}"; do
    pattern="^exp_internal -f \$CLICKHOUSE_TMP/\$basename.debuglog 0$"
    grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
    if grep -q "^spawn.*CLICKHOUSE_CLIENT_BINARY$" "$test_case"; then
        pattern="^spawn.*CLICKHOUSE_CLIENT_BINARY.*--history_file$"
        grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
    fi
    # Otherwise expect_after/expect_before will not bail without stdin attached
    # (and actually this is a hack anyway, correct way is to use $any_spawn_id)
    pattern="-i \$any_spawn_id timeout"
    grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
    pattern="-i \$any_spawn_id eof"
    grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
 done
 # Forbid non-unique error codes
 if [[ "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | wc -l)" != "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | sort | uniq | wc -l)" ]]
 then
    echo "ErrorCodes.cpp contains non-unique error codes"
 fi
 # Check that there is no system-wide libraries/headers in use.
 #
 # NOTE: it is better to override find_path/find_library in cmake, but right now
 # it is not possible, see [1] for the reference.
 #
 #   [1]: git grep --recurse-submodules -e find_library -e find_path contrib
 if git grep -e find_path -e find_library -- :**CMakeLists.txt; then
    echo "There is find_path/find_library usage. ClickHouse should use everything bundled. Consider adding one more contrib module."
 fi
 # Forbid std::filesystem::is_symlink and std::filesystem::read_symlink, because it's easy to use them incorrectly
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
    grep -vP $EXCLUDE |
    xargs grep -P '::(is|read)_symlink' | grep -v "STYLE_CHECK_ALLOW_STD_FS_SYMLINK" && echo "Use DB::FS::isSymlink and DB::FS::readSymlink instead"
 # Forbid __builtin_unreachable(), because it's hard to debug when it becomes reachable
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
    grep -vP $EXCLUDE |
    xargs grep -P '__builtin_unreachable' && echo "Use UNREACHABLE() from defines.h instead"
 # Forbid mt19937() and random_device() which are outdated and slow
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
    grep -vP $EXCLUDE |
    xargs grep -P '(std::mt19937|std::mersenne_twister_engine|std::random_device)' && echo "Use pcg64_fast (from pcg_random.h) and randomSeed (from Common/randomSeed.h) instead"
 # Require checking return value of close(),
 # since it can hide fd misuse and break other places.
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
    grep -vP $EXCLUDE |
    xargs grep -e ' close(.*fd' -e ' ::close(' | grep -v = && echo "Return value of close() should be checked"
 # A small typo can lead to debug code in release builds, see https://github.com/ClickHouse/ClickHouse/pull/47647
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -l -F '#ifdef NDEBUG' | xargs -I@FILE awk '/#ifdef NDEBUG/ { inside = 1; dirty = 1 } /#endif/ { if (inside && dirty) { print "File @FILE has suspicious #ifdef NDEBUG, possibly confused with #ifndef NDEBUG" }; inside = 0 } /#else/ { dirty = 0 }' @FILE
 # If a user is doing dynamic or typeid cast with a pointer, and immediately dereferencing it, it is unsafe.
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep --line-number -P '(dynamic|typeid)_cast<[^>]+\*>\([^\(\)]+\)->' | grep -P '.' && echo "It's suspicious when you are doing a dynamic_cast or typeid_cast with a pointer and immediately dereferencing it. Use references instead of pointers or check a pointer to nullptr."
 # Check for bad punctuation: whitespace before comma.
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'"
 # Check usage of std::regex which is too bloated and slow.
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number 'std::regex' | grep -P '.' && echo "^ Please use re2 instead of std::regex"
 # Cyrillic characters hiding inside Latin.
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | grep -v StorageSystemContributors.generated.cpp | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place."
 # Orphaned header files.
 join -v1 <(find $ROOT_PATH/{src,programs,utils} -name '*.h' -printf '%f\n' | sort | uniq) <(find $ROOT_PATH/{src,programs,utils} -name '*.cpp' -or -name '*.c' -or -name '*.h' -or -name '*.S' | xargs grep --no-filename -o -P '[\w-]+\.h' | sort | uniq) |
    grep . && echo '^ Found orphan header files.'
 # Don't allow dynamic compiler check with CMake, because we are using hermetic, reproducible, cross-compiled, static (TLDR, good) builds.
 ls -1d $ROOT_PATH/contrib/*-cmake | xargs -I@ find @ -name 'CMakeLists.txt' -or -name '*.cmake' | xargs grep --with-filename -i -P 'check_c_compiler_flag|check_cxx_compiler_flag|check_c_source_compiles|check_cxx_source_compiles|check_include_file|check_symbol_exists|cmake_push_check_state|cmake_pop_check_state|find_package|CMAKE_REQUIRED_FLAGS|CheckIncludeFile|CheckCCompilerFlag|CheckCXXCompilerFlag|CheckCSourceCompiles|CheckCXXSourceCompiles|CheckCSymbolExists|CheckCXXSymbolExists' | grep -v Rust && echo "^ It's not allowed to have dynamic compiler checks with CMake."
 # Wrong spelling of abbreviations, e.g. SQL is right, Sql is wrong. XMLHttpRequest is very wrong.
 find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
    grep -vP $EXCLUDE |
    xargs grep -P 'Sql|Html|Xml|Cpu|Tcp|Udp|Http|Db|Json|Yaml' | grep -v -P 'RabbitMQ|Azure|Aws|aws|Avro|IO/S3' &&
    echo "Abbreviations such as SQL, XML, HTTP, should be in all caps. For example, SQL is right, Sql is wrong. XMLHttpRequest is very wrong."
 find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
    grep -vP $EXCLUDE |
    xargs grep -F -i 'ErrorCodes::LOGICAL_ERROR, "Logical error:' &&
    echo "If an exception has LOGICAL_ERROR code, there is no need to include the text 'Logical error' in the exception message, because then the phrase 'Logical error' will be printed twice."
 PATTERN="allow_";
 DIFF=$(comm -3 <(grep -o "\b$PATTERN\w*\b" $ROOT_PATH/src/Core/Settings.cpp | sort -u) <(grep -o -h "\b$PATTERN\w*\b" $ROOT_PATH/src/Databases/enableAllExperimentalSettings.cpp $ROOT_PATH/utils/check-style/experimental_settings_ignore.txt | sort -u));
 [ -n "$DIFF" ] && echo "$DIFF" && echo "^^ Detected 'allow_*' settings that might need to be included in src/Databases/enableAllExperimentalSettings.cpp" && echo "Alternatively, consider adding an exception to utils/check-style/experimental_settings_ignore.txt"
--- a/ci/jobs/scripts/check_style/check_submodules.sh
+++ b/ci/jobs/scripts/check_style/check_submodules.sh
@ -0,0 +1,37 @@
 #!/usr/bin/env bash
 # The script checks if all submodules defined in $GIT_ROOT/.gitmodules exist in $GIT_ROOT/contrib
 set -e
 GIT_ROOT="."
 cd "$GIT_ROOT"
 # Remove keys for submodule.*.path parameters, the values are separated by \0
 # and check if the directory exists
 git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \
  xargs -P100 -0 --no-run-if-empty -I{} bash -c 'if ! test -d '"'{}'"'; then echo Directory for submodule {} is not found; exit 1; fi' 2>&1
 # And check that the submodule is fine
 git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \
  xargs -P100 -0 --no-run-if-empty -I{} git submodule status -q '{}' 2>&1
 # All submodules should be from https://github.com/
 git config --file "$ROOT_PATH/.gitmodules" --get-regexp 'submodule\..+\.url' | \
 while read -r line; do
    name=${line#submodule.}; name=${name%.url*}
    url=${line#* }
    [[ "$url" != 'https://github.com/'* ]] && echo "All submodules should be from https://github.com/, submodule '$name' has '$url'"
 done
 # All submodules should be of this form: [submodule "contrib/libxyz"] (for consistency, the submodule name does matter too much)
 # - restrict the check to top-level .gitmodules file
 git config --file "$ROOT_PATH/.gitmodules" --get-regexp 'submodule\..+\.path' | \
 while read -r line; do
    name=${line#submodule.}; name=${name%.path*}
    path=${line#* }
    [ "$name" != "$path" ] && echo "Submodule name '$name' is not equal to it's path '$path'"
 done
--- a/ci/jobs/scripts/check_style/check_typos.sh
+++ b/ci/jobs/scripts/check_style/check_typos.sh
@ -0,0 +1,15 @@
 #!/usr/bin/env bash
 # Check for typos in code.
 ROOT_PATH="."
 #FIXME: check all (or almost all) repo
 codespell \
    --skip "*generated*,*gperf*,*.bin,*.mrk*,*.idx,checksums.txt,*.dat,*.pyc,*.kate-swp,*obfuscateQueries.cpp,d3-*.js,*.min.js,*.sum,${ROOT_PATH}/utils/check-style/aspell-ignore" \
    --ignore-words "${ROOT_PATH}/utils/check-style/codespell-ignore-words.list" \
    --exclude-file "${ROOT_PATH}/utils/check-style/codespell-ignore-lines.list" \
    --quiet-level 2 \
    "$ROOT_PATH"/{src,base,programs,utils} \
    $@ | grep -P '.' \
    && echo -e "\nFound some typos in code.\nSee the files utils/check-style/codespell* if you want to add an exception."
--- a/ci/jobs/scripts/check_style/checks_to_refactor.sh
+++ b/ci/jobs/scripts/check_style/checks_to_refactor.sh
@ -0,0 +1,98 @@
 #!/bin/bash
 ROOT_PATH="."
 # Queries to system.query_log/system.query_thread_log should have current_database = currentDatabase() condition
 # NOTE: it is not that accurate, but at least something.
 tests_with_query_log=( $(
    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
        xargs grep --with-filename -e system.query_log -e system.query_thread_log | cut -d: -f1 | sort -u
 ) )
 for test_case in "${tests_with_query_log[@]}"; do
    grep -qE current_database.*currentDatabase "$test_case" || {
        grep -qE 'current_database.*\$CLICKHOUSE_DATABASE' "$test_case"
    } || echo "Queries to system.query_log/system.query_thread_log does not have current_database = currentDatabase() condition in $test_case"
 done
 grep -iE 'SYSTEM STOP MERGES;?$' -R $ROOT_PATH/tests/queries && echo "Merges cannot be disabled globally in fast/stateful/stateless tests, because it will break concurrently running queries"
 # Queries to:
 tables_with_database_column=(
    system.tables
    system.parts
    system.detached_parts
    system.parts_columns
    system.columns
    system.projection_parts
    system.mutations
 )
 # should have database = currentDatabase() condition
 #
 # NOTE: it is not that accuate, but at least something.
 tests_with_database_column=( $(
    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
        xargs grep --with-filename $(printf -- "-e %s " "${tables_with_database_column[@]}") |
        grep -v -e ':--' -e ':#' |
        cut -d: -f1 | sort -u
 ) )
 for test_case in "${tests_with_database_column[@]}"; do
    grep -qE database.*currentDatabase "$test_case" || {
        grep -qE 'database.*\$CLICKHOUSE_DATABASE' "$test_case"
    } || {
        # explicit database
        grep -qE "database[ ]*=[ ]*'" "$test_case"
    } || {
        echo "Queries to ${tables_with_database_column[*]} does not have database = currentDatabase()/\$CLICKHOUSE_DATABASE condition in $test_case"
    }
 done
 # Queries with ReplicatedMergeTree
 # NOTE: it is not that accuate, but at least something.
 tests_with_replicated_merge_tree=( $(
    find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
        xargs grep --with-filename -e "Replicated.*MergeTree[ ]*(.*" | cut -d: -f1 | sort -u
 ) )
 for test_case in "${tests_with_replicated_merge_tree[@]}"; do
    case "$test_case" in
        *.gen.*)
            ;;
        *.sh)
            test_case_zk_prefix="\(\$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX\|{database}\)"
            grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
            ;;
        *.sql|*.sql.j2)
            test_case_zk_prefix="\({database}\|currentDatabase()\|{uuid}\|{default_path_test}\)"
            grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
            ;;
        *.py)
            # Right now there is not such tests anyway
            echo "No ReplicatedMergeTree style check for *.py ($test_case)"
            ;;
    esac
 done
 # The stateful directory should only contain the tests that depend on the test dataset (hits or visits).
 find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -v '00076_system_columns_bytes' | xargs -I{} bash -c 'grep -q -P "hits|visits" "{}" || echo "The test {} does not depend on the test dataset (hits or visits table) and should be located in the 0_stateless directory. You can also add an exception to the check-style script."'
 # Check for existence of __init__.py files
 for i in "${ROOT_PATH}"/tests/integration/test_*; do FILE="${i}/__init__.py"; [ ! -f "${FILE}" ] && echo "${FILE} should exist for every integration test"; done
 # Check for executable bit on non-executable files
 find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable."
 # Check for BOM
 find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM"
 find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM"
 find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM"
 # Conflict markers
 find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' |
    xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files"
 # DOS/Windows newlines
 find $ROOT_PATH/{base,src,programs,utils,docs} -name '*.md' -or -name '*.h' -or -name '*.cpp' -or -name '*.js' -or -name '*.py' -or -name '*.html' | xargs grep -l -P '\r$' && echo "^ Files contain DOS/Windows newlines (\r\n instead of \n)."
 # # workflows check
 # act --list --directory="$ROOT_PATH" 1>/dev/null 2>&1 || act --list --directory="$ROOT_PATH" 2>&1
 # actionlint -ignore 'reusable workflow call.+' || :
--- a/ci/jobs/scripts/check_style/double_whitespaces.pl
+++ b/ci/jobs/scripts/check_style/double_whitespaces.pl
@ -0,0 +1,37 @@
 #!/usr/bin/perl
 use strict;
 # Find double whitespace such as "a,  b, c" that looks very ugly and annoying.
 # But skip double whitespaces if they are used as an alignment - by comparing to surrounding lines.
 my $ret = 0;
 foreach my $file (@ARGV)
 {
    my @array;
    open (FH,'<',$file);
    while (<FH>)
    {
        push @array, $_;
    }
    for (my $i = 1; $i < $#array; ++$i)
    {
        if ($array[$i] =~ ',( {2,3})[^ /]')
        {
            # https://stackoverflow.com/questions/87380/how-can-i-find-the-location-of-a-regex-match-in-perl
            if ((substr($array[$i - 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/) # whitespaces are not part of alignment
             && (substr($array[$i + 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/)
             && $array[$i] !~ /(-?\d+\w*,\s+){3,}/) # this is not a number table like { 10, -1,  2 }
            {
                print($file . ":" . ($i + 1) . $array[$i]);
                $ret = 1;
            }
        }
    }
 }
 exit $ret;
--- a/ci/jobs/scripts/functional_tests_results.py
+++ b/ci/jobs/scripts/functional_tests_results.py
@ -0,0 +1,284 @@
 import dataclasses
 from typing import List
 from praktika.environment import Environment
 from praktika.result import Result
 OK_SIGN = "[ OK "
 FAIL_SIGN = "[ FAIL "
 TIMEOUT_SIGN = "[ Timeout! "
 UNKNOWN_SIGN = "[ UNKNOWN "
 SKIPPED_SIGN = "[ SKIPPED "
 HUNG_SIGN = "Found hung queries in processlist"
 SERVER_DIED_SIGN = "Server died, terminating all processes"
 SERVER_DIED_SIGN2 = "Server does not respond to health check"
 DATABASE_SIGN = "Database: "
 SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"]
 RETRIES_SIGN = "Some tests were restarted"
 # def write_results(results_file, status_file, results, status):
 #     with open(results_file, "w", encoding="utf-8") as f:
 #         out = csv.writer(f, delimiter="\t")
 #         out.writerows(results)
 #     with open(status_file, "w", encoding="utf-8") as f:
 #         out = csv.writer(f, delimiter="\t")
 #         out.writerow(status)
 BROKEN_TESTS_ANALYZER_TECH_DEBT = [
    "01624_soft_constraints",
    # Check after ConstantNode refactoring
    "02944_variant_as_common_type",
 ]
 class FTResultsProcessor:
    @dataclasses.dataclass
    class Summary:
        total: int
        skipped: int
        unknown: int
        failed: int
        success: int
        test_results: List[Result]
        hung: bool = False
        server_died: bool = False
        retries: bool = False
        success_finish: bool = False
        test_end: bool = True
    def __init__(self, wd):
        self.tests_output_file = f"{wd}/test_result.txt"
        # self.test_results_parsed_file = f"{wd}/test_result.tsv"
        # self.status_file = f"{wd}/check_status.tsv"
        self.broken_tests = BROKEN_TESTS_ANALYZER_TECH_DEBT
    def _process_test_output(self):
        total = 0
        skipped = 0
        unknown = 0
        failed = 0
        success = 0
        hung = False
        server_died = False
        retries = False
        success_finish = False
        test_results = []
        test_end = True
        with open(self.tests_output_file, "r", encoding="utf-8") as test_file:
            for line in test_file:
                original_line = line
                line = line.strip()
                if any(s in line for s in SUCCESS_FINISH_SIGNS):
                    success_finish = True
                # Ignore hung check report, since it may be quite large.
                # (and may break python parser which has limit of 128KiB for each row).
                if HUNG_SIGN in line:
                    hung = True
                    break
                if SERVER_DIED_SIGN in line or SERVER_DIED_SIGN2 in line:
                    server_died = True
                if RETRIES_SIGN in line:
                    retries = True
                if any(
                    sign in line
                    for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)
                ):
                    test_name = line.split(" ")[2].split(":")[0]
                    test_time = ""
                    try:
                        time_token = line.split("]")[1].strip().split()[0]
                        float(time_token)
                        test_time = time_token
                    except:
                        pass
                    total += 1
                    if TIMEOUT_SIGN in line:
                        if test_name in self.broken_tests:
                            success += 1
                            test_results.append((test_name, "BROKEN", test_time, []))
                        else:
                            failed += 1
                            test_results.append((test_name, "Timeout", test_time, []))
                    elif FAIL_SIGN in line:
                        if test_name in self.broken_tests:
                            success += 1
                            test_results.append((test_name, "BROKEN", test_time, []))
                        else:
                            failed += 1
                            test_results.append((test_name, "FAIL", test_time, []))
                    elif UNKNOWN_SIGN in line:
                        unknown += 1
                        test_results.append((test_name, "FAIL", test_time, []))
                    elif SKIPPED_SIGN in line:
                        skipped += 1
                        test_results.append((test_name, "SKIPPED", test_time, []))
                    else:
                        if OK_SIGN in line and test_name in self.broken_tests:
                            skipped += 1
                            test_results.append(
                                (
                                    test_name,
                                    "NOT_FAILED",
                                    test_time,
                                    [
                                        "This test passed. Update analyzer_tech_debt.txt.\n"
                                    ],
                                )
                            )
                        else:
                            success += int(OK_SIGN in line)
                            test_results.append((test_name, "OK", test_time, []))
                    test_end = False
                elif (
                    len(test_results) > 0
                    and test_results[-1][1] == "FAIL"
                    and not test_end
                ):
                    test_results[-1][3].append(original_line)
                # Database printed after everything else in case of failures,
                # so this is a stop marker for capturing test output.
                #
                # And it is handled after everything else to include line with database into the report.
                if DATABASE_SIGN in line:
                    test_end = True
        test_results = [
            Result(
                name=test[0],
                status=test[1],
                start_time=None,
                duration=float(test[2]),
                info="".join(test[3])[:8192],
            )
            for test in test_results
        ]
        s = self.Summary(
            total=total,
            skipped=skipped,
            unknown=unknown,
            failed=failed,
            success=success,
            test_results=test_results,
            hung=hung,
            server_died=server_died,
            success_finish=success_finish,
            retries=retries,
        )
        return s
    def run(self):
        state = Result.Status.SUCCESS
        s = self._process_test_output()
        test_results = s.test_results
        # # Check test_results.tsv for sanitizer asserts, crashes and other critical errors.
        # # If the file is present, it's expected to be generated by stress_test.lib check for critical errors
        # # In the end this file will be fully regenerated, including both results from critical errors check and
        # # functional test results.
        # if test_results_path and os.path.exists(test_results_path):
        #     with open(test_results_path, "r", encoding="utf-8") as test_results_file:
        #         existing_test_results = list(
        #             csv.reader(test_results_file, delimiter="\t")
        #         )
        #         for test in existing_test_results:
        #             if len(test) < 2:
        #                 unknown += 1
        #             else:
        #                 test_results.append(test)
        #
        #                 if test[1] != "OK":
        #                     failed += 1
        #                 else:
        #                     success += 1
        # is_flaky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
        # logging.info("Is flaky check: %s", is_flaky_check)
        # # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
        # # But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped.
        # if failed != 0 or unknown != 0 or (success == 0 and (not is_flaky_check)):
        if s.failed != 0 or s.unknown != 0:
            state = Result.Status.FAILED
        if s.hung:
            state = Result.Status.FAILED
            test_results.append(
                Result("Some queries hung", "FAIL", info="Some queries hung")
            )
        elif s.server_died:
            state = Result.Status.FAILED
            # When ClickHouse server crashes, some tests are still running
            # and fail because they cannot connect to server
            for result in test_results:
                if result.status == "FAIL":
                    result.status = "SERVER_DIED"
            test_results.append(Result("Server died", "FAIL", info="Server died"))
        elif not s.success_finish:
            state = Result.Status.FAILED
            test_results.append(
                Result("Tests are not finished", "FAIL", info="Tests are not finished")
            )
        elif s.retries:
            test_results.append(
                Result("Some tests restarted", "SKIPPED", info="Some tests restarted")
            )
        else:
            pass
        # TODO: !!!
        # def test_result_comparator(item):
        #     # sort by status then by check name
        #     order = {
        #         "FAIL": 0,
        #         "SERVER_DIED": 1,
        #         "Timeout": 2,
        #         "NOT_FAILED": 3,
        #         "BROKEN": 4,
        #         "OK": 5,
        #         "SKIPPED": 6,
        #     }
        #     return order.get(item[1], 10), str(item[0]), item[1]
        #
        # test_results.sort(key=test_result_comparator)
        return Result.create_from(
            name=Environment.JOB_NAME,
            results=test_results,
            status=state,
            files=[self.tests_output_file],
            with_info_from_results=False,
        )
 # if __name__ == "__main__":
 #     logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
 #     parser = argparse.ArgumentParser(
 #         description="ClickHouse script for parsing results of functional tests"
 #     )
 #
 #     parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
 #     parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
 #     args = parser.parse_args()
 #
 #     broken_tests = []
 #     state, description, test_results = process_result(
 #         args.in_results_dir,
 #         broken_tests,
 #         args.in_test_result_file,
 #         args.in_results_file,
 #     )
 #     logging.info("Result parsed")
 #     status = (state, description)
 #
 #
 #
 #     write_results(args.out_results_file, args.out_status_file, test_results, status)
 #     logging.info("Result written")
--- a/ci/praktika/init.py
+++ b/ci/praktika/init.py
@ -0,0 +1,5 @@
 from .artifact import Artifact
 from .docker import Docker
 from .job import Job
 from .secret import Secret
 from .workflow import Workflow
--- a/ci/praktika/main.py
+++ b/ci/praktika/main.py
@ -0,0 +1,94 @@
 import argparse
 import sys
 from praktika.html_prepare import Html
 from praktika.utils import Utils
 from praktika.validator import Validator
 from praktika.yaml_generator import YamlGenerator
 def create_parser():
    parser = argparse.ArgumentParser(prog="python3 -m praktika")
    subparsers = parser.add_subparsers(dest="command", help="Available subcommands")
    run_parser = subparsers.add_parser("run", help="Job Runner")
    run_parser.add_argument("--job", help="Job Name", type=str, required=True)
    run_parser.add_argument(
        "--workflow",
        help="Workflow Name (required if job name is not uniq per config)",
        type=str,
        default="",
    )
    run_parser.add_argument(
        "--no-docker",
        help="Do not run job in docker even if job config says so, for local test",
        action="store_true",
    )
    run_parser.add_argument(
        "--docker",
        help="Custom docker image for job run, for local test",
        type=str,
        default="",
    )
    run_parser.add_argument(
        "--param",
        help="Custom parameter to pass into a job script, it's up to job script how to use it, for local test",
        type=str,
        default=None,
    )
    run_parser.add_argument(
        "--ci",
        help="When not set - dummy env will be generated, for local test",
        action="store_true",
        default="",
    )
    _yaml_parser = subparsers.add_parser("yaml", help="Generates Yaml Workflows")
    _html_parser = subparsers.add_parser("html", help="Uploads HTML page for reports")
    return parser
 if __name__ == "__main__":
    parser = create_parser()
    args = parser.parse_args()
    if args.command == "yaml":
        Validator().validate()
        YamlGenerator().generate()
    elif args.command == "html":
        Html.prepare()
    elif args.command == "run":
        from praktika.mangle import _get_workflows
        from praktika.runner import Runner
        workflows = _get_workflows(name=args.workflow or None)
        job_workflow_pairs = []
        for workflow in workflows:
            job = workflow.find_job(args.job, lazy=True)
            if job:
                job_workflow_pairs.append((job, workflow))
        if not job_workflow_pairs:
            Utils.raise_with_error(
                f"Failed to find job [{args.job}] workflow [{args.workflow}]"
            )
        elif len(job_workflow_pairs) > 1:
            Utils.raise_with_error(
                f"More than one job [{args.job}] found - try specifying workflow name with --workflow"
            )
        else:
            job, workflow = job_workflow_pairs[0][0], job_workflow_pairs[0][1]
            print(f"Going to run job [{job.name}], workflow [{workflow.name}]")
            Runner().run(
                workflow=workflow,
                job=job,
                docker=args.docker,
                dummy_env=not args.ci,
                no_docker=args.no_docker,
                param=args.param,
            )
    else:
        parser.print_help()
        sys.exit(1)
--- a/ci/praktika/_environment.py
+++ b/ci/praktika/_environment.py
@ -0,0 +1,198 @@
 import dataclasses
 import json
 import os
 from pathlib import Path
 from types import SimpleNamespace
 from typing import Any, Dict, List, Type
 from praktika import Workflow
 from praktika._settings import _Settings
 from praktika.utils import MetaClasses, T
@dataclasses.dataclass
 class _Environment(MetaClasses.Serializable):
    WORKFLOW_NAME: str
    JOB_NAME: str
    REPOSITORY: str
    BRANCH: str
    SHA: str
    PR_NUMBER: int
    EVENT_TYPE: str
    JOB_OUTPUT_STREAM: str
    EVENT_FILE_PATH: str
    CHANGE_URL: str
    COMMIT_URL: str
    BASE_BRANCH: str
    RUN_ID: str
    RUN_URL: str
    INSTANCE_TYPE: str
    INSTANCE_ID: str
    INSTANCE_LIFE_CYCLE: str
    LOCAL_RUN: bool = False
    PARAMETER: Any = None
    REPORT_INFO: List[str] = dataclasses.field(default_factory=list)
    name = "environment"
    @classmethod
    def file_name_static(cls, _name=""):
        return f"{_Settings.TEMP_DIR}/{cls.name}.json"
    @classmethod
    def from_dict(cls: Type[T], obj: Dict[str, Any]) -> T:
        JOB_OUTPUT_STREAM = os.getenv("GITHUB_OUTPUT", "")
        obj["JOB_OUTPUT_STREAM"] = JOB_OUTPUT_STREAM
        if "PARAMETER" in obj:
            obj["PARAMETER"] = _to_object(obj["PARAMETER"])
        return cls(**obj)
    def add_info(self, info):
        self.REPORT_INFO.append(info)
        self.dump()
    @classmethod
    def get(cls):
        if Path(cls.file_name_static()).is_file():
            return cls.from_fs("environment")
        else:
            print("WARNING: Environment: get from env")
            env = cls.from_env()
            env.dump()
            return env
    def set_job_name(self, job_name):
        self.JOB_NAME = job_name
        self.dump()
        return self
    @staticmethod
    def get_needs_statuses():
        if Path(_Settings.WORKFLOW_STATUS_FILE).is_file():
            with open(_Settings.WORKFLOW_STATUS_FILE, "r", encoding="utf8") as f:
                return json.load(f)
        else:
            print(
                f"ERROR: Status file [{_Settings.WORKFLOW_STATUS_FILE}] does not exist"
            )
            raise RuntimeError()
    @classmethod
    def from_env(cls) -> "_Environment":
        WORKFLOW_NAME = os.getenv("GITHUB_WORKFLOW", "")
        JOB_NAME = os.getenv("JOB_NAME", "")
        REPOSITORY = os.getenv("GITHUB_REPOSITORY", "")
        BRANCH = os.getenv("GITHUB_HEAD_REF", "")
        EVENT_FILE_PATH = os.getenv("GITHUB_EVENT_PATH", "")
        JOB_OUTPUT_STREAM = os.getenv("GITHUB_OUTPUT", "")
        RUN_ID = os.getenv("GITHUB_RUN_ID", "0")
        RUN_URL = f"https://github.com/{REPOSITORY}/actions/runs/{RUN_ID}"
        BASE_BRANCH = os.getenv("GITHUB_BASE_REF", "")
        if EVENT_FILE_PATH:
            with open(EVENT_FILE_PATH, "r", encoding="utf-8") as f:
                github_event = json.load(f)
            if "pull_request" in github_event:
                EVENT_TYPE = Workflow.Event.PULL_REQUEST
                PR_NUMBER = github_event["pull_request"]["number"]
                SHA = github_event["pull_request"]["head"]["sha"]
                CHANGE_URL = github_event["pull_request"]["html_url"]
                COMMIT_URL = CHANGE_URL + f"/commits/{SHA}"
            elif "commits" in github_event:
                EVENT_TYPE = Workflow.Event.PUSH
                SHA = github_event["after"]
                CHANGE_URL = github_event["head_commit"]["url"]  # commit url
                PR_NUMBER = 0
                COMMIT_URL = CHANGE_URL
            else:
                assert False, "TODO: not supported"
        else:
            print("WARNING: Local execution - dummy Environment will be generated")
            SHA = "TEST"
            PR_NUMBER = -1
            EVENT_TYPE = Workflow.Event.PUSH
            CHANGE_URL = ""
            COMMIT_URL = ""
        INSTANCE_TYPE = (
            os.getenv("INSTANCE_TYPE", None)
            # or Shell.get_output("ec2metadata --instance-type")
            or ""
        )
        INSTANCE_ID = (
            os.getenv("INSTANCE_ID", None)
            # or Shell.get_output("ec2metadata --instance-id")
            or ""
        )
        INSTANCE_LIFE_CYCLE = (
            os.getenv("INSTANCE_LIFE_CYCLE", None)
            # or Shell.get_output(
            #     "curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle"
            # )
            or ""
        )
        return _Environment(
            WORKFLOW_NAME=WORKFLOW_NAME,
            JOB_NAME=JOB_NAME,
            REPOSITORY=REPOSITORY,
            BRANCH=BRANCH,
            EVENT_FILE_PATH=EVENT_FILE_PATH,
            JOB_OUTPUT_STREAM=JOB_OUTPUT_STREAM,
            SHA=SHA,
            EVENT_TYPE=EVENT_TYPE,
            PR_NUMBER=PR_NUMBER,
            RUN_ID=RUN_ID,
            CHANGE_URL=CHANGE_URL,
            COMMIT_URL=COMMIT_URL,
            RUN_URL=RUN_URL,
            BASE_BRANCH=BASE_BRANCH,
            INSTANCE_TYPE=INSTANCE_TYPE,
            INSTANCE_ID=INSTANCE_ID,
            INSTANCE_LIFE_CYCLE=INSTANCE_LIFE_CYCLE,
            REPORT_INFO=[],
        )
    def get_s3_prefix(self, latest=False):
        return self.get_s3_prefix_static(self.PR_NUMBER, self.BRANCH, self.SHA, latest)
    @classmethod
    def get_s3_prefix_static(cls, pr_number, branch, sha, latest=False):
        prefix = ""
        if pr_number > 0:
            prefix += f"{pr_number}"
        else:
            prefix += f"{branch}"
        if latest:
            prefix += f"/latest"
        elif sha:
            prefix += f"/{sha}"
        return prefix
    # TODO: find a better place for the function. This file should not import praktika.settings
    #   as it's requires reading users config, that's why imports nested inside the function
    def get_report_url(self):
        import urllib
        from praktika.settings import Settings
        from praktika.utils import Utils
        path = Settings.HTML_S3_PATH
        for bucket, endpoint in Settings.S3_BUCKET_TO_HTTP_ENDPOINT.items():
            if bucket in path:
                path = path.replace(bucket, endpoint)
                break
        REPORT_URL = f"https://{path}/{Path(Settings.HTML_PAGE_FILE).name}?PR={self.PR_NUMBER}&sha={self.SHA}&name_0={urllib.parse.quote(self.WORKFLOW_NAME, safe='')}&name_1={urllib.parse.quote(self.JOB_NAME, safe='')}"
        return REPORT_URL
    def is_local_run(self):
        return self.LOCAL_RUN
 def _to_object(data):
    if isinstance(data, dict):
        return SimpleNamespace(**{k: _to_object(v) for k, v in data.items()})
    elif isinstance(data, list):
        return [_to_object(i) for i in data]
    else:
        return data
--- a/ci/praktika/_settings.py
+++ b/ci/praktika/_settings.py
@ -0,0 +1,124 @@
 import dataclasses
 from pathlib import Path
 from typing import Dict, Iterable, List, Optional
@dataclasses.dataclass
 class _Settings:
    ######################################
    #    Pipeline generation settings    #
    ######################################
    CI_PATH = "./ci"
    WORKFLOW_PATH_PREFIX: str = "./.github/workflows"
    WORKFLOWS_DIRECTORY: str = f"{CI_PATH}/workflows"
    SETTINGS_DIRECTORY: str = f"{CI_PATH}/settings"
    CI_CONFIG_JOB_NAME = "Config Workflow"
    DOCKER_BUILD_JOB_NAME = "Docker Builds"
    FINISH_WORKFLOW_JOB_NAME = "Finish Workflow"
    READY_FOR_MERGE_STATUS_NAME = "Ready for Merge"
    CI_CONFIG_RUNS_ON: Optional[List[str]] = None
    DOCKER_BUILD_RUNS_ON: Optional[List[str]] = None
    VALIDATE_FILE_PATHS: bool = True
    ######################################
    #    Runtime Settings                #
    ######################################
    MAX_RETRIES_S3 = 3
    MAX_RETRIES_GH = 3
    ######################################
    #   S3 (artifact storage) settings   #
    ######################################
    S3_ARTIFACT_PATH: str = ""
    ######################################
    #        CI workspace settings       #
    ######################################
    TEMP_DIR: str = "/tmp/praktika"
    OUTPUT_DIR: str = f"{TEMP_DIR}/output"
    INPUT_DIR: str = f"{TEMP_DIR}/input"
    PYTHON_INTERPRETER: str = "python3"
    PYTHON_PACKET_MANAGER: str = "pip3"
    PYTHON_VERSION: str = "3.9"
    INSTALL_PYTHON_FOR_NATIVE_JOBS: bool = False
    INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS: str = "./ci/requirements.txt"
    ENVIRONMENT_VAR_FILE: str = f"{TEMP_DIR}/environment.json"
    RUN_LOG: str = f"{TEMP_DIR}/praktika_run.log"
    SECRET_GH_APP_ID: str = "GH_APP_ID"
    SECRET_GH_APP_PEM_KEY: str = "GH_APP_PEM_KEY"
    ENV_SETUP_SCRIPT: str = "/tmp/praktika_setup_env.sh"
    WORKFLOW_STATUS_FILE: str = f"{TEMP_DIR}/workflow_status.json"
    ######################################
    #        CI Cache settings           #
    ######################################
    CACHE_VERSION: int = 1
    CACHE_DIGEST_LEN: int = 20
    CACHE_S3_PATH: str = ""
    CACHE_LOCAL_PATH: str = f"{TEMP_DIR}/ci_cache"
    ######################################
    #        Report settings             #
    ######################################
    HTML_S3_PATH: str = ""
    HTML_PAGE_FILE: str = "./praktika/json.html"
    TEXT_CONTENT_EXTENSIONS: Iterable[str] = frozenset([".txt", ".log"])
    S3_BUCKET_TO_HTTP_ENDPOINT: Optional[Dict[str, str]] = None
    DOCKERHUB_USERNAME: str = ""
    DOCKERHUB_SECRET: str = ""
    DOCKER_WD: str = "/wd"
    ######################################
    #        CI DB Settings              #
    ######################################
    SECRET_CI_DB_URL: str = "CI_DB_URL"
    SECRET_CI_DB_PASSWORD: str = "CI_DB_PASSWORD"
    CI_DB_DB_NAME = ""
    CI_DB_TABLE_NAME = ""
    CI_DB_INSERT_TIMEOUT_SEC = 5
 _USER_DEFINED_SETTINGS = [
    "S3_ARTIFACT_PATH",
    "CACHE_S3_PATH",
    "HTML_S3_PATH",
    "S3_BUCKET_TO_HTTP_ENDPOINT",
    "TEXT_CONTENT_EXTENSIONS",
    "TEMP_DIR",
    "OUTPUT_DIR",
    "INPUT_DIR",
    "CI_CONFIG_RUNS_ON",
    "DOCKER_BUILD_RUNS_ON",
    "CI_CONFIG_JOB_NAME",
    "PYTHON_INTERPRETER",
    "PYTHON_VERSION",
    "PYTHON_PACKET_MANAGER",
    "INSTALL_PYTHON_FOR_NATIVE_JOBS",
    "INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS",
    "MAX_RETRIES_S3",
    "MAX_RETRIES_GH",
    "VALIDATE_FILE_PATHS",
    "DOCKERHUB_USERNAME",
    "DOCKERHUB_SECRET",
    "READY_FOR_MERGE_STATUS_NAME",
    "SECRET_CI_DB_URL",
    "SECRET_CI_DB_PASSWORD",
    "CI_DB_DB_NAME",
    "CI_DB_TABLE_NAME",
    "CI_DB_INSERT_TIMEOUT_SEC",
    "SECRET_GH_APP_PEM_KEY",
    "SECRET_GH_APP_ID",
 ]
 class GHRunners:
    ubuntu = "ubuntu-latest"
 if __name__ == "__main__":
    for setting in _USER_DEFINED_SETTINGS:
        print(_Settings().__getattribute__(setting))
    # print(dataclasses.asdict(_Settings()))
--- a/ci/praktika/artifact.py
+++ b/ci/praktika/artifact.py
@ -0,0 +1,33 @@
 from dataclasses import dataclass
 class Artifact:
    class Type:
        GH = "github"
        S3 = "s3"
        PHONY = "phony"
    @dataclass
    class Config:
        """
        name - artifact name
        type - artifact type, see Artifact.Type
        path - file path or glob, e.g. "path/**/[abc]rtifac?/*"
        """
        name: str
        type: str
        path: str
        _provided_by: str = ""
        _s3_path: str = ""
        def is_s3_artifact(self):
            return self.type == Artifact.Type.S3
    @classmethod
    def define_artifact(cls, name, type, path):
        return cls.Config(name=name, type=type, path=path)
    @classmethod
    def define_gh_artifact(cls, name, path):
        return cls.define_artifact(name=name, type=cls.Type.GH, path=path)
--- a/ci/praktika/cache.py
+++ b/ci/praktika/cache.py
@ -0,0 +1,127 @@
 import dataclasses
 import json
 from pathlib import Path
 from praktika import Artifact, Job, Workflow
 from praktika._environment import _Environment
 from praktika.digest import Digest
 from praktika.s3 import S3
 from praktika.settings import Settings
 from praktika.utils import Utils
 class Cache:
    @dataclasses.dataclass
    class CacheRecord:
        class Type:
            SUCCESS = "success"
        type: str
        sha: str
        pr_number: int
        branch: str
        def dump(self, path):
            with open(path, "w", encoding="utf8") as f:
                json.dump(dataclasses.asdict(self), f)
        @classmethod
        def from_fs(cls, path):
            with open(path, "r", encoding="utf8") as f:
                return Cache.CacheRecord(**json.load(f))
        @classmethod
        def from_dict(cls, obj):
            return Cache.CacheRecord(**obj)
    def __init__(self):
        self.digest = Digest()
        self.success = {}  # type Dict[str, Any]
    @classmethod
    def push_success_record(cls, job_name, job_digest, sha):
        type_ = Cache.CacheRecord.Type.SUCCESS
        record = Cache.CacheRecord(
            type=type_,
            sha=sha,
            pr_number=_Environment.get().PR_NUMBER,
            branch=_Environment.get().BRANCH,
        )
        assert (
            Settings.CACHE_S3_PATH
        ), f"Setting CACHE_S3_PATH must be defined with enabled CI Cache"
        record_path = f"{Settings.CACHE_S3_PATH}/v{Settings.CACHE_VERSION}/{Utils.normalize_string(job_name)}/{job_digest}"
        record_file = Path(Settings.TEMP_DIR) / type_
        record.dump(record_file)
        S3.copy_file_to_s3(s3_path=record_path, local_path=record_file)
        record_file.unlink()
    def fetch_success(self, job_name, job_digest):
        type_ = Cache.CacheRecord.Type.SUCCESS
        assert (
            Settings.CACHE_S3_PATH
        ), f"Setting CACHE_S3_PATH must be defined with enabled CI Cache"
        record_path = f"{Settings.CACHE_S3_PATH}/v{Settings.CACHE_VERSION}/{Utils.normalize_string(job_name)}/{job_digest}/{type_}"
        record_file_local_dir = (
            f"{Settings.CACHE_LOCAL_PATH}/{Utils.normalize_string(job_name)}/"
        )
        Path(record_file_local_dir).mkdir(parents=True, exist_ok=True)
        if S3.head_object(record_path):
            res = S3.copy_file_from_s3(
                s3_path=record_path, local_path=record_file_local_dir
            )
        else:
            res = None
        if res:
            print(f"Cache record found, job [{job_name}], digest [{job_digest}]")
            self.success[job_name] = True
            return Cache.CacheRecord.from_fs(Path(record_file_local_dir) / type_)
        return None
 if __name__ == "__main__":
    # test
    c = Cache()
    workflow = Workflow.Config(
        name="TEST",
        event=Workflow.Event.PULL_REQUEST,
        jobs=[
            Job.Config(
                name="JobA",
                runs_on=["some"],
                command="python -m unittest ./ci/tests/example_1/test_example_produce_artifact.py",
                provides=["greet"],
                job_requirements=Job.Requirements(
                    python_requirements_txt="./ci/requirements.txt"
                ),
                digest_config=Job.CacheDigestConfig(
                    # example: use glob to include files
                    include_paths=["./ci/tests/example_1/test_example_consume*.py"],
                ),
            ),
            Job.Config(
                name="JobB",
                runs_on=["some"],
                command="python -m unittest ./ci/tests/example_1/test_example_consume_artifact.py",
                requires=["greet"],
                job_requirements=Job.Requirements(
                    python_requirements_txt="./ci/requirements.txt"
                ),
                digest_config=Job.CacheDigestConfig(
                    # example: use dir to include files recursively
                    include_paths=["./ci/tests/example_1"],
                    # example: use glob to exclude files from digest
                    exclude_paths=[
                        "./ci/tests/example_1/test_example_consume*",
                        "./**/*.pyc",
                    ],
                ),
            ),
        ],
        artifacts=[Artifact.Config(type="s3", name="greet", path="hello")],
        enable_cache=True,
    )
    for job in workflow.jobs:
        print(c.digest.calc_job_digest(job))
--- a/ci/praktika/cidb.py
+++ b/ci/praktika/cidb.py
@ -0,0 +1,136 @@
 import copy
 import dataclasses
 import json
 from typing import Optional
 import requests
 from praktika._environment import _Environment
 from praktika.result import Result
 from praktika.settings import Settings
 from praktika.utils import Utils
 class CIDB:
    @dataclasses.dataclass
    class TableRecord:
        pull_request_number: int
        commit_sha: str
        commit_url: str
        check_name: str
        check_status: str
        check_duration_ms: int
        check_start_time: int
        report_url: str
        pull_request_url: str
        base_ref: str
        base_repo: str
        head_ref: str
        head_repo: str
        task_url: str
        instance_type: str
        instance_id: str
        test_name: str
        test_status: str
        test_duration_ms: Optional[int]
        test_context_raw: str
    def __init__(self, url, passwd):
        self.url = url
        self.auth = {
            "X-ClickHouse-User": "default",
            "X-ClickHouse-Key": passwd,
        }
    @classmethod
    def json_data_generator(cls, result: Result):
        env = _Environment.get()
        base_record = cls.TableRecord(
            pull_request_number=env.PR_NUMBER,
            commit_sha=env.SHA,
            commit_url=env.COMMIT_URL,
            check_name=result.name,
            check_status=result.status,
            check_duration_ms=int(result.duration * 1000),
            check_start_time=Utils.timestamp_to_str(result.start_time),
            report_url=env.get_report_url(),
            pull_request_url=env.CHANGE_URL,
            base_ref=env.BASE_BRANCH,
            base_repo=env.REPOSITORY,
            head_ref=env.BRANCH,
            # TODO: remove from table?
            head_repo=env.REPOSITORY,
            # TODO: remove from table?
            task_url="",
            instance_type=",".join([env.INSTANCE_TYPE, env.INSTANCE_LIFE_CYCLE]),
            instance_id=env.INSTANCE_ID,
            test_name="",
            test_status="",
            test_duration_ms=None,
            test_context_raw=result.info,
        )
        yield json.dumps(dataclasses.asdict(base_record))
        for result_ in result.results:
            record = copy.deepcopy(base_record)
            record.test_name = result_.name
            if result_.start_time:
                record.check_start_time = (Utils.timestamp_to_str(result.start_time),)
            record.test_status = result_.status
            record.test_duration_ms = int(result_.duration * 1000)
            record.test_context_raw = result_.info
            yield json.dumps(dataclasses.asdict(record))
    def insert(self, result: Result):
        # Create a session object
        params = {
            "database": Settings.CI_DB_DB_NAME,
            "query": f"INSERT INTO {Settings.CI_DB_TABLE_NAME} FORMAT JSONEachRow",
            "date_time_input_format": "best_effort",
            "send_logs_level": "warning",
        }
        session = requests.Session()
        for json_str in self.json_data_generator(result):
            try:
                response1 = session.post(
                    url=self.url,
                    params=params,
                    data=json_str,
                    headers=self.auth,
                    timeout=Settings.CI_DB_INSERT_TIMEOUT_SEC,
                )
            except Exception as ex:
                raise ex
        session.close()
    def check(self):
        # Create a session object
        params = {
            "database": Settings.CI_DB_DB_NAME,
            "query": f"SELECT 1",
        }
        try:
            response = requests.post(
                url=self.url,
                params=params,
                data="",
                headers=self.auth,
                timeout=Settings.CI_DB_INSERT_TIMEOUT_SEC,
            )
            if not response.ok:
                print("ERROR: No connection to CI DB")
                return (
                    False,
                    f"ERROR: No connection to CI DB [{response.status_code}/{response.reason}]",
                )
            if not response.json() == 1:
                print("ERROR: CI DB smoke test failed select 1 == 1")
                return (
                    False,
                    f"ERROR: CI DB smoke test failed [select 1 ==> {response.json()}]",
                )
        except Exception as ex:
            print(f"ERROR: Exception [{ex}]")
            return False, "CIDB: ERROR: Exception [{ex}]"
        return True, ""
--- a/ci/praktika/digest.py
+++ b/ci/praktika/digest.py
@ -0,0 +1,112 @@
 import dataclasses
 import hashlib
 import os
 from hashlib import md5
 from pathlib import Path
 from typing import List
 from praktika import Job
 from praktika.docker import Docker
 from praktika.settings import Settings
 from praktika.utils import Utils
 class Digest:
    def __init__(self):
        self.digest_cache = {}
    @staticmethod
    def _hash_digest_config(digest_config: Job.CacheDigestConfig) -> str:
        data_dict = dataclasses.asdict(digest_config)
        hash_obj = md5()
        hash_obj.update(str(data_dict).encode())
        hash_string = hash_obj.hexdigest()
        return hash_string
    def calc_job_digest(self, job_config: Job.Config):
        config = job_config.digest_config
        if not config:
            return "f" * Settings.CACHE_DIGEST_LEN
        cache_key = self._hash_digest_config(config)
        if cache_key in self.digest_cache:
            return self.digest_cache[cache_key]
        included_files = Utils.traverse_paths(
            job_config.digest_config.include_paths,
            job_config.digest_config.exclude_paths,
            sorted=True,
        )
        print(
            f"calc digest for job [{job_config.name}]: hash_key [{cache_key}], include [{len(included_files)}] files"
        )
        # Sort files to ensure consistent hash calculation
        included_files.sort()
        # Calculate MD5 hash
        res = ""
        if not included_files:
            res = "f" * Settings.CACHE_DIGEST_LEN
            print(f"NOTE: empty digest config [{config}] - return dummy digest")
        else:
            hash_md5 = hashlib.md5()
            for file_path in included_files:
                res = self._calc_file_digest(file_path, hash_md5)
        assert res
        self.digest_cache[cache_key] = res
        return res
    def calc_docker_digest(
        self,
        docker_config: Docker.Config,
        dependency_configs: List[Docker.Config],
        hash_md5=None,
    ):
        """
        :param hash_md5:
        :param dependency_configs: list of Docker.Config(s) that :param docker_config: depends on
        :param docker_config: Docker.Config to calculate digest for
        :return:
        """
        print(f"Calculate digest for docker [{docker_config.name}]")
        paths = Utils.traverse_path(docker_config.path, sorted=True)
        if not hash_md5:
            hash_md5 = hashlib.md5()
        dependencies = []
        for dependency_name in docker_config.depends_on:
            for dependency_config in dependency_configs:
                if dependency_config.name == dependency_name:
                    print(
                        f"Add docker [{dependency_config.name}] as dependency for docker [{docker_config.name}] digest calculation"
                    )
                    dependencies.append(dependency_config)
        for dependency in dependencies:
            _ = self.calc_docker_digest(dependency, dependency_configs, hash_md5)
        for path in paths:
            _ = self._calc_file_digest(path, hash_md5=hash_md5)
        return hash_md5.hexdigest()[: Settings.CACHE_DIGEST_LEN]
    @staticmethod
    def _calc_file_digest(file_path, hash_md5):
        # Resolve file path if it's a symbolic link
        resolved_path = file_path
        if Path(file_path).is_symlink():
            resolved_path = os.path.realpath(file_path)
            if not Path(resolved_path).is_file():
                print(
                    f"WARNING: No valid file resolved by link {file_path} -> {resolved_path} - skipping digest calculation"
                )
                return hash_md5.hexdigest()[: Settings.CACHE_DIGEST_LEN]
        with open(resolved_path, "rb") as f:
            for chunk in iter(lambda: f.read(4096), b""):
                hash_md5.update(chunk)
        return hash_md5.hexdigest()[: Settings.CACHE_DIGEST_LEN]
--- a/ci/praktika/docker.py
+++ b/ci/praktika/docker.py
@ -0,0 +1,60 @@
 import dataclasses
 from typing import List
 from praktika.utils import Shell
 class Docker:
    class Platforms:
        ARM = "linux/arm64"
        AMD = "linux/amd64"
        arm_amd = [ARM, AMD]
    @dataclasses.dataclass
    class Config:
        name: str
        path: str
        depends_on: List[str]
        platforms: List[str]
    @classmethod
    def build(cls, config: "Docker.Config", log_file, digests, add_latest):
        tags_substr = f" -t {config.name}:{digests[config.name]}"
        if add_latest:
            tags_substr = f" -t {config.name}:latest"
        from_tag = ""
        if config.depends_on:
            assert (
                len(config.depends_on) == 1
            ), f"Only one dependency in depends_on is currently supported, docker [{config}]"
            from_tag = f" --build-arg FROM_TAG={digests[config.depends_on[0]]}"
        command = f"docker buildx build --platform {','.join(config.platforms)} {tags_substr} {from_tag} --cache-to type=inline --cache-from type=registry,ref={config.name} --push {config.path}"
        return Shell.run(command, log_file=log_file, verbose=True)
    @classmethod
    def sort_in_build_order(cls, dockers: List["Docker.Config"]):
        ready_names = []
        i = 0
        while i < len(dockers):
            docker = dockers[i]
            if not docker.depends_on or all(
                dep in ready_names for dep in docker.depends_on
            ):
                ready_names.append(docker.name)
                i += 1
            else:
                dockers.append(dockers.pop(i))
        return dockers
    @classmethod
    def login(cls, user_name, user_password):
        print("Docker: log in to dockerhub")
        return Shell.check(
            f"docker login --username '{user_name}' --password-stdin",
            strict=True,
            stdin_str=user_password,
            encoding="utf-8",
            verbose=True,
        )
--- a/ci/praktika/environment.py
+++ b/ci/praktika/environment.py
@ -0,0 +1,3 @@
 from praktika._environment import _Environment
 Environment = _Environment.get()
--- a/tests/integration/test_system_replicated_fetches/init.py
+++ b/tests/integration/test_system_replicated_fetches/init.py
--- a/ci/praktika/execution/main.py
+++ b/ci/praktika/execution/main.py
@ -0,0 +1,4 @@
 from praktika.execution.machine_init import run
 if __name__ == "__main__":
    run()
--- a/ci/praktika/execution/execution_settings.py
+++ b/ci/praktika/execution/execution_settings.py
@ -0,0 +1,31 @@
 import os
 from praktika.utils import MetaClasses
 class ScalingType(metaclass=MetaClasses.WithIter):
    DISABLED = "disabled"
    AUTOMATIC_SCALE_DOWN = "scale_down"
    AUTOMATIC_SCALE_UP_DOWN = "scale"
 class DefaultExecutionSettings:
    GH_ACTIONS_DIRECTORY: str = "/home/ubuntu/gh_actions"
    RUNNER_SCALING_TYPE: str = ScalingType.AUTOMATIC_SCALE_UP_DOWN
    MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC: int = 30
 class ExecutionSettings:
    GH_ACTIONS_DIRECTORY = os.getenv(
        "GH_ACTIONS_DIRECTORY", DefaultExecutionSettings.GH_ACTIONS_DIRECTORY
    )
    RUNNER_SCALING_TYPE = os.getenv(
        "RUNNER_SCALING_TYPE", DefaultExecutionSettings.RUNNER_SCALING_TYPE
    )
    MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC = int(
        os.getenv(
            "MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC",
            DefaultExecutionSettings.MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC,
        )
    )
    LOCAL_EXECUTION = bool(os.getenv("CLOUD", "0") == "0")
--- a/ci/praktika/execution/machine_init.py
+++ b/ci/praktika/execution/machine_init.py
@ -0,0 +1,338 @@
 import os
 import platform
 import signal
 import time
 import traceback
 import requests
 from praktika.execution.execution_settings import ExecutionSettings, ScalingType
 from praktika.utils import ContextManager, Shell
 class StateMachine:
    class StateNames:
        INIT = "init"
        WAIT = "wait"
        RUN = "run"
    def __init__(self):
        self.state = self.StateNames.INIT
        self.scale_type = ExecutionSettings.RUNNER_SCALING_TYPE
        self.machine = Machine(scaling_type=self.scale_type).update_instance_info()
        self.state_updated_at = int(time.time())
        self.forked = False
    def kick(self):
        if self.state == self.StateNames.INIT:
            self.machine.config_actions().run_actions_async()
            print("State Machine: INIT -> WAIT")
            self.state = self.StateNames.WAIT
            self.state_updated_at = int(time.time())
            # TODO: add monitoring
            if not self.machine.is_actions_process_healthy():
                print(f"ERROR: GH runner process unexpectedly died")
                self.machine.self_terminate(decrease_capacity=False)
        elif self.state == self.StateNames.WAIT:
            res = self.machine.check_job_assigned()
            if res:
                print("State Machine: WAIT -> RUN")
                self.state = self.StateNames.RUN
                self.state_updated_at = int(time.time())
                self.check_scale_up()
            else:
                self.check_scale_down()
        elif self.state == self.StateNames.RUN:
            res = self.machine.check_job_running()
            if res:
                pass
            else:
                print("State Machine: RUN -> INIT")
                self.state = self.StateNames.INIT
                self.state_updated_at = int(time.time())
    def check_scale_down(self):
        if self.scale_type not in (
            ScalingType.AUTOMATIC_SCALE_DOWN,
            ScalingType.AUTOMATIC_SCALE_UP_DOWN,
        ):
            return
        if ScalingType.AUTOMATIC_SCALE_UP_DOWN and not self.forked:
            print(
                f"Scaling type is AUTOMATIC_SCALE_UP_DOWN and machine has not run a job - do not scale down"
            )
            return
        if (
            int(time.time()) - self.state_updated_at
            > ExecutionSettings.MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC
        ):
            print(
                f"No job assigned for more than MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC [{ExecutionSettings.MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC}] - scale down the instance"
            )
            if not ExecutionSettings.LOCAL_EXECUTION:
                self.machine.self_terminate(decrease_capacity=True)
            else:
                print("Local execution - skip scaling operation")
    def check_scale_up(self):
        if self.scale_type not in (ScalingType.AUTOMATIC_SCALE_UP_DOWN,):
            return
        if self.forked:
            print("This instance already forked once - do not scale up")
            return
        self.machine.self_fork()
        self.forked = True
    def run(self):
        self.machine.unconfig_actions()
        while True:
            self.kick()
            time.sleep(5)
    def terminate(self):
        try:
            self.machine.unconfig_actions()
        except:
            print("WARNING: failed to unconfig runner")
        if not ExecutionSettings.LOCAL_EXECUTION:
            if self.machine is not None:
                self.machine.self_terminate(decrease_capacity=False)
                time.sleep(10)
                # wait termination
            print("ERROR: failed to terminate instance via aws cli - try os call")
            os.system("sudo shutdown now")
        else:
            print("NOTE: Local execution - machine won't be terminated")
 class Machine:
    @staticmethod
    def get_latest_gh_actions_release():
        url = f"https://api.github.com/repos/actions/runner/releases/latest"
        response = requests.get(url, timeout=5)
        if response.status_code == 200:
            latest_release = response.json()
            return latest_release["tag_name"].removeprefix("v")
        else:
            print(f"Failed to get the latest release: {response.status_code}")
            return None
    def __init__(self, scaling_type):
        self.os_name = platform.system().lower()
        assert self.os_name == "linux", f"Unsupported OS [{self.os_name}]"
        if platform.machine() == "x86_64":
            self.arch = "x64"
        elif "aarch64" in platform.machine().lower():
            self.arch = "arm64"
        else:
            assert False, f"Unsupported arch [{platform.machine()}]"
        self.instance_id = None
        self.asg_name = None
        self.runner_api_endpoint = None
        self.runner_type = None
        self.labels = []
        self.proc = None
        assert scaling_type in ScalingType
        self.scaling_type = scaling_type
    def install_gh_actions_runner(self):
        gh_actions_version = self.get_latest_gh_actions_release()
        assert self.os_name and gh_actions_version and self.arch
        Shell.check(
            f"rm -rf {ExecutionSettings.GH_ACTIONS_DIRECTORY}",
            strict=True,
            verbose=True,
        )
        Shell.check(
            f"mkdir {ExecutionSettings.GH_ACTIONS_DIRECTORY}", strict=True, verbose=True
        )
        with ContextManager.cd(ExecutionSettings.GH_ACTIONS_DIRECTORY):
            Shell.check(
                f"curl -O -L https://github.com/actions/runner/releases/download/v{gh_actions_version}/actions-runner-{self.os_name}-{self.arch}-{gh_actions_version}.tar.gz",
                strict=True,
                verbose=True,
            )
            Shell.check(f"tar xzf *tar.gz", strict=True, verbose=True)
            Shell.check(f"rm -f *tar.gz", strict=True, verbose=True)
            Shell.check(f"sudo ./bin/installdependencies.sh", strict=True, verbose=True)
            Shell.check(
                f"chown -R ubuntu:ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}",
                strict=True,
                verbose=True,
            )
    def _get_gh_token_from_ssm(self):
        gh_token = Shell.get_output_or_raise(
            "/usr/local/bin/aws ssm  get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value"
        )
        return gh_token
    def update_instance_info(self):
        self.instance_id = Shell.get_output_or_raise("ec2metadata --instance-id")
        assert self.instance_id
        self.asg_name = Shell.get_output(
            f"aws ec2 describe-instances --instance-id {self.instance_id} --query \"Reservations[].Instances[].Tags[?Key=='aws:autoscaling:groupName'].Value\" --output text"
        )
        # self.runner_type = Shell.get_output_or_raise(
        #     f'/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values={self.instance_id}" --query "Tags[?Key==\'github:runner-type\'].Value" --output text'
        # )
        self.runner_type = self.asg_name
        if (
            self.scaling_type != ScalingType.DISABLED
            and not ExecutionSettings.LOCAL_EXECUTION
        ):
            assert (
                self.asg_name and self.runner_type
            ), f"Failed to retrieve ASG name, which is required for scaling_type [{self.scaling_type}]"
        org = os.getenv("MY_ORG", "")
        assert (
            org
        ), "MY_ORG env variable myst be set to use init script for runner machine"
        self.runner_api_endpoint = f"https://github.com/{org}"
        self.labels = ["self-hosted", self.runner_type]
        return self
    @classmethod
    def check_job_assigned(cls):
        runner_pid = Shell.get_output_or_raise("pgrep Runner.Listener")
        if not runner_pid:
            print("check_job_assigned: No runner pid")
            return False
        log_file = Shell.get_output_or_raise(
            f"lsof -p {runner_pid} | grep -o {ExecutionSettings.GH_ACTIONS_DIRECTORY}/_diag/Runner.*log"
        )
        if not log_file:
            print("check_job_assigned: No log file")
            return False
        return Shell.check(f"grep -q 'Terminal] .* Running job:' {log_file}")
    def check_job_running(self):
        if self.proc is None:
            print(f"WARNING: No job started")
            return False
        exit_code = self.proc.poll()
        if exit_code is None:
            return True
        else:
            print(f"Job runner finished with exit code [{exit_code}]")
            self.proc = None
            return False
    def config_actions(self):
        if not self.instance_id:
            self.update_instance_info()
        token = self._get_gh_token_from_ssm()
        assert token and self.instance_id and self.runner_api_endpoint and self.labels
        command = f"sudo -u ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}/config.sh --token {token} \
            --url {self.runner_api_endpoint} --ephemeral --unattended --replace \
            --runnergroup Default --labels {','.join(self.labels)} --work wd --name {self.instance_id}"
        res = 1
        i = 0
        while i < 10 and res != 0:
            res = Shell.run(command)
            i += 1
            if res != 0:
                print(
                    f"ERROR: failed to configure GH actions runner after [{i}] attempts, exit code [{res}], retry after 10s"
                )
                time.sleep(10)
                self._get_gh_token_from_ssm()
        if res == 0:
            print("GH action runner has been configured")
        else:
            assert False, "GH actions runner configuration failed"
        return self
    def unconfig_actions(self):
        token = self._get_gh_token_from_ssm()
        command = f"sudo -u ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}/config.sh remove --token {token}"
        Shell.check(command, strict=True)
        return self
    def run_actions_async(self):
        command = f"sudo -u ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}/run.sh"
        self.proc = Shell.run_async(command)
        assert self.proc is not None
        return self
    def is_actions_process_healthy(self):
        try:
            if self.proc.poll() is None:
                return True
            stdout, stderr = self.proc.communicate()
            if self.proc.returncode != 0:
                # Handle failure
                print(
                    f"GH Action process failed with return code {self.proc.returncode}"
                )
                print(f"Error output: {stderr}")
                return False
            else:
                print(f"GH Action process is not running")
                return False
        except Exception as e:
            print(f"GH Action process exception: {e}")
            return False
    def self_terminate(self, decrease_capacity):
        print(
            f"WARNING: Self terminate is called, decrease_capacity [{decrease_capacity}]"
        )
        traceback.print_stack()
        if not self.instance_id:
            self.update_instance_info()
        assert self.instance_id
        command = f"aws autoscaling terminate-instance-in-auto-scaling-group --instance-id {self.instance_id}"
        if decrease_capacity:
            command += " --should-decrement-desired-capacity"
        else:
            command += " --no-should-decrement-desired-capacity"
        Shell.check(
            command=command,
            verbose=True,
        )
    def self_fork(self):
        current_capacity = Shell.get_output(
            f'aws autoscaling describe-auto-scaling-groups --auto-scaling-group-name {self.asg_name} \
                --query "AutoScalingGroups[0].DesiredCapacity" --output text'
        )
        current_capacity = int(current_capacity)
        if not current_capacity:
            print("ERROR: failed to get current capacity - cannot scale up")
            return
        desired_capacity = current_capacity + 1
        command = f"aws autoscaling set-desired-capacity --auto-scaling-group-name {self.asg_name} --desired-capacity {desired_capacity}"
        print(f"Increase capacity [{current_capacity} -> {desired_capacity}]")
        res = Shell.check(
            command=command,
            verbose=True,
        )
        if not res:
            print("ERROR: failed to increase capacity - cannot scale up")
 def handle_signal(signum, _frame):
    print(f"FATAL: Received signal {signum}")
    raise RuntimeError(f"killed by signal {signum}")
 def run():
    signal.signal(signal.SIGINT, handle_signal)
    signal.signal(signal.SIGTERM, handle_signal)
    m = None
    try:
        m = StateMachine()
        m.run()
    except Exception as e:
        print(f"FATAL: Exception [{e}] - terminate instance")
        time.sleep(10)
        if m:
            m.terminate()
        raise e
 if __name__ == "__main__":
    run()
--- a/ci/praktika/favicon/lambda_function.py
+++ b/ci/praktika/favicon/lambda_function.py
@ -0,0 +1,102 @@
 import base64
 import random
 import struct
 import zlib
 def create_favicon():
    # Image dimensions
    width = 32
    height = 32
    # Initialize a transparent background image (RGBA: 4 bytes per pixel)
    image_data = bytearray(
        [0, 0, 0, 0] * width * height
    )  # Set alpha to 0 for transparency
    # Draw 4 vertical lines with color #FAFF68 (RGB: 250, 255, 104)
    line_color = [250, 255, 104, 255]  # RGBA for #FAFF68 with full opacity
    line_width = 4
    space_width = 3
    x_start = space_width
    line_number = 4
    line_height = height - space_width
    for i in range(line_number):
        # Randomly pick a starting y position for each line
        y_start = random.randint(0, height - 1)
        # Draw the line with random shift along Y-axis
        for y in range(line_height):
            y_pos = (y + y_start) % height
            for x in range(line_width):
                pixel_index = (y_pos * width + x_start + x) * 4
                image_data[pixel_index : pixel_index + 4] = line_color
        x_start += line_width + space_width
    # Convert the RGBA image to PNG format
    png_data = create_png(width, height, image_data)
    # Convert PNG to ICO format
    ico_data = create_ico(png_data)
    return ico_data
 def create_png(width, height, image_data):
    def write_chunk(chunk_type, data):
        chunk_len = struct.pack(">I", len(data))
        chunk_crc = struct.pack(">I", zlib.crc32(chunk_type + data) & 0xFFFFFFFF)
        return chunk_len + chunk_type + data + chunk_crc
    png_signature = b"\x89PNG\r\n\x1a\n"
    ihdr_chunk = struct.pack(">IIBBBBB", width, height, 8, 6, 0, 0, 0)
    idat_data = zlib.compress(
        b"".join(
            b"\x00" + image_data[y * width * 4 : (y + 1) * width * 4]
            for y in range(height)
        ),
        9,
    )
    idat_chunk = write_chunk(b"IDAT", idat_data)
    iend_chunk = write_chunk(b"IEND", b"")
    return png_signature + write_chunk(b"IHDR", ihdr_chunk) + idat_chunk + iend_chunk
 def create_ico(png_data):
    # ICO header: reserved (2 bytes), type (2 bytes), image count (2 bytes)
    ico_header = struct.pack("<HHH", 0, 1, 1)
    # ICO entry: width, height, color count, reserved, color planes, bits per pixel, size, offset
    ico_entry = struct.pack("<BBBBHHII", 32, 32, 0, 0, 1, 32, len(png_data), 22)
    return ico_header + ico_entry + png_data
 def save_favicon_to_disk(ico_data, file_path="favicon.ico"):
    with open(file_path, "wb") as f:
        f.write(ico_data)
    print(f"Favicon saved to {file_path}")
 def lambda_handler(event, context):
    # Generate the favicon
    favicon_data = create_favicon()
    # Return the favicon as a binary response
    return {
        "statusCode": 200,
        "headers": {
            "Content-Type": "image/x-icon",
            "Content-Disposition": 'inline; filename="favicon.ico"',
        },
        "body": base64.b64encode(favicon_data).decode("utf-8"),
        "isBase64Encoded": True,
    }
 # Optional: Call the function directly to generate and save favicon locally (if running outside Lambda)
 if __name__ == "__main__":
    favicon_data = create_favicon()
    save_favicon_to_disk(favicon_data)
--- a/ci/praktika/gh.py
+++ b/ci/praktika/gh.py
@ -0,0 +1,105 @@
 import json
 import time
 from praktika._environment import _Environment
 from praktika.result import Result
 from praktika.settings import Settings
 from praktika.utils import Shell
 class GH:
    @classmethod
    def do_command_with_retries(cls, command):
        res = False
        retry_count = 0
        out, err = "", ""
        while retry_count < Settings.MAX_RETRIES_GH and not res:
            ret_code, out, err = Shell.get_res_stdout_stderr(command, verbose=True)
            res = ret_code == 0
            if not res and "Validation Failed" in err:
                print("ERROR: GH command validation error")
                break
            if not res and "Bad credentials" in err:
                print("ERROR: GH credentials/auth failure")
                break
            if not res:
                retry_count += 1
                time.sleep(5)
        if not res:
            print(
                f"ERROR: Failed to execute gh command [{command}] out:[{out}] err:[{err}] after [{retry_count}] attempts"
            )
        return res
    @classmethod
    def post_pr_comment(
        cls, comment_body, or_update_comment_with_substring, repo=None, pr=None
    ):
        if not repo:
            repo = _Environment.get().REPOSITORY
        if not pr:
            pr = _Environment.get().PR_NUMBER
        if or_update_comment_with_substring:
            print(f"check comment [{comment_body}] created")
            cmd_check_created = f'gh api -H "Accept: application/vnd.github.v3+json" \
                "/repos/{repo}/issues/{pr}/comments" \
                --jq \'.[] | {{id: .id, body: .body}}\' | grep -F "{or_update_comment_with_substring}"'
            output = Shell.get_output(cmd_check_created)
            if output:
                comment_ids = []
                try:
                    comment_ids = [
                        json.loads(item.strip())["id"] for item in output.split("\n")
                    ]
                except Exception as ex:
                    print(f"Failed to retrieve PR comments with [{ex}]")
                for id in comment_ids:
                    cmd = f'gh api \
                       -X PATCH \
                          -H "Accept: application/vnd.github.v3+json" \
                             "/repos/{repo}/issues/comments/{id}" \
                             -f body=\'{comment_body}\''
                    print(f"Update existing comments [{id}]")
                    return cls.do_command_with_retries(cmd)
        cmd = f'gh pr comment {pr} --body "{comment_body}"'
        return cls.do_command_with_retries(cmd)
    @classmethod
    def post_commit_status(cls, name, status, description, url):
        status = cls.convert_to_gh_status(status)
        command = (
            f"gh api -X POST -H 'Accept: application/vnd.github.v3+json' "
            f"/repos/{_Environment.get().REPOSITORY}/statuses/{_Environment.get().SHA} "
            f"-f state='{status}' -f target_url='{url}' "
            f"-f description='{description}' -f context='{name}'"
        )
        return cls.do_command_with_retries(command)
    @classmethod
    def convert_to_gh_status(cls, status):
        if status in (
            Result.Status.PENDING,
            Result.Status.SUCCESS,
            Result.Status.FAILED,
            Result.Status.ERROR,
        ):
            return status
        if status in Result.Status.RUNNING:
            return Result.Status.PENDING
        else:
            assert (
                False
            ), f"Invalid status [{status}] to be set as GH commit status.state"
 if __name__ == "__main__":
    # test
    GH.post_pr_comment(
        comment_body="foobar",
        or_update_comment_with_substring="CI",
        repo="ClickHouse/praktika",
        pr=15,
    )
--- a/ci/praktika/gh_auth.py
+++ b/ci/praktika/gh_auth.py
@ -0,0 +1,71 @@
 import sys
 import time
 from typing import List
 import requests
 from jwt import JWT, jwk_from_pem
 from praktika import Workflow
 from praktika.mangle import _get_workflows
 from praktika.settings import Settings
 from praktika.utils import Shell
 class GHAuth:
    @staticmethod
    def _generate_jwt(client_id, pem):
        pem = str.encode(pem)
        signing_key = jwk_from_pem(pem)
        payload = {
            "iat": int(time.time()),
            "exp": int(time.time()) + 600,
            "iss": client_id,
        }
        # Create JWT
        jwt_instance = JWT()
        encoded_jwt = jwt_instance.encode(payload, signing_key, alg="RS256")
        return encoded_jwt
    @staticmethod
    def _get_installation_id(jwt_token):
        headers = {
            "Authorization": f"Bearer {jwt_token}",
            "Accept": "application/vnd.github.v3+json",
        }
        response = requests.get(
            "https://api.github.com/app/installations", headers=headers, timeout=10
        )
        response.raise_for_status()
        installations = response.json()
        assert installations, "No installations found for the GitHub App"
        return installations[0]["id"]
    @staticmethod
    def _get_access_token(jwt_token, installation_id):
        headers = {
            "Authorization": f"Bearer {jwt_token}",
            "Accept": "application/vnd.github.v3+json",
        }
        url = (
            f"https://api.github.com/app/installations/{installation_id}/access_tokens"
        )
        response = requests.post(url, headers=headers, timeout=10)
        response.raise_for_status()
        return response.json()["token"]
    @classmethod
    def auth(cls, workflow_name) -> None:
        wf = _get_workflows(workflow_name)  # type: List[Workflow.Config]
        pem = wf[0].get_secret(Settings.SECRET_GH_APP_PEM_KEY).get_value()
        assert pem
        app_id = wf[0].get_secret(Settings.SECRET_GH_APP_ID).get_value()
        # Generate JWT
        jwt_token = cls._generate_jwt(app_id, pem)
        # Get Installation ID
        installation_id = cls._get_installation_id(jwt_token)
        # Get Installation Access Token
        access_token = cls._get_access_token(jwt_token, installation_id)
        Shell.check(f"echo {access_token} | gh auth login --with-token", strict=True)
 if __name__ == "__main__":
    GHAuth.auth(sys.argv[1])
--- a/ci/praktika/hook_cache.py
+++ b/ci/praktika/hook_cache.py
@ -0,0 +1,124 @@
 from praktika._environment import _Environment
 from praktika.cache import Cache
 from praktika.mangle import _get_workflows
 from praktika.runtime import RunConfig
 from praktika.settings import Settings
 from praktika.utils import Utils
 class CacheRunnerHooks:
    @classmethod
    def configure(cls, _workflow):
        workflow_config = RunConfig.from_fs(_workflow.name)
        cache = Cache()
        assert _Environment.get().WORKFLOW_NAME
        workflow = _get_workflows(name=_Environment.get().WORKFLOW_NAME)[0]
        print(f"Workflow Configure, workflow [{workflow.name}]")
        assert (
            workflow.enable_cache
        ), f"Outdated yaml pipelines or BUG. Configuration must be run only for workflow with enabled cache, workflow [{workflow.name}]"
        artifact_digest_map = {}
        job_digest_map = {}
        for job in workflow.jobs:
            if not job.digest_config:
                print(
                    f"NOTE: job [{job.name}] has no Config.digest_config - skip cache check, always run"
                )
            digest = cache.digest.calc_job_digest(job_config=job)
            job_digest_map[job.name] = digest
            if job.provides:
                # assign the job digest also to the artifacts it provides
                for artifact in job.provides:
                    artifact_digest_map[artifact] = digest
        for job in workflow.jobs:
            digests_combined_list = []
            if job.requires:
                # include digest of required artifact to the job digest, so that they affect job state
                for artifact_name in job.requires:
                    if artifact_name not in [
                        artifact.name for artifact in workflow.artifacts
                    ]:
                        # phony artifact assumed to be not affecting jobs that depend on it
                        continue
                    digests_combined_list.append(artifact_digest_map[artifact_name])
            digests_combined_list.append(job_digest_map[job.name])
            final_digest = "-".join(digests_combined_list)
            workflow_config.digest_jobs[job.name] = final_digest
        assert (
            workflow_config.digest_jobs
        ), f"BUG, Workflow with enabled cache must have job digests after configuration, wf [{workflow.name}]"
        print("Check remote cache")
        job_to_cache_record = {}
        for job_name, job_digest in workflow_config.digest_jobs.items():
            record = cache.fetch_success(job_name=job_name, job_digest=job_digest)
            if record:
                assert (
                    Utils.normalize_string(job_name)
                    not in workflow_config.cache_success
                )
                workflow_config.cache_success.append(job_name)
                workflow_config.cache_success_base64.append(Utils.to_base64(job_name))
                job_to_cache_record[job_name] = record
        print("Check artifacts to reuse")
        for job in workflow.jobs:
            if job.name in workflow_config.cache_success:
                if job.provides:
                    for artifact_name in job.provides:
                        workflow_config.cache_artifacts[artifact_name] = (
                            job_to_cache_record[job.name]
                        )
        print(f"Write config to GH's job output")
        with open(_Environment.get().JOB_OUTPUT_STREAM, "a", encoding="utf8") as f:
            print(
                f"DATA={workflow_config.to_json()}",
                file=f,
            )
        print(f"WorkflowRuntimeConfig: [{workflow_config.to_json(pretty=True)}]")
        print(
            "Dump WorkflowConfig to fs, the next hooks in this job might want to see it"
        )
        workflow_config.dump()
        return workflow_config
    @classmethod
    def pre_run(cls, _workflow, _job, _required_artifacts=None):
        path_prefixes = []
        if _job.name == Settings.CI_CONFIG_JOB_NAME:
            # SPECIAL handling
            return path_prefixes
        env = _Environment.get()
        runtime_config = RunConfig.from_fs(_workflow.name)
        required_artifacts = []
        if _required_artifacts:
            required_artifacts = _required_artifacts
        for artifact in required_artifacts:
            if artifact.name in runtime_config.cache_artifacts:
                record = runtime_config.cache_artifacts[artifact.name]
                print(f"Reuse artifact [{artifact.name}] from [{record}]")
                path_prefixes.append(
                    env.get_s3_prefix_static(
                        record.pr_number, record.branch, record.sha
                    )
                )
            else:
                path_prefixes.append(env.get_s3_prefix())
        return path_prefixes
    @classmethod
    def run(cls, workflow, job):
        pass
    @classmethod
    def post_run(cls, workflow, job):
        if job.name == Settings.CI_CONFIG_JOB_NAME:
            return
        if job.digest_config:
            # cache is enabled, and it's a job that supposed to be cached (has defined digest config)
            workflow_runtime = RunConfig.from_fs(workflow.name)
            job_digest = workflow_runtime.digest_jobs[job.name]
            Cache.push_success_record(job.name, job_digest, workflow_runtime.sha)
--- a/ci/praktika/hook_html.py
+++ b/ci/praktika/hook_html.py
@ -0,0 +1,198 @@
 import dataclasses
 import json
 import urllib.parse
 from pathlib import Path
 from typing import List
 from praktika._environment import _Environment
 from praktika.gh import GH
 from praktika.parser import WorkflowConfigParser
 from praktika.result import Result, ResultInfo
 from praktika.runtime import RunConfig
 from praktika.s3 import S3
 from praktika.settings import Settings
 from praktika.utils import Shell, Utils
@dataclasses.dataclass
 class GitCommit:
    date: str
    message: str
    sha: str
    @staticmethod
    def from_json(json_data: str) -> List["GitCommit"]:
        commits = []
        try:
            data = json.loads(json_data)
            commits = [
                GitCommit(
                    message=commit["messageHeadline"],
                    sha=commit["oid"],
                    date=commit["committedDate"],
                )
                for commit in data.get("commits", [])
            ]
        except Exception as e:
            print(
                f"ERROR: Failed to deserialize commit's data: [{json_data}], ex: [{e}]"
            )
        return commits
 class HtmlRunnerHooks:
    @classmethod
    def configure(cls, _workflow):
        def _get_pr_commits(pr_number):
            res = []
            if not pr_number:
                return res
            output = Shell.get_output(f"gh pr view {pr_number}  --json commits")
            if output:
                res = GitCommit.from_json(output)
            return res
        # generate pending Results for all jobs in the workflow
        if _workflow.enable_cache:
            skip_jobs = RunConfig.from_fs(_workflow.name).cache_success
        else:
            skip_jobs = []
        env = _Environment.get()
        results = []
        for job in _workflow.jobs:
            if job.name not in skip_jobs:
                result = Result.generate_pending(job.name)
            else:
                result = Result.generate_skipped(job.name)
            results.append(result)
        summary_result = Result.generate_pending(_workflow.name, results=results)
        summary_result.aux_links.append(env.CHANGE_URL)
        summary_result.aux_links.append(env.RUN_URL)
        summary_result.start_time = Utils.timestamp()
        page_url = "/".join(
            ["https:/", Settings.HTML_S3_PATH, str(Path(Settings.HTML_PAGE_FILE).name)]
        )
        for bucket, endpoint in Settings.S3_BUCKET_TO_HTTP_ENDPOINT.items():
            page_url = page_url.replace(bucket, endpoint)
        # TODO: add support for non-PRs (use branch?)
        page_url += f"?PR={env.PR_NUMBER}&sha=latest&name_0={urllib.parse.quote(env.WORKFLOW_NAME, safe='')}"
        summary_result.html_link = page_url
        # clean the previous latest results in PR if any
        if env.PR_NUMBER:
            S3.clean_latest_result()
        S3.copy_result_to_s3(
            summary_result,
            unlock=False,
        )
        print(f"CI Status page url [{page_url}]")
        res1 = GH.post_commit_status(
            name=_workflow.name,
            status=Result.Status.PENDING,
            description="",
            url=page_url,
        )
        res2 = GH.post_pr_comment(
            comment_body=f"Workflow [[{_workflow.name}]({page_url})], commit [{_Environment.get().SHA[:8]}]",
            or_update_comment_with_substring=f"Workflow [",
        )
        if not (res1 or res2):
            Utils.raise_with_error(
                "Failed to set both GH commit status and PR comment with Workflow Status, cannot proceed"
            )
        if env.PR_NUMBER:
            commits = _get_pr_commits(env.PR_NUMBER)
            # TODO: upload commits data to s3 to visualise it on a report page
            print(commits)
    @classmethod
    def pre_run(cls, _workflow, _job):
        result = Result.from_fs(_job.name)
        S3.copy_result_from_s3(
            Result.file_name_static(_workflow.name),
        )
        workflow_result = Result.from_fs(_workflow.name)
        workflow_result.update_sub_result(result)
        S3.copy_result_to_s3(
            workflow_result,
            unlock=True,
        )
    @classmethod
    def run(cls, _workflow, _job):
        pass
    @classmethod
    def post_run(cls, _workflow, _job, info_errors):
        result = Result.from_fs(_job.name)
        env = _Environment.get()
        S3.copy_result_from_s3(
            Result.file_name_static(_workflow.name),
            lock=True,
        )
        workflow_result = Result.from_fs(_workflow.name)
        print(f"Workflow info [{workflow_result.info}], info_errors [{info_errors}]")
        env_info = env.REPORT_INFO
        if env_info:
            print(
                f"WARNING: some info lines are set in Environment - append to report [{env_info}]"
            )
            info_errors += env_info
        if info_errors:
            info_errors = [f"    |  {error}" for error in info_errors]
            info_str = f"{_job.name}:\n"
            info_str += "\n".join(info_errors)
            print("Update workflow results with new info")
            workflow_result.set_info(info_str)
        old_status = workflow_result.status
        S3.upload_result_files_to_s3(result)
        workflow_result.update_sub_result(result)
        skipped_job_results = []
        if not result.is_ok():
            print(
                "Current job failed - find dependee jobs in the workflow and set their statuses to skipped"
            )
            workflow_config_parsed = WorkflowConfigParser(_workflow).parse()
            for dependee_job in workflow_config_parsed.workflow_yaml_config.jobs:
                if _job.name in dependee_job.needs:
                    if _workflow.get_job(dependee_job.name).run_unless_cancelled:
                        continue
                    print(
                        f"NOTE: Set job [{dependee_job.name}] status to [{Result.Status.SKIPPED}] due to current failure"
                    )
                    skipped_job_results.append(
                        Result(
                            name=dependee_job.name,
                            status=Result.Status.SKIPPED,
                            info=ResultInfo.SKIPPED_DUE_TO_PREVIOUS_FAILURE
                            + f" [{_job.name}]",
                        )
                    )
        for skipped_job_result in skipped_job_results:
            workflow_result.update_sub_result(skipped_job_result)
        S3.copy_result_to_s3(
            workflow_result,
            unlock=True,
        )
        if workflow_result.status != old_status:
            print(
                f"Update GH commit status [{result.name}]: [{old_status} -> {workflow_result.status}], link [{workflow_result.html_link}]"
            )
            GH.post_commit_status(
                name=workflow_result.name,
                status=GH.convert_to_gh_status(workflow_result.status),
                description="",
                url=workflow_result.html_link,
            )
--- a/ci/praktika/hook_interface.py
+++ b/ci/praktika/hook_interface.py
@ -0,0 +1,43 @@
 from abc import ABC, abstractmethod
 from praktika import Workflow
 class HookInterface(ABC):
    @abstractmethod
    def pre_run(self, _workflow, _job):
        """
        runs in pre-run step
        :param _workflow:
        :param _job:
        :return:
        """
        pass
    @abstractmethod
    def run(self, _workflow, _job):
        """
        runs in run step
        :param _workflow:
        :param _job:
        :return:
        """
        pass
    @abstractmethod
    def post_run(self, _workflow, _job):
        """
        runs in post-run step
        :param _workflow:
        :param _job:
        :return:
        """
        pass
    @abstractmethod
    def configure(self, _workflow: Workflow.Config):
        """
        runs in initial WorkflowConfig job in run step
        :return:
        """
        pass
--- a/ci/praktika/html_prepare.py
+++ b/ci/praktika/html_prepare.py
@ -0,0 +1,10 @@
 from praktika.s3 import S3
 from praktika.settings import Settings
 class Html:
    @classmethod
    def prepare(cls):
        S3.copy_file_to_s3(
            s3_path=Settings.HTML_S3_PATH, local_path=Settings.HTML_PAGE_FILE
        )
--- a/ci/praktika/job.py
+++ b/ci/praktika/job.py
@ -0,0 +1,102 @@
 import copy
 import json
 from dataclasses import dataclass, field
 from typing import Any, List, Optional
 class Job:
    @dataclass
    class Requirements:
        python: bool = False
        python_requirements_txt: str = ""
    @dataclass
    class CacheDigestConfig:
        include_paths: List[str] = field(default_factory=list)
        exclude_paths: List[str] = field(default_factory=list)
    @dataclass
    class Config:
        # Job Name
        name: str
        # Machine's label to run job on. For instance [ubuntu-latest] for free gh runner
        runs_on: List[str]
        # Job Run Command
        command: str
        # What job requires
        #   May be phony or physical names
        requires: List[str] = field(default_factory=list)
        # What job provides
        #   May be phony or physical names
        provides: List[str] = field(default_factory=list)
        job_requirements: Optional["Job.Requirements"] = None
        timeout: int = 1 * 3600
        digest_config: Optional["Job.CacheDigestConfig"] = None
        run_in_docker: str = ""
        run_unless_cancelled: bool = False
        allow_merge_on_failure: bool = False
        parameter: Any = None
        def parametrize(
            self,
            parameter: Optional[List[Any]] = None,
            runs_on: Optional[List[List[str]]] = None,
            timeout: Optional[List[int]] = None,
        ):
            assert (
                parameter or runs_on
            ), "Either :parameter or :runs_on must be non empty list for parametrisation"
            if not parameter:
                parameter = [None] * len(runs_on)
            if not runs_on:
                runs_on = [None] * len(parameter)
            if not timeout:
                timeout = [None] * len(parameter)
            assert (
                len(parameter) == len(runs_on) == len(timeout)
            ), "Parametrization lists must be of the same size"
            res = []
            for parameter_, runs_on_, timeout_ in zip(parameter, runs_on, timeout):
                obj = copy.deepcopy(self)
                if parameter_:
                    obj.parameter = parameter_
                if runs_on_:
                    obj.runs_on = runs_on_
                if timeout_:
                    obj.timeout = timeout_
                obj.name = obj.get_job_name_with_parameter()
                res.append(obj)
            return res
        def get_job_name_with_parameter(self):
            name, parameter, runs_on = self.name, self.parameter, self.runs_on
            res = name
            name_params = []
            if isinstance(parameter, list) or isinstance(parameter, dict):
                name_params.append(json.dumps(parameter))
            elif parameter is not None:
                name_params.append(parameter)
            if runs_on:
                assert isinstance(runs_on, list)
                name_params.append(json.dumps(runs_on))
            if name_params:
                name_params = [str(param) for param in name_params]
                res += f" ({', '.join(name_params)})"
            self.name = res
            return res
        def __repr__(self):
            return self.name
--- a/ci/praktika/json.html
+++ b/ci/praktika/json.html
@ -0,0 +1,745 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>praktika report</title>
    <link rel="icon" href="https://w4z3pajszlbkfcw2wcylfei5km0xmwag.lambda-url.us-east-1.on.aws/" type="image/x-icon">
    <style>
        /* Default (Day Theme) */
        :root {
            --background-color: white;
            --text-color: #000;
            --tile-background: #f9f9f9;
            --footer-background: #f1f1f1;
            --footer-text-color: #000;
            --status-width: 300px;
        }
        body {
            background-color: var(--background-color);
            color: var(--text-color);
            height: 100%;
            margin: 0;
            display: flex;
            flex-direction: column;
            font-family: 'IBM Plex Mono Condensed', monospace, sans-serif;
            --header-background-color: #f4f4f4;
        }
        body.night-theme {
            --background-color: #1F1F1C;
            --text-color: #fff;
            --tile-background: black;
            --header-background-color: #1F1F1C;
        }
        #info-container {
            margin-left: calc(var(--status-width) + 20px);
            margin-bottom: 10px;
            background-color: var(--tile-background);
            padding: 10px;
            text-align: left;
        }
        #status-container {
            position: fixed;
            top: 0;
            bottom: 0;
            left: 0;
            width: var(--status-width);
            background-color: var(--tile-background);
            padding: 20px;
            box-sizing: border-box;
            font-size: 18px;
            margin: 0;
        }
        #status-container a {
            color: #007bff;
            text-decoration: underline;
            font-weight: bold;
            cursor: pointer;
            display: inline-block;
            margin-top: 5px;
            margin-left: 20px;
            padding: 2px 0;
            font-size: 0.8em;
        }
        #status-container a:hover {
            color: #0056b3;
            text-decoration: none;
        }
        .key-value-pair {
            display: flex;               /* Enable Flexbox for alignment */
            justify-content: space-between; /* Distribute space between key and value */
            margin-bottom: 20px;         /* Add space between each pair */
        }
        .json-key {
            font-weight: bold;
        }
        .json-value {
            font-weight: normal;
            font-family: 'Source Code Pro', monospace, sans-serif;
            letter-spacing: -0.5px;
        }
        #result-container {
            background-color: var(--tile-background);
            margin-left: calc(var(--status-width) + 20px);
            padding: 20px;
            box-sizing: border-box;
            text-align: center;
            font-size: 18px;
            font-weight: normal;
            flex-grow: 1;
        }
        #footer {
            padding: 10px;
            position: fixed;
            bottom: 0;
            left: 0;
            right: 0;
            background-color: #1F1F1C;
            color: white;
            font-size: 14px;
            display: flex;
            justify-content: space-between; /* Ensure the .left expands, and .right and .settings are aligned to the right */
            align-items: center;
        }
        #footer a {
            color: white;
            text-decoration: none;
        }
        #footer .left {
            flex-grow: 1; /* Takes up all the available space */
        }
        /* make some space around '/' in the navigation line */
        #footer .left span.separator {
            margin-left: 5px;
            margin-right: 5px;
        }
        #footer .right, #footer .settings {
            display: flex;
            align-items: center;
        }
        #footer .right a::before {
            content: "#";
            margin-left: 10px;
            color: #e0e0e0;
        }
        #footer .right::before, #footer .settings::before {
            content: "|"; /* Add separator before right and settings sections */
            margin-left: 10px;
            margin-right: 10px;
            color: #e0e0e0;
        }
        #theme-toggle {
            cursor: pointer;
            font-size: 20px;
            color: white;
        }
        #theme-toggle:hover {
            color: #e0e0e0;
        }
        #footer a:hover {
            text-decoration: underline;
        }
        #links {
            margin-top: 10px;
            padding: 15px;
            border: 1px solid #ccc;
            border-radius: 5px;
            background-color: #f9f9f9;
        }
        #links a {
            display: block;
            margin-bottom: 5px;
            padding: 5px 10px;
            background-color: #D5D5D5;
            color: black;
            text-decoration: none;
            border-radius: 5px;
        }
        #links a:hover {
            background-color: #D5D5D5;
        }
        table {
            width: 100%;
            border-collapse: collapse;
        }
        th.name-column, td.name-column {
            max-width: 400px; /* Set the maximum width for the column */
            white-space: nowrap; /* Prevent text from wrapping */
            overflow: hidden; /* Hide the overflowed text */
            text-overflow: ellipsis; /* Show ellipsis (...) for overflowed text */
        }
        th.status-column, td.status-column {
            max-width: 100px; /* Set the maximum width for the column */
            white-space: nowrap; /* Prevent text from wrapping */
            overflow: hidden; /* Hide the overflowed text */
            text-overflow: ellipsis; /* Show ellipsis (...) for overflowed text */
        }
        th.time-column, td.time-column {
            max-width: 120px; /* Set the maximum width for the column */
            white-space: nowrap; /* Prevent text from wrapping */
            text-align: right;
        }
        th.info-column, td.info-column {
            width: 100%; /* Allow the column to take all the remaining space */
        }
        th, td {
            padding: 8px;
            border: 1px solid #ddd;
            text-align: left;
        }
        th {
            background-color: var(--header-background-color);
        }
        .status-success {
            color: green;
            font-weight: bold;
        }
        .status-fail {
            color: red;
            font-weight: bold;
        }
        .status-pending {
            color: #d4a017;
            font-weight: bold;
        }
        .status-broken {
            color: purple;
            font-weight: bold;
        }
        .status-run {
            color: blue;
            font-weight: bold;
        }
        .status-error {
            color: darkred;
            font-weight: bold;
        }
        .status-other {
            color: grey;
            font-weight: bold;
        }
    </style>
 </head>
 <body>
 <div id="info-container"></div>
 <div id="status-container"></div>
 <div id="result-container"></div>
 <footer id="footer">
    <div class="left"></div>
    <div class="right"></div>
    <div class="settings">
        <span id="theme-toggle">☀️</span>
    </div>
 </footer>
 <script>
    function toggleTheme() {
        document.body.classList.toggle('night-theme');
        const toggleIcon = document.getElementById('theme-toggle');
        if (document.body.classList.contains('night-theme')) {
            toggleIcon.textContent = '☾'; // Moon for night mode
        } else {
            toggleIcon.textContent = '☀️'; // Sun for day mode
        }
    }
    // Attach the toggle function to the click event of the icon
    document.getElementById('theme-toggle').addEventListener('click', toggleTheme);
    function formatTimestamp(timestamp, showDate = true) {
        const date = new Date(timestamp * 1000);
        const day = String(date.getDate()).padStart(2, '0');
        const monthNames = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
            "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
        const month = monthNames[date.getMonth()];
        const year = date.getFullYear();
        const hours = String(date.getHours()).padStart(2, '0');
        const minutes = String(date.getMinutes()).padStart(2, '0');
        const seconds = String(date.getSeconds()).padStart(2, '0');
        //const milliseconds = String(date.getMilliseconds()).padStart(2, '0');
        return showDate
            ? `${day}-${month}-${year} ${hours}:${minutes}:${seconds}`
            : `${hours}:${minutes}:${seconds}`;
    }
    function formatDuration(durationInSeconds, detailed = false) {
        // Check if the duration is empty, null, or not a number
        if (!durationInSeconds || isNaN(durationInSeconds)) {
            return '';
        }
        // Ensure duration is a floating-point number
        const duration = parseFloat(durationInSeconds);
        if (detailed) {
            // Format in the detailed format with hours, minutes, and seconds
            const hours = Math.floor(duration / 3600);
            const minutes = Math.floor((duration % 3600) / 60);
            const seconds = Math.floor(duration % 60);
            const formattedHours = hours > 0 ? `${hours}h ` : '';
            const formattedMinutes = minutes > 0 ? `${minutes}m ` : '';
            const formattedSeconds = `${String(seconds).padStart(2, '0')}s`;
            return `${formattedHours}${formattedMinutes}${formattedSeconds}`.trim();
        } else {
            // Format in the default format with seconds and milliseconds
            const seconds = Math.floor(duration);
            const milliseconds = Math.floor((duration % 1) * 1000);
            const formattedSeconds = String(seconds);
            const formattedMilliseconds = String(milliseconds).padStart(3, '0');
            return `${formattedSeconds}.${formattedMilliseconds}`;
        }
    }
    // Function to determine status class based on value
    function getStatusClass(status) {
        const lowerStatus = status.toLowerCase();
        if (lowerStatus.includes('success') || lowerStatus === 'ok') return 'status-success';
        if (lowerStatus.includes('fail')) return 'status-fail';
        if (lowerStatus.includes('pending')) return 'status-pending';
        if (lowerStatus.includes('broken')) return 'status-broken';
        if (lowerStatus.includes('run')) return 'status-run';
        if (lowerStatus.includes('error')) return 'status-error';
        return 'status-other';
    }
    function addKeyValueToStatus(key, value) {
        const statusContainer = document.getElementById('status-container');
        let keyValuePair = document.createElement('div');
        keyValuePair.className = 'key-value-pair';
        const keyElement = document.createElement('div');
        keyElement.className = 'json-key';
        keyElement.textContent = key + ':';
        const valueElement = document.createElement('div');
        valueElement.className = 'json-value';
        valueElement.textContent = value;
        keyValuePair.appendChild(keyElement)
        keyValuePair.appendChild(valueElement)
        statusContainer.appendChild(keyValuePair);
    }
    function addFileButtonToStatus(key, links) {
        if (links == null) {
            return
        }
        const statusContainer = document.getElementById('status-container');
        const keyElement = document.createElement('div');
        keyElement.className = 'json-key';
        keyElement.textContent = columnSymbols[key] + ':' || key;
        statusContainer.appendChild(keyElement);
        if (Array.isArray(links) && links.length > 0) {
            links.forEach(link => {
                const textLink = document.createElement('a');
                textLink.href = link;
                textLink.textContent = link.split('/').pop();
                textLink.target = '_blank';
                statusContainer.appendChild(textLink);
                statusContainer.appendChild(document.createElement('br'));
            });
        }
    }
    function addStatusToStatus(status, start_time, duration) {
        const statusContainer = document.getElementById('status-container')
        let keyValuePair = document.createElement('div');
        keyValuePair.className = 'key-value-pair';
        let keyElement = document.createElement('div');
        let valueElement = document.createElement('div');
        keyElement.className = 'json-key';
        valueElement.className = 'json-value';
        keyElement.textContent = columnSymbols['status'] + ':' || 'status:';
        valueElement.classList.add('status-value');
        valueElement.classList.add(getStatusClass(status));
        valueElement.textContent = status;
        keyValuePair.appendChild(keyElement);
        keyValuePair.appendChild(valueElement);
        statusContainer.appendChild(keyValuePair);
        keyValuePair = document.createElement('div');
        keyValuePair.className = 'key-value-pair';
        keyElement = document.createElement('div');
        valueElement = document.createElement('div');
        keyElement.className = 'json-key';
        valueElement.className = 'json-value';
        keyElement.textContent = columnSymbols['start_time'] + ':' || 'start_time:';
        valueElement.textContent = formatTimestamp(start_time);
        keyValuePair.appendChild(keyElement);
        keyValuePair.appendChild(valueElement);
        statusContainer.appendChild(keyValuePair);
        keyValuePair = document.createElement('div');
        keyValuePair.className = 'key-value-pair';
        keyElement = document.createElement('div');
        valueElement = document.createElement('div');
        keyElement.className = 'json-key';
        valueElement.className = 'json-value';
        keyElement.textContent = columnSymbols['duration'] + ':' || 'duration:';
        if (duration === null) {
            // Set initial value to 0 and add a unique ID or data attribute to identify the duration element
            valueElement.textContent = '00:00:00';
            valueElement.setAttribute('id', 'duration-value');
        } else {
            // Format the duration if it's a valid number
            valueElement.textContent = formatDuration(duration, true);
        }
        keyValuePair.appendChild(keyElement);
        keyValuePair.appendChild(valueElement);
        statusContainer.appendChild(keyValuePair);
    }
    function navigatePath(jsonObj, nameArray) {
        let baseParams = new URLSearchParams(window.location.search);
        let keysToDelete = [];
        baseParams.forEach((value, key) => {
            if (key.startsWith('name_')) {
                keysToDelete.push(key); // Collect the keys to delete
            }
        });
        keysToDelete.forEach((key) => baseParams.delete(key));
        let pathNames = [];
        let pathLinks = [];
        let currentObj = jsonObj;
        // Add the first entry (root level)
        baseParams.set(`name_0`, currentObj.name);
        pathNames.push(currentObj.name);
        pathLinks.push(`<span class="separator">/</span><a href="${window.location.pathname}?${baseParams.toString()}">${currentObj.name}</a>`);
        // Iterate through the nameArray starting at index 0
        for (const [index, name] of nameArray.entries()) {
            if (index === 0) continue;
            if (currentObj && Array.isArray(currentObj.results)) {
                const nextResult = currentObj.results.find(result => result.name === name);
                if (nextResult) {
                    baseParams.set(`name_${index}`, nextResult.name);
                    pathNames.push(nextResult.name);  // Correctly push nextResult name, not currentObj.name
                    pathLinks.push(`<span class="separator">/</span><a href="${window.location.pathname}?${baseParams.toString()}">${nextResult.name}</a>`);
                    currentObj = nextResult; // Move to the next object in the hierarchy
                } else {
                    console.error(`Name "${name}" not found in results array.`);
                    return null; // Name not found in results array
                }
            } else {
                console.error(`Current object is not structured as expected.`);
                return null; // Current object is not structured as expected
            }
        }
        const footerLeft = document.querySelector('#footer .left');
        footerLeft.innerHTML = pathLinks.join('');
        return currentObj;
    }
    // Define the fixed columns globally, so both functions can use it
    const columns = ['name', 'status', 'start_time', 'duration', 'info'];
    const columnSymbols = {
        name: '📂',
        status: '✔️',
        start_time: '🕒',
        duration: '⏳',
        info: 'ℹ️',
        files: '📄'
    };
    function createResultsTable(results, nest_level) {
        if (results && Array.isArray(results) && results.length > 0) {
            const table = document.createElement('table');
            const thead = document.createElement('thead');
            const tbody = document.createElement('tbody');
            // Get the current URL parameters
            const currentUrl = new URL(window.location.href);
            // Create table headers based on the fixed columns
            const headerRow = document.createElement('tr');
            columns.forEach(column => {
                const th = document.createElement('th');
                th.textContent = th.textContent = columnSymbols[column] || column;
                th.style.cursor = 'pointer'; // Make headers clickable
                th.addEventListener('click', () => sortTable(results, column, tbody, nest_level)); // Add click event to sort the table
                headerRow.appendChild(th);
            });
            thead.appendChild(headerRow);
            // Create table rows
            populateTableRows(tbody, results, columns, nest_level);
            table.appendChild(thead);
            table.appendChild(tbody);
            return table;
        }
        return null;
    }
    function populateTableRows(tbody, results, columns, nest_level) {
        const currentUrl = new URL(window.location.href);  // Get the current URL
        // Clear existing rows if re-rendering (used in sorting)
        tbody.innerHTML = '';
        results.forEach((result, index) => {
            const row = document.createElement('tr');
            columns.forEach(column => {
                const td = document.createElement('td');
                const value = result[column];
                if (column === 'name') {
                    // Create a link for the name field, using name_X
                    const link = document.createElement('a');
                    const newUrl = new URL(currentUrl); // Create a fresh copy of the URL for each row
                    newUrl.searchParams.set(`name_${nest_level}`, value); // Use backticks for string interpolation
                    link.href = newUrl.toString();
                    link.textContent = value;
                    td.classList.add('name-column');
                    td.appendChild(link);
                } else if (column === 'status') {
                    // Apply status formatting
                    const span = document.createElement('span');
                    span.className = getStatusClass(value);
                    span.textContent = value;
                    td.classList.add('status-column');
                    td.appendChild(span);
                } else if (column === 'start_time') {
                    td.classList.add('time-column');
                    td.textContent = value ? formatTimestamp(value, false) : '';
                } else if (column === 'duration') {
                    td.classList.add('time-column');
                    td.textContent = value ? formatDuration(value) : '';
                } else if (column === 'info') {
                    // For info and other columns, just display the value
                    td.textContent = value || '';
                    td.classList.add('info-column');
                }
                row.appendChild(td);
            });
            tbody.appendChild(row);
        });
    }
    function sortTable(results, key, tbody, nest_level) {
        // Find the table header element for the given key
        let th = null;
        const tableHeaders = document.querySelectorAll('th'); // Select all table headers
        tableHeaders.forEach(header => {
            if (header.textContent.trim().toLowerCase() === key.toLowerCase()) {
                th = header;
            }
        });
        if (!th) {
            console.error(`No table header found for key: ${key}`);
            return;
        }
        // Determine the current sort direction
        let ascending = th.getAttribute('data-sort-direction') === 'asc' ? false : true;
        // Toggle the sort direction for the next click
        th.setAttribute('data-sort-direction', ascending ? 'asc' : 'desc');
        // Sort the results array by the given key
        results.sort((a, b) => {
            if (a[key] < b[key]) return ascending ? -1 : 1;
            if (a[key] > b[key]) return ascending ? 1 : -1;
            return 0;
        });
        // Re-populate the table with sorted data
        populateTableRows(tbody, results, columns, nest_level);
    }
    function loadJSON(PR, sha, nameParams) {
        const infoElement = document.getElementById('info-container');
        let lastModifiedTime = null;
        const task = nameParams[0].toLowerCase();
        // Construct the URL dynamically based on PR, sha, and name_X
        const baseUrl = window.location.origin + window.location.pathname.replace('/json.html', '');
        const path = `${baseUrl}/${encodeURIComponent(PR)}/${encodeURIComponent(sha)}/result_${task}.json`;
        fetch(path, {cache: "no-cache"})
            .then(response => {
                if (!response.ok) {
                    throw new Error(`HTTP error! status: ${response.status}`);
                }
                lastModifiedTime = response.headers.get('Last-Modified');
                return response.json();
            })
            .then(data => {
                const linksDiv = document.getElementById('links');
                const resultsDiv = document.getElementById('result-container');
                const footerRight = document.querySelector('#footer .right');
                let targetData = navigatePath(data, nameParams);
                let nest_level = nameParams.length;
                if (targetData) {
                    infoElement.style.display = 'none';
                    // Handle footer links if present
                    if (Array.isArray(data.aux_links) && data.aux_links.length > 0) {
                        data.aux_links.forEach(link => {
                            const a = document.createElement('a');
                            a.href = link;
                            a.textContent = link.split('/').pop();
                            a.target = '_blank';
                            footerRight.appendChild(a);
                        });
                    }
                    addStatusToStatus(targetData.status, targetData.start_time, targetData.duration)
                    // Handle links
                    addFileButtonToStatus('files', targetData.links)
                    // Handle duration update if duration is null and start_time exists
                    if (targetData.duration === null && targetData.start_time) {
                        let duration = Math.floor(Date.now() / 1000 - targetData.start_time);
                        const durationElement = document.getElementById('duration-value');
                        const intervalId = setInterval(() => {
                            duration++;
                            durationElement.textContent = formatDuration(duration, true);
                        }, 1000);
                    }
                    // If 'results' exists and is non-empty, create the table
                    const resultsData = targetData.results;
                    if (Array.isArray(resultsData) && resultsData.length > 0) {
                        const table = createResultsTable(resultsData, nest_level);
                        if (table) {
                            resultsDiv.appendChild(table);
                        }
                    }
                } else {
                    infoElement.textContent = 'Object Not Found';
                    infoElement.style.display = 'block';
                }
                // Set up auto-reload if Last-Modified header is present
                if (lastModifiedTime) {
                    setInterval(() => {
                        checkForUpdate(path, lastModifiedTime);
                    }, 30000); // 30000 milliseconds = 30 seconds
                }
            })
            .catch(error => {
                console.error('Error loading JSON:', error);
                infoElement.textContent = 'Error loading data';
                infoElement.style.display = 'block';
            });
    }
    // Function to check if the JSON file is updated
    function checkForUpdate(path, lastModifiedTime) {
        fetch(path, {method: 'HEAD'})
            .then(response => {
                if (!response.ok) {
                    throw new Error(`HTTP error! status: ${response.status}`);
                }
                const newLastModifiedTime = response.headers.get('Last-Modified');
                if (newLastModifiedTime && new Date(newLastModifiedTime) > new Date(lastModifiedTime)) {
                    // If the JSON file has been updated, reload the page
                    window.location.reload();
                }
            })
            .catch(error => {
                console.error('Error checking for update:', error);
            });
    }
    // Initialize the page and load JSON from URL parameter
    function init() {
        const urlParams = new URLSearchParams(window.location.search);
        const PR = urlParams.get('PR');
        const sha = urlParams.get('sha');
        const root_name = urlParams.get('name_0');
        const nameParams = [];
        urlParams.forEach((value, key) => {
            if (key.startsWith('name_')) {
                const index = parseInt(key.split('_')[1], 10);
                nameParams[index] = value;
            }
        });
        if (PR) {
            addKeyValueToStatus("PR", PR)
        } else {
            console.error("TODO")
        }
        addKeyValueToStatus("sha", sha);
        if (nameParams[1]) {
            addKeyValueToStatus("job", nameParams[1]);
        }
        addKeyValueToStatus("workflow", nameParams[0]);
        if (PR && sha && root_name) {
            loadJSON(PR, sha, nameParams);
        } else {
            document.getElementById('title').textContent = 'Error: Missing required URL parameters: PR, sha, or name_0';
        }
    }
    window.onload = init;
 </script>
 </body>
 </html>
--- a/ci/praktika/mangle.py
+++ b/ci/praktika/mangle.py
@ -0,0 +1,137 @@
 import copy
 import importlib.util
 from pathlib import Path
 from typing import Any, Dict
 from praktika import Job
 from praktika._settings import _USER_DEFINED_SETTINGS, _Settings
 from praktika.utils import ContextManager, Utils
 def _get_workflows(name=None, file=None):
    """
    Gets user's workflow configs
    """
    res = []
    with ContextManager.cd():
        directory = Path(_Settings.WORKFLOWS_DIRECTORY)
        for py_file in directory.glob("*.py"):
            if file and file not in str(py_file):
                continue
            module_name = py_file.name.removeprefix(".py")
            spec = importlib.util.spec_from_file_location(
                module_name, f"{_Settings.WORKFLOWS_DIRECTORY}/{module_name}"
            )
            assert spec
            foo = importlib.util.module_from_spec(spec)
            assert spec.loader
            spec.loader.exec_module(foo)
            try:
                for workflow in foo.WORKFLOWS:
                    if name:
                        if name == workflow.name:
                            print(f"Read workflow [{name}] config from [{module_name}]")
                            res = [workflow]
                            break
                        else:
                            continue
                    else:
                        res += foo.WORKFLOWS
                        print(f"Read workflow configs from [{module_name}]")
            except Exception as e:
                print(
                    f"WARNING: Failed to add WORKFLOWS config from [{module_name}], exception [{e}]"
                )
    if not res:
        Utils.raise_with_error(f"Failed to find workflow [{name or file}]")
    for workflow in res:
        # add native jobs
        _update_workflow_with_native_jobs(workflow)
        # fill in artifact properties, e.g. _provided_by
        _update_workflow_artifacts(workflow)
    return res
 def _update_workflow_artifacts(workflow):
    artifact_job = {}
    for job in workflow.jobs:
        for artifact_name in job.provides:
            assert artifact_name not in artifact_job
            artifact_job[artifact_name] = job.name
    for artifact in workflow.artifacts:
        artifact._provided_by = artifact_job[artifact.name]
 def _update_workflow_with_native_jobs(workflow):
    if workflow.dockers:
        from praktika.native_jobs import _docker_build_job
        print(f"Enable native job [{_docker_build_job.name}] for [{workflow.name}]")
        aux_job = copy.deepcopy(_docker_build_job)
        if workflow.enable_cache:
            print(
                f"Add automatic digest config for [{aux_job.name}] job since cache is enabled"
            )
            docker_digest_config = Job.CacheDigestConfig()
            for docker_config in workflow.dockers:
                docker_digest_config.include_paths.append(docker_config.path)
            aux_job.digest_config = docker_digest_config
        workflow.jobs.insert(0, aux_job)
        for job in workflow.jobs[1:]:
            if not job.requires:
                job.requires = []
            job.requires.append(aux_job.name)
    if (
        workflow.enable_cache
        or workflow.enable_report
        or workflow.enable_merge_ready_status
    ):
        from praktika.native_jobs import _workflow_config_job
        print(f"Enable native job [{_workflow_config_job.name}] for [{workflow.name}]")
        aux_job = copy.deepcopy(_workflow_config_job)
        workflow.jobs.insert(0, aux_job)
        for job in workflow.jobs[1:]:
            if not job.requires:
                job.requires = []
            job.requires.append(aux_job.name)
    if workflow.enable_merge_ready_status:
        from praktika.native_jobs import _final_job
        print(f"Enable native job [{_final_job.name}] for [{workflow.name}]")
        aux_job = copy.deepcopy(_final_job)
        for job in workflow.jobs:
            aux_job.requires.append(job.name)
        workflow.jobs.append(aux_job)
 def _get_user_settings() -> Dict[str, Any]:
    """
    Gets user's settings
    """
    res = {}  # type: Dict[str, Any]
    directory = Path(_Settings.SETTINGS_DIRECTORY)
    for py_file in directory.glob("*.py"):
        module_name = py_file.name.removeprefix(".py")
        spec = importlib.util.spec_from_file_location(
            module_name, f"{_Settings.SETTINGS_DIRECTORY}/{module_name}"
        )
        assert spec
        foo = importlib.util.module_from_spec(spec)
        assert spec.loader
        spec.loader.exec_module(foo)
        for setting in _USER_DEFINED_SETTINGS:
            try:
                value = getattr(foo, setting)
                res[setting] = value
                print(f"Apply user defined setting [{setting} = {value}]")
            except Exception as e:
                pass
    return res
--- a/ci/praktika/native_jobs.py
+++ b/ci/praktika/native_jobs.py
@ -0,0 +1,378 @@
 import sys
 from typing import Dict
 from praktika import Job, Workflow
 from praktika._environment import _Environment
 from praktika.cidb import CIDB
 from praktika.digest import Digest
 from praktika.docker import Docker
 from praktika.gh import GH
 from praktika.hook_cache import CacheRunnerHooks
 from praktika.hook_html import HtmlRunnerHooks
 from praktika.mangle import _get_workflows
 from praktika.result import Result, ResultInfo
 from praktika.runtime import RunConfig
 from praktika.s3 import S3
 from praktika.settings import Settings
 from praktika.utils import Shell, Utils
 assert Settings.CI_CONFIG_RUNS_ON
 _workflow_config_job = Job.Config(
    name=Settings.CI_CONFIG_JOB_NAME,
    runs_on=Settings.CI_CONFIG_RUNS_ON,
    job_requirements=(
        Job.Requirements(
            python=Settings.INSTALL_PYTHON_FOR_NATIVE_JOBS,
            python_requirements_txt=Settings.INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS,
        )
        if Settings.INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS
        else None
    ),
    command=f"{Settings.PYTHON_INTERPRETER} -m praktika.native_jobs '{Settings.CI_CONFIG_JOB_NAME}'",
 )
 _docker_build_job = Job.Config(
    name=Settings.DOCKER_BUILD_JOB_NAME,
    runs_on=Settings.DOCKER_BUILD_RUNS_ON,
    job_requirements=Job.Requirements(
        python=Settings.INSTALL_PYTHON_FOR_NATIVE_JOBS,
        python_requirements_txt="",
    ),
    timeout=4 * 3600,
    command=f"{Settings.PYTHON_INTERPRETER} -m praktika.native_jobs '{Settings.DOCKER_BUILD_JOB_NAME}'",
 )
 _final_job = Job.Config(
    name=Settings.FINISH_WORKFLOW_JOB_NAME,
    runs_on=Settings.CI_CONFIG_RUNS_ON,
    job_requirements=Job.Requirements(
        python=Settings.INSTALL_PYTHON_FOR_NATIVE_JOBS,
        python_requirements_txt="",
    ),
    command=f"{Settings.PYTHON_INTERPRETER} -m praktika.native_jobs '{Settings.FINISH_WORKFLOW_JOB_NAME}'",
    run_unless_cancelled=True,
 )
 def _build_dockers(workflow, job_name):
    print(f"Start [{job_name}], workflow [{workflow.name}]")
    dockers = workflow.dockers
    ready = []
    results = []
    job_status = Result.Status.SUCCESS
    job_info = ""
    dockers = Docker.sort_in_build_order(dockers)
    docker_digests = {}  # type: Dict[str, str]
    for docker in dockers:
        docker_digests[docker.name] = Digest().calc_docker_digest(docker, dockers)
    if not Shell.check(
        "docker buildx inspect --bootstrap | grep -q docker-container", verbose=True
    ):
        print("Install docker container driver")
        if not Shell.check(
            "docker buildx create --use --name mybuilder --driver docker-container",
            verbose=True,
        ):
            job_status = Result.Status.FAILED
            job_info = "Failed to install docker buildx driver"
    if job_status == Result.Status.SUCCESS:
        if not Docker.login(
            Settings.DOCKERHUB_USERNAME,
            user_password=workflow.get_secret(Settings.DOCKERHUB_SECRET).get_value(),
        ):
            job_status = Result.Status.FAILED
            job_info = "Failed to login to dockerhub"
    if job_status == Result.Status.SUCCESS:
        for docker in dockers:
            assert (
                docker.name not in ready
            ), f"All docker names must be uniq [{dockers}]"
            stopwatch = Utils.Stopwatch()
            info = f"{docker.name}:{docker_digests[docker.name]}"
            log_file = f"{Settings.OUTPUT_DIR}/docker_{Utils.normalize_string(docker.name)}.log"
            files = []
            code, out, err = Shell.get_res_stdout_stderr(
                f"docker manifest inspect {docker.name}:{docker_digests[docker.name]}"
            )
            print(
                f"Docker inspect results for {docker.name}:{docker_digests[docker.name]}: exit code [{code}], out [{out}], err [{err}]"
            )
            if "no such manifest" in err:
                ret_code = Docker.build(
                    docker, log_file=log_file, digests=docker_digests, add_latest=False
                )
                if ret_code == 0:
                    status = Result.Status.SUCCESS
                else:
                    status = Result.Status.FAILED
                    job_status = Result.Status.FAILED
                    info += f", failed with exit code: {ret_code}, see log"
                    files.append(log_file)
            else:
                print(
                    f"Docker image [{docker.name}:{docker_digests[docker.name]} exists - skip build"
                )
                status = Result.Status.SKIPPED
            ready.append(docker.name)
            results.append(
                Result(
                    name=docker.name,
                    status=status,
                    info=info,
                    duration=stopwatch.duration,
                    start_time=stopwatch.start_time,
                    files=files,
                )
            )
    Result.from_fs(job_name).set_status(job_status).set_results(results).set_info(
        job_info
    )
    if job_status != Result.Status.SUCCESS:
        sys.exit(1)
 def _config_workflow(workflow: Workflow.Config, job_name):
    def _check_yaml_up_to_date():
        print("Check workflows are up to date")
        stop_watch = Utils.Stopwatch()
        exit_code, output, err = Shell.get_res_stdout_stderr(
            f"git diff-index HEAD -- {Settings.WORKFLOW_PATH_PREFIX}"
        )
        info = ""
        status = Result.Status.SUCCESS
        if exit_code != 0:
            info = f"workspace has uncommitted files unexpectedly [{output}]"
            status = Result.Status.ERROR
            print("ERROR: ", info)
        else:
            Shell.check(f"{Settings.PYTHON_INTERPRETER} -m praktika --generate")
            exit_code, output, err = Shell.get_res_stdout_stderr(
                f"git diff-index HEAD -- {Settings.WORKFLOW_PATH_PREFIX}"
            )
            if exit_code != 0:
                info = f"workspace has outdated workflows [{output}] - regenerate with [python -m praktika --generate]"
                status = Result.Status.ERROR
                print("ERROR: ", info)
        return (
            Result(
                name="Check Workflows updated",
                status=status,
                start_time=stop_watch.start_time,
                duration=stop_watch.duration,
                info=info,
            ),
            info,
        )
    def _check_secrets(secrets):
        print("Check Secrets")
        stop_watch = Utils.Stopwatch()
        infos = []
        for secret_config in secrets:
            value = secret_config.get_value()
            if not value:
                info = f"ERROR: Failed to read secret [{secret_config.name}]"
                infos.append(info)
                print(info)
        info = "\n".join(infos)
        return (
            Result(
                name="Check Secrets",
                status=(Result.Status.FAILED if infos else Result.Status.SUCCESS),
                start_time=stop_watch.start_time,
                duration=stop_watch.duration,
                info=info,
            ),
            info,
        )
    def _check_db(workflow):
        stop_watch = Utils.Stopwatch()
        res, info = CIDB(
            workflow.get_secret(Settings.SECRET_CI_DB_URL).get_value(),
            workflow.get_secret(Settings.SECRET_CI_DB_PASSWORD).get_value(),
        ).check()
        return (
            Result(
                name="Check CI DB",
                status=(Result.Status.FAILED if not res else Result.Status.SUCCESS),
                start_time=stop_watch.start_time,
                duration=stop_watch.duration,
                info=info,
            ),
            info,
        )
    print(f"Start [{job_name}], workflow [{workflow.name}]")
    results = []
    files = []
    info_lines = []
    job_status = Result.Status.SUCCESS
    workflow_config = RunConfig(
        name=workflow.name,
        digest_jobs={},
        digest_dockers={},
        sha=_Environment.get().SHA,
        cache_success=[],
        cache_success_base64=[],
        cache_artifacts={},
    ).dump()
    # checks:
    result_, info = _check_yaml_up_to_date()
    if result_.status != Result.Status.SUCCESS:
        print("ERROR: yaml files are outdated - regenerate, commit and push")
        job_status = Result.Status.ERROR
        info_lines.append(job_name + ": " + info)
    results.append(result_)
    if workflow.secrets:
        result_, info = _check_secrets(workflow.secrets)
        if result_.status != Result.Status.SUCCESS:
            print(f"ERROR: Invalid secrets in workflow [{workflow.name}]")
            job_status = Result.Status.ERROR
            info_lines.append(job_name + ": " + info)
        results.append(result_)
    if workflow.enable_cidb:
        result_, info = _check_db(workflow)
        if result_.status != Result.Status.SUCCESS:
            job_status = Result.Status.ERROR
            info_lines.append(job_name + ": " + info)
        results.append(result_)
    # config:
    if workflow.dockers:
        print("Calculate docker's digests")
        dockers = workflow.dockers
        dockers = Docker.sort_in_build_order(dockers)
        for docker in dockers:
            workflow_config.digest_dockers[docker.name] = Digest().calc_docker_digest(
                docker, dockers
            )
        workflow_config.dump()
    if workflow.enable_cache:
        print("Cache Lookup")
        stop_watch = Utils.Stopwatch()
        workflow_config = CacheRunnerHooks.configure(workflow)
        results.append(
            Result(
                name="Cache Lookup",
                status=Result.Status.SUCCESS,
                start_time=stop_watch.start_time,
                duration=stop_watch.duration,
            )
        )
        files.append(RunConfig.file_name_static(workflow.name))
    workflow_config.dump()
    if workflow.enable_report:
        print("Init report")
        stop_watch = Utils.Stopwatch()
        HtmlRunnerHooks.configure(workflow)
        results.append(
            Result(
                name="Init Report",
                status=Result.Status.SUCCESS,
                start_time=stop_watch.start_time,
                duration=stop_watch.duration,
            )
        )
        files.append(Result.file_name_static(workflow.name))
    Result.from_fs(job_name).set_status(job_status).set_results(results).set_files(
        files
    ).set_info("\n".join(info_lines))
    if job_status != Result.Status.SUCCESS:
        sys.exit(1)
 def _finish_workflow(workflow, job_name):
    print(f"Start [{job_name}], workflow [{workflow.name}]")
    env = _Environment.get()
    print("Check Actions statuses")
    print(env.get_needs_statuses())
    print("Check Workflow results")
    S3.copy_result_from_s3(
        Result.file_name_static(workflow.name),
        lock=False,
    )
    workflow_result = Result.from_fs(workflow.name)
    ready_for_merge_status = Result.Status.SUCCESS
    ready_for_merge_description = ""
    failed_results = []
    update_final_report = False
    for result in workflow_result.results:
        if result.name == job_name or result.status in (
            Result.Status.SUCCESS,
            Result.Status.SKIPPED,
        ):
            continue
        if not result.is_completed():
            print(
                f"ERROR: not finished job [{result.name}] in the workflow - set status to error"
            )
            result.status = Result.Status.ERROR
            # dump workflow result after update - to have an updated result in post
            workflow_result.dump()
            # add error into env - should apper in the report
            env.add_info(ResultInfo.NOT_FINALIZED + f" [{result.name}]")
            update_final_report = True
        job = workflow.get_job(result.name)
        if not job or not job.allow_merge_on_failure:
            print(
                f"NOTE: Result for [{result.name}] has not ok status [{result.status}]"
            )
            ready_for_merge_status = Result.Status.FAILED
            failed_results.append(result.name.split("(", maxsplit=1)[0])  # cut name
    if failed_results:
        ready_for_merge_description = f"failed: {', '.join(failed_results)}"
    if not GH.post_commit_status(
        name=Settings.READY_FOR_MERGE_STATUS_NAME + f" [{workflow.name}]",
        status=ready_for_merge_status,
        description=ready_for_merge_description,
        url="",
    ):
        print(f"ERROR: failed to set status [{Settings.READY_FOR_MERGE_STATUS_NAME}]")
        env.add_info(ResultInfo.GH_STATUS_ERROR)
    if update_final_report:
        S3.copy_result_to_s3(
            workflow_result,
            unlock=False,
        )  # no lock - no unlock
    Result.from_fs(job_name).set_status(Result.Status.SUCCESS).set_info(
        ready_for_merge_description
    )
 if __name__ == "__main__":
    job_name = sys.argv[1]
    assert job_name, "Job name must be provided as input argument"
    workflow = _get_workflows(name=_Environment.get().WORKFLOW_NAME)[0]
    if job_name == Settings.DOCKER_BUILD_JOB_NAME:
        _build_dockers(workflow, job_name)
    elif job_name == Settings.CI_CONFIG_JOB_NAME:
        _config_workflow(workflow, job_name)
    elif job_name == Settings.FINISH_WORKFLOW_JOB_NAME:
        _finish_workflow(workflow, job_name)
    else:
        assert False, f"BUG, job name [{job_name}]"
--- a/ci/praktika/parser.py
+++ b/ci/praktika/parser.py
@ -0,0 +1,258 @@
 import dataclasses
 from typing import Any, Dict, List
 from praktika import Artifact, Workflow
 from praktika.mangle import _get_workflows
 class AddonType:
    PY = "py"
@dataclasses.dataclass
 class WorkflowYaml:
    @dataclasses.dataclass
    class JobYaml:
        name: str
        needs: List[str]
        runs_on: List[str]
        artifacts_gh_requires: List["WorkflowYaml.ArtifactYaml"]
        artifacts_gh_provides: List["WorkflowYaml.ArtifactYaml"]
        addons: List["WorkflowYaml.JobAddonYaml"]
        gh_app_auth: bool
        run_unless_cancelled: bool
        parameter: Any
        def __repr__(self):
            return self.name
    @dataclasses.dataclass
    class ArtifactYaml:
        name: str
        provided_by: str
        required_by: List[str]
        path: str
        type: str
        def __repr__(self):
            return self.name
    @dataclasses.dataclass
    class JobAddonYaml:
        install_python: bool
        requirements_txt_path: str
    name: str
    event: str
    branches: List[str]
    jobs: List[JobYaml]
    job_to_config: Dict[str, JobYaml]
    artifact_to_config: Dict[str, ArtifactYaml]
    secret_names_gh: List[str]
    enable_cache: bool
 class WorkflowConfigParser:
    def __init__(self, config: Workflow.Config):
        self.workflow_name = config.name
        self.config = config
        self.requires_all = []  # type: List[str]
        self.provides_all = []  # type: List[str]
        self.job_names_all = []  # type: List[str]
        self.artifact_to_providing_job_map = {}  # type: Dict[str, List[str]]
        self.artifact_to_job_requires_map = {}  # type: Dict[str, List[str]]
        self.artifact_map = {}  # type: Dict[str, List[Artifact.Config]]
        self.job_to_provides_artifacts = {}  # type: Dict[str, List[Artifact.Config]]
        self.job_to_requires_artifacts = {}  # type: Dict[str, List[Artifact.Config]]
        self.workflow_yaml_config = WorkflowYaml(
            name=self.workflow_name,
            event=config.event,
            branches=[],
            jobs=[],
            secret_names_gh=[],
            job_to_config={},
            artifact_to_config={},
            enable_cache=False,
        )
    def parse(self):
        self.workflow_yaml_config.enable_cache = self.config.enable_cache
        # populate WorkflowYaml.branches
        if self.config.event in (Workflow.Event.PUSH,):
            assert (
                self.config.branches
            ), f'Workflow.Config.branches (e.g. ["main"]) must be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]'
            assert (
                not self.config.base_branches
            ), f'Workflow.Config.base_branches (e.g. ["main"]) must not be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]'
            assert isinstance(
                self.config.branches, list
            ), f'Workflow.Config.branches must be of type list (e.g. ["main"]), workflow [{self.workflow_name}]'
            self.workflow_yaml_config.branches = self.config.branches
        elif self.config.event in (Workflow.Event.PULL_REQUEST,):
            assert (
                self.config.base_branches
            ), f'Workflow.Config.base_branches (e.g. ["main"]) must be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]'
            assert (
                not self.config.branches
            ), f'Workflow.Config.branches (e.g. ["main"]) must not be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]'
            assert isinstance(
                self.config.base_branches, list
            ), f'Workflow.Config.base_branches must be of type list (e.g. ["main"]), workflow [{self.workflow_name}]'
            self.workflow_yaml_config.branches = self.config.base_branches
        # populate WorkflowYaml.artifact_to_config with phony artifacts
        for job in self.config.jobs:
            assert (
                job.name not in self.workflow_yaml_config.artifact_to_config
            ), f"Not uniq Job name [{job.name}], workflow [{self.workflow_name}]"
            self.workflow_yaml_config.artifact_to_config[job.name] = (
                WorkflowYaml.ArtifactYaml(
                    name=job.name,
                    provided_by=job.name,
                    required_by=[],
                    path="",
                    type=Artifact.Type.PHONY,
                )
            )
        # populate jobs
        for job in self.config.jobs:
            job_yaml_config = WorkflowYaml.JobYaml(
                name=job.name,
                addons=[],
                artifacts_gh_requires=[],
                artifacts_gh_provides=[],
                needs=[],
                runs_on=[],
                gh_app_auth=False,
                run_unless_cancelled=job.run_unless_cancelled,
                parameter=None,
            )
            self.workflow_yaml_config.jobs.append(job_yaml_config)
            assert (
                job.name not in self.workflow_yaml_config.job_to_config
            ), f"Job name [{job.name}] is not uniq, workflow [{self.workflow_name}]"
            self.workflow_yaml_config.job_to_config[job.name] = job_yaml_config
        # populate WorkflowYaml.artifact_to_config
        if self.config.artifacts:
            for artifact in self.config.artifacts:
                assert (
                    artifact.name not in self.workflow_yaml_config.artifact_to_config
                ), f"Artifact name [{artifact.name}] is not uniq, workflow [{self.workflow_name}]"
                artifact_yaml_config = WorkflowYaml.ArtifactYaml(
                    name=artifact.name,
                    provided_by="",
                    required_by=[],
                    path=artifact.path,
                    type=artifact.type,
                )
                self.workflow_yaml_config.artifact_to_config[artifact.name] = (
                    artifact_yaml_config
                )
        # populate ArtifactYaml.provided_by
        for job in self.config.jobs:
            if job.provides:
                for artifact_name in job.provides:
                    assert (
                        artifact_name in self.workflow_yaml_config.artifact_to_config
                    ), f"Artifact [{artifact_name}] has no config, job [{job.name}], workflow [{self.workflow_name}]"
                    assert not self.workflow_yaml_config.artifact_to_config[
                        artifact_name
                    ].provided_by, f"Artifact [{artifact_name}] provided by multiple jobs [{self.workflow_yaml_config.artifact_to_config[artifact_name].provided_by}] and [{job.name}]"
                    self.workflow_yaml_config.artifact_to_config[
                        artifact_name
                    ].provided_by = job.name
        # populate ArtifactYaml.required_by
        for job in self.config.jobs:
            if job.requires:
                for artifact_name in job.requires:
                    assert (
                        artifact_name in self.workflow_yaml_config.artifact_to_config
                    ), f"Artifact [{artifact_name}] has no config, job [{job.name}], workflow [{self.workflow_name}]"
                    assert self.workflow_yaml_config.artifact_to_config[
                        artifact_name
                    ].provided_by, f"Artifact [{artifact_name}] has no job providing it, required by job [{job.name}], workflow [{self.workflow_name}]"
                    self.workflow_yaml_config.artifact_to_config[
                        artifact_name
                    ].required_by.append(job.name)
        # populate JobYaml.addons
        for job in self.config.jobs:
            if job.job_requirements:
                addon_yaml = WorkflowYaml.JobAddonYaml(
                    requirements_txt_path=job.job_requirements.python_requirements_txt,
                    install_python=job.job_requirements.python,
                )
                self.workflow_yaml_config.job_to_config[job.name].addons.append(
                    addon_yaml
                )
        if self.config.enable_report:
            for job in self.config.jobs:
                # auth required for every job with enabled HTML, so that workflow summary status can be updated
                self.workflow_yaml_config.job_to_config[job.name].gh_app_auth = True
        # populate JobYaml.runs_on
        for job in self.config.jobs:
            self.workflow_yaml_config.job_to_config[job.name].runs_on = job.runs_on
        # populate JobYaml.artifacts_gh_requires, JobYaml.artifacts_gh_provides and JobYaml.needs
        for (
            artifact_name,
            artifact,
        ) in self.workflow_yaml_config.artifact_to_config.items():
            # assert (
            #     artifact.provided_by
            #     and artifact.provided_by in self.workflow_yaml_config.job_to_config
            # ), f"Artifact [{artifact_name}] has no valid job providing it [{artifact.provided_by}]"
            for job_name in artifact.required_by:
                if (
                    artifact.provided_by
                    not in self.workflow_yaml_config.job_to_config[job_name].needs
                ):
                    self.workflow_yaml_config.job_to_config[job_name].needs.append(
                        artifact.provided_by
                    )
                if artifact.type in (Artifact.Type.GH,):
                    self.workflow_yaml_config.job_to_config[
                        job_name
                    ].artifacts_gh_requires.append(artifact)
                elif artifact.type in (Artifact.Type.PHONY, Artifact.Type.S3):
                    pass
                else:
                    assert (
                        False
                    ), f"Artifact [{artifact_name}] has unsupported type [{artifact.type}]"
            if not artifact.required_by and artifact.type != Artifact.Type.PHONY:
                print(
                    f"WARNING: Artifact [{artifact_name}] provided by job [{artifact.provided_by}] not required by any job in workflow [{self.workflow_name}]"
                )
            if artifact.type == Artifact.Type.GH:
                self.workflow_yaml_config.job_to_config[
                    artifact.provided_by
                ].artifacts_gh_provides.append(artifact)
        # populate JobYaml.parametrize
        for job in self.config.jobs:
            self.workflow_yaml_config.job_to_config[job.name].parameter = job.parameter
        # populate secrets
        for secret_config in self.config.secrets:
            if secret_config.is_gh():
                self.workflow_yaml_config.secret_names_gh.append(secret_config.name)
        return self
 if __name__ == "__main__":
    # test
    workflows = _get_workflows()
    for workflow in workflows:
        WorkflowConfigParser(workflow).parse()
--- a/ci/praktika/result.py
+++ b/ci/praktika/result.py
@ -0,0 +1,354 @@
 import dataclasses
 import datetime
 import sys
 from collections.abc import Container
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 from praktika._environment import _Environment
 from praktika._settings import _Settings
 from praktika.utils import ContextManager, MetaClasses, Shell, Utils
@dataclasses.dataclass
 class Result(MetaClasses.Serializable):
    """
    Represents the outcome of a workflow/job/task or any operation, along with associated metadata.
    This class supports nesting of results to represent tasks with sub-tasks, and includes
    various attributes to track status, timing, files, and links.
    Attributes:
        name (str): The name of the task.
        status (str): The current status of the task. Should be one of the values defined in the Status class.
        start_time (Optional[float]): The start time of the task in Unix timestamp format. None if not started.
        duration (Optional[float]): The duration of the task in seconds. None if not completed.
        results (List[Result]): A list of sub-results representing nested tasks.
        files (List[str]): A list of file paths or names related to the result.
        links (List[str]): A list of URLs related to the result (e.g., links to reports or resources).
        info (str): Additional information about the result. Free-form text.
        # TODO: rename
        aux_links (List[str]): A list of auxiliary links that provide additional context for the result.
        # TODO: remove
        html_link (str): A direct link to an HTML representation of the result (e.g., a detailed report page).
    Inner Class:
        Status: Defines possible statuses for the task, such as "success", "failure", etc.
    """
    class Status:
        SKIPPED = "skipped"
        SUCCESS = "success"
        FAILED = "failure"
        PENDING = "pending"
        RUNNING = "running"
        ERROR = "error"
    name: str
    status: str
    start_time: Optional[float] = None
    duration: Optional[float] = None
    results: List["Result"] = dataclasses.field(default_factory=list)
    files: List[str] = dataclasses.field(default_factory=list)
    links: List[str] = dataclasses.field(default_factory=list)
    info: str = ""
    aux_links: List[str] = dataclasses.field(default_factory=list)
    html_link: str = ""
    @staticmethod
    def create_from(
        name="",
        results: List["Result"] = None,
        stopwatch: Utils.Stopwatch = None,
        status="",
        files=None,
        info="",
        with_info_from_results=True,
    ):
        if isinstance(status, bool):
            status = Result.Status.SUCCESS if status else Result.Status.FAILED
        if not results and not status:
            print("ERROR: Either .results or .status must be provided")
            raise
        if not name:
            name = _Environment.get().JOB_NAME
            if not name:
                print("ERROR: Failed to guess the .name")
                raise
        result_status = status or Result.Status.SUCCESS
        infos = []
        if info:
            if isinstance(info, Container):
                infos += info
            else:
                infos.append(info)
        if results and not status:
            for result in results:
                if result.status not in (Result.Status.SUCCESS, Result.Status.FAILED):
                    Utils.raise_with_error(
                        f"Unexpected result status [{result.status}] for Result.create_from call"
                    )
                if result.status != Result.Status.SUCCESS:
                    result_status = Result.Status.FAILED
        if results:
            for result in results:
                if result.info and with_info_from_results:
                    infos.append(f"{result.name}: {result.info}")
        return Result(
            name=name,
            status=result_status,
            start_time=stopwatch.start_time if stopwatch else None,
            duration=stopwatch.duration if stopwatch else None,
            info="\n".join(infos) if infos else "",
            results=results or [],
            files=files or [],
        )
    @staticmethod
    def get():
        return Result.from_fs(_Environment.get().JOB_NAME)
    def is_completed(self):
        return self.status not in (Result.Status.PENDING, Result.Status.RUNNING)
    def is_running(self):
        return self.status not in (Result.Status.RUNNING,)
    def is_ok(self):
        return self.status in (Result.Status.SKIPPED, Result.Status.SUCCESS)
    def set_status(self, status) -> "Result":
        self.status = status
        self.dump()
        return self
    def set_success(self) -> "Result":
        return self.set_status(Result.Status.SUCCESS)
    def set_results(self, results: List["Result"]) -> "Result":
        self.results = results
        self.dump()
        return self
    def set_files(self, files) -> "Result":
        for file in files:
            assert Path(
                file
            ).is_file(), f"Not valid file [{file}] from file list [{files}]"
        if not self.files:
            self.files = []
        self.files += files
        self.dump()
        return self
    def set_info(self, info: str) -> "Result":
        if self.info:
            self.info += "\n"
        self.info += info
        self.dump()
        return self
    def set_link(self, link) -> "Result":
        self.links.append(link)
        self.dump()
        return self
    @classmethod
    def file_name_static(cls, name):
        return f"{_Settings.TEMP_DIR}/result_{Utils.normalize_string(name)}.json"
    @classmethod
    def from_dict(cls, obj: Dict[str, Any]) -> "Result":
        sub_results = []
        for result_dict in obj["results"] or []:
            sub_res = cls.from_dict(result_dict)
            sub_results.append(sub_res)
        obj["results"] = sub_results
        return Result(**obj)
    def update_duration(self):
        if not self.duration and self.start_time:
            self.duration = datetime.datetime.utcnow().timestamp() - self.start_time
        else:
            if not self.duration:
                print(
                    f"NOTE: duration is set for job [{self.name}] Result - do not update by CI"
                )
            else:
                print(
                    f"NOTE: start_time is not set for job [{self.name}] Result - do not update duration"
                )
        return self
    def update_sub_result(self, result: "Result"):
        assert self.results, "BUG?"
        for i, result_ in enumerate(self.results):
            if result_.name == result.name:
                self.results[i] = result
        self._update_status()
        return self
    def _update_status(self):
        was_pending = False
        was_running = False
        if self.status == self.Status.PENDING:
            was_pending = True
        if self.status == self.Status.RUNNING:
            was_running = True
        has_pending, has_running, has_failed = False, False, False
        for result_ in self.results:
            if result_.status in (self.Status.RUNNING,):
                has_running = True
            if result_.status in (self.Status.PENDING,):
                has_pending = True
            if result_.status in (self.Status.ERROR, self.Status.FAILED):
                has_failed = True
        if has_running:
            self.status = self.Status.RUNNING
        elif has_pending:
            self.status = self.Status.PENDING
        elif has_failed:
            self.status = self.Status.FAILED
        else:
            self.status = self.Status.SUCCESS
        if (was_pending or was_running) and self.status not in (
            self.Status.PENDING,
            self.Status.RUNNING,
        ):
            print("Pipeline finished")
            self.update_duration()
    @classmethod
    def generate_pending(cls, name, results=None):
        return Result(
            name=name,
            status=Result.Status.PENDING,
            start_time=None,
            duration=None,
            results=results or [],
            files=[],
            links=[],
            info="",
        )
    @classmethod
    def generate_skipped(cls, name, results=None):
        return Result(
            name=name,
            status=Result.Status.SKIPPED,
            start_time=None,
            duration=None,
            results=results or [],
            files=[],
            links=[],
            info="from cache",
        )
    @classmethod
    def create_from_command_execution(
        cls,
        name,
        command,
        with_log=False,
        fail_fast=True,
        workdir=None,
        command_args=None,
        command_kwargs=None,
    ):
        """
        Executes shell commands or Python callables, optionally logging output, and handles errors.
        :param name: Check name
        :param command: Shell command (str) or Python callable, or list of them.
        :param workdir: Optional working directory.
        :param with_log: Boolean flag to log output to a file.
        :param fail_fast: Boolean flag to stop execution if one command fails.
        :param command_args: Positional arguments for the callable command.
        :param command_kwargs: Keyword arguments for the callable command.
        :return: Result object with status and optional log file.
        """
        # Stopwatch to track execution time
        stop_watch_ = Utils.Stopwatch()
        command_args = command_args or []
        command_kwargs = command_kwargs or {}
        # Set log file path if logging is enabled
        log_file = (
            f"{_Settings.TEMP_DIR}/{Utils.normalize_string(name)}.log"
            if with_log
            else None
        )
        # Ensure the command is a list for consistent iteration
        if not isinstance(command, list):
            fail_fast = False
            command = [command]
        print(f"> Starting execution for [{name}]")
        res = True  # Track success/failure status
        error_infos = []
        for command_ in command:
            if callable(command_):
                # If command is a Python function, call it with provided arguments
                result = command_(*command_args, **command_kwargs)
                if isinstance(result, bool):
                    res = result
                elif result:
                    error_infos.append(str(result))
                    res = False
            else:
                # Run shell command in a specified directory with logging and verbosity
                with ContextManager.cd(workdir):
                    exit_code = Shell.run(command_, verbose=True, log_file=log_file)
                    res = exit_code == 0
            # If fail_fast is enabled, stop on first failure
            if not res and fail_fast:
                print(f"Execution stopped due to failure in [{command_}]")
                break
        # Create and return the result object with status and log file (if any)
        return Result.create_from(
            name=name,
            status=res,
            stopwatch=stop_watch_,
            info=error_infos,
            files=[log_file] if log_file else None,
        )
    def finish_job_accordingly(self):
        self.dump()
        if not self.is_ok():
            print("ERROR: Job Failed")
            for result in self.results:
                if not result.is_ok():
                    print("Failed checks:")
                    print("  |  ", result)
            sys.exit(1)
        else:
            print("ok")
 class ResultInfo:
    SETUP_ENV_JOB_FAILED = (
        "Failed to set up job env, it's praktika bug or misconfiguration"
    )
    PRE_JOB_FAILED = (
        "Failed to do a job pre-run step, it's praktika bug or misconfiguration"
    )
    KILLED = "Job killed or terminated, no Result provided"
    NOT_FOUND_IMPOSSIBLE = (
        "No Result file (bug, or job misbehaviour, must not ever happen)"
    )
    SKIPPED_DUE_TO_PREVIOUS_FAILURE = "Skipped due to previous failure"
    TIMEOUT = "Timeout"
    GH_STATUS_ERROR = "Failed to set GH commit status"
    NOT_FINALIZED = (
        "Job did not not provide Result: job script bug, died CI runner or praktika bug"
    )
    S3_ERROR = "S3 call failure"
--- a/ci/praktika/runner.py
+++ b/ci/praktika/runner.py
@ -0,0 +1,348 @@
 import os
 import re
 import sys
 import traceback
 from pathlib import Path
 from praktika._environment import _Environment
 from praktika.artifact import Artifact
 from praktika.cidb import CIDB
 from praktika.digest import Digest
 from praktika.hook_cache import CacheRunnerHooks
 from praktika.hook_html import HtmlRunnerHooks
 from praktika.result import Result, ResultInfo
 from praktika.runtime import RunConfig
 from praktika.s3 import S3
 from praktika.settings import Settings
 from praktika.utils import Shell, TeePopen, Utils
 class Runner:
    @staticmethod
    def generate_dummy_environment(workflow, job):
        print("WARNING: Generate dummy env for local test")
        Shell.check(
            f"mkdir -p {Settings.TEMP_DIR} {Settings.INPUT_DIR} {Settings.OUTPUT_DIR}"
        )
        _Environment(
            WORKFLOW_NAME=workflow.name,
            JOB_NAME=job.name,
            REPOSITORY="",
            BRANCH="",
            SHA="",
            PR_NUMBER=-1,
            EVENT_TYPE="",
            JOB_OUTPUT_STREAM="",
            EVENT_FILE_PATH="",
            CHANGE_URL="",
            COMMIT_URL="",
            BASE_BRANCH="",
            RUN_URL="",
            RUN_ID="",
            INSTANCE_ID="",
            INSTANCE_TYPE="",
            INSTANCE_LIFE_CYCLE="",
            LOCAL_RUN=True,
        ).dump()
        workflow_config = RunConfig(
            name=workflow.name,
            digest_jobs={},
            digest_dockers={},
            sha="",
            cache_success=[],
            cache_success_base64=[],
            cache_artifacts={},
        )
        for docker in workflow.dockers:
            workflow_config.digest_dockers[docker.name] = Digest().calc_docker_digest(
                docker, workflow.dockers
            )
        workflow_config.dump()
        Result.generate_pending(job.name).dump()
    def _setup_env(self, _workflow, job):
        # source env file to write data into fs (workflow config json, workflow status json)
        Shell.check(f". {Settings.ENV_SETUP_SCRIPT}", verbose=True, strict=True)
        # parse the same env script and apply envs from python so that this process sees them
        with open(Settings.ENV_SETUP_SCRIPT, "r") as f:
            content = f.read()
        export_pattern = re.compile(
            r"export (\w+)=\$\(cat<<\'EOF\'\n(.*?)EOF\n\)", re.DOTALL
        )
        matches = export_pattern.findall(content)
        for key, value in matches:
            value = value.strip()
            os.environ[key] = value
            print(f"Set environment variable {key}.")
        print("Read GH Environment")
        env = _Environment.from_env()
        env.JOB_NAME = job.name
        env.PARAMETER = job.parameter
        env.dump()
        print(env)
        return 0
    def _pre_run(self, workflow, job):
        env = _Environment.get()
        result = Result(
            name=job.name,
            status=Result.Status.RUNNING,
            start_time=Utils.timestamp(),
        )
        result.dump()
        if workflow.enable_report and job.name != Settings.CI_CONFIG_JOB_NAME:
            print("Update Job and Workflow Report")
            HtmlRunnerHooks.pre_run(workflow, job)
        print("Download required artifacts")
        required_artifacts = []
        if job.requires and workflow.artifacts:
            for requires_artifact_name in job.requires:
                for artifact in workflow.artifacts:
                    if (
                        artifact.name == requires_artifact_name
                        and artifact.type == Artifact.Type.S3
                    ):
                        required_artifacts.append(artifact)
        print(f"--- Job requires s3 artifacts [{required_artifacts}]")
        if workflow.enable_cache:
            prefixes = CacheRunnerHooks.pre_run(
                _job=job, _workflow=workflow, _required_artifacts=required_artifacts
            )
        else:
            prefixes = [env.get_s3_prefix()] * len(required_artifacts)
        for artifact, prefix in zip(required_artifacts, prefixes):
            s3_path = f"{Settings.S3_ARTIFACT_PATH}/{prefix}/{Utils.normalize_string(artifact._provided_by)}/{Path(artifact.path).name}"
            assert S3.copy_file_from_s3(s3_path=s3_path, local_path=Settings.INPUT_DIR)
        return 0
    def _run(self, workflow, job, docker="", no_docker=False, param=None):
        if param:
            if not isinstance(param, str):
                Utils.raise_with_error(
                    f"Custom param for local tests must be of type str, got [{type(param)}]"
                )
            env = _Environment.get()
            env.dump()
        if job.run_in_docker and not no_docker:
            # TODO: add support for any image, including not from ci config (e.g. ubuntu:latest)
            docker_tag = RunConfig.from_fs(workflow.name).digest_dockers[
                job.run_in_docker
            ]
            docker = docker or f"{job.run_in_docker}:{docker_tag}"
            cmd = f"docker run --rm --user \"$(id -u):$(id -g)\" -e PYTHONPATH='{Settings.DOCKER_WD}:{Settings.DOCKER_WD}/ci' --volume ./:{Settings.DOCKER_WD} --volume {Settings.TEMP_DIR}:{Settings.TEMP_DIR} --workdir={Settings.DOCKER_WD} {docker} {job.command}"
        else:
            cmd = job.command
        if param:
            print(f"Custom --param [{param}] will be passed to job's script")
            cmd += f" --param {param}"
        print(f"--- Run command [{cmd}]")
        with TeePopen(cmd, timeout=job.timeout) as process:
            exit_code = process.wait()
            result = Result.from_fs(job.name)
            if exit_code != 0:
                if not result.is_completed():
                    if process.timeout_exceeded:
                        print(
                            f"WARNING: Job timed out: [{job.name}], timeout [{job.timeout}], exit code [{exit_code}]"
                        )
                        result.set_status(Result.Status.ERROR).set_info(
                            ResultInfo.TIMEOUT
                        )
                    elif result.is_running():
                        info = f"ERROR: Job terminated with an error, exit code [{exit_code}]  - set status to [{Result.Status.ERROR}]"
                        print(info)
                        result.set_status(Result.Status.ERROR).set_info(info)
                    else:
                        info = f"ERROR: Invalid status [{result.status}] for exit code [{exit_code}]  - switch to [{Result.Status.ERROR}]"
                        print(info)
                        result.set_status(Result.Status.ERROR).set_info(info)
            result.dump()
        return exit_code
    def _post_run(
        self, workflow, job, setup_env_exit_code, prerun_exit_code, run_exit_code
    ):
        info_errors = []
        env = _Environment.get()
        result_exist = Result.exist(job.name)
        if setup_env_exit_code != 0:
            info = f"ERROR: {ResultInfo.SETUP_ENV_JOB_FAILED}"
            print(info)
            # set Result with error and logs
            Result(
                name=job.name,
                status=Result.Status.ERROR,
                start_time=Utils.timestamp(),
                duration=0.0,
                info=info,
            ).dump()
        elif prerun_exit_code != 0:
            info = f"ERROR: {ResultInfo.PRE_JOB_FAILED}"
            print(info)
            # set Result with error and logs
            Result(
                name=job.name,
                status=Result.Status.ERROR,
                start_time=Utils.timestamp(),
                duration=0.0,
                info=info,
            ).dump()
        elif not result_exist:
            info = f"ERROR: {ResultInfo.NOT_FOUND_IMPOSSIBLE}"
            print(info)
            Result(
                name=job.name,
                start_time=Utils.timestamp(),
                duration=None,
                status=Result.Status.ERROR,
                info=ResultInfo.NOT_FOUND_IMPOSSIBLE,
            ).dump()
        result = Result.from_fs(job.name)
        if not result.is_completed():
            info = f"ERROR: {ResultInfo.KILLED}"
            print(info)
            result.set_info(info).set_status(Result.Status.ERROR).dump()
        result.set_files(files=[Settings.RUN_LOG])
        result.update_duration().dump()
        if result.info and result.status != Result.Status.SUCCESS:
            # provide job info to workflow level
            info_errors.append(result.info)
        if run_exit_code == 0:
            providing_artifacts = []
            if job.provides and workflow.artifacts:
                for provides_artifact_name in job.provides:
                    for artifact in workflow.artifacts:
                        if (
                            artifact.name == provides_artifact_name
                            and artifact.type == Artifact.Type.S3
                        ):
                            providing_artifacts.append(artifact)
            if providing_artifacts:
                print(f"Job provides s3 artifacts [{providing_artifacts}]")
                for artifact in providing_artifacts:
                    try:
                        assert Shell.check(
                            f"ls -l {artifact.path}", verbose=True
                        ), f"Artifact {artifact.path} not found"
                        s3_path = f"{Settings.S3_ARTIFACT_PATH}/{env.get_s3_prefix()}/{Utils.normalize_string(env.JOB_NAME)}"
                        link = S3.copy_file_to_s3(
                            s3_path=s3_path, local_path=artifact.path
                        )
                        result.set_link(link)
                    except Exception as e:
                        error = (
                            f"ERROR: Failed to upload artifact [{artifact}], ex [{e}]"
                        )
                        print(error)
                        info_errors.append(error)
                        result.set_status(Result.Status.ERROR)
        if workflow.enable_cidb:
            print("Insert results to CIDB")
            try:
                CIDB(
                    url=workflow.get_secret(Settings.SECRET_CI_DB_URL).get_value(),
                    passwd=workflow.get_secret(
                        Settings.SECRET_CI_DB_PASSWORD
                    ).get_value(),
                ).insert(result)
            except Exception as ex:
                error = f"ERROR: Failed to insert data into CI DB, exception [{ex}]"
                print(error)
                info_errors.append(error)
        result.dump()
        # always in the end
        if workflow.enable_cache:
            print(f"Run CI cache hook")
            if result.is_ok():
                CacheRunnerHooks.post_run(workflow, job)
        if workflow.enable_report:
            print(f"Run html report hook")
            HtmlRunnerHooks.post_run(workflow, job, info_errors)
        return True
    def run(
        self, workflow, job, docker="", dummy_env=False, no_docker=False, param=None
    ):
        res = True
        setup_env_code = -10
        prerun_code = -10
        run_code = -10
        if res and not dummy_env:
            print(
                f"\n\n=== Setup env script [{job.name}], workflow [{workflow.name}] ==="
            )
            try:
                setup_env_code = self._setup_env(workflow, job)
                # Source the bash script and capture the environment variables
                res = setup_env_code == 0
                if not res:
                    print(
                        f"ERROR: Setup env script failed with exit code [{setup_env_code}]"
                    )
            except Exception as e:
                print(f"ERROR: Setup env script failed with exception [{e}]")
                traceback.print_exc()
            print(f"=== Setup env finished ===\n\n")
        else:
            self.generate_dummy_environment(workflow, job)
        if res and not dummy_env:
            res = False
            print(f"=== Pre run script [{job.name}], workflow [{workflow.name}] ===")
            try:
                prerun_code = self._pre_run(workflow, job)
                res = prerun_code == 0
                if not res:
                    print(f"ERROR: Pre-run failed with exit code [{prerun_code}]")
            except Exception as e:
                print(f"ERROR: Pre-run script failed with exception [{e}]")
                traceback.print_exc()
            print(f"=== Pre run finished ===\n\n")
        if res:
            res = False
            print(f"=== Run script [{job.name}], workflow [{workflow.name}] ===")
            try:
                run_code = self._run(
                    workflow, job, docker=docker, no_docker=no_docker, param=param
                )
                res = run_code == 0
                if not res:
                    print(f"ERROR: Run failed with exit code [{run_code}]")
            except Exception as e:
                print(f"ERROR: Run script failed with exception [{e}]")
                traceback.print_exc()
            print(f"=== Run scrip finished ===\n\n")
        if not dummy_env:
            print(f"=== Post run script [{job.name}], workflow [{workflow.name}] ===")
            self._post_run(workflow, job, setup_env_code, prerun_code, run_code)
            print(f"=== Post run scrip finished ===")
        if not res:
            sys.exit(1)
--- a/ci/praktika/runtime.py
+++ b/ci/praktika/runtime.py
@ -0,0 +1,35 @@
 from dataclasses import dataclass
 from typing import Dict, List
 from praktika.cache import Cache
 from praktika.settings import Settings
 from praktika.utils import MetaClasses, Utils
@dataclass
 class RunConfig(MetaClasses.Serializable):
    name: str
    digest_jobs: Dict[str, str]
    digest_dockers: Dict[str, str]
    cache_success: List[str]
    # there are might be issue with special characters in job names if used directly in yaml syntax - create base64 encoded list to avoid this
    cache_success_base64: List[str]
    cache_artifacts: Dict[str, Cache.CacheRecord]
    sha: str
    @classmethod
    def from_dict(cls, obj):
        cache_artifacts = obj["cache_artifacts"]
        cache_artifacts_deserialized = {}
        for artifact_name, cache_artifact in cache_artifacts.items():
            cache_artifacts_deserialized[artifact_name] = Cache.CacheRecord.from_dict(
                cache_artifact
            )
        obj["cache_artifacts"] = cache_artifacts_deserialized
        return RunConfig(**obj)
    @classmethod
    def file_name_static(cls, name):
        return (
            f"{Settings.TEMP_DIR}/workflow_config_{Utils.normalize_string(name)}.json"
        )
--- a/ci/praktika/s3.py
+++ b/ci/praktika/s3.py
@ -0,0 +1,295 @@
 import dataclasses
 import json
 import time
 from pathlib import Path
 from typing import Dict
 from praktika._environment import _Environment
 from praktika.settings import Settings
 from praktika.utils import Shell, Utils
 class S3:
    @dataclasses.dataclass
    class Object:
        AcceptRanges: str
        Expiration: str
        LastModified: str
        ContentLength: int
        ETag: str
        ContentType: str
        ServerSideEncryption: str
        Metadata: Dict
        def has_tags(self, tags):
            meta = self.Metadata
            for k, v in tags.items():
                if k not in meta or meta[k] != v:
                    print(f"tag [{k}={v}] does not match meta [{meta}]")
                    return False
            return True
    @classmethod
    def clean_s3_directory(cls, s3_path):
        assert len(s3_path.split("/")) > 2, "check to not delete too much"
        cmd = f"aws s3 rm s3://{s3_path} --recursive"
        cls.run_command_with_retries(cmd, retries=1)
        return
    @classmethod
    def copy_file_to_s3(cls, s3_path, local_path, text=False):
        assert Path(local_path).exists(), f"Path [{local_path}] does not exist"
        assert Path(s3_path), f"Invalid S3 Path [{s3_path}]"
        assert Path(
            local_path
        ).is_file(), f"Path [{local_path}] is not file. Only files are supported"
        file_name = Path(local_path).name
        s3_full_path = s3_path
        if not s3_full_path.endswith(file_name):
            s3_full_path = f"{s3_path}/{Path(local_path).name}"
        cmd = f"aws s3 cp {local_path} s3://{s3_full_path}"
        if text:
            cmd += " --content-type text/plain"
        res = cls.run_command_with_retries(cmd)
        if not res:
            raise
        bucket = s3_path.split("/")[0]
        endpoint = Settings.S3_BUCKET_TO_HTTP_ENDPOINT[bucket]
        assert endpoint
        return f"https://{s3_full_path}".replace(bucket, endpoint)
    @classmethod
    def put(cls, s3_path, local_path, text=False, metadata=None):
        assert Path(local_path).exists(), f"Path [{local_path}] does not exist"
        assert Path(s3_path), f"Invalid S3 Path [{s3_path}]"
        assert Path(
            local_path
        ).is_file(), f"Path [{local_path}] is not file. Only files are supported"
        file_name = Path(local_path).name
        s3_full_path = s3_path
        if not s3_full_path.endswith(file_name):
            s3_full_path = f"{s3_path}/{Path(local_path).name}"
        s3_full_path = str(s3_full_path).removeprefix("s3://")
        bucket, key = s3_full_path.split("/", maxsplit=1)
        command = (
            f"aws s3api put-object --bucket {bucket} --key {key} --body {local_path}"
        )
        if metadata:
            for k, v in metadata.items():
                command += f" --metadata {k}={v}"
        cmd = f"aws s3 cp {local_path} s3://{s3_full_path}"
        if text:
            cmd += " --content-type text/plain"
        res = cls.run_command_with_retries(command)
        assert res
    @classmethod
    def run_command_with_retries(cls, command, retries=Settings.MAX_RETRIES_S3):
        i = 0
        res = False
        while not res and i < retries:
            i += 1
            ret_code, stdout, stderr = Shell.get_res_stdout_stderr(
                command, verbose=True
            )
            if "aws sso login" in stderr:
                print("ERROR: aws login expired")
                break
            elif "does not exist" in stderr:
                print("ERROR: requested file does not exist")
                break
            if ret_code != 0:
                print(
                    f"ERROR: aws s3 cp failed, stdout/stderr err: [{stderr}], out [{stdout}]"
                )
            res = ret_code == 0
        return res
    @classmethod
    def get_link(cls, s3_path, local_path):
        s3_full_path = f"{s3_path}/{Path(local_path).name}"
        bucket = s3_path.split("/")[0]
        endpoint = Settings.S3_BUCKET_TO_HTTP_ENDPOINT[bucket]
        return f"https://{s3_full_path}".replace(bucket, endpoint)
    @classmethod
    def copy_file_from_s3(cls, s3_path, local_path):
        assert Path(s3_path), f"Invalid S3 Path [{s3_path}]"
        if Path(local_path).is_dir():
            local_path = Path(local_path) / Path(s3_path).name
        else:
            assert Path(
                local_path
            ).parent.is_dir(), f"Parent path for [{local_path}] does not exist"
        cmd = f"aws s3 cp s3://{s3_path}  {local_path}"
        res = cls.run_command_with_retries(cmd)
        return res
    @classmethod
    def head_object(cls, s3_path):
        s3_path = str(s3_path).removeprefix("s3://")
        bucket, key = s3_path.split("/", maxsplit=1)
        output = Shell.get_output(
            f"aws s3api head-object --bucket {bucket} --key {key}", verbose=True
        )
        if not output:
            return None
        else:
            return cls.Object(**json.loads(output))
    @classmethod
    def delete(cls, s3_path):
        assert Path(s3_path), f"Invalid S3 Path [{s3_path}]"
        return Shell.check(
            f"aws s3 rm s3://{s3_path}",
            verbose=True,
        )
    # TODO: apparently should be placed into separate file to be used only inside praktika
    #   keeping this module clean from importing Settings, Environment and etc, making it easy for use externally
    @classmethod
    def copy_result_to_s3(cls, result, unlock=True):
        result.dump()
        env = _Environment.get()
        s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}"
        s3_path_full = f"{s3_path}/{Path(result.file_name()).name}"
        url = S3.copy_file_to_s3(s3_path=s3_path, local_path=result.file_name())
        if env.PR_NUMBER:
            print("Duplicate Result for latest commit alias in PR")
            s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix(latest=True)}"
            url = S3.copy_file_to_s3(s3_path=s3_path, local_path=result.file_name())
        if unlock:
            if not cls.unlock(s3_path_full):
                print(f"ERROR: File [{s3_path_full}] unlock failure")
                assert False  # TODO: investigate
        return url
    @classmethod
    def copy_result_from_s3(cls, local_path, lock=True):
        env = _Environment.get()
        file_name = Path(local_path).name
        s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}/{file_name}"
        if lock:
            cls.lock(s3_path)
        if not S3.copy_file_from_s3(s3_path=s3_path, local_path=local_path):
            print(f"ERROR: failed to cp file [{s3_path}] from s3")
            raise
    @classmethod
    def lock(cls, s3_path, level=0):
        assert level < 3, "Never"
        env = _Environment.get()
        s3_path_lock = s3_path + f".lock"
        file_path_lock = f"{Settings.TEMP_DIR}/{Path(s3_path_lock).name}"
        assert Shell.check(
            f"echo '''{env.JOB_NAME}''' > {file_path_lock}", verbose=True
        ), "Never"
        i = 20
        meta = S3.head_object(s3_path_lock)
        while meta:
            print(f"WARNING: Failed to acquire lock, meta [{meta}] - wait")
            i -= 5
            if i < 0:
                info = f"ERROR: lock acquire failure - unlock forcefully"
                print(info)
                env.add_info(info)
                break
            time.sleep(5)
        metadata = {"job": Utils.to_base64(env.JOB_NAME)}
        S3.put(
            s3_path=s3_path_lock,
            local_path=file_path_lock,
            metadata=metadata,
        )
        time.sleep(1)
        obj = S3.head_object(s3_path_lock)
        if not obj or not obj.has_tags(tags=metadata):
            print(f"WARNING: locked by another job [{obj}]")
            env.add_info("S3 lock file failure")
            cls.lock(s3_path, level=level + 1)
        print("INFO: lock acquired")
    @classmethod
    def unlock(cls, s3_path):
        s3_path_lock = s3_path + ".lock"
        env = _Environment.get()
        obj = S3.head_object(s3_path_lock)
        if not obj:
            print("ERROR: lock file is removed")
            assert False  # investigate
        elif not obj.has_tags({"job": Utils.to_base64(env.JOB_NAME)}):
            print("ERROR: lock file was acquired by another job")
            assert False  # investigate
        if not S3.delete(s3_path_lock):
            print(f"ERROR: File [{s3_path_lock}] delete failure")
        print("INFO: lock released")
        return True
    @classmethod
    def get_result_link(cls, result):
        env = _Environment.get()
        s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix(latest=True if env.PR_NUMBER else False)}"
        return S3.get_link(s3_path=s3_path, local_path=result.file_name())
    @classmethod
    def clean_latest_result(cls):
        env = _Environment.get()
        env.SHA = "latest"
        assert env.PR_NUMBER
        s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}"
        S3.clean_s3_directory(s3_path=s3_path)
    @classmethod
    def _upload_file_to_s3(
        cls, local_file_path, upload_to_s3: bool, text: bool = False, s3_subprefix=""
    ) -> str:
        if upload_to_s3:
            env = _Environment.get()
            s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}"
            if s3_subprefix:
                s3_subprefix.removeprefix("/").removesuffix("/")
                s3_path += f"/{s3_subprefix}"
            html_link = S3.copy_file_to_s3(
                s3_path=s3_path, local_path=local_file_path, text=text
            )
            return html_link
        return f"file://{Path(local_file_path).absolute()}"
    @classmethod
    def upload_result_files_to_s3(cls, result):
        if result.results:
            for result_ in result.results:
                cls.upload_result_files_to_s3(result_)
        for file in result.files:
            if not Path(file).is_file():
                print(f"ERROR: Invalid file [{file}] in [{result.name}] - skip upload")
                result.info += f"\nWARNING: Result file [{file}] was not found"
                file_link = cls._upload_file_to_s3(file, upload_to_s3=False)
            else:
                is_text = False
                for text_file_suffix in Settings.TEXT_CONTENT_EXTENSIONS:
                    if file.endswith(text_file_suffix):
                        print(
                            f"File [{file}] matches Settings.TEXT_CONTENT_EXTENSIONS [{Settings.TEXT_CONTENT_EXTENSIONS}] - add text attribute for s3 object"
                        )
                        is_text = True
                        break
                file_link = cls._upload_file_to_s3(
                    file,
                    upload_to_s3=True,
                    text=is_text,
                    s3_subprefix=Utils.normalize_string(result.name),
                )
            result.links.append(file_link)
        if result.files:
            print(
                f"Result files [{result.files}] uploaded to s3 [{result.links[-len(result.files):]}] - clean files list"
            )
            result.files = []
        result.dump()
--- a/ci/praktika/secret.py
+++ b/ci/praktika/secret.py
@ -0,0 +1,61 @@
 import dataclasses
 import os
 from praktika.utils import Shell
 class Secret:
    class Type:
        AWS_SSM_VAR = "aws parameter"
        AWS_SSM_SECRET = "aws secret"
        GH_SECRET = "gh secret"
    @dataclasses.dataclass
    class Config:
        name: str
        type: str
        def is_gh(self):
            return self.type == Secret.Type.GH_SECRET
        def get_value(self):
            if self.type == Secret.Type.AWS_SSM_VAR:
                return self.get_aws_ssm_var()
            if self.type == Secret.Type.AWS_SSM_SECRET:
                return self.get_aws_ssm_secret()
            elif self.type == Secret.Type.GH_SECRET:
                return self.get_gh_secret()
            else:
                assert False, f"Not supported secret type, secret [{self}]"
        def get_aws_ssm_var(self):
            res = Shell.get_output(
                f"aws ssm get-parameter --name {self.name} --with-decryption --output text --query Parameter.Value",
            )
            if not res:
                print(f"ERROR: Failed to get secret [{self.name}]")
                raise RuntimeError()
            return res
        def get_aws_ssm_secret(self):
            name, secret_key_name = self.name, ""
            if "." in self.name:
                name, secret_key_name = self.name.split(".")
            cmd = f"aws secretsmanager get-secret-value --secret-id  {name} --query SecretString --output text"
            if secret_key_name:
                cmd += f" | jq -r '.[\"{secret_key_name}\"]'"
            res = Shell.get_output(cmd, verbose=True)
            if not res:
                print(f"ERROR: Failed to get secret [{self.name}]")
                raise RuntimeError()
            return res
        def get_gh_secret(self):
            res = os.getenv(f"{self.name}")
            if not res:
                print(f"ERROR: Failed to get secret [{self.name}]")
                raise RuntimeError()
            return res
        def __repr__(self):
            return self.name
--- a/ci/praktika/settings.py
+++ b/ci/praktika/settings.py
@ -0,0 +1,8 @@
 from praktika._settings import _Settings
 from praktika.mangle import _get_user_settings
 Settings = _Settings()
 user_settings = _get_user_settings()
 for setting, value in user_settings.items():
    Settings.__setattr__(setting, value)
--- a/ci/praktika/utils.py
+++ b/ci/praktika/utils.py
@ -0,0 +1,597 @@
 import base64
 import dataclasses
 import glob
 import json
 import multiprocessing
 import os
 import re
 import signal
 import subprocess
 import sys
 import time
 from abc import ABC, abstractmethod
 from contextlib import contextmanager
 from datetime import datetime
 from pathlib import Path
 from threading import Thread
 from types import SimpleNamespace
 from typing import Any, Dict, Iterator, List, Optional, Type, TypeVar, Union
 from praktika._settings import _Settings
 T = TypeVar("T", bound="Serializable")
 class MetaClasses:
    class WithIter(type):
        def __iter__(cls):
            return (v for k, v in cls.__dict__.items() if not k.startswith("_"))
    @dataclasses.dataclass
    class Serializable(ABC):
        @classmethod
        def to_dict(cls, obj):
            if dataclasses.is_dataclass(obj):
                return {k: cls.to_dict(v) for k, v in dataclasses.asdict(obj).items()}
            elif isinstance(obj, SimpleNamespace):
                return {k: cls.to_dict(v) for k, v in vars(obj).items()}
            elif isinstance(obj, list):
                return [cls.to_dict(i) for i in obj]
            elif isinstance(obj, dict):
                return {k: cls.to_dict(v) for k, v in obj.items()}
            else:
                return obj
        @classmethod
        def from_dict(cls: Type[T], obj: Dict[str, Any]) -> T:
            return cls(**obj)
        @classmethod
        def from_fs(cls: Type[T], name) -> T:
            with open(cls.file_name_static(name), "r", encoding="utf8") as f:
                try:
                    return cls.from_dict(json.load(f))
                except json.decoder.JSONDecodeError as ex:
                    print(f"ERROR: failed to parse json, ex [{ex}]")
                    print(f"JSON content [{cls.file_name_static(name)}]")
                    Shell.check(f"cat {cls.file_name_static(name)}")
                    raise ex
        @classmethod
        @abstractmethod
        def file_name_static(cls, name):
            pass
        def file_name(self):
            return self.file_name_static(self.name)
        def dump(self):
            with open(self.file_name(), "w", encoding="utf8") as f:
                json.dump(self.to_dict(self), f, indent=4)
            return self
        @classmethod
        def exist(cls, name):
            return Path(cls.file_name_static(name)).is_file()
        def to_json(self, pretty=False):
            return json.dumps(dataclasses.asdict(self), indent=4 if pretty else None)
 class ContextManager:
    @staticmethod
    @contextmanager
    def cd(to: Optional[Union[Path, str]] = None) -> Iterator[None]:
        """
        changes current working directory to @path or `git root` if @path is None
        :param to:
        :return:
        """
        if not to:
            try:
                to = Shell.get_output_or_raise("git rev-parse --show-toplevel")
            except:
                pass
            if not to:
                if Path(_Settings.DOCKER_WD).is_dir():
                    to = _Settings.DOCKER_WD
            if not to:
                assert False, "FIX IT"
            assert to
        old_pwd = os.getcwd()
        os.chdir(to)
        try:
            yield
        finally:
            os.chdir(old_pwd)
 class Shell:
    @classmethod
    def get_output_or_raise(cls, command, verbose=False):
        return cls.get_output(command, verbose=verbose, strict=True).strip()
    @classmethod
    def get_output(cls, command, strict=False, verbose=False):
        if verbose:
            print(f"Run command [{command}]")
        res = subprocess.run(
            command,
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
        )
        if res.stderr:
            print(f"WARNING: stderr: {res.stderr.strip()}")
        if strict and res.returncode != 0:
            raise RuntimeError(f"command failed with {res.returncode}")
        return res.stdout.strip()
    @classmethod
    def get_res_stdout_stderr(cls, command, verbose=True):
        if verbose:
            print(f"Run command [{command}]")
        res = subprocess.run(
            command,
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
        )
        return res.returncode, res.stdout.strip(), res.stderr.strip()
    @classmethod
    def check(
        cls,
        command,
        log_file=None,
        strict=False,
        verbose=False,
        dry_run=False,
        stdin_str=None,
        timeout=None,
        retries=0,
        **kwargs,
    ):
        return (
            cls.run(
                command,
                log_file,
                strict,
                verbose,
                dry_run,
                stdin_str,
                retries=retries,
                timeout=timeout,
                **kwargs,
            )
            == 0
        )
    @classmethod
    def run(
        cls,
        command,
        log_file=None,
        strict=False,
        verbose=False,
        dry_run=False,
        stdin_str=None,
        timeout=None,
        retries=0,
        **kwargs,
    ):
        def _check_timeout(timeout, process) -> None:
            if not timeout:
                return
            time.sleep(timeout)
            print(
                f"WARNING: Timeout exceeded [{timeout}], sending SIGTERM to process group [{process.pid}]"
            )
            try:
                os.killpg(process.pid, signal.SIGTERM)
            except ProcessLookupError:
                print("Process already terminated.")
                return
            time_wait = 0
            wait_interval = 5
            # Wait for process to terminate
            while process.poll() is None and time_wait < 100:
                print("Waiting for process to exit...")
                time.sleep(wait_interval)
                time_wait += wait_interval
            # Force kill if still running
            if process.poll() is None:
                print(f"WARNING: Process still running after SIGTERM, sending SIGKILL")
                try:
                    os.killpg(process.pid, signal.SIGKILL)
                except ProcessLookupError:
                    print("Process already terminated.")
        # Dry-run
        if dry_run:
            print(f"Dry-run. Would run command [{command}]")
            return 0  # Return success for dry-run
        if verbose:
            print(f"Run command: [{command}]")
        log_file = log_file or "/dev/null"
        proc = None
        for retry in range(retries + 1):
            try:
                with open(log_file, "w") as log_fp:
                    proc = subprocess.Popen(
                        command,
                        shell=True,
                        stderr=subprocess.STDOUT,
                        stdout=subprocess.PIPE,
                        stdin=subprocess.PIPE if stdin_str else None,
                        universal_newlines=True,
                        start_new_session=True,  # Start a new process group for signal handling
                        bufsize=1,  # Line-buffered
                        errors="backslashreplace",
                        **kwargs,
                    )
                    # Start the timeout thread if specified
                    if timeout:
                        t = Thread(target=_check_timeout, args=(timeout, proc))
                        t.daemon = True
                        t.start()
                    # Write stdin if provided
                    if stdin_str:
                        proc.stdin.write(stdin_str)
                        proc.stdin.close()
                    # Process output in real-time
                    if proc.stdout:
                        for line in proc.stdout:
                            sys.stdout.write(line)
                            log_fp.write(line)
                    proc.wait()  # Wait for the process to finish
                    if proc.returncode == 0:
                        break  # Exit retry loop if success
                    else:
                        if verbose:
                            print(
                                f"ERROR: command [{command}] failed, exit code: {proc.returncode}, retry: {retry}/{retries}"
                            )
            except Exception as e:
                if verbose:
                    print(
                        f"ERROR: command failed, exception: {e}, retry: {retry}/{retries}"
                    )
                if proc:
                    proc.kill()
        # Handle strict mode (ensure process success or fail)
        if strict:
            assert (
                proc and proc.returncode == 0
            ), f"Command failed with return code {proc.returncode}"
        return proc.returncode if proc else 1  # Return 1 if process never started
    @classmethod
    def run_async(
        cls,
        command,
        stdin_str=None,
        verbose=False,
        suppress_output=False,
        **kwargs,
    ):
        if verbose:
            print(f"Run command in background [{command}]")
        proc = subprocess.Popen(
            command,
            shell=True,
            stderr=subprocess.STDOUT if not suppress_output else subprocess.DEVNULL,
            stdout=subprocess.PIPE if not suppress_output else subprocess.DEVNULL,
            stdin=subprocess.PIPE if stdin_str else None,
            universal_newlines=True,
            start_new_session=True,
            bufsize=1,
            errors="backslashreplace",
            **kwargs,
        )
        if proc.stdout:
            for line in proc.stdout:
                print(line, end="")
        return proc
 class Utils:
    @staticmethod
    def terminate_process_group(pid, force=False):
        if not force:
            os.killpg(os.getpgid(pid), signal.SIGTERM)
        else:
            os.killpg(os.getpgid(pid), signal.SIGKILL)
    @staticmethod
    def set_env(key, val):
        os.environ[key] = val
    @staticmethod
    def print_formatted_error(error_message, stdout="", stderr=""):
        stdout_lines = stdout.splitlines() if stdout else []
        stderr_lines = stderr.splitlines() if stderr else []
        print(f"ERROR: {error_message}")
        if stdout_lines:
            print("  Out:")
            for line in stdout_lines:
                print(f"     | {line}")
        if stderr_lines:
            print("  Err:")
            for line in stderr_lines:
                print(f"     | {line}")
    @staticmethod
    def sleep(seconds):
        time.sleep(seconds)
    @staticmethod
    def cwd():
        return Path.cwd()
    @staticmethod
    def cpu_count():
        return multiprocessing.cpu_count()
    @staticmethod
    def raise_with_error(error_message, stdout="", stderr="", ex=None):
        Utils.print_formatted_error(error_message, stdout, stderr)
        raise ex or RuntimeError()
    @staticmethod
    def timestamp():
        return datetime.utcnow().timestamp()
    @staticmethod
    def timestamp_to_str(timestamp):
        return datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S")
    @staticmethod
    def get_failed_tests_number(description: str) -> Optional[int]:
        description = description.lower()
        pattern = r"fail:\s*(\d+)\s*(?=,|$)"
        match = re.search(pattern, description)
        if match:
            return int(match.group(1))
        return None
    @staticmethod
    def is_killed_with_oom():
        if Shell.check(
            "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'"
        ):
            return True
        return False
    @staticmethod
    def clear_dmesg():
        Shell.check("sudo dmesg --clear", verbose=True)
    @staticmethod
    def to_base64(value):
        assert isinstance(value, str), f"TODO: not supported for {type(value)}"
        string_bytes = value.encode("utf-8")
        base64_bytes = base64.b64encode(string_bytes)
        base64_string = base64_bytes.decode("utf-8")
        return base64_string
    @staticmethod
    def is_hex(s):
        try:
            int(s, 16)
            return True
        except ValueError:
            return False
    @staticmethod
    def normalize_string(string: str) -> str:
        res = string.lower()
        for r in (
            (" ", "_"),
            ("(", ""),
            (")", ""),
            ("{", ""),
            ("}", ""),
            ("'", ""),
            ("[", ""),
            ("]", ""),
            (",", ""),
            ("/", "_"),
            ("-", "_"),
            (":", ""),
            ('"', ""),
        ):
            res = res.replace(*r)
        return res
    @staticmethod
    def traverse_path(path, file_suffixes=None, sorted=False, not_exists_ok=False):
        res = []
        def is_valid_file(file):
            if file_suffixes is None:
                return True
            return any(file.endswith(suffix) for suffix in file_suffixes)
        if os.path.isfile(path):
            if is_valid_file(path):
                res.append(path)
        elif os.path.isdir(path):
            for root, dirs, files in os.walk(path):
                for file in files:
                    full_path = os.path.join(root, file)
                    if is_valid_file(full_path):
                        res.append(full_path)
        elif "*" in str(path):
            res.extend(
                [
                    f
                    for f in glob.glob(path, recursive=True)
                    if os.path.isfile(f) and is_valid_file(f)
                ]
            )
        else:
            if not_exists_ok:
                pass
            else:
                assert False, f"File does not exist or not valid [{path}]"
        if sorted:
            res.sort(reverse=True)
        return res
    @classmethod
    def traverse_paths(
        cls,
        include_paths,
        exclude_paths,
        file_suffixes=None,
        sorted=False,
        not_exists_ok=False,
    ) -> List["str"]:
        included_files_ = set()
        for path in include_paths:
            included_files_.update(cls.traverse_path(path, file_suffixes=file_suffixes))
        excluded_files = set()
        for path in exclude_paths:
            res = cls.traverse_path(path, not_exists_ok=not_exists_ok)
            if not res:
                print(
                    f"WARNING: Utils.traverse_paths excluded 0 files by path [{path}] in exclude_paths"
                )
            else:
                excluded_files.update(res)
        res = [f for f in included_files_ if f not in excluded_files]
        if sorted:
            res.sort(reverse=True)
        return res
    @classmethod
    def add_to_PATH(cls, path):
        path_cur = os.getenv("PATH", "")
        if path_cur:
            path += ":" + path_cur
        os.environ["PATH"] = path
    class Stopwatch:
        def __init__(self):
            self.start_time = datetime.utcnow().timestamp()
        @property
        def duration(self) -> float:
            return datetime.utcnow().timestamp() - self.start_time
 class TeePopen:
    def __init__(
        self,
        command: str,
        log_file: Union[str, Path] = "",
        env: Optional[dict] = None,
        timeout: Optional[int] = None,
    ):
        self.command = command
        self.log_file_name = log_file
        self.log_file = None
        self.env = env or os.environ.copy()
        self.process = None  # type: Optional[subprocess.Popen]
        self.timeout = timeout
        self.timeout_exceeded = False
        self.terminated_by_sigterm = False
        self.terminated_by_sigkill = False
    def _check_timeout(self) -> None:
        if self.timeout is None:
            return
        time.sleep(self.timeout)
        print(
            f"WARNING: Timeout exceeded [{self.timeout}], send SIGTERM to [{self.process.pid}] and give a chance for graceful termination"
        )
        self.send_signal(signal.SIGTERM)
        time_wait = 0
        self.terminated_by_sigterm = True
        self.timeout_exceeded = True
        while self.process.poll() is None and time_wait < 100:
            print("wait...")
            wait = 5
            time.sleep(wait)
            time_wait += wait
        while self.process.poll() is None:
            print(f"WARNING: Still running, send SIGKILL to [{self.process.pid}]")
            self.send_signal(signal.SIGKILL)
            self.terminated_by_sigkill = True
            time.sleep(2)
    def __enter__(self) -> "TeePopen":
        if self.log_file_name:
            self.log_file = open(self.log_file_name, "w", encoding="utf-8")
        self.process = subprocess.Popen(
            self.command,
            shell=True,
            universal_newlines=True,
            env=self.env,
            start_new_session=True,  # signall will be sent to all children
            stderr=subprocess.STDOUT,
            stdout=subprocess.PIPE,
            bufsize=1,
            errors="backslashreplace",
        )
        time.sleep(1)
        print(f"Subprocess started, pid [{self.process.pid}]")
        if self.timeout is not None and self.timeout > 0:
            t = Thread(target=self._check_timeout)
            t.daemon = True  # does not block the program from exit
            t.start()
        return self
    def __exit__(self, exc_type, exc_value, traceback):
        self.wait()
        if self.log_file:
            self.log_file.close()
    def wait(self) -> int:
        if self.process.stdout is not None:
            for line in self.process.stdout:
                sys.stdout.write(line)
                if self.log_file:
                    self.log_file.write(line)
        return self.process.wait()
    def poll(self):
        return self.process.poll()
    def send_signal(self, signal_num):
        os.killpg(self.process.pid, signal_num)
 if __name__ == "__main__":
    @dataclasses.dataclass
    class Test(MetaClasses.Serializable):
        name: str
        @staticmethod
        def file_name_static(name):
            return f"/tmp/{Utils.normalize_string(name)}.json"
    Test(name="dsada").dump()
    t = Test.from_fs("dsada")
    print(t)
--- a/ci/praktika/validator.py
+++ b/ci/praktika/validator.py
@ -0,0 +1,208 @@
 import glob
 import sys
 from itertools import chain
 from pathlib import Path
 from praktika import Workflow
 from praktika._settings import GHRunners
 from praktika.mangle import _get_workflows
 from praktika.settings import Settings
 from praktika.utils import ContextManager
 class Validator:
    @classmethod
    def validate(cls):
        print("---Start validating Pipeline and settings---")
        workflows = _get_workflows()
        for workflow in workflows:
            print(f"Validating workflow [{workflow.name}]")
            cls.validate_file_paths_in_run_command(workflow)
            cls.validate_file_paths_in_digest_configs(workflow)
            cls.validate_requirements_txt_files(workflow)
            cls.validate_dockers(workflow)
            if workflow.artifacts:
                for artifact in workflow.artifacts:
                    if artifact.is_s3_artifact():
                        assert (
                            Settings.S3_ARTIFACT_PATH
                        ), "Provide S3_ARTIFACT_PATH setting in any .py file in ./ci/settings/* to be able to use s3 for artifacts"
            for job in workflow.jobs:
                if job.requires and workflow.artifacts:
                    for require in job.requires:
                        if (
                            require in workflow.artifacts
                            and workflow.artifacts[require].is_s3_artifact()
                        ):
                            assert not any(
                                [r in GHRunners for r in job.runs_on]
                            ), f"GH runners [{job.name}:{job.runs_on}] must not be used with S3 as artifact storage"
                if job.allow_merge_on_failure:
                    assert (
                        workflow.enable_merge_ready_status
                    ), f"Job property allow_merge_on_failure must be used only with enabled workflow.enable_merge_ready_status, workflow [{workflow.name}], job [{job.name}]"
            if workflow.enable_cache:
                assert (
                    Settings.CI_CONFIG_RUNS_ON
                ), f"Runner label to run workflow config job must be provided via CACHE_CONFIG_RUNS_ON setting if enable_cache=True, workflow [{workflow.name}]"
                assert (
                    Settings.CACHE_S3_PATH
                ), f"CACHE_S3_PATH Setting must be defined if enable_cache=True, workflow [{workflow.name}]"
            if workflow.dockers:
                cls.evaluate_check(
                    Settings.DOCKER_BUILD_RUNS_ON,
                    f"DOCKER_BUILD_RUNS_ON settings must be defined if workflow has dockers",
                    workflow_name=workflow.name,
                )
            if workflow.enable_report:
                assert (
                    Settings.HTML_S3_PATH
                ), f"HTML_S3_PATH Setting must be defined if enable_html=True, workflow [{workflow.name}]"
                assert (
                    Settings.S3_BUCKET_TO_HTTP_ENDPOINT
                ), f"S3_BUCKET_TO_HTTP_ENDPOINT Setting must be defined if enable_html=True, workflow [{workflow.name}]"
                assert (
                    Settings.HTML_S3_PATH.split("/")[0]
                    in Settings.S3_BUCKET_TO_HTTP_ENDPOINT
                ), f"S3_BUCKET_TO_HTTP_ENDPOINT Setting must include bucket name [{Settings.HTML_S3_PATH}] from HTML_S3_PATH, workflow [{workflow.name}]"
            if workflow.enable_cache:
                for artifact in workflow.artifacts or []:
                    assert (
                        artifact.is_s3_artifact()
                    ), f"All artifacts must be of S3 type if enable_cache|enable_html=True, artifact [{artifact.name}], type [{artifact.type}], workflow [{workflow.name}]"
            if workflow.dockers:
                assert (
                    Settings.DOCKERHUB_USERNAME
                ), f"Settings.DOCKERHUB_USERNAME must be provided if workflow has dockers, workflow [{workflow.name}]"
                assert (
                    Settings.DOCKERHUB_SECRET
                ), f"Settings.DOCKERHUB_SECRET must be provided if workflow has dockers, workflow [{workflow.name}]"
                assert workflow.get_secret(
                    Settings.DOCKERHUB_SECRET
                ), f"Secret [{Settings.DOCKERHUB_SECRET}] must have configuration in workflow.secrets, workflow [{workflow.name}]"
            if (
                workflow.enable_cache
                or workflow.enable_report
                or workflow.enable_merge_ready_status
            ):
                for job in workflow.jobs:
                    assert not any(
                        job in ("ubuntu-latest",) for job in job.runs_on
                    ), f"GitHub Runners must not be used for workflow with enabled: workflow.enable_cache, workflow.enable_html or workflow.enable_merge_ready_status as s3 access is required, workflow [{workflow.name}], job [{job.name}]"
            if workflow.enable_cidb:
                assert (
                    Settings.SECRET_CI_DB_URL
                ), f"Settings.CI_DB_URL_SECRET must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]"
                assert (
                    Settings.SECRET_CI_DB_PASSWORD
                ), f"Settings.CI_DB_PASSWORD_SECRET must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]"
                assert (
                    Settings.CI_DB_DB_NAME
                ), f"Settings.CI_DB_DB_NAME must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]"
                assert (
                    Settings.CI_DB_TABLE_NAME
                ), f"Settings.CI_DB_TABLE_NAME must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]"
    @classmethod
    def validate_file_paths_in_run_command(cls, workflow: Workflow.Config) -> None:
        if not Settings.VALIDATE_FILE_PATHS:
            return
        with ContextManager.cd():
            for job in workflow.jobs:
                run_command = job.command
                command_parts = run_command.split(" ")
                for part in command_parts:
                    if ">" in part:
                        return
                    if "/" in part:
                        assert (
                            Path(part).is_file() or Path(part).is_dir()
                        ), f"Apparently run command [{run_command}] for job [{job}] has invalid path [{part}]. Setting to disable check: VALIDATE_FILE_PATHS"
    @classmethod
    def validate_file_paths_in_digest_configs(cls, workflow: Workflow.Config) -> None:
        if not Settings.VALIDATE_FILE_PATHS:
            return
        with ContextManager.cd():
            for job in workflow.jobs:
                if not job.digest_config:
                    continue
                for include_path in chain(
                    job.digest_config.include_paths, job.digest_config.exclude_paths
                ):
                    if "*" in include_path:
                        assert glob.glob(
                            include_path, recursive=True
                        ), f"Apparently file glob [{include_path}] in job [{job.name}] digest_config [{job.digest_config}] invalid, workflow [{workflow.name}]. Setting to disable check: VALIDATE_FILE_PATHS"
                    else:
                        assert (
                            Path(include_path).is_file() or Path(include_path).is_dir()
                        ), f"Apparently file path [{include_path}] in job [{job.name}] digest_config [{job.digest_config}] invalid, workflow [{workflow.name}]. Setting to disable check: VALIDATE_FILE_PATHS"
    @classmethod
    def validate_requirements_txt_files(cls, workflow: Workflow.Config) -> None:
        with ContextManager.cd():
            for job in workflow.jobs:
                if job.job_requirements:
                    if job.job_requirements.python_requirements_txt:
                        path = Path(job.job_requirements.python_requirements_txt)
                        message = f"File with py requirement [{path}] does not exist"
                        if job.name in (
                            Settings.DOCKER_BUILD_JOB_NAME,
                            Settings.CI_CONFIG_JOB_NAME,
                            Settings.FINISH_WORKFLOW_JOB_NAME,
                        ):
                            message += '\n  If all requirements already installed on your runners - add setting INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS""'
                            message += "\n  If requirements needs to be installed - add requirements file (Settings.INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS):"
                            message += "\n      echo jwt==1.3.1 > ./ci/requirements.txt"
                            message += (
                                "\n      echo requests==2.32.3 >> ./ci/requirements.txt"
                            )
                            message += "\n      echo https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl >> ./ci/requirements.txt"
                        cls.evaluate_check(
                            path.is_file(), message, job.name, workflow.name
                        )
    @classmethod
    def validate_dockers(cls, workflow: Workflow.Config):
        names = []
        for docker in workflow.dockers:
            cls.evaluate_check(
                docker.name not in names,
                f"Non uniq docker name [{docker.name}]",
                workflow_name=workflow.name,
            )
            names.append(docker.name)
        for docker in workflow.dockers:
            for docker_dep in docker.depends_on:
                cls.evaluate_check(
                    docker_dep in names,
                    f"Docker [{docker.name}] has invalid dependency [{docker_dep}]",
                    workflow_name=workflow.name,
                )
    @classmethod
    def evaluate_check(cls, check_ok, message, workflow_name, job_name=""):
        message = message.split("\n")
        messages = [message] if not isinstance(message, list) else message
        if check_ok:
            return
        else:
            print(
                f"ERROR: Config validation failed: workflow [{workflow_name}], job [{job_name}]:"
            )
            for message in messages:
                print(" ||  " + message)
            sys.exit(1)
--- a/ci/praktika/version.py
+++ b/ci/praktika/version.py
@ -0,0 +1 @@
 VERSION = 1
--- a/ci/praktika/workflow.py
+++ b/ci/praktika/workflow.py
@ -0,0 +1,68 @@
 from dataclasses import dataclass, field
 from typing import List, Optional
 from praktika import Artifact, Job
 from praktika.docker import Docker
 from praktika.secret import Secret
 from praktika.utils import Utils
 class Workflow:
    class Event:
        PULL_REQUEST = "pull_request"
        PUSH = "push"
    @dataclass
    class Config:
        """
        branches - List of branch names or patterns, for push trigger only
        base_branches - List of base branches (target branch), for pull_request trigger only
        """
        name: str
        event: str
        jobs: List[Job.Config]
        branches: List[str] = field(default_factory=list)
        base_branches: List[str] = field(default_factory=list)
        artifacts: List[Artifact.Config] = field(default_factory=list)
        dockers: List[Docker.Config] = field(default_factory=list)
        secrets: List[Secret.Config] = field(default_factory=list)
        enable_cache: bool = False
        enable_report: bool = False
        enable_merge_ready_status: bool = False
        enable_cidb: bool = False
        def is_event_pull_request(self):
            return self.event == Workflow.Event.PULL_REQUEST
        def is_event_push(self):
            return self.event == Workflow.Event.PUSH
        def get_job(self, name):
            job = self.find_job(name)
            if not job:
                Utils.raise_with_error(
                    f"Failed to find job [{name}], workflow [{self.name}]"
                )
            return job
        def find_job(self, name, lazy=False):
            name = str(name)
            for job in self.jobs:
                if lazy:
                    if name.lower() in job.name.lower():
                        return job
                else:
                    if job.name == name:
                        return job
            return None
        def get_secret(self, name) -> Optional[Secret.Config]:
            name = str(name)
            names = []
            for secret in self.secrets:
                if secret.name == name:
                    return secret
                names.append(secret.name)
            print(f"ERROR: Failed to find secret [{name}], workflow secrets [{names}]")
            raise
--- a/ci/praktika/yaml_generator.py
+++ b/ci/praktika/yaml_generator.py
@ -0,0 +1,350 @@
 import dataclasses
 from typing import List
 from praktika import Artifact, Job, Workflow
 from praktika.mangle import _get_workflows
 from praktika.parser import WorkflowConfigParser
 from praktika.runtime import RunConfig
 from praktika.settings import Settings
 from praktika.utils import ContextManager, Shell, Utils
 class YamlGenerator:
    class Templates:
        TEMPLATE_PULL_REQUEST_0 = """\
 # generated by praktika
 name: {NAME}
 on:
  {EVENT}:
    branches: [{BRANCHES}]
 # Cancel the previous wf run in PRs.
 concurrency:
  group: ${{{{{{{{ github.workflow }}}}}}}}-${{{{{{{{ github.ref }}}}}}}}
  cancel-in-progress: true
 env:
  # Force the stdout and stderr streams to be unbuffered
  PYTHONUNBUFFERED: 1
  GH_TOKEN: ${{{{{{{{ github.token }}}}}}}}
 # Allow updating GH commit statuses and PR comments to post an actual job reports link
 permissions: write-all
 jobs:
 {JOBS}\
 """
        TEMPLATE_CALLABLE_WORKFLOW = """\
 # generated by praktika
 name: {NAME}
 on:
  workflow_call:
    inputs:
      config:
        type: string
        required: false
        default: ''
    secrets:
 {SECRETS}
 env:
  PYTHONUNBUFFERED: 1
 jobs:
 {JOBS}\
 """
        TEMPLATE_SECRET_CONFIG = """\
      {SECRET_NAME}:
        required: true
 """
        TEMPLATE_MATRIX = """
    strategy:
      fail-fast: false
      matrix:
        params: {PARAMS_LIST}\
 """
        TEMPLATE_JOB_0 = """
  {JOB_NAME_NORMALIZED}:
    runs-on: [{RUNS_ON}]
    needs: [{NEEDS}]{IF_EXPRESSION}
    name: "{JOB_NAME_GH}"
    outputs:
      data: ${{{{ steps.run.outputs.DATA }}}}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
 {JOB_ADDONS}
      - name: Prepare env script
        run: |
          cat > {ENV_SETUP_SCRIPT} << 'ENV_SETUP_SCRIPT_EOF'
          export PYTHONPATH=./ci:.
 {SETUP_ENVS}
          cat > {WORKFLOW_CONFIG_FILE} << 'EOF'
          ${{{{ needs.{WORKFLOW_CONFIG_JOB_NAME}.outputs.data }}}}
          EOF
          cat > {WORKFLOW_STATUS_FILE} << 'EOF'
          ${{{{ toJson(needs) }}}}
          EOF
          ENV_SETUP_SCRIPT_EOF
          rm -rf {INPUT_DIR} {OUTPUT_DIR} {TEMP_DIR}
          mkdir -p {TEMP_DIR} {INPUT_DIR} {OUTPUT_DIR}
 {DOWNLOADS_GITHUB}
      - name: Run
        id: run
        run: |
          . /tmp/praktika_setup_env.sh
          set -o pipefail
          {PYTHON} -m praktika run --job '''{JOB_NAME}''' --workflow "{WORKFLOW_NAME}" --ci |& tee {RUN_LOG}
 {UPLOADS_GITHUB}\
 """
        TEMPLATE_SETUP_ENV_SECRETS = """\
          export {SECRET_NAME}=$(cat<<'EOF'
          ${{{{ secrets.{SECRET_NAME} }}}}
          EOF
          )\
 """
        TEMPLATE_PY_INSTALL = """
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: {PYTHON_VERSION}
 """
        TEMPLATE_PY_WITH_REQUIREMENTS = """
      - name: Install dependencies
        run: |
          sudo apt-get update && sudo apt install -y python3-pip
          # TODO: --break-system-packages? otherwise ubuntu's apt/apt-get complains
          {PYTHON} -m pip install --upgrade pip --break-system-packages
          {PIP} install -r {REQUIREMENT_PATH} --break-system-packages
 """
        TEMPLATE_GH_UPLOAD = """
      - name: Upload artifact {NAME}
        uses: actions/upload-artifact@v4
        with:
          name: {NAME}
          path: {PATH}
 """
        TEMPLATE_GH_DOWNLOAD = """
      - name: Download artifact {NAME}
        uses: actions/download-artifact@v4
        with:
          name: {NAME}
          path: {PATH}
 """
        TEMPLATE_IF_EXPRESSION = """
    if: ${{{{ !failure() && !cancelled() && !contains(fromJson(needs.{WORKFLOW_CONFIG_JOB_NAME}.outputs.data).cache_success_base64, '{JOB_NAME_BASE64}') }}}}\
 """
        TEMPLATE_IF_EXPRESSION_SKIPPED_OR_SUCCESS = """
    if: ${{ !failure() && !cancelled() }}\
 """
        TEMPLATE_IF_EXPRESSION_NOT_CANCELLED = """
    if: ${{ !cancelled() }}\
 """
    def __init__(self):
        self.py_workflows = []  # type: List[Workflow.Config]
    @classmethod
    def _get_workflow_file_name(cls, workflow_name):
        return f"{Settings.WORKFLOW_PATH_PREFIX}/{Utils.normalize_string(workflow_name)}.yaml"
    def generate(self, workflow_file="", workflow_config=None):
        print("---Start generating yaml pipelines---")
        if workflow_config:
            self.py_workflows = [workflow_config]
        else:
            self.py_workflows = _get_workflows(file=workflow_file)
            assert self.py_workflows
        for workflow_config in self.py_workflows:
            print(f"Generate workflow [{workflow_config.name}]")
            parser = WorkflowConfigParser(workflow_config).parse()
            if (
                workflow_config.is_event_pull_request()
                or workflow_config.is_event_push()
            ):
                yaml_workflow_str = PullRequestPushYamlGen(parser).generate()
            else:
                assert (
                    False
                ), f"Workflow event not yet supported [{workflow_config.event}]"
            with ContextManager.cd():
                with open(self._get_workflow_file_name(workflow_config.name), "w") as f:
                    f.write(yaml_workflow_str)
        with ContextManager.cd():
            Shell.check("git add ./.github/workflows/*.yaml")
 class PullRequestPushYamlGen:
    def __init__(self, parser: WorkflowConfigParser):
        self.workflow_config = parser.workflow_yaml_config
        self.parser = parser
    def generate(self):
        job_items = []
        for i, job in enumerate(self.workflow_config.jobs):
            job_name_normalized = Utils.normalize_string(job.name)
            needs = ", ".join(map(Utils.normalize_string, job.needs))
            job_name = job.name
            job_addons = []
            for addon in job.addons:
                if addon.install_python:
                    job_addons.append(
                        YamlGenerator.Templates.TEMPLATE_PY_INSTALL.format(
                            PYTHON_VERSION=Settings.PYTHON_VERSION
                        )
                    )
                if addon.requirements_txt_path:
                    job_addons.append(
                        YamlGenerator.Templates.TEMPLATE_PY_WITH_REQUIREMENTS.format(
                            PYTHON=Settings.PYTHON_INTERPRETER,
                            PIP=Settings.PYTHON_PACKET_MANAGER,
                            PYTHON_VERSION=Settings.PYTHON_VERSION,
                            REQUIREMENT_PATH=addon.requirements_txt_path,
                        )
                    )
            uploads_github = []
            for artifact in job.artifacts_gh_provides:
                uploads_github.append(
                    YamlGenerator.Templates.TEMPLATE_GH_UPLOAD.format(
                        NAME=artifact.name, PATH=artifact.path
                    )
                )
            downloads_github = []
            for artifact in job.artifacts_gh_requires:
                downloads_github.append(
                    YamlGenerator.Templates.TEMPLATE_GH_DOWNLOAD.format(
                        NAME=artifact.name, PATH=Settings.INPUT_DIR
                    )
                )
            config_job_name_normalized = Utils.normalize_string(
                Settings.CI_CONFIG_JOB_NAME
            )
            if_expression = ""
            if (
                self.workflow_config.enable_cache
                and job_name_normalized != config_job_name_normalized
            ):
                if_expression = YamlGenerator.Templates.TEMPLATE_IF_EXPRESSION.format(
                    WORKFLOW_CONFIG_JOB_NAME=config_job_name_normalized,
                    JOB_NAME_BASE64=Utils.to_base64(job_name),
                )
            if job.run_unless_cancelled:
                if_expression = (
                    YamlGenerator.Templates.TEMPLATE_IF_EXPRESSION_NOT_CANCELLED
                )
            secrets_envs = []
            for secret in self.workflow_config.secret_names_gh:
                secrets_envs.append(
                    YamlGenerator.Templates.TEMPLATE_SETUP_ENV_SECRETS.format(
                        SECRET_NAME=secret
                    )
                )
            job_item = YamlGenerator.Templates.TEMPLATE_JOB_0.format(
                JOB_NAME_NORMALIZED=job_name_normalized,
                WORKFLOW_CONFIG_JOB_NAME=config_job_name_normalized,
                IF_EXPRESSION=if_expression,
                RUNS_ON=", ".join(job.runs_on),
                NEEDS=needs,
                JOB_NAME_GH=job_name.replace('"', '\\"'),
                JOB_NAME=job_name.replace(
                    "'", "'\\''"
                ),  # ' must be escaped so that yaml commands are properly parsed
                WORKFLOW_NAME=self.workflow_config.name,
                ENV_SETUP_SCRIPT=Settings.ENV_SETUP_SCRIPT,
                SETUP_ENVS="\n".join(secrets_envs),
                WORKFLOW_CONFIG_FILE=RunConfig.file_name_static(
                    self.workflow_config.name
                ),
                JOB_ADDONS="".join(job_addons),
                DOWNLOADS_GITHUB="\n".join(downloads_github),
                UPLOADS_GITHUB="\n".join(uploads_github),
                RUN_LOG=Settings.RUN_LOG,
                PYTHON=Settings.PYTHON_INTERPRETER,
                WORKFLOW_STATUS_FILE=Settings.WORKFLOW_STATUS_FILE,
                TEMP_DIR=Settings.TEMP_DIR,
                INPUT_DIR=Settings.INPUT_DIR,
                OUTPUT_DIR=Settings.OUTPUT_DIR,
            )
            job_items.append(job_item)
        base_template = YamlGenerator.Templates.TEMPLATE_PULL_REQUEST_0
        template_1 = base_template.strip().format(
            NAME=self.workflow_config.name,
            BRANCHES=", ".join(
                [f"'{branch}'" for branch in self.workflow_config.branches]
            ),
            EVENT=self.workflow_config.event,
            JOBS="{}" * len(job_items),
        )
        res = template_1.format(*job_items)
        return res
@dataclasses.dataclass
 class AuxConfig:
    # defines aux step to install dependencies
    addon: Job.Requirements
    # defines aux step(s) to upload GH artifacts
    uploads_gh: List[Artifact.Config]
    # defines aux step(s) to download GH artifacts
    downloads_gh: List[Artifact.Config]
    def get_aux_workflow_name(self):
        suffix = ""
        if self.addon.python_requirements_txt:
            suffix += "_py"
        for _ in self.uploads_gh:
            suffix += "_uplgh"
        for _ in self.downloads_gh:
            suffix += "_dnlgh"
        return f"{Settings.WORKFLOW_PATH_PREFIX}/aux_job{suffix}.yaml"
    def get_aux_workflow_input(self):
        res = ""
        if self.addon.python_requirements_txt:
            res += f"      requirements_txt: {self.addon.python_requirements_txt}"
        return res
 if __name__ == "__main__":
    WFS = [
        Workflow.Config(
            name="PR",
            event=Workflow.Event.PULL_REQUEST,
            jobs=[
                Job.Config(
                    name="Hello World",
                    runs_on=["foo"],
                    command="bar",
                    job_requirements=Job.Requirements(
                        python_requirements_txt="./requirement.txt"
                    ),
                )
            ],
            enable_cache=True,
        )
    ]
    YamlGenerator().generate(workflow_config=WFS)
--- a/ci/settings/definitions.py
+++ b/ci/settings/definitions.py
@ -0,0 +1,233 @@
 from praktika import Docker, Secret
 S3_BUCKET_NAME = "clickhouse-builds"
 S3_BUCKET_HTTP_ENDPOINT = "clickhouse-builds.s3.amazonaws.com"
 class RunnerLabels:
    CI_SERVICES = "ci_services"
    CI_SERVICES_EBS = "ci_services_ebs"
    BUILDER = "builder"
 BASE_BRANCH = "master"
 SECRETS = [
    Secret.Config(
        name="dockerhub_robot_password",
        type=Secret.Type.AWS_SSM_VAR,
    ),
    Secret.Config(
        name="woolenwolf_gh_app.clickhouse-app-id",
        type=Secret.Type.AWS_SSM_SECRET,
    ),
    Secret.Config(
        name="woolenwolf_gh_app.clickhouse-app-key",
        type=Secret.Type.AWS_SSM_SECRET,
    ),
 ]
 DOCKERS = [
    # Docker.Config(
    #     name="clickhouse/binary-builder",
    #     path="./ci/docker/packager/binary-builder",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=[],
    # ),
    # Docker.Config(
    #     name="clickhouse/cctools",
    #     path="./ci/docker/packager/cctools",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=[],
    # ),
    # Docker.Config(
    #     name="clickhouse/test-old-centos",
    #     path="./ci/docker/test/compatibility/centos",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=[],
    # ),
    # Docker.Config(
    #     name="clickhouse/test-old-ubuntu",
    #     path="./ci/docker/test/compatibility/ubuntu",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=[],
    # ),
    # Docker.Config(
    #     name="clickhouse/test-util",
    #     path="./ci/docker/test/util",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=[],
    # ),
    # Docker.Config(
    #     name="clickhouse/integration-test",
    #     path="./ci/docker/test/integration/base",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/test-base"],
    # ),
    # Docker.Config(
    #     name="clickhouse/fuzzer",
    #     path="./ci/docker/test/fuzzer",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/test-base"],
    # ),
    # Docker.Config(
    #     name="clickhouse/performance-comparison",
    #     path="./ci/docker/test/performance-comparison",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=[],
    # ),
    Docker.Config(
        name="clickhouse/fasttest",
        path="./ci/docker/fasttest",
        platforms=Docker.Platforms.arm_amd,
        depends_on=[],
    ),
    # Docker.Config(
    #     name="clickhouse/test-base",
    #     path="./ci/docker/test/base",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/test-util"],
    # ),
    # Docker.Config(
    #     name="clickhouse/clickbench",
    #     path="./ci/docker/test/clickbench",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/test-base"],
    # ),
    # Docker.Config(
    #     name="clickhouse/keeper-jepsen-test",
    #     path="./ci/docker/test/keeper-jepsen",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/test-base"],
    # ),
    # Docker.Config(
    #     name="clickhouse/server-jepsen-test",
    #     path="./ci/docker/test/server-jepsen",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/test-base"],
    # ),
    # Docker.Config(
    #     name="clickhouse/sqllogic-test",
    #     path="./ci/docker/test/sqllogic",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/test-base"],
    # ),
    # Docker.Config(
    #     name="clickhouse/sqltest",
    #     path="./ci/docker/test/sqltest",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/test-base"],
    # ),
    # Docker.Config(
    #     name="clickhouse/stateless-test",
    #     path="./ci/docker/test/stateless",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/test-base"],
    # ),
    # Docker.Config(
    #     name="clickhouse/stateful-test",
    #     path="./ci/docker/test/stateful",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/stateless-test"],
    # ),
    # Docker.Config(
    #     name="clickhouse/stress-test",
    #     path="./ci/docker/test/stress",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/stateful-test"],
    # ),
    # Docker.Config(
    #     name="clickhouse/unit-test",
    #     path="./ci/docker/test/unit",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/test-base"],
    # ),
    # Docker.Config(
    #     name="clickhouse/integration-tests-runner",
    #     path="./ci/docker/test/integration/runner",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/test-base"],
    # ),
    Docker.Config(
        name="clickhouse/style-test",
        path="./ci/docker/style-test",
        platforms=Docker.Platforms.arm_amd,
        depends_on=[],
    ),
    # Docker.Config(
    #     name="clickhouse/docs-builder",
    #     path="./ci/docker/docs/builder",
    #     platforms=Docker.Platforms.arm_amd,
    #     depends_on=["clickhouse/test-base"],
    # ),
 ]
 # TODO:
 # "docker/test/integration/s3_proxy": {
 #     "name": "clickhouse/s3-proxy",
 #     "dependent": []
 # },
 # "docker/test/integration/resolver": {
 #     "name": "clickhouse/python-bottle",
 #     "dependent": []
 # },
 # "docker/test/integration/helper_container": {
 #     "name": "clickhouse/integration-helper",
 #     "dependent": []
 # },
 # "docker/test/integration/mysql_golang_client": {
 #     "name": "clickhouse/mysql-golang-client",
 #     "dependent": []
 # },
 # "docker/test/integration/dotnet_client": {
 #     "name": "clickhouse/dotnet-client",
 #     "dependent": []
 # },
 # "docker/test/integration/mysql_java_client": {
 #     "name": "clickhouse/mysql-java-client",
 #     "dependent": []
 # },
 # "docker/test/integration/mysql_js_client": {
 #     "name": "clickhouse/mysql-js-client",
 #     "dependent": []
 # },
 # "docker/test/integration/mysql_php_client": {
 #     "name": "clickhouse/mysql-php-client",
 #     "dependent": []
 # },
 # "docker/test/integration/postgresql_java_client": {
 #     "name": "clickhouse/postgresql-java-client",
 #     "dependent": []
 # },
 # "docker/test/integration/kerberos_kdc": {
 #     "only_amd64": true,
 #     "name": "clickhouse/kerberos-kdc",
 #     "dependent": []
 # },
 # "docker/test/integration/kerberized_hadoop": {
 #     "only_amd64": true,
 #     "name": "clickhouse/kerberized-hadoop",
 #     "dependent": []
 # },
 # "docker/test/sqlancer": {
 #     "name": "clickhouse/sqlancer-test",
 #     "dependent": []
 # },
 # "docker/test/install/deb": {
 #     "name": "clickhouse/install-deb-test",
 #     "dependent": []
 # },
 # "docker/test/install/rpm": {
 #     "name": "clickhouse/install-rpm-test",
 #     "dependent": []
 # },
 # "docker/test/integration/nginx_dav": {
 #     "name": "clickhouse/nginx-dav",
 #     "dependent": []
 # }
 class JobNames:
    STYLE_CHECK = "Style Check"
    FAST_TEST = "Fast test"
    BUILD_AMD_DEBUG = "Build amd64 debug"
--- a/ci/settings/settings.py
+++ b/ci/settings/settings.py
@ -0,0 +1,20 @@
 from ci.settings.definitions import (
    S3_BUCKET_HTTP_ENDPOINT,
    S3_BUCKET_NAME,
    RunnerLabels,
 )
 S3_ARTIFACT_PATH = f"{S3_BUCKET_NAME}/artifacts"
 CI_CONFIG_RUNS_ON = [RunnerLabels.CI_SERVICES]
 DOCKER_BUILD_RUNS_ON = [RunnerLabels.CI_SERVICES_EBS]
 CACHE_S3_PATH = f"{S3_BUCKET_NAME}/ci_ch_cache"
 HTML_S3_PATH = f"{S3_BUCKET_NAME}/reports"
 S3_BUCKET_TO_HTTP_ENDPOINT = {S3_BUCKET_NAME: S3_BUCKET_HTTP_ENDPOINT}
 DOCKERHUB_USERNAME = "robotclickhouse"
 DOCKERHUB_SECRET = "dockerhub_robot_password"
 CI_DB_DB_NAME = "default"
 CI_DB_TABLE_NAME = "checks"
 INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS = ""
--- a/ci/workflows/pull_request.py
+++ b/ci/workflows/pull_request.py
@ -0,0 +1,94 @@
 from typing import List
 from praktika import Artifact, Job, Workflow
 from praktika.settings import Settings
 from ci.settings.definitions import (
    BASE_BRANCH,
    DOCKERS,
    SECRETS,
    JobNames,
    RunnerLabels,
 )
 class ArtifactNames:
    ch_debug_binary = "clickhouse_debug_binary"
 style_check_job = Job.Config(
    name=JobNames.STYLE_CHECK,
    runs_on=[RunnerLabels.CI_SERVICES],
    command="python3 ./ci/jobs/check_style.py",
    run_in_docker="clickhouse/style-test",
 )
 fast_test_job = Job.Config(
    name=JobNames.FAST_TEST,
    runs_on=[RunnerLabels.BUILDER],
    command="python3 ./ci/jobs/fast_test.py",
    run_in_docker="clickhouse/fasttest",
    digest_config=Job.CacheDigestConfig(
        include_paths=[
            "./ci/jobs/fast_test.py",
            "./tests/queries/0_stateless/",
            "./src",
        ],
    ),
 )
 job_build_amd_debug = Job.Config(
    name=JobNames.BUILD_AMD_DEBUG,
    runs_on=[RunnerLabels.BUILDER],
    command="python3 ./ci/jobs/build_clickhouse.py amd_debug",
    run_in_docker="clickhouse/fasttest",
    digest_config=Job.CacheDigestConfig(
        include_paths=[
            "./src",
            "./contrib/",
            "./CMakeLists.txt",
            "./PreLoad.cmake",
            "./cmake",
            "./base",
            "./programs",
            "./docker/packager/packager",
            "./rust",
            "./tests/ci/version_helper.py",
        ],
    ),
    provides=[ArtifactNames.ch_debug_binary],
 )
 workflow = Workflow.Config(
    name="PR",
    event=Workflow.Event.PULL_REQUEST,
    base_branches=[BASE_BRANCH],
    jobs=[
        style_check_job,
        fast_test_job,
        job_build_amd_debug,
    ],
    artifacts=[
        Artifact.Config(
            name=ArtifactNames.ch_debug_binary,
            type=Artifact.Type.S3,
            path=f"{Settings.TEMP_DIR}/build/programs/clickhouse",
        )
    ],
    dockers=DOCKERS,
    secrets=SECRETS,
    enable_cache=True,
    enable_report=True,
    enable_merge_ready_status=True,
 )
 WORKFLOWS = [
    workflow,
 ]  # type: List[Workflow.Config]
 if __name__ == "__main__":
    # local job test inside praktika environment
    from praktika.runner import Runner
    Runner().run(workflow, fast_test_job, docker="fasttest", dummy_env=True)
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@ -2,11 +2,11 @@
 # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
 # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54490)
+SET(VERSION_REVISION 54492)
 SET(VERSION_MAJOR 24)
-SET(VERSION_MINOR 9)
+SET(VERSION_MINOR 11)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH e02b434d2fc0c4fbee29ca675deab7474d274608)
+SET(VERSION_GITHASH c82cf25b3e5864bcc153cbe45adb8c6527e1ec6e)
-SET(VERSION_DESCRIBE v24.9.1.1-testing)
+SET(VERSION_DESCRIBE v24.11.1.1-testing)
-SET(VERSION_STRING 24.9.1.1)
+SET(VERSION_STRING 24.11.1.1)
 # end of autochange
--- a/cmake/cpu_features.cmake
+++ b/cmake/cpu_features.cmake
@ -11,6 +11,38 @@ option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries n
 if (ARCH_NATIVE)
    set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native")
    # Populate the ENABLE_ option flags. This is required for the build of some third-party dependencies, specifically snappy, which
    # (somewhat weirdly) expects the relative SNAPPY_HAVE_ preprocessor variables to be populated, in addition to the microarchitecture
    # feature flags being enabled in the compiler. This fixes the ARCH_NATIVE flag by automatically populating the ENABLE_ option flags
    # according to the current CPU's capabilities, detected using clang.
    if (ARCH_AMD64)
        execute_process(
            COMMAND sh -c "clang -E - -march=native -###"
            INPUT_FILE /dev/null
            OUTPUT_QUIET
            ERROR_VARIABLE TEST_FEATURE_RESULT)
        macro(TEST_AMD64_FEATURE TEST_FEATURE_RESULT feat flag)
            if (${TEST_FEATURE_RESULT} MATCHES "\"\\+${feat}\"")
                set(${flag} ON)
            else ()
                set(${flag} OFF)
            endif ()
        endmacro()
        TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} ssse3 ENABLE_SSSE3)
        TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} sse4.1 ENABLE_SSE41)
        TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} sse4.2 ENABLE_SSE42)
        TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} vpclmulqdq ENABLE_PCLMULQDQ)
        TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} popcnt ENABLE_POPCNT)
        TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} avx ENABLE_AVX)
        TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} avx2 ENABLE_AVX2)
        TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} avx512f ENABLE_AVX512)
        TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} avx512vbmi ENABLE_AVX512_VBMI)
        TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} bmi ENABLE_BMI)
        TEST_AMD64_FEATURE (${TEST_FEATURE_RESULT} bmi2 ENABLE_BMI2)
    endif ()
 elseif (ARCH_AARCH64)
    # ARM publishes almost every year a new revision of it's ISA [1]. Each version comes with new mandatory and optional features from
    # which CPU vendors can pick and choose. This creates a lot of variability ... We provide two build "profiles", one for maximum
--- a/cmake/cxx.cmake
+++ b/cmake/cxx.cmake
@ -1,4 +1,21 @@
-set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build.
+if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
    # Enable libcxx debug mode: https://releases.llvm.org/15.0.0/projects/libcxx/docs/DesignDocs/DebugMode.html
    # The docs say the debug mode violates complexity guarantees, so do this only for Debug builds.
    # set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_ENABLE_DEBUG_MODE=1")
    # ^^ Crashes the database upon startup, needs investigation.
    #    Besides that, the implementation looks like a poor man's MSAN specific to libcxx. Since CI tests MSAN
    #    anyways, we can keep the debug mode disabled.
    # Libcxx also provides extra assertions:
    # --> https://releases.llvm.org/15.0.0/projects/libcxx/docs/UsingLibcxx.html#assertions-mode
    # These look orthogonal to the debug mode but the debug mode enables them implicitly:
    # --> https://github.com/llvm/llvm-project/blob/release/15.x/libcxx/include/__assert#L29
    # They are cheap and straightforward, so enable them in debug builds:
    set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_ENABLE_ASSERTIONS=1")
    # TODO Once we upgrade to LLVM 18+, reconsider all of the above as they introduced "hardening modes":
    # https://libcxx.llvm.org/Hardening.html
 endif ()
 add_subdirectory(contrib/libcxxabi-cmake)
 add_subdirectory(contrib/libcxx-cmake)
--- a/Show More
+++ b/Show More
		`@ -0,0 +1 @@`
							`Note: This directory is under active development for CI improvements and is not currently in use within the scope of the existing CI pipeline.`
		`@ -0,0 +1,3 @@`
							`from praktika._environment import _Environment`

							`Environment = _Environment.get()`