diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 950e672272a..3e0131a388a 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -85,4 +85,4 @@ At a minimum, the following information should be added (but add more as needed)
 - [ ] <!---batch_2--> 3
 - [ ] <!---batch_3--> 4
 
-<details>
+</details>
diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index 2a98722414b..b0380b939bb 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -9,6 +9,12 @@ on: # yamllint disable-line rule:truthy
   push:
     branches:
       - 'backport/**'
+
+# Cancel the previous wf run in PRs.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   RunConfig:
     runs-on: [self-hosted, style-checker-aarch64]
diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml
deleted file mode 100644
index 3c2be767ad2..00000000000
--- a/.github/workflows/cancel.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-name: Cancel
-
-env:
-  # Force the stdout and stderr streams to be unbuffered
-  PYTHONUNBUFFERED: 1
-
-on: # yamllint disable-line rule:truthy
-  workflow_run:
-    workflows: ["PullRequestCI", "ReleaseBranchCI", "DocsCheck", "BackportPR"]
-    types:
-      - requested
-jobs:
-  cancel:
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - uses: styfle/cancel-workflow-action@0.9.1
-        with:
-          all_but_latest: true
-          workflow_id: ${{ github.event.workflow.id }}
diff --git a/.github/workflows/debug.yml b/.github/workflows/debug.yml
deleted file mode 100644
index 5abed268ecd..00000000000
--- a/.github/workflows/debug.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-# The CI for each commit, prints envs and content of GITHUB_EVENT_PATH
-name: Debug
-
-'on':
-  [push, pull_request, pull_request_review, release, workflow_dispatch, workflow_call]
-
-jobs:
-  DebugInfo:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 515236bb826..3e1c5576e7d 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -10,14 +10,13 @@ env:
   workflow_dispatch:
 
 jobs:
-  Debug:
-    # The task for having a preserved ENV and event.json for later investigation
-    uses: ./.github/workflows/debug.yml
   RunConfig:
     runs-on: [self-hosted, style-checker-aarch64]
     outputs:
       data: ${{ steps.runconfig.outputs.CI_DATA }}
     steps:
+      - name: DebugInfo
+        uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6
       - name: Check out repository code
         uses: ClickHouse/checkout@v1
         with:
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index a6e369ae0e6..9f16e32707e 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -14,6 +14,11 @@ on:  # yamllint disable-line rule:truthy
     branches:
       - master
 
+# Cancel the previous wf run in PRs.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   RunConfig:
     runs-on: [self-hosted, style-checker-aarch64]
diff --git a/.github/workflows/pull_request_approved.yml b/.github/workflows/pull_request_approved.yml
deleted file mode 100644
index 3de4978ad68..00000000000
--- a/.github/workflows/pull_request_approved.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: PullRequestApprovedCI
-
-env:
-  # Force the stdout and stderr streams to be unbuffered
-  PYTHONUNBUFFERED: 1
-
-on:  # yamllint disable-line rule:truthy
-  pull_request_review:
-    types:
-      - submitted
-
-jobs:
-  MergeOnApproval:
-    runs-on: [self-hosted, style-checker]
-    steps:
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: Merge approved PR
-        run: |
-          cd "$GITHUB_WORKSPACE/tests/ci"
-          python3 merge_pr.py --check-approved
diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index d25e18bd397..a137eb2bdf2 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -75,7 +75,7 @@ The supported formats are:
 | [ArrowStream](#data-format-arrow-stream)                                                  | ✔    | ✔     |
 | [ORC](#data-format-orc)                                                                   | ✔    | ✔     |
 | [One](#data-format-one)                                                                   | ✔    | ✗     |
-| [Npy](#data-format-npy)                                                                   | ✔    | ✗     |
+| [Npy](#data-format-npy)                                                                   | ✔    | ✔     |
 | [RowBinary](#rowbinary)                                                                   | ✔    | ✔     |
 | [RowBinaryWithNames](#rowbinarywithnamesandtypes)                                         | ✔    | ✔     |
 | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes)                                 | ✔    | ✔     |
@@ -2466,23 +2466,22 @@ Result:
 
 ## Npy {#data-format-npy}
 
-This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. During import, ClickHouse treats top level dimension as an array of rows with single column. Supported Npy data types and their corresponding type in ClickHouse:
-| Npy type | ClickHouse type |
-|:--------:|:---------------:|
-| b1       |    UInt8        |
-| i1       |    Int8         |
-| i2       |    Int16        |
-| i4       |    Int32        |
-| i8       |    Int64        |
-| u1       |    UInt8        |
-| u2       |    UInt16       |
-| u4       |    UInt32       |
-| u8       |    UInt64       |
-| f2       |    Float32      |
-| f4       |    Float32      |
-| f8       |    Float64      |
-| S        |    String       |
-| U        |    String       |
+This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. During import, ClickHouse treats top level dimension as an array of rows with single column. Supported Npy data types and their corresponding type in ClickHouse: 
+
+| Npy data type (`INSERT`) | ClickHouse data type                                            | Npy data type (`SELECT`) |
+|--------------------------|-----------------------------------------------------------------|--------------------------|
+| `i1`                     | [Int8](/docs/en/sql-reference/data-types/int-uint.md)           | `i1`                     |
+| `i2`                     | [Int16](/docs/en/sql-reference/data-types/int-uint.md)          | `i2`                     |
+| `i4`                     | [Int32](/docs/en/sql-reference/data-types/int-uint.md)          | `i4`                     |
+| `i8`                     | [Int64](/docs/en/sql-reference/data-types/int-uint.md)          | `i8`                     |
+| `u1`, `b1`               | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)          | `u1`                     |
+| `u2`                     | [UInt16](/docs/en/sql-reference/data-types/int-uint.md)         | `u2`                     |
+| `u4`                     | [UInt32](/docs/en/sql-reference/data-types/int-uint.md)         | `u4`                     |
+| `u8`                     | [UInt64](/docs/en/sql-reference/data-types/int-uint.md)         | `u8`                     |
+| `f2`, `f4`               | [Float32](/docs/en/sql-reference/data-types/float.md)           | `f4`                     |
+| `f8`                     | [Float64](/docs/en/sql-reference/data-types/float.md)           | `f8`                     |
+| `S`, `U`                 | [String](/docs/en/sql-reference/data-types/string.md)           | `S`                      |
+|                          | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `S`                      |
 
 **Example of saving an array in .npy format using Python**
 
@@ -2509,6 +2508,14 @@ Result:
 └───────────────┘
 ```
 
+**Selecting Data**
+
+You can select data from a ClickHouse table and save them into some file in the Npy format by the following command:
+
+```bash
+$ clickhouse-client --query="SELECT {column} FROM {some_table} FORMAT Npy" > {filename.npy}
+```
+
 ## LineAsString {#lineasstring}
 
 In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted.
diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md
index 9d495126d28..18c7816f811 100644
--- a/docs/en/sql-reference/data-types/map.md
+++ b/docs/en/sql-reference/data-types/map.md
@@ -7,6 +7,7 @@ sidebar_label: Map(K, V)
 # Map(K, V)
 
 `Map(K, V)` data type stores `key:value` pairs.
+The Map datatype is implemented as `Array(Tuple(key T1, value T2))`, which means that the order of keys in each map does not change, i.e., this data type maintains insertion order.
 
 **Parameters**
 
diff --git a/docs/en/sql-reference/functions/conditional-functions.md b/docs/en/sql-reference/functions/conditional-functions.md
index eb4e98961f1..564186fd8db 100644
--- a/docs/en/sql-reference/functions/conditional-functions.md
+++ b/docs/en/sql-reference/functions/conditional-functions.md
@@ -234,3 +234,34 @@ SELECT least(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3))
 :::note
 The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison.
 :::
+
+## clamp
+
+Constrain the return value between A and B.
+
+**Syntax**
+
+``` sql
+clamp(value, min, max)
+```
+
+**Arguments**
+
+- `value` – Input value.
+- `min` – Limit the lower bound.
+- `max` – Limit the upper bound.
+
+**Returned values**
+
+If the value is less than the minimum value, return the minimum value; if it is greater than the maximum value, return the maximum value; otherwise, return the current value.
+
+Examples:
+
+```sql
+SELECT clamp(1, 2, 3) result,  toTypeName(result) type;
+```
+```response
+┌─result─┬─type────┐
+│      2 │ Float64 │
+└────────┴─────────┘
+```
\ No newline at end of file
diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md
index 4ef407a4d13..34c6016235a 100644
--- a/docs/en/sql-reference/statements/select/join.md
+++ b/docs/en/sql-reference/statements/select/join.md
@@ -151,6 +151,14 @@ Result:
 
 Query with `INNER` type of a join and conditions with `OR` and `AND`:
 
+:::note
+
+By default, non-equal conditions are supported as long as they use columns from the same table.
+For example, `t1.a = t2.key AND t1.b > 0 AND t2.b > t2.c`, because `t1.b > 0` uses columns only from `t1` and `t2.b > t2.c` uses columns only from `t2`.
+However, you can try experimental support for conditions like `t1.a = t2.key AND t1.b > t2.key`, check out section below for more details.
+
+:::
+
 ``` sql
 SELECT a, b, val FROM t1 INNER JOIN t2 ON t1.a = t2.key OR t1.b = t2.key AND t2.val > 3;
 ```
@@ -165,7 +173,7 @@ Result:
 └───┴────┴─────┘
 ```
 
-## [experimental] Join with inequality conditions
+## [experimental] Join with inequality conditions for columns from different tables
 
 :::note
 This feature is experimental. To use it, set `allow_experimental_join_condition` to 1 in your configuration files or by using the `SET` command:
diff --git a/src/Analyzer/FunctionNode.h b/src/Analyzer/FunctionNode.h
index 8d14b7eeb0d..8abffcfc8ee 100644
--- a/src/Analyzer/FunctionNode.h
+++ b/src/Analyzer/FunctionNode.h
@@ -201,8 +201,11 @@ public:
 
     void convertToNullable() override
     {
-        chassert(kind == FunctionKind::ORDINARY);
-        wrap_with_nullable = true;
+        /// Ignore other function kinds.
+        /// We might try to convert aggregate/window function for invalid query
+        /// before the validation happened.
+        if (kind == FunctionKind::ORDINARY)
+            wrap_with_nullable = true;
     }
 
     void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 0d2cd5c5537..d6e1ceb243a 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -475,7 +475,7 @@ struct TableExpressionData
 class ExpressionsStack
 {
 public:
-    void pushNode(const QueryTreeNodePtr & node)
+    void push(const QueryTreeNodePtr & node)
     {
         if (node->hasAlias())
         {
@@ -492,7 +492,7 @@ public:
         expressions.emplace_back(node);
     }
 
-    void popNode()
+    void pop()
     {
         const auto & top_expression = expressions.back();
         const auto & top_expression_alias = top_expression->getAlias();
@@ -730,6 +730,8 @@ struct IdentifierResolveScope
             join_use_nulls = context->getSettingsRef().join_use_nulls;
         else if (parent_scope)
             join_use_nulls = parent_scope->join_use_nulls;
+
+        alias_name_to_expression_node = &alias_name_to_expression_node_before_group_by;
     }
 
     QueryTreeNodePtr scope_node;
@@ -745,7 +747,10 @@ struct IdentifierResolveScope
     std::unordered_map<std::string, QueryTreeNodePtr> expression_argument_name_to_node;
 
     /// Alias name to query expression node
-    std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_expression_node;
+    std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_expression_node_before_group_by;
+    std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_expression_node_after_group_by;
+
+    std::unordered_map<std::string, QueryTreeNodePtr> * alias_name_to_expression_node = nullptr;
 
     /// Alias name to lambda node
     std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_lambda_node;
@@ -878,6 +883,22 @@ struct IdentifierResolveScope
         return it->second;
     }
 
+    void pushExpressionNode(const QueryTreeNodePtr & node)
+    {
+        bool had_aggregate_function = expressions_in_resolve_process_stack.hasAggregateFunction();
+        expressions_in_resolve_process_stack.push(node);
+        if (group_by_use_nulls && had_aggregate_function != expressions_in_resolve_process_stack.hasAggregateFunction())
+            alias_name_to_expression_node = &alias_name_to_expression_node_before_group_by;
+    }
+
+    void popExpressionNode()
+    {
+        bool had_aggregate_function = expressions_in_resolve_process_stack.hasAggregateFunction();
+        expressions_in_resolve_process_stack.pop();
+        if (group_by_use_nulls && had_aggregate_function != expressions_in_resolve_process_stack.hasAggregateFunction())
+            alias_name_to_expression_node = &alias_name_to_expression_node_after_group_by;
+    }
+
     /// Dump identifier resolve scope
     [[maybe_unused]] void dump(WriteBuffer & buffer) const
     {
@@ -894,8 +915,8 @@ struct IdentifierResolveScope
         for (const auto & [alias_name, node] : expression_argument_name_to_node)
             buffer << "Alias name " << alias_name << " node " << node->formatASTForErrorMessage() << '\n';
 
-        buffer << "Alias name to expression node table size " << alias_name_to_expression_node.size() << '\n';
-        for (const auto & [alias_name, node] : alias_name_to_expression_node)
+        buffer << "Alias name to expression node table size " << alias_name_to_expression_node->size() << '\n';
+        for (const auto & [alias_name, node] : *alias_name_to_expression_node)
             buffer << "Alias name " << alias_name << " expression node " << node->dumpTree() << '\n';
 
         buffer << "Alias name to function node table size " << alias_name_to_lambda_node.size() << '\n';
@@ -1023,7 +1044,7 @@ private:
 
         if (is_lambda_node)
         {
-            if (scope.alias_name_to_expression_node.contains(alias))
+            if (scope.alias_name_to_expression_node->contains(alias))
                 scope.nodes_with_duplicated_aliases.insert(node->clone());
 
             auto [_, inserted] = scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node));
@@ -1036,7 +1057,7 @@ private:
         if (scope.alias_name_to_lambda_node.contains(alias))
             scope.nodes_with_duplicated_aliases.insert(node->clone());
 
-        auto [_, inserted] = scope.alias_name_to_expression_node.insert(std::make_pair(alias, node));
+        auto [_, inserted] = scope.alias_name_to_expression_node->insert(std::make_pair(alias, node));
         if (!inserted)
             scope.nodes_with_duplicated_aliases.insert(node->clone());
 
@@ -1838,7 +1859,7 @@ void QueryAnalyzer::collectScopeValidIdentifiersForTypoCorrection(
 
     if (allow_expression_identifiers)
     {
-        for (const auto & [name, expression] : scope.alias_name_to_expression_node)
+        for (const auto & [name, expression] : *scope.alias_name_to_expression_node)
         {
             assert(expression);
             auto expression_identifier = Identifier(name);
@@ -1868,7 +1889,7 @@ void QueryAnalyzer::collectScopeValidIdentifiersForTypoCorrection(
     {
         if (allow_function_identifiers)
         {
-            for (const auto & [name, _] : scope.alias_name_to_expression_node)
+            for (const auto & [name, _] : *scope.alias_name_to_expression_node)
                 valid_identifiers_result.insert(Identifier(name));
         }
 
@@ -2768,7 +2789,7 @@ bool QueryAnalyzer::tryBindIdentifierToAliases(const IdentifierLookup & identifi
     auto get_alias_name_to_node_map = [&]() -> const std::unordered_map<std::string, QueryTreeNodePtr> &
     {
         if (identifier_lookup.isExpressionLookup())
-            return scope.alias_name_to_expression_node;
+            return *scope.alias_name_to_expression_node;
         else if (identifier_lookup.isFunctionLookup())
             return scope.alias_name_to_lambda_node;
 
@@ -2830,7 +2851,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
     auto get_alias_name_to_node_map = [&]() -> std::unordered_map<std::string, QueryTreeNodePtr> &
     {
         if (identifier_lookup.isExpressionLookup())
-            return scope.alias_name_to_expression_node;
+            return *scope.alias_name_to_expression_node;
         else if (identifier_lookup.isFunctionLookup())
             return scope.alias_name_to_lambda_node;
 
@@ -2868,7 +2889,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
     /// Resolve expression if necessary
     if (node_type == QueryTreeNodeType::IDENTIFIER)
     {
-        scope.expressions_in_resolve_process_stack.pushNode(it->second);
+        scope.pushExpressionNode(it->second);
 
         auto & alias_identifier_node = it->second->as<IdentifierNode &>();
         auto identifier = alias_identifier_node.getIdentifier();
@@ -2899,9 +2920,9 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
         if (identifier_lookup.isExpressionLookup())
             scope.alias_name_to_lambda_node.erase(identifier_bind_part);
         else if (identifier_lookup.isFunctionLookup())
-            scope.alias_name_to_expression_node.erase(identifier_bind_part);
+            scope.alias_name_to_expression_node->erase(identifier_bind_part);
 
-        scope.expressions_in_resolve_process_stack.popNode();
+        scope.popExpressionNode();
     }
     else if (node_type == QueryTreeNodeType::FUNCTION)
     {
@@ -4098,8 +4119,8 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
              * SELECT id FROM ( SELECT ... ) AS subquery ARRAY JOIN [0] AS id INNER JOIN second_table USING (id)
              * In the example, identifier `id` should be resolved into one from USING (id) column.
              */
-            auto alias_it = scope.alias_name_to_expression_node.find(identifier_lookup.identifier.getFullName());
-            if (alias_it != scope.alias_name_to_expression_node.end() && alias_it->second->getNodeType() == QueryTreeNodeType::COLUMN)
+            auto alias_it = scope.alias_name_to_expression_node->find(identifier_lookup.identifier.getFullName());
+            if (alias_it != scope.alias_name_to_expression_node->end() && alias_it->second->getNodeType() == QueryTreeNodeType::COLUMN)
             {
                 const auto & column_node = alias_it->second->as<ColumnNode &>();
                 if (column_node.getColumnSource()->getNodeType() == QueryTreeNodeType::ARRAY_JOIN)
@@ -4814,6 +4835,19 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
         }
     }
 
+    if (!scope.expressions_in_resolve_process_stack.hasAggregateFunction())
+    {
+        for (auto & [node, _] : matched_expression_nodes_with_names)
+        {
+            auto it = scope.nullable_group_by_keys.find(node);
+            if (it != scope.nullable_group_by_keys.end())
+            {
+                node = it->node->clone();
+                node->convertToNullable();
+            }
+        }
+    }
+
     std::unordered_map<const IColumnTransformerNode *, std::unordered_set<std::string>> strict_transformer_to_used_column_names;
     for (const auto & transformer : matcher_node_typed.getColumnTransformers().getNodes())
     {
@@ -5007,7 +5041,10 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
             scope.scope_node->formatASTForErrorMessage());
     }
 
+    auto original_ast = matcher_node->getOriginalAST();
     matcher_node = std::move(list);
+    if (original_ast)
+        matcher_node->setOriginalAST(original_ast);
 
     return result_projection_names;
 }
@@ -5203,10 +5240,14 @@ ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_nod
     for (size_t i = 0; i < lambda_arguments_nodes_size; ++i)
     {
         auto & lambda_argument_node = lambda_arguments_nodes[i];
-        auto & lambda_argument_node_typed = lambda_argument_node->as<IdentifierNode &>();
-        const auto & lambda_argument_name = lambda_argument_node_typed.getIdentifier().getFullName();
+        const auto * lambda_argument_identifier = lambda_argument_node->as<IdentifierNode>();
+        const auto * lambda_argument_column = lambda_argument_node->as<ColumnNode>();
+        if (!lambda_argument_identifier && !lambda_argument_column)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected IDENTIFIER or COLUMN as lambda argument, got {}", lambda_node->dumpTree());
+        const auto & lambda_argument_name = lambda_argument_identifier ? lambda_argument_identifier->getIdentifier().getFullName()
+                                                                       : lambda_argument_column->getColumnName();
 
-        bool has_expression_node = scope.alias_name_to_expression_node.contains(lambda_argument_name);
+        bool has_expression_node = scope.alias_name_to_expression_node->contains(lambda_argument_name);
         bool has_alias_node = scope.alias_name_to_lambda_node.contains(lambda_argument_name);
 
         if (has_expression_node || has_alias_node)
@@ -5214,7 +5255,7 @@ ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_nod
             throw Exception(ErrorCodes::BAD_ARGUMENTS,
                 "Alias name '{}' inside lambda {} cannot have same name as lambda argument. In scope {}",
                 lambda_argument_name,
-                lambda_argument_node_typed.formatASTForErrorMessage(),
+                lambda_argument_node->formatASTForErrorMessage(),
                 scope.scope_node->formatASTForErrorMessage());
         }
 
@@ -6233,8 +6274,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
           *
           * To resolve b we need to resolve a.
           */
-        auto it = scope.alias_name_to_expression_node.find(node_alias);
-        if (it != scope.alias_name_to_expression_node.end())
+        auto it = scope.alias_name_to_expression_node->find(node_alias);
+        if (it != scope.alias_name_to_expression_node->end())
             node = it->second;
 
         if (allow_lambda_expression)
@@ -6245,7 +6286,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
         }
     }
 
-    scope.expressions_in_resolve_process_stack.pushNode(node);
+    scope.pushExpressionNode(node);
 
     auto node_type = node->getNodeType();
 
@@ -6274,7 +6315,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
                 resolved_identifier_node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::FUNCTION}, scope).resolved_identifier;
 
                 if (resolved_identifier_node && !node_alias.empty())
-                    scope.alias_name_to_expression_node.erase(node_alias);
+                    scope.alias_name_to_expression_node->erase(node_alias);
             }
 
             if (!resolved_identifier_node && allow_table_expression)
@@ -6490,13 +6531,23 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
 
     validateTreeSize(node, scope.context->getSettingsRef().max_expanded_ast_elements, node_to_tree_size);
 
-    if (!scope.expressions_in_resolve_process_stack.hasAggregateFunction())
+    /// Lambda can be inside the aggregate function, so we should check parent scopes.
+    /// Most likely only the root scope can have an arrgegate function, but let's check all just in case.
+    bool in_aggregate_function_scope = false;
+    for (const auto * scope_ptr = &scope; scope_ptr; scope_ptr = scope_ptr->parent_scope)
+        in_aggregate_function_scope = in_aggregate_function_scope || scope_ptr->expressions_in_resolve_process_stack.hasAggregateFunction();
+
+    if (!in_aggregate_function_scope)
     {
-        auto it = scope.nullable_group_by_keys.find(node);
-        if (it != scope.nullable_group_by_keys.end())
+        for (const auto * scope_ptr = &scope; scope_ptr; scope_ptr = scope_ptr->parent_scope)
         {
-            node = it->node->clone();
-            node->convertToNullable();
+            auto it = scope_ptr->nullable_group_by_keys.find(node);
+            if (it != scope_ptr->nullable_group_by_keys.end())
+            {
+                node = it->node->clone();
+                node->convertToNullable();
+                break;
+            }
         }
     }
 
@@ -6505,8 +6556,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
       */
     if (!node_alias.empty() && use_alias_table && !scope.group_by_use_nulls)
     {
-        auto it = scope.alias_name_to_expression_node.find(node_alias);
-        if (it != scope.alias_name_to_expression_node.end())
+        auto it = scope.alias_name_to_expression_node->find(node_alias);
+        if (it != scope.alias_name_to_expression_node->end())
             it->second = node;
 
         if (allow_lambda_expression)
@@ -6519,7 +6570,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
 
     resolved_expressions.emplace(node, result_projection_names);
 
-    scope.expressions_in_resolve_process_stack.popNode();
+    scope.popExpressionNode();
     bool expression_was_root = scope.expressions_in_resolve_process_stack.empty();
     if (expression_was_root)
         scope.non_cached_identifier_lookups_during_expression_resolve.clear();
@@ -6863,11 +6914,11 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod
                   */
                 resolve_settings.allow_to_resolve_subquery_during_identifier_resolution = false;
 
-                scope.expressions_in_resolve_process_stack.pushNode(current_join_tree_node);
+                scope.pushExpressionNode(current_join_tree_node);
 
                 auto table_identifier_resolve_result = tryResolveIdentifier(table_identifier_lookup, scope, resolve_settings);
 
-                scope.expressions_in_resolve_process_stack.popNode();
+                scope.popExpressionNode();
                 bool expression_was_root = scope.expressions_in_resolve_process_stack.empty();
                 if (expression_was_root)
                     scope.non_cached_identifier_lookups_during_expression_resolve.clear();
@@ -7453,7 +7504,7 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
     for (auto & array_join_expression : array_join_nodes)
     {
         auto array_join_expression_alias = array_join_expression->getAlias();
-        if (!array_join_expression_alias.empty() && scope.alias_name_to_expression_node.contains(array_join_expression_alias))
+        if (!array_join_expression_alias.empty() && scope.alias_name_to_expression_node->contains(array_join_expression_alias))
             throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS,
                 "ARRAY JOIN expression {} with duplicate alias {}. In scope {}",
                 array_join_expression->formatASTForErrorMessage(),
@@ -7547,8 +7598,8 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
     array_join_nodes = std::move(array_join_column_expressions);
     for (auto & array_join_column_expression : array_join_nodes)
     {
-        auto it = scope.alias_name_to_expression_node.find(array_join_column_expression->getAlias());
-        if (it != scope.alias_name_to_expression_node.end())
+        auto it = scope.alias_name_to_expression_node->find(array_join_column_expression->getAlias());
+        if (it != scope.alias_name_to_expression_node->end())
         {
             auto & array_join_column_expression_typed = array_join_column_expression->as<ColumnNode &>();
             auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
@@ -8008,7 +8059,12 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
             window_node_typed.setParentWindowName({});
         }
 
-        scope.window_name_to_window_node.emplace(window_node_typed.getAlias(), window_node);
+        auto [_, inserted] = scope.window_name_to_window_node.emplace(window_node_typed.getAlias(), window_node);
+        if (!inserted)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "Window '{}' is already defined. In scope {}",
+                window_node_typed.getAlias(),
+                scope.scope_node->formatASTForErrorMessage());
     }
 
     /** Disable identifier cache during JOIN TREE resolve.
@@ -8077,8 +8133,10 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
         /// Clone is needed cause aliases share subtrees.
         /// If not clone, the same (shared) subtree could be resolved again with different (Nullable) type
         /// See 03023_group_by_use_nulls_analyzer_crashes
-        for (auto & [_, node] : scope.alias_name_to_expression_node)
-            node = node->clone();
+        for (auto & [key, node] : scope.alias_name_to_expression_node_before_group_by)
+            scope.alias_name_to_expression_node_after_group_by[key] = node->clone();
+
+        scope.alias_name_to_expression_node = &scope.alias_name_to_expression_node_after_group_by;
     }
 
     if (query_node_typed.hasHaving())
@@ -8162,8 +8220,8 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
 
         bool has_node_in_alias_table = false;
 
-        auto it = scope.alias_name_to_expression_node.find(node_alias);
-        if (it != scope.alias_name_to_expression_node.end())
+        auto it = scope.alias_name_to_expression_node->find(node_alias);
+        if (it != scope.alias_name_to_expression_node->end())
         {
             has_node_in_alias_table = true;
 
@@ -8222,7 +8280,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
 
     /// Remove aliases from expression and lambda nodes
 
-    for (auto & [_, node] : scope.alias_name_to_expression_node)
+    for (auto & [_, node] : *scope.alias_name_to_expression_node)
         node->removeAlias();
 
     for (auto & [_, node] : scope.alias_name_to_lambda_node)
diff --git a/src/Analyzer/ValidationUtils.cpp b/src/Analyzer/ValidationUtils.cpp
index e17639367eb..9e977964755 100644
--- a/src/Analyzer/ValidationUtils.cpp
+++ b/src/Analyzer/ValidationUtils.cpp
@@ -26,6 +26,10 @@ namespace
 
 void validateFilter(const QueryTreeNodePtr & filter_node, std::string_view exception_place_message, const QueryTreeNodePtr & query_node)
 {
+    if (filter_node->getNodeType() == QueryTreeNodeType::LIST)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            "Unsupported expression '{}' in filter", filter_node->formatASTForErrorMessage());
+
     auto filter_node_result_type = filter_node->getResultType();
     if (!filter_node_result_type->canBeUsedInBooleanContext())
         throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER,
diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index 2d1d69ced73..cbda5466303 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <cstring>
-#include <cassert>
 
 #include <Columns/IColumn.h>
 #include <Columns/IColumnImpl.h>
@@ -12,6 +11,8 @@
 #include <Common/assert_cast.h>
 #include <Core/Field.h>
 
+#include <base/defines.h>
+
 
 class Collator;
 
@@ -42,7 +43,11 @@ private:
     size_t ALWAYS_INLINE offsetAt(ssize_t i) const { return offsets[i - 1]; }
 
     /// Size of i-th element, including terminating zero.
-    size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; }
+    size_t ALWAYS_INLINE sizeAt(ssize_t i) const
+    {
+        chassert(offsets[i] > offsets[i - 1]);
+        return offsets[i] - offsets[i - 1];
+    }
 
     struct ComparatorBase;
 
@@ -79,7 +84,7 @@ public:
 
     size_t byteSizeAt(size_t n) const override
     {
-        assert(n < size());
+        chassert(n < size());
         return sizeAt(n) + sizeof(offsets[0]);
     }
 
@@ -94,25 +99,25 @@ public:
 
     Field operator[](size_t n) const override
     {
-        assert(n < size());
+        chassert(n < size());
         return Field(&chars[offsetAt(n)], sizeAt(n) - 1);
     }
 
     void get(size_t n, Field & res) const override
     {
-        assert(n < size());
+        chassert(n < size());
         res = std::string_view{reinterpret_cast<const char *>(&chars[offsetAt(n)]), sizeAt(n) - 1};
     }
 
     StringRef getDataAt(size_t n) const override
     {
-        assert(n < size());
+        chassert(n < size());
         return StringRef(&chars[offsetAt(n)], sizeAt(n) - 1);
     }
 
     bool isDefaultAt(size_t n) const override
     {
-        assert(n < size());
+        chassert(n < size());
         return sizeAt(n) == 1;
     }
 
diff --git a/src/Columns/ColumnUnique.cpp b/src/Columns/ColumnUnique.cpp
index edfee69a752..54f45204c00 100644
--- a/src/Columns/ColumnUnique.cpp
+++ b/src/Columns/ColumnUnique.cpp
@@ -21,5 +21,8 @@ template class ColumnUnique<ColumnFloat64>;
 template class ColumnUnique<ColumnString>;
 template class ColumnUnique<ColumnFixedString>;
 template class ColumnUnique<ColumnDateTime64>;
+template class ColumnUnique<ColumnIPv4>;
+template class ColumnUnique<ColumnIPv6>;
+template class ColumnUnique<ColumnUUID>;
 
 }
diff --git a/src/Common/CaresPTRResolver.cpp b/src/Common/CaresPTRResolver.cpp
index 0261f4a130f..df456c9cfbd 100644
--- a/src/Common/CaresPTRResolver.cpp
+++ b/src/Common/CaresPTRResolver.cpp
@@ -173,11 +173,6 @@ namespace DB
         return true;
     }
 
-    void CaresPTRResolver::cancel_requests(ares_channel channel)
-    {
-        ares_cancel(channel);
-    }
-
     std::span<pollfd> CaresPTRResolver::get_readable_sockets(int * sockets, pollfd * pollfd, ares_channel channel)
     {
         int sockets_bitmask = ares_getsock(channel, sockets, ARES_GETSOCK_MAXNUM);
diff --git a/src/Common/CaresPTRResolver.h b/src/Common/CaresPTRResolver.h
index 24a5e422ca8..95194e0d5ce 100644
--- a/src/Common/CaresPTRResolver.h
+++ b/src/Common/CaresPTRResolver.h
@@ -44,8 +44,6 @@ namespace DB
     private:
         bool wait_and_process(ares_channel channel);
 
-        void cancel_requests(ares_channel channel);
-
         void resolve(const std::string & ip, std::unordered_set<std::string> & response, ares_channel channel);
 
         void resolve_v6(const std::string & ip, std::unordered_set<std::string> & response, ares_channel channel);
diff --git a/src/Common/PageCache.cpp b/src/Common/PageCache.cpp
index d4598d4683b..56bd8c1a339 100644
--- a/src/Common/PageCache.cpp
+++ b/src/Common/PageCache.cpp
@@ -198,12 +198,18 @@ size_t PageCache::getPinnedSize() const
 PageCache::MemoryStats PageCache::getResidentSetSize() const
 {
     MemoryStats stats;
+
 #ifdef OS_LINUX
     if (use_madv_free)
     {
         std::unordered_set<UInt64> cache_mmap_addrs;
         {
             std::lock_guard lock(global_mutex);
+
+            /// Don't spend time on reading smaps if page cache is not used.
+            if (mmaps.empty())
+                return stats;
+
             for (const auto & m : mmaps)
                 cache_mmap_addrs.insert(reinterpret_cast<UInt64>(m.ptr));
         }
@@ -258,7 +264,7 @@ PageCache::MemoryStats PageCache::getResidentSetSize() const
                 UInt64 addr = unhexUInt<UInt64>(s.c_str());
                 current_range_is_cache = cache_mmap_addrs.contains(addr);
             }
-            else if (s == "Rss:" || s == "LazyFree")
+            else if (s == "Rss:" || s == "LazyFree:")
             {
                 skip_whitespace();
                 size_t val;
diff --git a/src/Common/Scheduler/Nodes/FifoQueue.h b/src/Common/Scheduler/Nodes/FifoQueue.h
index 45ed32343ff..9ec997c06d2 100644
--- a/src/Common/Scheduler/Nodes/FifoQueue.h
+++ b/src/Common/Scheduler/Nodes/FifoQueue.h
@@ -6,7 +6,8 @@
 
 #include <Poco/Util/AbstractConfiguration.h>
 
-#include <deque>
+#include <boost/intrusive/list.hpp>
+
 #include <mutex>
 
 
@@ -15,6 +16,7 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int LOGICAL_ERROR;
     extern const int INVALID_SCHEDULER_NODE;
 }
 
@@ -42,7 +44,7 @@ public:
         std::lock_guard lock(mutex);
         queue_cost += request->cost;
         bool was_empty = requests.empty();
-        requests.push_back(request);
+        requests.push_back(*request);
         if (was_empty)
             scheduleActivation();
     }
@@ -52,7 +54,7 @@ public:
         std::lock_guard lock(mutex);
         if (requests.empty())
             return {nullptr, false};
-        ResourceRequest * result = requests.front();
+        ResourceRequest * result = &requests.front();
         requests.pop_front();
         if (requests.empty())
             busy_periods++;
@@ -65,19 +67,24 @@ public:
     bool cancelRequest(ResourceRequest * request) override
     {
         std::lock_guard lock(mutex);
-        // TODO(serxa): reimplement queue as intrusive list of ResourceRequest to make this O(1) instead of O(N)
-        for (auto i = requests.begin(), e = requests.end(); i != e; ++i)
+        if (request->is_linked())
         {
-            if (*i == request)
-            {
-                requests.erase(i);
-                if (requests.empty())
-                    busy_periods++;
-                queue_cost -= request->cost;
-                canceled_requests++;
-                canceled_cost += request->cost;
-                return true;
-            }
+            // It's impossible to check that `request` is indeed inserted to this queue and not another queue.
+            // It's up to caller to make sure this is the case. Otherwise, list sizes will be corrupted.
+            // Not tracking list sizes is not an option, because another problem appears: removing from list w/o locking.
+            // Another possible solution - keep track if request `is_cancelable` guarded by `mutex`
+            // Simple check for list size corruption
+            if (requests.empty())
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "trying to cancel request (linked into another queue) from empty queue: {}", getPath());
+
+            requests.erase(requests.iterator_to(*request));
+
+            if (requests.empty())
+                busy_periods++;
+            queue_cost -= request->cost;
+            canceled_requests++;
+            canceled_cost += request->cost;
+            return true;
         }
         return false;
     }
@@ -124,7 +131,7 @@ public:
 private:
     std::mutex mutex;
     Int64 queue_cost = 0;
-    std::deque<ResourceRequest *> requests; // TODO(serxa): reimplement it using intrusive list to avoid allocations/deallocations and O(N) during cancel
+    boost::intrusive::list<ResourceRequest> requests;
 };
 
 }
diff --git a/src/Common/Scheduler/ResourceRequest.h b/src/Common/Scheduler/ResourceRequest.h
index f3153ad382c..d64f624cec5 100644
--- a/src/Common/Scheduler/ResourceRequest.h
+++ b/src/Common/Scheduler/ResourceRequest.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <boost/intrusive/list.hpp>
 #include <base/types.h>
 #include <limits>
 
@@ -41,7 +42,7 @@ constexpr ResourceCost ResourceCostMax = std::numeric_limits<int>::max();
  * Returning true means successful cancel and therefore steps (4) and (5) are not going to happen
  * and step (6) MUST be omitted.
  */
-class ResourceRequest
+class ResourceRequest : public boost::intrusive::list_base_hook<>
 {
 public:
     /// Cost of request execution; should be filled before request enqueueing.
@@ -62,6 +63,7 @@ public:
     {
         cost = cost_;
         constraint = nullptr;
+        // Note that list_base_hook should be reset independently (by intrusive list)
     }
 
     virtual ~ResourceRequest() = default;
diff --git a/src/Common/UTF8Helpers.cpp b/src/Common/UTF8Helpers.cpp
index be1f222dc96..b8f5c000e75 100644
--- a/src/Common/UTF8Helpers.cpp
+++ b/src/Common/UTF8Helpers.cpp
@@ -1,9 +1,13 @@
-#include <Common/UTF8Helpers.h>
 #include <Common/StringUtils/StringUtils.h>
+#include <Common/TargetSpecific.h>
+#include <Common/UTF8Helpers.h>
 
 #include <widechar_width.h>
 #include <bit>
 
+#if USE_MULTITARGET_CODE
+#include <immintrin.h>
+#endif
 
 namespace DB
 {
@@ -215,5 +219,71 @@ size_t computeBytesBeforeWidth(const UInt8 * data, size_t size, size_t prefix, s
     return computeWidthImpl<BytesBeforeLimit>(data, size, prefix, limit);
 }
 
+
+DECLARE_DEFAULT_CODE(
+bool isAllASCII(const UInt8 * data, size_t size)
+{
+    UInt8 mask = 0;
+    for (size_t i = 0; i < size; ++i)
+        mask |= data[i];
+
+    return !(mask & 0x80);
+})
+
+DECLARE_SSE42_SPECIFIC_CODE(
+/// Copy from https://github.com/lemire/fastvalidate-utf-8/blob/master/include/simdasciicheck.h
+bool isAllASCII(const UInt8 * data, size_t size)
+{
+    __m128i masks = _mm_setzero_si128();
+
+    size_t i = 0;
+    for (; i + 16 <= size; i += 16)
+    {
+        __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(data + i));
+        masks = _mm_or_si128(masks, bytes);
+    }
+    int mask = _mm_movemask_epi8(masks);
+
+    UInt8 tail_mask = 0;
+    for (; i < size; i++)
+        tail_mask |= data[i];
+
+    mask |= (tail_mask & 0x80);
+    return !mask;
+})
+
+DECLARE_AVX2_SPECIFIC_CODE(
+bool isAllASCII(const UInt8 * data, size_t size)
+{
+    __m256i masks = _mm256_setzero_si256();
+
+    size_t i = 0;
+    for (; i + 32 <= size; i += 32)
+    {
+        __m256i bytes = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(data + i));
+        masks = _mm256_or_si256(masks, bytes);
+    }
+    int mask = _mm256_movemask_epi8(masks);
+
+    UInt8 tail_mask = 0;
+    for (; i < size; i++)
+        tail_mask |= data[i];
+
+    mask |= (tail_mask & 0x80);
+    return !mask;
+})
+
+bool isAllASCII(const UInt8* data, size_t size)
+{
+#if USE_MULTITARGET_CODE
+    if (isArchSupported(TargetArch::AVX2))
+        return TargetSpecific::AVX2::isAllASCII(data, size);
+    if (isArchSupported(TargetArch::SSE42))
+        return TargetSpecific::SSE42::isAllASCII(data, size);
+#endif
+    return TargetSpecific::Default::isAllASCII(data, size);
+}
+
+
 }
 }
diff --git a/src/Common/UTF8Helpers.h b/src/Common/UTF8Helpers.h
index a4dd88921b7..933b62c7b63 100644
--- a/src/Common/UTF8Helpers.h
+++ b/src/Common/UTF8Helpers.h
@@ -136,7 +136,10 @@ size_t computeWidth(const UInt8 * data, size_t size, size_t prefix = 0) noexcept
   */
 size_t computeBytesBeforeWidth(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept;
 
-}
 
+/// If all the characters in the string are ASCII, return true.
+bool isAllASCII(const UInt8* data, size_t size);
+
+}
 
 }
diff --git a/src/Formats/NumpyDataTypes.h b/src/Formats/NumpyDataTypes.h
index cb40c67cd19..062f743c0ea 100644
--- a/src/Formats/NumpyDataTypes.h
+++ b/src/Formats/NumpyDataTypes.h
@@ -1,10 +1,12 @@
 #pragma once
 #include <cstddef>
 #include <Storages/NamedCollectionsHelpers.h>
+#include <IO/WriteBufferFromString.h>
 
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
+    extern const int NOT_IMPLEMENTED;
 }
 
 enum class NumpyDataTypeIndex : uint8_t
@@ -29,9 +31,9 @@ class NumpyDataType
 public:
     enum Endianness
     {
-        LITTLE,
-        BIG,
-        NONE,
+        LITTLE = '<',
+        BIG = '>',
+        NONE = '|',
     };
     NumpyDataTypeIndex type_index;
 
@@ -41,15 +43,18 @@ public:
     Endianness getEndianness() const { return endianness; }
 
     virtual NumpyDataTypeIndex getTypeIndex() const = 0;
+    virtual size_t getSize() const { throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Function getSize() is not implemented"); }
+    virtual void setSize(size_t) { throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Function setSize() is not implemented"); }
+    virtual String str() const { throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Function str() is not implemented"); }
 
-private:
+protected:
     Endianness endianness;
 };
 
 class NumpyDataTypeInt : public NumpyDataType
 {
 public:
-    NumpyDataTypeInt(Endianness endianness, size_t size_, bool is_signed_) : NumpyDataType(endianness), size(size_), is_signed(is_signed_)
+    NumpyDataTypeInt(Endianness endianness_, size_t size_, bool is_signed_) : NumpyDataType(endianness_), size(size_), is_signed(is_signed_)
     {
         switch (size)
         {
@@ -67,6 +72,14 @@ public:
         return type_index;
     }
     bool isSigned() const { return is_signed; }
+    String str() const override
+    {
+        DB::WriteBufferFromOwnString buf;
+        writeChar(static_cast<char>(endianness), buf);
+        writeChar(is_signed ? 'i' : 'u', buf);
+        writeIntText(size, buf);
+        return buf.str();
+    }
 
 private:
     size_t size;
@@ -76,7 +89,7 @@ private:
 class NumpyDataTypeFloat : public NumpyDataType
 {
 public:
-    NumpyDataTypeFloat(Endianness endianness, size_t size_) : NumpyDataType(endianness), size(size_)
+    NumpyDataTypeFloat(Endianness endianness_, size_t size_) : NumpyDataType(endianness_), size(size_)
     {
         switch (size)
         {
@@ -92,6 +105,14 @@ public:
     {
         return type_index;
     }
+    String str() const override
+    {
+        DB::WriteBufferFromOwnString buf;
+        writeChar(static_cast<char>(endianness), buf);
+        writeChar('f', buf);
+        writeIntText(size, buf);
+        return buf.str();
+    }
 private:
     size_t size;
 };
@@ -99,13 +120,22 @@ private:
 class NumpyDataTypeString : public NumpyDataType
 {
 public:
-    NumpyDataTypeString(Endianness endianness, size_t size_) : NumpyDataType(endianness), size(size_)
+    NumpyDataTypeString(Endianness endianness_, size_t size_) : NumpyDataType(endianness_), size(size_)
     {
         type_index = NumpyDataTypeIndex::String;
     }
 
     NumpyDataTypeIndex getTypeIndex() const override { return type_index; }
-    size_t getSize() const { return size; }
+    size_t getSize() const override { return size; }
+    void setSize(size_t size_) override { size = size_; }
+    String str() const override
+    {
+        DB::WriteBufferFromOwnString buf;
+        writeChar(static_cast<char>(endianness), buf);
+        writeChar('S', buf);
+        writeIntText(size, buf);
+        return buf.str();
+    }
 private:
     size_t size;
 };
@@ -113,13 +143,13 @@ private:
 class NumpyDataTypeUnicode : public NumpyDataType
 {
 public:
-    NumpyDataTypeUnicode(Endianness endianness, size_t size_) : NumpyDataType(endianness), size(size_)
+    NumpyDataTypeUnicode(Endianness endianness_, size_t size_) : NumpyDataType(endianness_), size(size_)
     {
         type_index = NumpyDataTypeIndex::Unicode;
     }
 
     NumpyDataTypeIndex getTypeIndex() const override { return type_index; }
-    size_t getSize() const { return size * 4; }
+    size_t getSize() const override { return size * 4; }
 private:
     size_t size;
 };
diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp
index 1f851da850a..57ca1bb49c8 100644
--- a/src/Formats/registerFormats.cpp
+++ b/src/Formats/registerFormats.cpp
@@ -76,6 +76,8 @@ void registerInputFormatCustomSeparated(FormatFactory & factory);
 void registerOutputFormatCustomSeparated(FormatFactory & factory);
 void registerInputFormatCapnProto(FormatFactory & factory);
 void registerOutputFormatCapnProto(FormatFactory & factory);
+void registerInputFormatNpy(FormatFactory & factory);
+void registerOutputFormatNpy(FormatFactory & factory);
 void registerInputFormatForm(FormatFactory & factory);
 
 /// Output only (presentational) formats.
@@ -104,7 +106,6 @@ void registerInputFormatMySQLDump(FormatFactory & factory);
 void registerInputFormatParquetMetadata(FormatFactory & factory);
 void registerInputFormatDWARF(FormatFactory & factory);
 void registerInputFormatOne(FormatFactory & factory);
-void registerInputFormatNpy(FormatFactory & factory);
 
 #if USE_HIVE
 void registerInputFormatHiveText(FormatFactory & factory);
@@ -224,6 +225,8 @@ void registerFormats()
     registerOutputFormatAvro(factory);
     registerInputFormatArrow(factory);
     registerOutputFormatArrow(factory);
+    registerInputFormatNpy(factory);
+    registerOutputFormatNpy(factory);
 
     registerOutputFormatPretty(factory);
     registerOutputFormatPrettyCompact(factory);
@@ -254,7 +257,6 @@ void registerFormats()
     registerInputFormatParquetMetadata(factory);
     registerInputFormatDWARF(factory);
     registerInputFormatOne(factory);
-    registerInputFormatNpy(factory);
 
     registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(factory);
     registerNonTrivialPrefixAndSuffixCheckerJSONAsString(factory);
diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp
index 048a601de81..d85bb0e7060 100644
--- a/src/Functions/FunctionHelpers.cpp
+++ b/src/Functions/FunctionHelpers.cpp
@@ -80,7 +80,7 @@ ColumnWithTypeAndName columnGetNested(const ColumnWithTypeAndName & col)
             return ColumnWithTypeAndName{ nullable_res, nested_type, col.name };
         }
         else
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for DataTypeNullable");
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} for DataTypeNullable", col.dumpStructure());
     }
     return col;
 }
diff --git a/src/Functions/GatherUtils/Sinks.h b/src/Functions/GatherUtils/Sinks.h
index a8054da1159..2aa7c147136 100644
--- a/src/Functions/GatherUtils/Sinks.h
+++ b/src/Functions/GatherUtils/Sinks.h
@@ -48,7 +48,7 @@ struct NumericArraySink : public ArraySinkImpl<NumericArraySink<T>>
     NumericArraySink(IColumn & elements_, ColumnArray::Offsets & offsets_, size_t column_size)
             : elements(assert_cast<ColVecType&>(elements_).getData()), offsets(offsets_)
     {
-        offsets.resize(column_size);
+        offsets.resize_exact(column_size);
     }
 
     void next()
@@ -69,7 +69,7 @@ struct NumericArraySink : public ArraySinkImpl<NumericArraySink<T>>
 
     void reserve(size_t num_elements)
     {
-        elements.reserve(num_elements);
+        elements.reserve_exact(num_elements);
     }
 };
 
@@ -85,7 +85,7 @@ struct StringSink
     StringSink(ColumnString & col, size_t column_size)
             : elements(col.getChars()), offsets(col.getOffsets())
     {
-        offsets.resize(column_size);
+        offsets.resize_exact(column_size);
     }
 
     void ALWAYS_INLINE next()
@@ -108,7 +108,7 @@ struct StringSink
 
     void reserve(size_t num_elements)
     {
-        elements.reserve(num_elements);
+        elements.reserve_exact(num_elements);
     }
 };
 
@@ -125,7 +125,7 @@ struct FixedStringSink
     FixedStringSink(ColumnFixedString & col, size_t column_size)
             : elements(col.getChars()), string_size(col.getN()), total_rows(column_size)
     {
-        elements.resize(column_size * string_size);
+        elements.resize_exact(column_size * string_size);
     }
 
     void next()
@@ -146,7 +146,7 @@ struct FixedStringSink
 
     void reserve(size_t num_elements)
     {
-        elements.reserve(num_elements);
+        elements.reserve_exact(num_elements);
     }
 };
 
@@ -165,7 +165,7 @@ struct GenericArraySink : public ArraySinkImpl<GenericArraySink>
     GenericArraySink(IColumn & elements_, ColumnArray::Offsets & offsets_, size_t column_size)
             : elements(elements_), offsets(offsets_)
     {
-        offsets.resize(column_size);
+        offsets.resize_exact(column_size);
     }
 
     void next()
@@ -210,7 +210,7 @@ struct NullableArraySink : public ArraySink
     void reserve(size_t num_elements)
     {
         ArraySink::reserve(num_elements);
-        null_map.reserve(num_elements);
+        null_map.reserve_exact(num_elements);
     }
 };
 
diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h
index 4e3009a695d..e5e3451fe4c 100644
--- a/src/Functions/GatherUtils/Sources.h
+++ b/src/Functions/GatherUtils/Sources.h
@@ -323,6 +323,8 @@ struct StringSource
             return {&elements[prev_offset], length + elem_size > offset ? std::min(elem_size, length + elem_size - offset) : 0};
         return {&elements[prev_offset + elem_size - offset], std::min(length, offset)};
     }
+
+    const ColumnString::Chars & getElements() const { return elements; }
 };
 
 /// Treats Enum values as Strings, modeled after StringSource
@@ -517,11 +519,12 @@ struct FixedStringSource
     const UInt8 * pos;
     const UInt8 * end;
     size_t string_size;
+    const typename ColumnString::Chars & elements;
+
     size_t row_num = 0;
     size_t column_size = 0;
 
-    explicit FixedStringSource(const ColumnFixedString & col)
-        : string_size(col.getN())
+    explicit FixedStringSource(const ColumnFixedString & col) : string_size(col.getN()), elements(col.getChars())
     {
         const auto & chars = col.getChars();
         pos = chars.data();
@@ -592,6 +595,8 @@ struct FixedStringSource
             return {pos, length + string_size > offset ? std::min(string_size, length + string_size - offset) : 0};
         return {pos + string_size - offset, std::min(length, offset)};
     }
+
+    const ColumnString::Chars & getElements() const { return elements; }
 };
 
 
diff --git a/src/Functions/LowerUpperImpl.h b/src/Functions/LowerUpperImpl.h
index f093e00f7ab..72b3ce1ca34 100644
--- a/src/Functions/LowerUpperImpl.h
+++ b/src/Functions/LowerUpperImpl.h
@@ -13,14 +13,14 @@ struct LowerUpperImpl
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets)
     {
-        res_data.resize(data.size());
+        res_data.resize_exact(data.size());
         res_offsets.assign(offsets);
         array(data.data(), data.data() + data.size(), res_data.data());
     }
 
     static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data)
     {
-        res_data.resize(data.size());
+        res_data.resize_exact(data.size());
         array(data.data(), data.data() + data.size(), res_data.data());
     }
 
diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h
index 7ca98166576..bb794a0f8ed 100644
--- a/src/Functions/LowerUpperUTF8Impl.h
+++ b/src/Functions/LowerUpperUTF8Impl.h
@@ -1,8 +1,9 @@
 #pragma once
 #include <Columns/ColumnString.h>
+#include <Functions/LowerUpperImpl.h>
+#include <base/defines.h>
 #include <Poco/UTF8Encoding.h>
 #include <Common/UTF8Helpers.h>
-#include <base/defines.h>
 
 #ifdef __SSE2__
 #include <emmintrin.h>
@@ -92,7 +93,15 @@ struct LowerUpperUTF8Impl
     {
         if (data.empty())
             return;
-        res_data.resize(data.size());
+
+        bool all_ascii = UTF8::isAllASCII(data.data(), data.size());
+        if (all_ascii)
+        {
+            LowerUpperImpl<not_case_lower_bound, not_case_upper_bound>::vector(data, offsets, res_data, res_offsets);
+            return;
+        }
+
+        res_data.resize_exact(data.size());
         res_offsets.assign(offsets);
         array(data.data(), data.data() + data.size(), offsets, res_data.data());
     }
diff --git a/src/Functions/clamp.cpp b/src/Functions/clamp.cpp
new file mode 100644
index 00000000000..bb347a575e4
--- /dev/null
+++ b/src/Functions/clamp.cpp
@@ -0,0 +1,69 @@
+#include <Functions/IFunction.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <DataTypes/getLeastSupertype.h>
+#include <Interpreters/castColumn.h>
+#include <Interpreters/Context.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int BAD_ARGUMENTS;
+}
+
+
+class FunctionClamp : public IFunction
+{
+
+public:
+    static constexpr auto name = "clamp";
+
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 3; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionClamp>(); }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & types) const override
+    {
+        if (types.size() != 3)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires 3 arguments", getName());
+
+        return getLeastSupertype(types);
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
+    {
+        size_t arg_size = arguments.size();
+        Columns converted_columns(arg_size);
+        for (size_t arg = 0; arg < arg_size; ++arg)
+            converted_columns[arg] = castColumn(arguments[arg], result_type)->convertToFullColumnIfConst();
+
+        auto result_column = result_type->createColumn();
+        for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
+        {
+            if (converted_columns[1]->compareAt(row_num, row_num, *converted_columns[2], 1) > 0)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The minimum value cannot be greater than the maximum value for function {}", getName());
+
+            size_t best_arg = 0;
+            if (converted_columns[1]->compareAt(row_num, row_num, *converted_columns[best_arg], 1) > 0)
+                best_arg = 1;
+            else if (converted_columns[2]->compareAt(row_num, row_num, *converted_columns[best_arg], 1) < 0)
+                best_arg = 2;
+
+            result_column->insertFrom(*converted_columns[best_arg], row_num);
+        }
+
+        return result_column;
+    }
+
+};
+
+REGISTER_FUNCTION(Clamp)
+{
+    factory.registerFunction<FunctionClamp>();
+}
+}
diff --git a/src/Functions/padString.cpp b/src/Functions/padString.cpp
index ccef87d83e7..0922e0ddb8a 100644
--- a/src/Functions/padString.cpp
+++ b/src/Functions/padString.cpp
@@ -210,19 +210,18 @@ namespace
 
                 pad_string = column_pad_const->getValue<String>();
             }
-            PaddingChars<is_utf8> padding_chars{pad_string};
 
             auto col_res = ColumnString::create();
             StringSink res_sink{*col_res, input_rows_count};
 
             if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
-                executeForSource(StringSource{*col}, column_length, padding_chars, res_sink);
+                executeForSource(StringSource{*col}, column_length, pad_string, res_sink);
             else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))
-                executeForSource(FixedStringSource{*col_fixed}, column_length, padding_chars, res_sink);
+                executeForSource(FixedStringSource{*col_fixed}, column_length, pad_string, res_sink);
             else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
-                executeForSource(ConstSource<StringSource>{*col_const}, column_length, padding_chars, res_sink);
+                executeForSource(ConstSource<StringSource>{*col_const}, column_length, pad_string, res_sink);
             else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
-                executeForSource(ConstSource<FixedStringSource>{*col_const_fixed}, column_length, padding_chars, res_sink);
+                executeForSource(ConstSource<FixedStringSource>{*col_const_fixed}, column_length, pad_string, res_sink);
             else
                 throw Exception(
                     ErrorCodes::ILLEGAL_COLUMN,
@@ -235,23 +234,40 @@ namespace
 
     private:
         template <typename SourceStrings>
-        void executeForSource(
-            SourceStrings && strings,
-            const ColumnPtr & column_length,
-            const PaddingChars<is_utf8> & padding_chars,
-            StringSink & res_sink) const
+        void executeForSource(SourceStrings && strings, const ColumnPtr & column_length, const String & pad_string, StringSink & res_sink) const
         {
-            if (const auto * col_const = checkAndGetColumn<ColumnConst>(column_length.get()))
-                executeForSourceAndLength(std::forward<SourceStrings>(strings), ConstSource<GenericValueSource>{*col_const}, padding_chars, res_sink);
+            const auto & chars = strings.getElements();
+            bool all_ascii = UTF8::isAllASCII(reinterpret_cast<const UInt8 *>(pad_string.data()), pad_string.size())
+                && UTF8::isAllASCII(chars.data(), chars.size());
+            bool is_actually_utf8 = is_utf8 && !all_ascii;
+
+            if (!is_actually_utf8)
+            {
+                PaddingChars<false> padding_chars{pad_string};
+                if (const auto * col_const = checkAndGetColumn<ColumnConst>(column_length.get()))
+                    executeForSourceAndLength<false>(
+                        std::forward<SourceStrings>(strings), ConstSource<GenericValueSource>{*col_const}, padding_chars, res_sink);
+                else
+                    executeForSourceAndLength<false>(
+                        std::forward<SourceStrings>(strings), GenericValueSource{*column_length}, padding_chars, res_sink);
+            }
             else
-                executeForSourceAndLength(std::forward<SourceStrings>(strings), GenericValueSource{*column_length}, padding_chars, res_sink);
+            {
+                PaddingChars<true> padding_chars{pad_string};
+                if (const auto * col_const = checkAndGetColumn<ColumnConst>(column_length.get()))
+                    executeForSourceAndLength<true>(
+                        std::forward<SourceStrings>(strings), ConstSource<GenericValueSource>{*col_const}, padding_chars, res_sink);
+                else
+                    executeForSourceAndLength<true>(
+                        std::forward<SourceStrings>(strings), GenericValueSource{*column_length}, padding_chars, res_sink);
+            }
         }
 
-        template <typename SourceStrings, typename SourceLengths>
+        template <bool is_actually_utf8, typename SourceStrings, typename SourceLengths>
         void executeForSourceAndLength(
             SourceStrings && strings,
             SourceLengths && lengths,
-            const PaddingChars<is_utf8> & padding_chars,
+            const PaddingChars<is_actually_utf8> & padding_chars,
             StringSink & res_sink) const
         {
             bool is_const_new_length = lengths.isConst();
@@ -263,7 +279,7 @@ namespace
             for (; !res_sink.isEnd(); res_sink.next(), strings.next(), lengths.next())
             {
                 auto str = strings.getWhole();
-                ssize_t current_length = getLengthOfSlice<is_utf8>(str);
+                ssize_t current_length = getLengthOfSlice<is_actually_utf8>(str);
 
                 if (!res_sink.rowNum() || !is_const_new_length)
                 {
@@ -293,7 +309,7 @@ namespace
                 }
                 else if (new_length < current_length)
                 {
-                    str = removeSuffixFromSlice<is_utf8>(str, current_length - new_length);
+                    str = removeSuffixFromSlice<is_actually_utf8>(str, current_length - new_length);
                     writeSlice(str, res_sink);
                 }
                 else if (new_length > current_length)
diff --git a/src/Functions/reverse.cpp b/src/Functions/reverse.cpp
index 32b998523c7..39608b77997 100644
--- a/src/Functions/reverse.cpp
+++ b/src/Functions/reverse.cpp
@@ -1,10 +1,10 @@
 #include <DataTypes/DataTypeString.h>
-#include <Columns/ColumnString.h>
 #include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnArray.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <base/map.h>
+#include "reverse.h"
 
 
 namespace DB
@@ -17,42 +17,6 @@ namespace ErrorCodes
 
 namespace
 {
-
-/** Reverse the string as a sequence of bytes.
-  */
-struct ReverseImpl
-{
-    static void vector(const ColumnString::Chars & data,
-        const ColumnString::Offsets & offsets,
-        ColumnString::Chars & res_data,
-        ColumnString::Offsets & res_offsets)
-    {
-        res_data.resize(data.size());
-        res_offsets.assign(offsets);
-        size_t size = offsets.size();
-
-        ColumnString::Offset prev_offset = 0;
-        for (size_t i = 0; i < size; ++i)
-        {
-            for (size_t j = prev_offset; j < offsets[i] - 1; ++j)
-                res_data[j] = data[offsets[i] + prev_offset - 2 - j];
-            res_data[offsets[i] - 1] = 0;
-            prev_offset = offsets[i];
-        }
-    }
-
-    static void vectorFixed(const ColumnString::Chars & data, size_t n, ColumnString::Chars & res_data)
-    {
-        res_data.resize(data.size());
-        size_t size = data.size() / n;
-
-        for (size_t i = 0; i < size; ++i)
-            for (size_t j = i * n; j < (i + 1) * n; ++j)
-                res_data[j] = data[(i * 2 + 1) * n - j - 1];
-    }
-};
-
-
 class FunctionReverse : public IFunction
 {
 public:
diff --git a/src/Functions/reverse.h b/src/Functions/reverse.h
new file mode 100644
index 00000000000..5f999af4297
--- /dev/null
+++ b/src/Functions/reverse.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <Columns/ColumnString.h>
+
+namespace DB
+{
+
+/** Reverse the string as a sequence of bytes.
+  */
+struct ReverseImpl
+{
+    static void vector(const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        res_data.resize_exact(data.size());
+        res_offsets.assign(offsets);
+        size_t size = offsets.size();
+
+        ColumnString::Offset prev_offset = 0;
+        for (size_t i = 0; i < size; ++i)
+        {
+            for (size_t j = prev_offset; j < offsets[i] - 1; ++j)
+                res_data[j] = data[offsets[i] + prev_offset - 2 - j];
+            res_data[offsets[i] - 1] = 0;
+            prev_offset = offsets[i];
+        }
+    }
+
+    static void vectorFixed(const ColumnString::Chars & data, size_t n, ColumnString::Chars & res_data)
+    {
+        res_data.resize_exact(data.size());
+        size_t size = data.size() / n;
+
+        for (size_t i = 0; i < size; ++i)
+            for (size_t j = i * n; j < (i + 1) * n; ++j)
+                res_data[j] = data[(i * 2 + 1) * n - j - 1];
+    }
+};
+
+}
diff --git a/src/Functions/reverseUTF8.cpp b/src/Functions/reverseUTF8.cpp
index 8a76af05d86..4ea861919a1 100644
--- a/src/Functions/reverseUTF8.cpp
+++ b/src/Functions/reverseUTF8.cpp
@@ -1,7 +1,9 @@
-#include <DataTypes/DataTypeString.h>
 #include <Columns/ColumnString.h>
+#include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionStringToString.h>
+#include <Common/UTF8Helpers.h>
+#include "reverse.h"
 
 
 namespace DB
@@ -25,10 +27,18 @@ struct ReverseUTF8Impl
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets)
     {
+        bool all_ascii = UTF8::isAllASCII(data.data(), data.size());
+        if (all_ascii)
+        {
+            ReverseImpl::vector(data, offsets, res_data, res_offsets);
+            return;
+        }
+
         res_data.resize(data.size());
         res_offsets.assign(offsets);
         size_t size = offsets.size();
 
+
         ColumnString::Offset prev_offset = 0;
         for (size_t i = 0; i < size; ++i)
         {
diff --git a/src/Functions/substring.cpp b/src/Functions/substring.cpp
index e809914f5f0..122f83d758b 100644
--- a/src/Functions/substring.cpp
+++ b/src/Functions/substring.cpp
@@ -148,9 +148,23 @@ public:
         if constexpr (is_utf8)
         {
             if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
-                return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, UTF8StringSource(*col), input_rows_count);
+            {
+                bool all_ascii = UTF8::isAllASCII(col->getChars().data(), col->getChars().size());
+                if (all_ascii)
+                    return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, StringSource(*col), input_rows_count);
+                else
+                    return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, UTF8StringSource(*col), input_rows_count);
+            }
+
             if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
-                return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource<UTF8StringSource>(*col_const), input_rows_count);
+            {
+                StringRef str_ref = col_const->getDataAt(0);
+                bool all_ascii = UTF8::isAllASCII(reinterpret_cast<const UInt8 *>(str_ref.data), str_ref.size);
+                if (all_ascii)
+                    return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource<StringSource>(*col_const), input_rows_count);
+                else
+                    return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource<UTF8StringSource>(*col_const), input_rows_count);
+            }
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName());
         }
         else
diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp
index 5f3f054b624..74474cb4b23 100644
--- a/src/Functions/substringIndex.cpp
+++ b/src/Functions/substringIndex.cpp
@@ -129,8 +129,10 @@ namespace
             res_data.reserve(str_column->getChars().size() / 2);
             res_offsets.reserve(rows);
 
+            bool all_ascii = UTF8::isAllASCII(str_column->getChars().data(), str_column->getChars().size())
+                && UTF8::isAllASCII(reinterpret_cast<const UInt8 *>(delim.data()), delim.size());
             std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
-                = !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
+                = !is_utf8 || all_ascii ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
 
             for (size_t i = 0; i < rows; ++i)
             {
@@ -140,10 +142,12 @@ namespace
                 StringRef res_ref;
                 if constexpr (!is_utf8)
                     res_ref = substringIndex(str_ref, delim[0], count);
+                else if (all_ascii)
+                    res_ref = substringIndex(str_ref, delim[0], count);
                 else
                     res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count);
 
-                appendToResultColumn(res_ref, res_data, res_offsets);
+                appendToResultColumn<true>(res_ref, res_data, res_offsets);
             }
         }
 
@@ -158,8 +162,10 @@ namespace
             res_data.reserve(str_column->getChars().size() / 2);
             res_offsets.reserve(rows);
 
+            bool all_ascii = UTF8::isAllASCII(str_column->getChars().data(), str_column->getChars().size())
+                && UTF8::isAllASCII(reinterpret_cast<const UInt8 *>(delim.data()), delim.size());
             std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
-                = !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
+                = !is_utf8 || all_ascii ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
 
             for (size_t i = 0; i < rows; ++i)
             {
@@ -168,10 +174,12 @@ namespace
                 StringRef res_ref;
                 if constexpr (!is_utf8)
                     res_ref = substringIndex(str_ref, delim[0], count);
+                else if (all_ascii)
+                    res_ref = substringIndex(str_ref, delim[0], count);
                 else
                     res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count);
 
-                appendToResultColumn(res_ref, res_data, res_offsets);
+                appendToResultColumn<true>(res_ref, res_data, res_offsets);
             }
         }
 
@@ -186,8 +194,10 @@ namespace
             res_data.reserve(str.size() * rows / 2);
             res_offsets.reserve(rows);
 
+            bool all_ascii = UTF8::isAllASCII(reinterpret_cast<const UInt8 *>(str.data()), str.size())
+                && UTF8::isAllASCII(reinterpret_cast<const UInt8 *>(delim.data()), delim.size());
             std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
-                = !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
+                = !is_utf8 || all_ascii ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
 
             StringRef str_ref{str.data(), str.size()};
             for (size_t i = 0; i < rows; ++i)
@@ -197,18 +207,26 @@ namespace
                 StringRef res_ref;
                 if constexpr (!is_utf8)
                     res_ref = substringIndex(str_ref, delim[0], count);
+                else if (all_ascii)
+                    res_ref = substringIndex(str_ref, delim[0], count);
                 else
                     res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count);
 
-                appendToResultColumn(res_ref, res_data, res_offsets);
+                appendToResultColumn<false>(res_ref, res_data, res_offsets);
             }
         }
 
+        template <bool padded>
         static void appendToResultColumn(const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
         {
             size_t res_offset = res_data.size();
             res_data.resize(res_offset + res_ref.size + 1);
-            memcpy(&res_data[res_offset], res_ref.data, res_ref.size);
+
+            if constexpr (padded)
+                memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], res_ref.data, res_ref.size);
+            else
+                memcpy(&res_data[res_offset], res_ref.data, res_ref.size);
+
             res_offset += res_ref.size;
             res_data[res_offset] = 0;
             ++res_offset;
diff --git a/src/Functions/trim.cpp b/src/Functions/trim.cpp
index dd51c606ff7..1f0011b8e99 100644
--- a/src/Functions/trim.cpp
+++ b/src/Functions/trim.cpp
@@ -46,8 +46,8 @@ public:
         ColumnString::Offsets & res_offsets)
     {
         size_t size = offsets.size();
-        res_offsets.resize(size);
-        res_data.reserve(data.size());
+        res_offsets.resize_exact(size);
+        res_data.reserve_exact(data.size());
 
         size_t prev_offset = 0;
         size_t res_offset = 0;
diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp
index 813546aa052..8823af55936 100644
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@@ -191,10 +191,14 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons
             result = sendRequest(attempt, range_begin, range_begin + n - 1);
             std::istream & istr = result->GetBody();
 
-            copyFromIStreamWithProgressCallback(istr, to, n, progress_callback, &bytes_copied);
+            bool cancelled = false;
+            copyFromIStreamWithProgressCallback(istr, to, n, progress_callback, &bytes_copied, &cancelled);
 
             ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Bytes, bytes_copied);
 
+            if (cancelled)
+                return initial_n - n + bytes_copied;
+
             if (read_settings.remote_throttler)
                 read_settings.remote_throttler->add(bytes_copied, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds);
 
diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index 0db998c14fc..5f438a7e5f9 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -35,10 +35,17 @@
 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>
 #include <Common/formatReadable.h>
+#include "Core/Joins.h"
+#include "Interpreters/TemporaryDataOnDisk.h"
 
 #include <Functions/FunctionHelpers.h>
 #include <Interpreters/castColumn.h>
 
+namespace CurrentMetrics
+{
+    extern const Metric TemporaryFilesForJoin;
+}
+
 namespace DB
 {
 
@@ -63,6 +70,7 @@ struct NotProcessedCrossJoin : public ExtraBlock
 {
     size_t left_position;
     size_t right_block;
+    std::unique_ptr<TemporaryFileStream::Reader> reader;
 };
 
 
@@ -249,6 +257,10 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
     , instance_id(instance_id_)
     , asof_inequality(table_join->getAsofInequality())
     , data(std::make_shared<RightTableData>())
+    , tmp_data(
+          table_join_->getTempDataOnDisk()
+              ? std::make_unique<TemporaryDataOnDisk>(table_join_->getTempDataOnDisk(), CurrentMetrics::TemporaryFilesForJoin)
+              : nullptr)
     , right_sample_block(right_sample_block_)
     , max_joined_block_rows(table_join->maxJoinedBlockRows())
     , instance_log_id(!instance_id_.empty() ? "(" + instance_id_ + ") " : "")
@@ -827,6 +839,21 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
     if (shrink_blocks)
         block_to_save = block_to_save.shrinkToFit();
 
+    size_t max_bytes_in_join = table_join->sizeLimits().max_bytes;
+    size_t max_rows_in_join = table_join->sizeLimits().max_rows;
+
+    if (kind == JoinKind::Cross && tmp_data
+        && (tmp_stream || (max_bytes_in_join && getTotalByteCount() + block_to_save.allocatedBytes() >= max_bytes_in_join)
+            || (max_rows_in_join && getTotalRowCount() + block_to_save.rows() >= max_rows_in_join)))
+    {
+        if (tmp_stream == nullptr)
+        {
+            tmp_stream = &tmp_data->createStream(right_sample_block);
+        }
+        tmp_stream->write(block_to_save);
+        return true;
+    }
+
     size_t total_rows = 0;
     size_t total_bytes = 0;
     {
@@ -944,7 +971,6 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
 
     shrinkStoredBlocksToFit(total_bytes);
 
-
     return table_join->sizeLimits().check(total_rows, total_bytes, "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
 }
 
@@ -2238,11 +2264,13 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed)
 {
     size_t start_left_row = 0;
     size_t start_right_block = 0;
+    std::unique_ptr<TemporaryFileStream::Reader> reader = nullptr;
     if (not_processed)
     {
         auto & continuation = static_cast<NotProcessedCrossJoin &>(*not_processed);
         start_left_row = continuation.left_position;
         start_right_block = continuation.right_block;
+        reader = std::move(continuation.reader);
         not_processed.reset();
     }
 
@@ -2271,18 +2299,12 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed)
 
     size_t rows_left = block.rows();
     size_t rows_added = 0;
-
     for (size_t left_row = start_left_row; left_row < rows_left; ++left_row)
     {
         size_t block_number = 0;
-        for (const Block & compressed_block_right : data->blocks)
+
+        auto process_right_block = [&](const Block & block_right)
         {
-            ++block_number;
-            if (block_number < start_right_block)
-                continue;
-
-            auto block_right = compressed_block_right.decompress();
-
             size_t rows_right = block_right.rows();
             rows_added += rows_right;
 
@@ -2294,6 +2316,44 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed)
                 const IColumn & column_right = *block_right.getByPosition(col_num).column;
                 dst_columns[num_existing_columns + col_num]->insertRangeFrom(column_right, 0, rows_right);
             }
+        };
+
+        for (const Block & compressed_block_right : data->blocks)
+        {
+            ++block_number;
+            if (block_number < start_right_block)
+                continue;
+
+            auto block_right = compressed_block_right.decompress();
+            process_right_block(block_right);
+            if (rows_added > max_joined_block_rows)
+            {
+                break;
+            }
+        }
+
+        if (tmp_stream && rows_added <= max_joined_block_rows)
+        {
+            if (reader == nullptr)
+            {
+                tmp_stream->finishWritingAsyncSafe();
+                reader = tmp_stream->getReadStream();
+            }
+            while (auto block_right = reader->read())
+            {
+                ++block_number;
+                process_right_block(block_right);
+                if (rows_added > max_joined_block_rows)
+                {
+                    break;
+                }
+            }
+
+            /// It means, that reader->read() returned {}
+            if (rows_added <= max_joined_block_rows)
+            {
+                reader.reset();
+            }
         }
 
         start_right_block = 0;
@@ -2301,7 +2361,7 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed)
         if (rows_added > max_joined_block_rows)
         {
             not_processed = std::make_shared<NotProcessedCrossJoin>(
-                NotProcessedCrossJoin{{block.cloneEmpty()}, left_row, block_number + 1});
+                NotProcessedCrossJoin{{block.cloneEmpty()}, left_row, block_number + 1, std::move(reader)});
             not_processed->block.swap(block);
             break;
         }
diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h
index 454f38ce08b..86db8943926 100644
--- a/src/Interpreters/HashJoin.h
+++ b/src/Interpreters/HashJoin.h
@@ -26,6 +26,7 @@
 
 #include <Storages/IStorage_fwd.h>
 #include <Interpreters/IKeyValueEntity.h>
+#include <Interpreters/TemporaryDataOnDisk.h>
 
 namespace DB
 {
@@ -442,6 +443,10 @@ private:
     RightTableDataPtr data;
     std::vector<Sizes> key_sizes;
 
+    /// Needed to do external cross join
+    TemporaryDataOnDiskPtr tmp_data;
+    TemporaryFileStream* tmp_stream{nullptr};
+
     /// Block with columns from the right-side table.
     Block right_sample_block;
     /// Block with columns from the right-side table except key columns.
diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
index 539d7a59f6f..d4af111eec0 100644
--- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
+++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
@@ -5,6 +5,8 @@
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTSubquery.h>
+#include <Parsers/ASTTablesInSelectQuery.h>
+#include <Parsers/ASTIdentifier.h>
 
 #include <DataTypes/DataTypesNumber.h>
 
@@ -38,22 +40,47 @@ namespace ErrorCodes
 namespace
 {
 
-ASTPtr normalizeAndValidateQuery(const ASTPtr & query)
+ASTPtr normalizeAndValidateQuery(const ASTPtr & query, const Names & column_names)
 {
+    ASTPtr result_query;
+
     if (query->as<ASTSelectWithUnionQuery>() || query->as<ASTSelectQuery>())
-    {
-        return query;
-    }
+        result_query = query;
     else if (auto * subquery = query->as<ASTSubquery>())
-    {
-        return subquery->children[0];
-    }
+        result_query = subquery->children[0];
     else
-    {
         throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
             "Expected ASTSelectWithUnionQuery or ASTSelectQuery. Actual {}",
             query->formatForErrorMessage());
-    }
+
+    if (column_names.empty())
+        return result_query;
+
+    /// The initial query the VIEW references to is wrapped here with another SELECT query to allow reading only necessary columns.
+    auto select_query = std::make_shared<ASTSelectQuery>();
+
+    auto result_table_expression_ast = std::make_shared<ASTTableExpression>();
+    result_table_expression_ast->children.push_back(std::make_shared<ASTSubquery>(std::move(result_query)));
+    result_table_expression_ast->subquery = result_table_expression_ast->children.back();
+
+    auto tables_in_select_query_element_ast = std::make_shared<ASTTablesInSelectQueryElement>();
+    tables_in_select_query_element_ast->children.push_back(std::move(result_table_expression_ast));
+    tables_in_select_query_element_ast->table_expression = tables_in_select_query_element_ast->children.back();
+
+    ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();
+    tables_in_select_query_ast->children.push_back(std::move(tables_in_select_query_element_ast));
+
+    select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables_in_select_query_ast));
+
+    auto projection_expression_list_ast = std::make_shared<ASTExpressionList>();
+    projection_expression_list_ast->children.reserve(column_names.size());
+
+    for (const auto & column_name : column_names)
+        projection_expression_list_ast->children.push_back(std::make_shared<ASTIdentifier>(column_name));
+
+    select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(projection_expression_list_ast));
+
+    return select_query;
 }
 
 ContextMutablePtr buildContext(const ContextPtr & context, const SelectQueryOptions & select_query_options)
@@ -125,8 +152,9 @@ QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query,
 InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer(
     const ASTPtr & query_,
     const ContextPtr & context_,
-    const SelectQueryOptions & select_query_options_)
-    : query(normalizeAndValidateQuery(query_))
+    const SelectQueryOptions & select_query_options_,
+    const Names & column_names)
+    : query(normalizeAndValidateQuery(query_, column_names))
     , context(buildContext(context_, select_query_options_))
     , select_query_options(select_query_options_)
     , query_tree(buildQueryTreeAndRunPasses(query, select_query_options, context, nullptr /*storage*/))
@@ -138,8 +166,9 @@ InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer(
     const ASTPtr & query_,
     const ContextPtr & context_,
     const StoragePtr & storage_,
-    const SelectQueryOptions & select_query_options_)
-    : query(normalizeAndValidateQuery(query_))
+    const SelectQueryOptions & select_query_options_,
+    const Names & column_names)
+    : query(normalizeAndValidateQuery(query_, column_names))
     , context(buildContext(context_, select_query_options_))
     , select_query_options(select_query_options_)
     , query_tree(buildQueryTreeAndRunPasses(query, select_query_options, context, storage_))
diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.h b/src/Interpreters/InterpreterSelectQueryAnalyzer.h
index 2ad7e6a50f3..73c524cbe28 100644
--- a/src/Interpreters/InterpreterSelectQueryAnalyzer.h
+++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.h
@@ -16,7 +16,8 @@ public:
     /// Initialize interpreter with query AST
     InterpreterSelectQueryAnalyzer(const ASTPtr & query_,
         const ContextPtr & context_,
-        const SelectQueryOptions & select_query_options_);
+        const SelectQueryOptions & select_query_options_,
+        const Names & column_names = {});
 
     /** Initialize interpreter with query AST and storage.
       * After query tree is built left most table expression is replaced with table node that
@@ -25,7 +26,8 @@ public:
     InterpreterSelectQueryAnalyzer(const ASTPtr & query_,
         const ContextPtr & context_,
         const StoragePtr & storage_,
-        const SelectQueryOptions & select_query_options_);
+        const SelectQueryOptions & select_query_options_,
+        const Names & column_names = {});
 
     /** Initialize interpreter with query tree.
       * No query tree passes are applied.
diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp
index 5b549a19083..457ed3ef4a6 100644
--- a/src/Interpreters/JoinedTables.cpp
+++ b/src/Interpreters/JoinedTables.cpp
@@ -310,7 +310,7 @@ std::shared_ptr<TableJoin> JoinedTables::makeTableJoin(const ASTSelectQuery & se
     auto settings = context->getSettingsRef();
     MultiEnum<JoinAlgorithm> join_algorithm = settings.join_algorithm;
     bool try_use_direct_join = join_algorithm.isSet(JoinAlgorithm::DIRECT) || join_algorithm.isSet(JoinAlgorithm::DEFAULT);
-    auto table_join = std::make_shared<TableJoin>(settings, context->getGlobalTemporaryVolume());
+    auto table_join = std::make_shared<TableJoin>(settings, context->getGlobalTemporaryVolume(), context->getTempDataOnDisk());
 
     const ASTTablesInSelectQueryElement * ast_join = select_query_.join();
     const auto & table_to_join = ast_join->table_expression->as<ASTTableExpression &>();
diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp
index 48d59dd3b24..1ee8ca14b2f 100644
--- a/src/Interpreters/TableJoin.cpp
+++ b/src/Interpreters/TableJoin.cpp
@@ -103,7 +103,7 @@ bool forAllKeys(OnExpr & expressions, Func callback)
 
 }
 
-TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_)
+TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_, TemporaryDataOnDiskScopePtr tmp_data_)
     : size_limits(SizeLimits{settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode})
     , default_max_bytes(settings.default_max_bytes_in_join)
     , join_use_nulls(settings.join_use_nulls)
@@ -117,6 +117,7 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_)
     , temporary_files_codec(settings.temporary_files_codec)
     , max_memory_usage(settings.max_memory_usage)
     , tmp_volume(tmp_volume_)
+    , tmp_data(tmp_data_)
 {
 }
 
diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h
index 88905edd3e8..8e83233e54c 100644
--- a/src/Interpreters/TableJoin.h
+++ b/src/Interpreters/TableJoin.h
@@ -9,6 +9,7 @@
 #include <QueryPipeline/SizeLimits.h>
 #include <DataTypes/getLeastSupertype.h>
 #include <Interpreters/IKeyValueEntity.h>
+#include <Interpreters/TemporaryDataOnDisk.h>
 
 #include <Common/Exception.h>
 #include <Parsers/IAST_fwd.h>
@@ -188,6 +189,8 @@ private:
 
     VolumePtr tmp_volume;
 
+    TemporaryDataOnDiskScopePtr tmp_data;
+
     std::shared_ptr<StorageJoin> right_storage_join;
 
     std::shared_ptr<const IKeyValueEntity> right_kv_storage;
@@ -233,7 +236,7 @@ private:
 public:
     TableJoin() = default;
 
-    TableJoin(const Settings & settings, VolumePtr tmp_volume_);
+    TableJoin(const Settings & settings, VolumePtr tmp_volume_, TemporaryDataOnDiskScopePtr tmp_data_);
 
     /// for StorageJoin
     TableJoin(SizeLimits limits, bool use_nulls, JoinKind kind, JoinStrictness strictness,
@@ -259,6 +262,8 @@ public:
 
     VolumePtr getGlobalTemporaryVolume() { return tmp_volume; }
 
+    TemporaryDataOnDiskScopePtr getTempDataOnDisk() { return tmp_data; }
+
     ActionsDAGPtr createJoinedBlockActions(ContextPtr context) const;
 
     const std::vector<JoinAlgorithm> & getEnabledJoinAlgorithms() const { return join_algorithm; }
diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp
index 9a237738b3e..a74b5bba2b9 100644
--- a/src/Interpreters/TemporaryDataOnDisk.cpp
+++ b/src/Interpreters/TemporaryDataOnDisk.cpp
@@ -1,12 +1,11 @@
+#include <atomic>
+#include <mutex>
 #include <Interpreters/TemporaryDataOnDisk.h>
 
 #include <IO/WriteBufferFromFile.h>
-#include <IO/ReadBufferFromFile.h>
 #include <Compression/CompressedWriteBuffer.h>
-#include <Compression/CompressedReadBuffer.h>
 #include <Interpreters/Cache/FileCache.h>
 #include <Formats/NativeWriter.h>
-#include <Formats/NativeReader.h>
 #include <Core/ProtocolDefines.h>
 #include <Disks/SingleDiskVolume.h>
 #include <Disks/DiskLocal.h>
@@ -14,6 +13,7 @@
 
 #include <Core/Defines.h>
 #include <Interpreters/Cache/WriteBufferToFileSegment.h>
+#include "Common/Exception.h"
 
 namespace ProfileEvents
 {
@@ -224,33 +224,26 @@ struct TemporaryFileStream::OutputWriter
     bool finalized = false;
 };
 
-struct TemporaryFileStream::InputReader
+TemporaryFileStream::Reader::Reader(const String & path, const Block & header_, size_t size)
+    : in_file_buf(path, size ? std::min<size_t>(DBMS_DEFAULT_BUFFER_SIZE, size) : DBMS_DEFAULT_BUFFER_SIZE)
+    , in_compressed_buf(in_file_buf)
+    , in_reader(in_compressed_buf, header_, DBMS_TCP_PROTOCOL_VERSION)
 {
-    InputReader(const String & path, const Block & header_, size_t size = 0)
-        : in_file_buf(path, size ? std::min<size_t>(DBMS_DEFAULT_BUFFER_SIZE, size) : DBMS_DEFAULT_BUFFER_SIZE)
-        , in_compressed_buf(in_file_buf)
-        , in_reader(in_compressed_buf, header_, DBMS_TCP_PROTOCOL_VERSION)
-    {
-        LOG_TEST(getLogger("TemporaryFileStream"), "Reading {} from {}", header_.dumpStructure(), path);
-    }
+    LOG_TEST(getLogger("TemporaryFileStream"), "Reading {} from {}", header_.dumpStructure(), path);
+}
 
-    explicit InputReader(const String & path, size_t size = 0)
-        : in_file_buf(path, size ? std::min<size_t>(DBMS_DEFAULT_BUFFER_SIZE, size) : DBMS_DEFAULT_BUFFER_SIZE)
-        , in_compressed_buf(in_file_buf)
-        , in_reader(in_compressed_buf, DBMS_TCP_PROTOCOL_VERSION)
-    {
-        LOG_TEST(getLogger("TemporaryFileStream"), "Reading from {}", path);
-    }
+TemporaryFileStream::Reader::Reader(const String & path, size_t size)
+    : in_file_buf(path, size ? std::min<size_t>(DBMS_DEFAULT_BUFFER_SIZE, size) : DBMS_DEFAULT_BUFFER_SIZE)
+    , in_compressed_buf(in_file_buf)
+    , in_reader(in_compressed_buf, DBMS_TCP_PROTOCOL_VERSION)
+{
+    LOG_TEST(getLogger("TemporaryFileStream"), "Reading from {}", path);
+}
 
-    Block read()
-    {
-        return in_reader.read();
-    }
-
-    ReadBufferFromFile in_file_buf;
-    CompressedReadBuffer in_compressed_buf;
-    NativeReader in_reader;
-};
+Block TemporaryFileStream::Reader::read()
+{
+    return in_reader.read();
+}
 
 TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const Block & header_, TemporaryDataOnDisk * parent_)
     : parent(parent_)
@@ -310,6 +303,12 @@ TemporaryFileStream::Stat TemporaryFileStream::finishWriting()
     return stat;
 }
 
+TemporaryFileStream::Stat TemporaryFileStream::finishWritingAsyncSafe()
+{
+    std::call_once(finish_writing, [this]{ finishWriting(); });
+    return stat;
+}
+
 bool TemporaryFileStream::isWriteFinished() const
 {
     assert(in_reader == nullptr || out_writer == nullptr);
@@ -326,7 +325,7 @@ Block TemporaryFileStream::read()
 
     if (!in_reader)
     {
-        in_reader = std::make_unique<InputReader>(getPath(), header, getSize());
+        in_reader = std::make_unique<Reader>(getPath(), header, getSize());
     }
 
     Block block = in_reader->read();
@@ -338,6 +337,17 @@ Block TemporaryFileStream::read()
     return block;
 }
 
+std::unique_ptr<TemporaryFileStream::Reader> TemporaryFileStream::getReadStream()
+{
+    if (!isWriteFinished())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing has been not finished");
+
+    if (isEof())
+        return nullptr;
+
+    return std::make_unique<Reader>(getPath(), header, getSize());
+}
+
 void TemporaryFileStream::updateAllocAndCheck()
 {
     assert(out_writer);
diff --git a/src/Interpreters/TemporaryDataOnDisk.h b/src/Interpreters/TemporaryDataOnDisk.h
index 40100a62b44..488eed70da9 100644
--- a/src/Interpreters/TemporaryDataOnDisk.h
+++ b/src/Interpreters/TemporaryDataOnDisk.h
@@ -1,7 +1,12 @@
 #pragma once
 
+#include <atomic>
+#include <mutex>
 #include <boost/noncopyable.hpp>
 
+#include <IO/ReadBufferFromFile.h>
+#include <Compression/CompressedReadBuffer.h>
+#include <Formats/NativeReader.h>
 #include <Core/Block.h>
 #include <Disks/IVolume.h>
 #include <Disks/TemporaryFileOnDisk.h>
@@ -132,12 +137,25 @@ private:
 
 /*
  * Data can be written into this stream and then read.
- * After finish writing, call `finishWriting` and then `read` to read the data.
+ * After finish writing, call `finishWriting` and then either call `read` or 'getReadStream'(only one of the two) to read the data.
  * Account amount of data written to disk in parent scope.
  */
 class TemporaryFileStream : boost::noncopyable
 {
 public:
+    struct Reader
+    {
+        Reader(const String & path, const Block & header_, size_t size = 0);
+
+        explicit Reader(const String & path, size_t size = 0);
+
+        Block read();
+
+        ReadBufferFromFile in_file_buf;
+        CompressedReadBuffer in_compressed_buf;
+        NativeReader in_reader;
+    };
+
     struct Stat
     {
         /// Statistics for file
@@ -154,8 +172,11 @@ public:
     void flush();
 
     Stat finishWriting();
+    Stat finishWritingAsyncSafe();
     bool isWriteFinished() const;
 
+    std::unique_ptr<Reader> getReadStream();
+
     Block read();
 
     String getPath() const;
@@ -184,11 +205,12 @@ private:
 
     Stat stat;
 
+    std::once_flag finish_writing;
+
     struct OutputWriter;
     std::unique_ptr<OutputWriter> out_writer;
 
-    struct InputReader;
-    std::unique_ptr<InputReader> in_reader;
+    std::unique_ptr<Reader> in_reader;
 };
 
 }
diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp
index d62ad83c6b2..52a0d748d63 100644
--- a/src/Planner/CollectSets.cpp
+++ b/src/Planner/CollectSets.cpp
@@ -36,6 +36,12 @@ public:
 
     void visitImpl(const QueryTreeNodePtr & node)
     {
+        if (const auto * constant_node = node->as<ConstantNode>())
+            /// Collect sets from source expression as well.
+            /// Most likely we will not build them, but those sets could be requested during analysis.
+            if (constant_node->hasSourceExpression())
+                collectSets(constant_node->getSourceExpression(), planner_context);
+
         auto * function_node = node->as<FunctionNode>();
         if (!function_node || !isNameOfInFunction(function_node->getFunctionName()))
             return;
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index c9c57233fb0..1b2a55a50b0 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -1207,7 +1207,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
         }
     }
 
-    auto table_join = std::make_shared<TableJoin>(settings, query_context->getGlobalTemporaryVolume());
+    auto table_join = std::make_shared<TableJoin>(settings, query_context->getGlobalTemporaryVolume(), query_context->getTempDataOnDisk());
     table_join->getTableJoin() = join_node.toASTTableJoin()->as<ASTTableJoin &>();
 
     if (join_constant)
diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp
index bd8940b96d8..c410b04f209 100644
--- a/src/Planner/PlannerJoins.cpp
+++ b/src/Planner/PlannerJoins.cpp
@@ -328,7 +328,7 @@ void buildJoinClause(
             {
                 throw Exception(
                     ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
-                    "JOIN {} join expression contains column from left and right table",
+                    "JOIN {} join expression contains column from left and right table, you may try experimental support of this feature by `SET allow_experimental_join_condition = 1`",
                     join_node.formatASTForErrorMessage());
             }
         }
@@ -363,7 +363,7 @@ void buildJoinClause(
             {
                 throw Exception(
                     ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
-                    "JOIN {} join expression contains column from left and right table",
+                    "JOIN {} join expression contains column from left and right table, you may try experimental support of this feature by `SET allow_experimental_join_condition = 1`",
                     join_node.formatASTForErrorMessage());
             }
         }
diff --git a/src/Processors/Formats/Impl/NpyOutputFormat.cpp b/src/Processors/Formats/Impl/NpyOutputFormat.cpp
new file mode 100644
index 00000000000..e02787b4f70
--- /dev/null
+++ b/src/Processors/Formats/Impl/NpyOutputFormat.cpp
@@ -0,0 +1,269 @@
+#include <Processors/Formats/Impl/NpyOutputFormat.h>
+
+#include <Core/TypeId.h>
+#include <DataTypes/DataTypeFixedString.h>
+#include <DataTypes/DataTypeArray.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnArray.h>
+#include <IO/WriteHelpers.h>
+#include <IO/WriteBufferFromString.h>
+#include <Formats/FormatFactory.h>
+
+#include <Common/assert_cast.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int TOO_MANY_COLUMNS;
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_COLUMN;
+}
+
+namespace
+{
+
+template <typename ColumnType, typename ValueType>
+void writeNumpyNumbers(const ColumnPtr & column, WriteBuffer & buf)
+{
+    const auto * number_column = assert_cast<const ColumnType *>(column.get());
+    for (size_t i = 0; i < number_column->size(); ++i)
+        writeBinaryLittleEndian(ValueType(number_column->getElement(i)), buf);
+}
+
+template <typename ColumnType>
+void writeNumpyStrings(const ColumnPtr & column, size_t length, WriteBuffer & buf)
+{
+    const auto * string_column = assert_cast<const ColumnType *>(column.get());
+    for (size_t i = 0; i < string_column->size(); ++i)
+    {
+        auto data = string_column->getDataAt(i);
+        buf.write(data.data, data.size);
+        writeChar(0, length - data.size, buf);
+    }
+}
+
+}
+
+String NpyOutputFormat::shapeStr() const
+{
+    WriteBufferFromOwnString shape;
+    writeIntText(num_rows, shape);
+    writeChar(',', shape);
+    for (UInt64 dim : numpy_shape)
+    {
+        writeIntText(dim, shape);
+        writeChar(',', shape);
+    }
+
+    return shape.str();
+}
+
+NpyOutputFormat::NpyOutputFormat(WriteBuffer & out_, const Block & header_) : IOutputFormat(header_, out_)
+{
+    const auto & header = getPort(PortKind::Main).getHeader();
+    auto data_types = header.getDataTypes();
+    if (data_types.size() > 1)
+        throw Exception(ErrorCodes::TOO_MANY_COLUMNS, "Expected single column for Npy output format, got {}", data_types.size());
+    data_type = data_types[0];
+
+    if (!getNumpyDataType(data_type))
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type {} is not supported for Npy output format", nested_data_type->getName());
+}
+
+bool NpyOutputFormat::getNumpyDataType(const DataTypePtr & type)
+{
+    switch (type->getTypeId())
+    {
+        case TypeIndex::Int8:
+            numpy_data_type = std::make_shared<NumpyDataTypeInt>(NumpyDataType::Endianness::LITTLE, sizeof(Int8), true);
+            break;
+        case TypeIndex::Int16:
+            numpy_data_type = std::make_shared<NumpyDataTypeInt>(NumpyDataType::Endianness::LITTLE, sizeof(Int16), true);
+            break;
+        case TypeIndex::Int32:
+            numpy_data_type = std::make_shared<NumpyDataTypeInt>(NumpyDataType::Endianness::LITTLE, sizeof(Int32), true);
+            break;
+        case TypeIndex::Int64:
+            numpy_data_type = std::make_shared<NumpyDataTypeInt>(NumpyDataType::Endianness::LITTLE, sizeof(Int64), true);
+            break;
+        case TypeIndex::UInt8:
+            numpy_data_type = std::make_shared<NumpyDataTypeInt>(NumpyDataType::Endianness::LITTLE, sizeof(UInt8), false);
+            break;
+        case TypeIndex::UInt16:
+            numpy_data_type = std::make_shared<NumpyDataTypeInt>(NumpyDataType::Endianness::LITTLE, sizeof(UInt16), false);
+            break;
+        case TypeIndex::UInt32:
+            numpy_data_type = std::make_shared<NumpyDataTypeInt>(NumpyDataType::Endianness::LITTLE, sizeof(UInt32), false);
+            break;
+        case TypeIndex::UInt64:
+            numpy_data_type = std::make_shared<NumpyDataTypeInt>(NumpyDataType::Endianness::LITTLE, sizeof(UInt64), false);
+            break;
+        case TypeIndex::Float32:
+            numpy_data_type = std::make_shared<NumpyDataTypeFloat>(NumpyDataType::Endianness::LITTLE, sizeof(Float32));
+            break;
+        case TypeIndex::Float64:
+            numpy_data_type = std::make_shared<NumpyDataTypeFloat>(NumpyDataType::Endianness::LITTLE, sizeof(Float64));
+            break;
+        case TypeIndex::FixedString:
+            numpy_data_type = std::make_shared<NumpyDataTypeString>(
+                NumpyDataType::Endianness::NONE, assert_cast<const DataTypeFixedString *>(type.get())->getN());
+            break;
+        case TypeIndex::String:
+            numpy_data_type = std::make_shared<NumpyDataTypeString>(NumpyDataType::Endianness::NONE, 0);
+            break;
+        case TypeIndex::Array:
+            return getNumpyDataType(assert_cast<const DataTypeArray *>(type.get())->getNestedType());
+        default:
+            nested_data_type = type;
+            return false;
+    }
+
+    nested_data_type = type;
+    return true;
+}
+
+void NpyOutputFormat::consume(Chunk chunk)
+{
+    if (!invalid_shape)
+    {
+        num_rows += chunk.getNumRows();
+        const auto & column = chunk.getColumns()[0];
+
+        if (!is_initialized)
+        {
+            initShape(column);
+            is_initialized = true;
+        }
+
+        ColumnPtr nested_column = column;
+        checkShape(nested_column);
+        updateSizeIfTypeString(nested_column);
+        columns.push_back(nested_column);
+    }
+}
+
+void NpyOutputFormat::initShape(const ColumnPtr & column)
+{
+    ColumnPtr nested_column = column;
+    while (const auto * array_column = typeid_cast<const ColumnArray *>(nested_column.get()))
+    {
+        auto dim = array_column->getOffsets()[0];
+        invalid_shape = dim == 0;
+        numpy_shape.push_back(dim);
+        nested_column = array_column->getDataPtr();
+    }
+
+    if (invalid_shape)
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Shape ({}) is invalid, as dimension size cannot be 0", shapeStr());
+}
+
+void NpyOutputFormat::checkShape(ColumnPtr & column)
+{
+    int dim = 0;
+    while (const auto * array_column = typeid_cast<const ColumnArray *>(column.get()))
+    {
+        const auto & array_offset = array_column->getOffsets();
+
+        for (size_t i = 0; i < array_offset.size(); ++i)
+            if (array_offset[i] - array_offset[i - 1] != numpy_shape[dim])
+            {
+                invalid_shape = true;
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ClickHouse doesn't support object types, cannot format ragged nested sequences (which is a list of arrays with different shapes)");
+            }
+
+        column = array_column->getDataPtr();
+        dim += 1;
+    }
+}
+
+void NpyOutputFormat::updateSizeIfTypeString(const ColumnPtr & column)
+{
+    if (nested_data_type->getTypeId() == TypeIndex::String)
+    {
+        const auto & string_offsets = assert_cast<const ColumnString *>(column.get())->getOffsets();
+        for (size_t i = 0; i < string_offsets.size(); ++i)
+        {
+            size_t string_length = static_cast<size_t>(string_offsets[i] - 1 - string_offsets[i - 1]);
+            if (numpy_data_type->getSize() < string_length)
+                numpy_data_type->setSize(string_length);
+        }
+    }
+}
+
+void NpyOutputFormat::finalizeImpl()
+{
+    if (!invalid_shape)
+    {
+        writeHeader();
+        writeColumns();
+    }
+}
+
+void NpyOutputFormat::writeHeader()
+{
+    String dict = "{'descr':'" + numpy_data_type->str() + "','fortran_order':False,'shape':(" + shapeStr() + "),}";
+    String padding = "\n";
+
+    /// completes the length of the header, which is divisible by 64.
+    size_t dict_length = dict.length() + 1;
+    size_t header_length = STATIC_HEADER_LENGTH + sizeof(UInt32) + dict_length;
+    if (header_length % 64)
+    {
+        header_length = ((header_length / 64) + 1) * 64;
+        dict_length = header_length - STATIC_HEADER_LENGTH - sizeof(UInt32);
+        padding = std::string(dict_length - dict.length(), '\x20');
+        padding.back() = '\n';
+    }
+
+    out.write(STATIC_HEADER, STATIC_HEADER_LENGTH);
+    writeBinaryLittleEndian(static_cast<UInt32>(dict_length), out);
+    out.write(dict.data(), dict.length());
+    out.write(padding.data(), padding.length());
+}
+
+void NpyOutputFormat::writeColumns()
+{
+    for (const auto & column : columns)
+    {
+        switch (nested_data_type->getTypeId())
+        {
+            case TypeIndex::Int8: writeNumpyNumbers<ColumnInt8, Int8>(column, out); break;
+            case TypeIndex::Int16: writeNumpyNumbers<ColumnInt16, Int16>(column, out); break;
+            case TypeIndex::Int32: writeNumpyNumbers<ColumnInt32, Int32>(column, out); break;
+            case TypeIndex::Int64: writeNumpyNumbers<ColumnInt64, Int64>(column, out); break;
+            case TypeIndex::UInt8: writeNumpyNumbers<ColumnUInt8, UInt8>(column, out); break;
+            case TypeIndex::UInt16: writeNumpyNumbers<ColumnUInt16, UInt16>(column, out); break;
+            case TypeIndex::UInt32: writeNumpyNumbers<ColumnUInt32, UInt32>(column, out); break;
+            case TypeIndex::UInt64: writeNumpyNumbers<ColumnUInt64, UInt64>(column, out); break;
+            case TypeIndex::Float32: writeNumpyNumbers<ColumnFloat32, Float32>(column, out); break;
+            case TypeIndex::Float64: writeNumpyNumbers<ColumnFloat64, Float64>(column, out); break;
+            case TypeIndex::FixedString:
+                writeNumpyStrings<ColumnFixedString>(column, numpy_data_type->getSize(), out);
+                break;
+            case TypeIndex::String:
+                writeNumpyStrings<ColumnString>(column, numpy_data_type->getSize(), out);
+                break;
+            default:
+                break;
+        }
+    }
+}
+
+void registerOutputFormatNpy(FormatFactory & factory)
+{
+    factory.registerOutputFormat("Npy",[](
+        WriteBuffer & buf,
+        const Block & sample,
+        const FormatSettings &)
+    {
+        return std::make_shared<NpyOutputFormat>(buf, sample);
+    });
+    factory.markFormatHasNoAppendSupport("Npy");
+}
+
+}
diff --git a/src/Processors/Formats/Impl/NpyOutputFormat.h b/src/Processors/Formats/Impl/NpyOutputFormat.h
new file mode 100644
index 00000000000..5dd6552ac0c
--- /dev/null
+++ b/src/Processors/Formats/Impl/NpyOutputFormat.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <Core/Block.h>
+#include <IO/WriteBuffer.h>
+#include <IO/WriteBufferFromVector.h>
+#include <Processors/Formats/IRowOutputFormat.h>
+#include <Formats/FormatSettings.h>
+#include <Formats/NumpyDataTypes.h>
+#include <Columns/IColumn.h>
+#include <Common/PODArray_fwd.h>
+
+#include <vector>
+#include <string>
+
+
+namespace DB
+{
+
+/** Stream for output data in Npy format.
+  * https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html
+  */
+class NpyOutputFormat : public IOutputFormat
+{
+public:
+    NpyOutputFormat(WriteBuffer & out_, const Block & header_);
+
+    String getName() const override { return "NpyOutputFormat"; }
+
+    String getContentType() const override { return "application/octet-stream"; }
+
+private:
+    String shapeStr() const;
+
+    bool getNumpyDataType(const DataTypePtr & type);
+
+    void consume(Chunk) override;
+    void initShape(const ColumnPtr & column);
+    void checkShape(ColumnPtr & column);
+    void updateSizeIfTypeString(const ColumnPtr & column);
+
+    void finalizeImpl() override;
+    void writeHeader();
+    void writeColumns();
+
+    bool is_initialized = false;
+    bool invalid_shape = false;
+
+    DataTypePtr data_type;
+    DataTypePtr nested_data_type;
+    std::shared_ptr<NumpyDataType> numpy_data_type;
+    UInt64 num_rows = 0;
+    std::vector<UInt64> numpy_shape;
+    Columns columns;
+
+    /// static header (version 3.0)
+    constexpr static auto STATIC_HEADER = "\x93NUMPY\x03\x00";
+    constexpr static size_t STATIC_HEADER_LENGTH = 8;
+};
+
+}
diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h
index f446ecec846..ae43295024a 100644
--- a/src/Processors/QueryPlan/AggregatingStep.h
+++ b/src/Processors/QueryPlan/AggregatingStep.h
@@ -59,6 +59,7 @@ public:
     const Aggregator::Params & getParams() const { return params; }
 
     const auto & getGroupingSetsParamsList() const { return grouping_sets_params; }
+    bool isGroupByUseNulls() const { return group_by_use_nulls; }
 
     bool inOrder() const { return !sort_description_for_merging.empty(); }
     bool explicitSortingRequired() const { return explicit_sorting_required_for_aggregation_in_order; }
diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
index 7dd526cbe95..5b3bcfc4468 100644
--- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
+++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
@@ -428,6 +428,9 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
         /// of the grouping sets, we could not push the filter down.
         if (aggregating->isGroupingSets())
         {
+            /// Cannot push down filter if type has been changed.
+            if (aggregating->isGroupByUseNulls())
+                return 0;
 
             const auto & actions = filter->getExpression();
             const auto & filter_node = actions->findInOutputs(filter->getFilterColumnName());
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index 64111602458..30ff9970790 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -579,8 +579,6 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
 
     auto candidates = getAggregateProjectionCandidates(node, *aggregating, *reading, max_added_blocks, allow_implicit_projections);
 
-    const auto & parts = reading->getParts();
-    const auto & alter_conversions = reading->getAlterConvertionsForParts();
     const auto & query_info = reading->getQueryInfo();
     const auto metadata = reading->getStorageMetadata();
     ContextPtr context = reading->getContext();
@@ -592,7 +590,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
     }
     else if (!candidates.real.empty())
     {
-        auto ordinary_reading_select_result = reading->selectRangesToRead(parts, alter_conversions);
+        auto ordinary_reading_select_result = reading->selectRangesToRead();
         size_t ordinary_reading_marks = ordinary_reading_select_result->selected_marks;
 
         /// Nothing to read. Ignore projections.
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
index cac172a856f..13c6c6b0821 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
@@ -136,12 +136,10 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
     NormalProjectionCandidate * best_candidate = nullptr;
 
     const Names & required_columns = reading->getAllColumnNames();
-    const auto & parts = reading->getParts();
-    const auto & alter_conversions = reading->getAlterConvertionsForParts();
     const auto & query_info = reading->getQueryInfo();
     MergeTreeDataSelectExecutor reader(reading->getMergeTreeData());
 
-    auto ordinary_reading_select_result = reading->selectRangesToRead(parts, alter_conversions);
+    auto ordinary_reading_select_result = reading->selectRangesToRead();
     size_t ordinary_reading_marks = ordinary_reading_select_result->selected_marks;
 
     /// Nothing to read. Ignore projections.
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 68b145f259d..e523a2c243c 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1364,11 +1364,27 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
         return merging_pipes.empty() ? Pipe::unitePipes(std::move(no_merging_pipes)) : Pipe::unitePipes(std::move(merging_pipes));
 }
 
+ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead() const
+{
+    return selectRangesToReadImpl(
+        prepared_parts,
+        alter_conversions_for_parts,
+        metadata_for_reading,
+        query_info,
+        context,
+        requested_num_streams,
+        max_block_numbers_to_read,
+        data,
+        all_column_names,
+        log,
+        indexes);
+}
+
 ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
     MergeTreeData::DataPartsVector parts,
     std::vector<AlterConversionsPtr> alter_conversions) const
 {
-    return selectRangesToRead(
+    return selectRangesToReadImpl(
         std::move(parts),
         std::move(alter_conversions),
         metadata_for_reading,
@@ -1855,10 +1871,7 @@ bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort()
 
 ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const
 {
-    auto result_ptr = analyzed_result_ptr
-        ? analyzed_result_ptr
-        : selectRangesToRead(prepared_parts, alter_conversions_for_parts);
-
+    auto result_ptr = analyzed_result_ptr ? analyzed_result_ptr : selectRangesToRead();
     return *result_ptr;
 }
 
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h
index f512ba9e0ae..5d7879e8dee 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.h
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.h
@@ -167,6 +167,8 @@ public:
         MergeTreeData::DataPartsVector parts,
         std::vector<AlterConversionsPtr> alter_conversions) const;
 
+    AnalysisResultPtr selectRangesToRead() const;
+
     StorageMetadataPtr getStorageMetadata() const { return metadata_for_reading; }
 
     /// Returns `false` if requested reading cannot be performed.
diff --git a/src/Storages/MergeTree/MarkRange.h b/src/Storages/MergeTree/MarkRange.h
index f36d5d89825..626d4e9e689 100644
--- a/src/Storages/MergeTree/MarkRange.h
+++ b/src/Storages/MergeTree/MarkRange.h
@@ -2,7 +2,6 @@
 
 #include <cstddef>
 #include <deque>
-#include <set>
 
 #include <fmt/core.h>
 #include <fmt/format.h>
diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
index 2a4ff74714d..84eadf951a2 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
@@ -981,10 +981,7 @@ void ParallelReplicasReadingCoordinator::handleInitialAllRangesAnnouncement(Init
     std::lock_guard lock(mutex);
 
     if (!pimpl)
-    {
-        mode = announcement.mode;
-        initialize();
-    }
+        initialize(announcement.mode);
 
     pimpl->handleInitialAllRangesAnnouncement(std::move(announcement));
 }
@@ -996,10 +993,7 @@ ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelR
     std::lock_guard lock(mutex);
 
     if (!pimpl)
-    {
-        mode = request.mode;
-        initialize();
-    }
+        initialize(request.mode);
 
     const auto replica_num = request.replica_num;
     auto response = pimpl->handleRequest(std::move(request));
@@ -1024,7 +1018,7 @@ void ParallelReplicasReadingCoordinator::markReplicaAsUnavailable(size_t replica
         pimpl->markReplicaAsUnavailable(replica_number);
 }
 
-void ParallelReplicasReadingCoordinator::initialize()
+void ParallelReplicasReadingCoordinator::initialize(CoordinationMode mode)
 {
     switch (mode)
     {
diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h
index 9cba7d8e8c2..60343988f03 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h
@@ -31,12 +31,11 @@ public:
     void setProgressCallback(ProgressCallback callback);
 
 private:
-    void initialize();
+    void initialize(CoordinationMode mode);
 
     std::mutex mutex;
     size_t replicas_count{0};
     size_t mark_segment_size{0};
-    CoordinationMode mode{CoordinationMode::Default};
     std::unique_ptr<ImplInterface> pimpl;
     ProgressCallback progress_callback; // store the callback only to bypass it to coordinator implementation
     std::set<size_t> replicas_used;
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index db568a1d5ab..016de94c17c 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -164,7 +164,7 @@ void StorageView::read(
 
     if (context->getSettingsRef().allow_experimental_analyzer)
     {
-        InterpreterSelectQueryAnalyzer interpreter(current_inner_query, getViewContext(context, storage_snapshot), options);
+        InterpreterSelectQueryAnalyzer interpreter(current_inner_query, getViewContext(context, storage_snapshot), options, column_names);
         interpreter.addStorageLimits(*query_info.storage_limits);
         query_plan = std::move(interpreter).extractQueryPlan();
     }
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index e3cbff5f01b..cec55cefda2 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -219,7 +219,7 @@ void addRequestedPathFileAndSizeVirtualsToChunk(
     }
 }
 
-static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allowed_inputs)
+static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block * allowed_inputs)
 {
     std::stack<const ActionsDAG::Node *> nodes;
     nodes.push(node);
@@ -228,7 +228,10 @@ static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allo
         const auto * cur = nodes.top();
         nodes.pop();
 
-        if (cur->type == ActionsDAG::ActionType::INPUT && !allowed_inputs.has(cur->result_name))
+        if (cur->type == ActionsDAG::ActionType::ARRAY_JOIN)
+            return false;
+
+        if (cur->type == ActionsDAG::ActionType::INPUT && allowed_inputs && !allowed_inputs->has(cur->result_name))
             return false;
 
         for (const auto * child : cur->children)
@@ -336,7 +339,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
         }
     }
 
-    if (allowed_inputs && !canEvaluateSubtree(node, *allowed_inputs))
+    if (!canEvaluateSubtree(node, allowed_inputs))
         return nullptr;
 
     return node;
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index b1ad84b71d4..cbf0a6a577f 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -751,6 +751,7 @@ class SettingsRandomizer:
         "max_read_buffer_size": lambda: random.randint(500000, 1048576),
         "prefer_localhost_replica": lambda: random.randint(0, 1),
         "max_block_size": lambda: random.randint(8000, 100000),
+        "max_joined_block_size_rows": lambda: random.randint(8000, 100000),
         "max_threads": lambda: random.randint(1, 64),
         "optimize_append_index": lambda: random.randint(0, 1),
         "optimize_if_chain_to_multiif": lambda: random.randint(0, 1),
diff --git a/tests/performance/ascii.xml b/tests/performance/ascii.xml
new file mode 100644
index 00000000000..83440437d2c
--- /dev/null
+++ b/tests/performance/ascii.xml
@@ -0,0 +1,22 @@
+<test>
+    <query>select substringUTF8(materialize('hello world'), 2, 5) from numbers(10000000)</query>
+    <query>select substringUTF8(materialize('hello 世界'), 2, 5) from numbers(10000000)</query>
+
+    <query>select substringIndexUTF8(materialize('www.clickhouse.com'), '.', 2) from numbers(10000000)</query>
+    <query>select substringIndexUTF8(materialize('官网www.clickhouse.com'), '.', 2) from numbers(10000000)</query>
+
+    <query>select reverseUTF8(materialize('hello world')) from numbers(10000000)</query>
+    <query>select reverseUTF8(materialize('hello 世界')) from numbers(10000000)</query>
+
+    <query>select lowerUTF8(materialize('hello world')) from numbers(10000000)</query>
+    <query>select lowerUTF8(materialize('hello 世界')) from numbers(10000000)</query>
+
+    <query>select upperUTF8(materialize('hello world')) from numbers(10000000)</query>
+    <query>select upperUTF8(materialize('hello 世界')) from numbers(10000000)</query>
+
+    <query>select leftPadUTF8(materialize('hello '), 10, ',') from numbers(10000000)</query>
+    <query>select leftPadUTF8(materialize('hello '), 10, '世界') from numbers(10000000)</query>
+
+    <query>select rightPadUTF8(materialize('hello '), 10, ',') from numbers(10000000)</query>
+    <query>select rightPadUTF8(materialize('hello '), 10, '世界') from numbers(10000000)</query>
+</test>
diff --git a/tests/queries/0_stateless/01115_prewhere_array_join.reference b/tests/queries/0_stateless/01115_prewhere_array_join.reference
index e69de29bb2d..573541ac970 100644
--- a/tests/queries/0_stateless/01115_prewhere_array_join.reference
+++ b/tests/queries/0_stateless/01115_prewhere_array_join.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/01115_prewhere_array_join.sql b/tests/queries/0_stateless/01115_prewhere_array_join.sql
index e614bdf402b..6ff86636d1d 100644
--- a/tests/queries/0_stateless/01115_prewhere_array_join.sql
+++ b/tests/queries/0_stateless/01115_prewhere_array_join.sql
@@ -5,3 +5,9 @@ INSERT INTO prewhere SELECT 0, randomPrintableASCII(10000) FROM numbers(10000);
 SELECT arrayJoin([light]) != 0 AS cond, length(heavy) FROM prewhere WHERE light != 0 AND cond != 0;
 
 DROP TABLE prewhere;
+
+DROP TABLE IF EXISTS testtable;
+CREATE TABLE testtable (DT Datetime, Label1 String, Value UInt64) ENGINE = MergeTree() PARTITION BY DT ORDER BY Label1;
+INSERT INTO testtable (*) Values (now(), 'app', 1);
+SELECT arrayJoin([0, 1]) AS arrayIdx FROM testtable WHERE arrayIdx = 0;
+DROP TABLE testtable;
diff --git a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference
index 212dd348edb..5acefdb365e 100644
--- a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference
+++ b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference
@@ -77,7 +77,7 @@ Expression (Project names)
   Distinct (DISTINCT)
     Union
       Distinct (Preliminary DISTINCT)
-        Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers)))))))
+        Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))))))
           ReadFromSystemNumbers
       ReadFromRemote (Read from remote replica)
 explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized
@@ -85,7 +85,7 @@ Union
   Expression (Project names)
     Distinct (DISTINCT)
       Distinct (Preliminary DISTINCT)
-        Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers)))))))
+        Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))))))
           ReadFromSystemNumbers
   ReadFromRemote (Read from remote replica)
 explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- not optimized
@@ -94,7 +94,7 @@ Expression (Project names)
     Union
       Expression (Before LIMIT BY)
         LimitBy
-          Expression ((Before LIMIT BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))))
+          Expression ((Before LIMIT BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))))))
             ReadFromSystemNumbers
       Expression
         ReadFromRemote (Read from remote replica)
@@ -102,7 +102,7 @@ explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1,
 Union
   Expression (Project names)
     LimitBy
-      Expression ((Before LIMIT BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))))
+      Expression ((Before LIMIT BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))))))
         ReadFromSystemNumbers
   ReadFromRemote (Read from remote replica)
 explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized
@@ -113,7 +113,7 @@ Expression (Project names)
         Sorting (Sorting for ORDER BY)
           Expression (Before ORDER BY)
             Distinct (Preliminary DISTINCT)
-              Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers)))))))
+              Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))))))
                 ReadFromSystemNumbers
         ReadFromRemote (Read from remote replica)
 explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized
@@ -124,7 +124,7 @@ Expression (Project names)
         Sorting (Sorting for ORDER BY)
           Expression (Before ORDER BY)
             Distinct (Preliminary DISTINCT)
-              Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers)))))))
+              Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))))))
                 ReadFromSystemNumbers
       ReadFromRemote (Read from remote replica)
 explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized
@@ -134,9 +134,9 @@ Expression (Project names)
       Sorting (Merge sorted streams for ORDER BY, without aggregation)
         Union
           LimitBy
-            Expression ((Before LIMIT BY + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) [lifted up part]))
+            Expression ((Before LIMIT BY + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))))) [lifted up part]))
               Sorting (Sorting for ORDER BY)
-                Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))))
+                Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))))))
                   ReadFromSystemNumbers
           ReadFromRemote (Read from remote replica)
 explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized
@@ -144,8 +144,8 @@ Expression (Project names)
   Sorting (Merge sorted streams after aggregation stage for ORDER BY)
     Union
       LimitBy
-        Expression ((Before LIMIT BY + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) [lifted up part]))
+        Expression ((Before LIMIT BY + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))))) [lifted up part]))
           Sorting (Sorting for ORDER BY)
-            Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))))
+            Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))))))
               ReadFromSystemNumbers
       ReadFromRemote (Read from remote replica)
diff --git a/tests/queries/0_stateless/02187_async_inserts_all_formats.python b/tests/queries/0_stateless/02187_async_inserts_all_formats.python
index fa555c78f8b..943e32d4cf2 100644
--- a/tests/queries/0_stateless/02187_async_inserts_all_formats.python
+++ b/tests/queries/0_stateless/02187_async_inserts_all_formats.python
@@ -38,7 +38,7 @@ def run_test(data_format, gen_data_template, settings):
 formats = (
     client.query(
         "SELECT name FROM system.formats WHERE is_input AND is_output \
-    AND name NOT IN ('CapnProto', 'RawBLOB', 'Template', 'ProtobufSingle', 'LineAsString', 'Protobuf', 'ProtobufList') ORDER BY name"
+    AND name NOT IN ('CapnProto', 'RawBLOB', 'Template', 'ProtobufSingle', 'LineAsString', 'Protobuf', 'ProtobufList', 'Npy') ORDER BY name"
     )
     .strip()
     .split("\n")
diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
index 3ddf165dec0..a152066a460 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@@ -201,6 +201,7 @@ cbrt
 ceil
 char
 cityHash64
+clamp
 coalesce
 concat
 concatAssumeInjective
diff --git a/tests/queries/0_stateless/02479_analyzer_join_with_constants.reference b/tests/queries/0_stateless/02479_analyzer_join_with_constants.reference
index 3a23cce46b4..72393057dbf 100644
--- a/tests/queries/0_stateless/02479_analyzer_join_with_constants.reference
+++ b/tests/queries/0_stateless/02479_analyzer_join_with_constants.reference
@@ -8,3 +8,5 @@
 --
 1	1	0	0
 --
+--
+1	1	0	0
diff --git a/tests/queries/0_stateless/02479_analyzer_join_with_constants.sql b/tests/queries/0_stateless/02479_analyzer_join_with_constants.sql
index 50248665bc9..bf081bed228 100644
--- a/tests/queries/0_stateless/02479_analyzer_join_with_constants.sql
+++ b/tests/queries/0_stateless/02479_analyzer_join_with_constants.sql
@@ -25,3 +25,8 @@ SELECT * FROM (SELECT 1 AS id, 1 AS value) AS t1 ASOF LEFT JOIN (SELECT 1 AS id,
 SELECT '--';
 
 SELECT b.dt FROM (SELECT NULL > NULL AS pk, 1 AS dt FROM numbers(5)) AS a ASOF LEFT JOIN (SELECT NULL AS pk, 1 AS dt) AS b ON (a.pk = b.pk) AND 1 != 1 AND (a.dt >= b.dt); -- { serverError 403, NOT_FOUND_COLUMN_IN_BLOCK }
+
+SELECT '--';
+
+-- Fuzzed
+SELECT * FROM (SELECT 1 AS id, 1 AS value) AS t1 ASOF LEFT JOIN (SELECT 1 AS id, 1 AS value) AS t2 ON (t1.id = t2.id) AND (toUInt256(1) IN (SELECT materialize(1))) AND (1 != 1) AND (t1.value >= t2.value);
diff --git a/tests/queries/0_stateless/02895_npy_output_format.reference b/tests/queries/0_stateless/02895_npy_output_format.reference
new file mode 100644
index 00000000000..ad3366c2a51
--- /dev/null
+++ b/tests/queries/0_stateless/02895_npy_output_format.reference
@@ -0,0 +1,48 @@
+-- test data types --
+-1
+1
+-1
+1
+-1
+1
+-1
+1
+0
+1
+0
+1
+0
+1
+0
+1
+0.2
+0.1
+0.02
+0.01
+npy
+npy
+npynpy
+npy
+array	Int8					
+array	Int16					
+array	Int32					
+array	Int64					
+array	UInt8					
+array	UInt16					
+array	UInt32					
+array	UInt64					
+array	Float32					
+array	Float64					
+array	String					
+array	String					
+-- test nested data types --
+[[[1],[2]],[[3],[4]]]
+[[[1],[2]],[[3],[4]]]
+[[0.1],[0.2]]
+[[0.1],[0.2]]
+['a','bb']
+['ccc','dddd']
+array	Array(Array(Array(Int8)))					
+array	Array(Array(Float64))					
+array	Array(String)					
+-- test exceptions --
diff --git a/tests/queries/0_stateless/02895_npy_output_format.sh b/tests/queries/0_stateless/02895_npy_output_format.sh
new file mode 100755
index 00000000000..934c80830c5
--- /dev/null
+++ b/tests/queries/0_stateless/02895_npy_output_format.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+# Tags: no-parallel
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+user_files_path=$($CLICKHOUSE_CLIENT_BINARY -q "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/
+rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/*
+chmod 777 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/
+
+${CLICKHOUSE_CLIENT} -n -q --ignore-error "
+    DROP DATABASE IF EXISTS npy_output_02895;
+    CREATE DATABASE IF NOT EXISTS npy_output_02895;
+
+    SELECT '-- test data types --';
+    CREATE TABLE IF NOT EXISTS npy_output_02895.data_types
+    (
+        i1 Int8,
+        i2 Int16,
+        i4 Int32,
+        i8 Int64,
+        u1 UInt8,
+        u2 UInt16,
+        u4 UInt32,
+        u8 UInt64,
+        f4 Float32,
+        f8 Float64,
+        fs FixedString(10),
+        s String
+    ) Engine = MergeTree ORDER BY i1;
+
+    INSERT INTO npy_output_02895.data_types VALUES (1, 1, 1, 1, 1, 1, 1, 1, 0.1, 0.01, 'npy', 'npy'), (-1, -1, -1, -1, 0, 0, 0, 0, 0.2, 0.02, 'npy', 'npynpy');
+
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy') SELECT i1 FROM npy_output_02895.data_types;
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy') SELECT i2 FROM npy_output_02895.data_types;
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy') SELECT i4 FROM npy_output_02895.data_types;
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy') SELECT i8 FROM npy_output_02895.data_types;
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy') SELECT u1 FROM npy_output_02895.data_types;
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy') SELECT u2 FROM npy_output_02895.data_types;
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy') SELECT u4 FROM npy_output_02895.data_types;
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy') SELECT u8 FROM npy_output_02895.data_types;
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy') SELECT f4 FROM npy_output_02895.data_types;
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy') SELECT f8 FROM npy_output_02895.data_types;
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy') SELECT fs FROM npy_output_02895.data_types;
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy') SELECT s FROM npy_output_02895.data_types;
+
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy');
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy');
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy');
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy');
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy');
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy');
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy');
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy');
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy');
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy');
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy');
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy');
+
+    SELECT '-- test nested data types --';
+    CREATE TABLE IF NOT EXISTS npy_output_02895.nested_data_types
+    (
+        i4 Array(Array(Array(Int8))),
+        f8 Array(Array(Float64)),
+        s Array(String),
+    ) Engine = MergeTree ORDER BY i4;
+
+    INSERT INTO npy_output_02895.nested_data_types VALUES ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], ['a', 'bb']), ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], ['ccc', 'dddd']);
+
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy') SELECT i4 FROM npy_output_02895.nested_data_types;
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy') SELECT f8 FROM npy_output_02895.nested_data_types;
+    INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy') SELECT s FROM npy_output_02895.nested_data_types;
+
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy');
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy');
+    SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy');
+    DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy');
+
+    SELECT '-- test exceptions --';
+    CREATE TABLE IF NOT EXISTS npy_output_02895.exceptions
+    (
+        unsupported_u UInt256,
+        unsupported_date Date,
+        unsupported_tuple Tuple(Int16, Int16),
+        unsupported_nested_i Array(Int128),
+        ragged_dimention Array(Int16),
+        zero_dimension Array(Int16)
+    ) Engine = MergeTree ORDER BY unsupported_u;
+
+    INSERT INTO npy_output_02895.exceptions VALUES (1, '2019-01-01', (1, 1), [1, 1], [1, 1], []), (0, '2019-01-01', (0, 0), [0, 0], [0], [0]);
+
+    SELECT * FROM npy_output_02895.exceptions FORMAT Npy; -- { clientError TOO_MANY_COLUMNS }
+    SELECT unsupported_u FROM npy_output_02895.exceptions FORMAT Npy; -- { clientError BAD_ARGUMENTS }
+    SELECT unsupported_date FROM npy_output_02895.exceptions FORMAT Npy; -- { clientError BAD_ARGUMENTS }
+    SELECT unsupported_tuple FROM npy_output_02895.exceptions FORMAT Npy; -- { clientError BAD_ARGUMENTS }
+    SELECT unsupported_nested_i FROM npy_output_02895.exceptions FORMAT Npy; -- { clientError BAD_ARGUMENTS }
+    SELECT ragged_dimention FROM npy_output_02895.exceptions FORMAT Npy; -- { clientError ILLEGAL_COLUMN }
+    SELECT zero_dimension FROM npy_output_02895.exceptions FORMAT Npy; -- { clientError ILLEGAL_COLUMN }
+
+    DROP DATABASE IF EXISTS npy_output_02895;"
+
+rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}
diff --git a/tests/queries/0_stateless/02941_variant_type_1.sh b/tests/queries/0_stateless/02941_variant_type_1.sh
index 4fb76532a05..22ca909a26e 100755
--- a/tests/queries/0_stateless/02941_variant_type_1.sh
+++ b/tests/queries/0_stateless/02941_variant_type_1.sh
@@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1"
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1 --index_granularity_bytes=10485760 --index_granularity=8192"
 
 function test1_insert()
 {
diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh
index 995b622b6bf..91ba0285bd8 100755
--- a/tests/queries/0_stateless/02941_variant_type_2.sh
+++ b/tests/queries/0_stateless/02941_variant_type_2.sh
@@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1"
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1 --index_granularity_bytes=10485760 --index_granularity=8192"
 
 function test4_insert()
 {
diff --git a/tests/queries/0_stateless/02941_variant_type_3.sh b/tests/queries/0_stateless/02941_variant_type_3.sh
index 9fbdf6de8a7..8a039a02d6d 100755
--- a/tests/queries/0_stateless/02941_variant_type_3.sh
+++ b/tests/queries/0_stateless/02941_variant_type_3.sh
@@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1"
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1 --index_granularity_bytes=10485760 --index_granularity=8192 "
 
 function test5_insert()
 {
diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh
index f6eaf2fcc9a..b003bcdcef1 100755
--- a/tests/queries/0_stateless/02941_variant_type_4.sh
+++ b/tests/queries/0_stateless/02941_variant_type_4.sh
@@ -7,7 +7,8 @@ CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1"
+
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1 --index_granularity_bytes=10485760 --index_granularity=8192 " 
 
 function test6_insert()
 {
diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference
index 17a17484a0c..02ea01eb2e6 100644
--- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference
+++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference
@@ -66,3 +66,61 @@ a	a
 a	a
 
 a	a
+0	0
+0	\N
+1	2
+1	\N
+2	4
+2	\N
+\N	0
+\N	2
+\N	4
+\N	\N
+0	0	nan
+2	4	nan
+1	2	nan
+2	\N	nan
+0	\N	nan
+1	\N	nan
+\N	2	nan
+\N	0	nan
+\N	4	nan
+\N	\N	nan
+[]
+['.']
+['.','.']
+['.','.','.']
+['.','.','.','.']
+['.','.','.','.','.']
+['.','.','.','.','.','.']
+['.','.','.','.','.','.','.']
+['.','.','.','.','.','.','.','.']
+['.','.','.','.','.','.','.','.','.']
+[]
+[]
+[]
+[]
+[]
+[]
+[]
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+10
diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql
index 68710137542..b8c173520a9 100644
--- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql
+++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql
@@ -21,3 +21,39 @@ SELECT tuple(number + 1) AS x FROM numbers(10) GROUP BY number + 1, toString(x)
 SELECT tuple(tuple(number)) AS x FROM numbers(10) WHERE toString(toUUID(tuple(number), NULL), x) GROUP BY number, (toString(x), number) WITH CUBE SETTINGS group_by_use_nulls = 1 FORMAT Null;
 
 SELECT  materialize('a'), 'a' AS key GROUP BY key WITH CUBE WITH TOTALS SETTINGS group_by_use_nulls = 1;
+
+EXPLAIN QUERY TREE
+SELECT a, b
+FROM numbers(3)
+GROUP BY number as a, (number + number) as b WITH CUBE
+ORDER BY a, b format Null;
+
+SELECT a, b
+FROM numbers(3)
+GROUP BY number as a, (number + number) as b WITH CUBE
+ORDER BY a, b;
+
+SELECT
+    a,
+    b,
+    cramersVBiasCorrected(a, b)
+FROM numbers(3)
+GROUP BY
+    number AS a,
+    number + number AS b
+    WITH CUBE
+SETTINGS group_by_use_nulls = 1;
+
+SELECT arrayMap(x -> '.', range(number % 10)) AS k FROM remote('127.0.0.{2,3}', numbers(10)) GROUP BY GROUPING SETS ((k)) ORDER BY k settings group_by_use_nulls=1;
+
+SELECT count('Lambda as function parameter') AS c FROM (SELECT ignore(ignore('Lambda as function parameter', 28, 28, 28, 28, 28, 28), 28), materialize('Lambda as function parameter'), 28, 28, 'world', 5 FROM system.numbers WHERE ignore(materialize('Lambda as function parameter'), materialize(toLowCardinality(28)), 28, 28, 28, 28, toUInt128(28)) LIMIT 2) GROUP BY GROUPING SETS ((toLowCardinality(0)), (toLowCardinality(toNullable(28))), (1)) HAVING nullIf(c, 10) < 50 ORDER BY c ASC NULLS FIRST settings group_by_use_nulls=1; -- { serverError ILLEGAL_AGGREGATION }
+
+SELECT arraySplit(x -> 0, []) WHERE materialize(1) GROUP BY (0, ignore('a')) WITH ROLLUP SETTINGS group_by_use_nulls = 1;
+
+SELECT arraySplit(x -> toUInt8(number), []) from numbers(1) GROUP BY toUInt8(number) WITH ROLLUP SETTINGS group_by_use_nulls = 1;
+
+SELECT arraySplit(number -> toUInt8(number), []) from numbers(1) GROUP BY toUInt8(number) WITH ROLLUP SETTINGS group_by_use_nulls = 1;
+
+SELECT count(arraySplit(number -> toUInt8(number), [arraySplit(x -> toUInt8(number), [])])) FROM numbers(10) GROUP BY number, [number] WITH ROLLUP settings group_by_use_nulls=1; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT}
+
+SELECT count(arraySplit(x -> toUInt8(number), [])) FROM numbers(10) GROUP BY number, [number] WITH ROLLUP settings group_by_use_nulls=1;
\ No newline at end of file
diff --git a/tests/queries/0_stateless/03036_clamp.reference b/tests/queries/0_stateless/03036_clamp.reference
new file mode 100644
index 00000000000..b866caf2261
--- /dev/null
+++ b/tests/queries/0_stateless/03036_clamp.reference
@@ -0,0 +1,14 @@
+10
+20
+15
+b
+0
+['hello']
+-1
+234
+\N
+\N
+5
+0
+1
+2
diff --git a/tests/queries/0_stateless/03036_clamp.sql b/tests/queries/0_stateless/03036_clamp.sql
new file mode 100644
index 00000000000..9973265c13b
--- /dev/null
+++ b/tests/queries/0_stateless/03036_clamp.sql
@@ -0,0 +1,15 @@
+SELECT clamp(1, 10, 20);
+SELECT clamp(30, 10, 20);
+SELECT clamp(15, 10, 20);
+SELECT clamp('a', 'b', 'c');
+SELECT clamp(today(), yesterday() - 10, yesterday() + 10) - today();
+SELECT clamp([], ['hello'], ['world']);
+SELECT clamp(-1., -1000., 18446744073709551615.);
+SELECT clamp(toNullable(123), 234, 456);
+select clamp(1, null, 5);
+select clamp(1, 6, null);
+select clamp(1, 5, nan);
+select clamp(toInt64(number), toInt64(number-1), toInt64(number+1)) from numbers(3);
+select clamp(number, number-1, number+1) from numbers(3);   -- { serverError NO_COMMON_TYPE }
+select clamp(1, 3, 2);   -- { serverError BAD_ARGUMENTS } 
+select clamp(1, data[1], data[2])from (select arrayJoin([[1, 2], [2,3], [3,2], [4, 4]]) as data);   -- { serverError BAD_ARGUMENTS } 
diff --git a/tests/queries/0_stateless/03101_analyzer_identifiers_3.sql b/tests/queries/0_stateless/03101_analyzer_identifiers_3.sql
index 8cb477ea6bf..77a0f040e88 100644
--- a/tests/queries/0_stateless/03101_analyzer_identifiers_3.sql
+++ b/tests/queries/0_stateless/03101_analyzer_identifiers_3.sql
@@ -39,7 +39,7 @@ SELECT * GROUP BY *;
 -- not ok as every component of ORDER BY may contain ASC/DESC and COLLATE; though can be supported in some sense
 -- but it works
 SELECT * ORDER BY *;
-SELECT * WHERE *;  -- { serverError UNSUPPORTED_METHOD }
+SELECT * WHERE *; -- { serverError BAD_ARGUMENTS }
 
 SELECT '---';
 
diff --git a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh
index b542c9fff9a..7df2118ad0c 100755
--- a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh
+++ b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh
@@ -39,6 +39,7 @@ $CLICKHOUSE_CLIENT -nm -q "
 set send_logs_level='fatal';
 drop table tp_1;
 restore table tp_1 from Disk('backups', '$backup_id');
+system stop merges tp_1;
 " | grep -o "RESTORED"
 
 $CLICKHOUSE_CLIENT -q "select count() from tp_1;"
diff --git a/tests/queries/0_stateless/03149_analyzer_window_redefinition.reference b/tests/queries/0_stateless/03149_analyzer_window_redefinition.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03149_analyzer_window_redefinition.sql b/tests/queries/0_stateless/03149_analyzer_window_redefinition.sql
new file mode 100644
index 00000000000..7bc5ec7579c
--- /dev/null
+++ b/tests/queries/0_stateless/03149_analyzer_window_redefinition.sql
@@ -0,0 +1,8 @@
+CREATE TABLE users (uid Int16, name String, age Int16) ENGINE=MergeTree ORDER BY tuple();
+
+INSERT INTO users VALUES (1231, 'John', 33);
+INSERT INTO users VALUES (6666, 'Ksenia', 48);
+INSERT INTO users VALUES (8888, 'Alice', 50);
+
+SELECT count(*) OVER w 
+FROM users WINDOW w AS (ORDER BY uid), w AS(ORDER BY name); -- { serverError BAD_ARGUMENTS }
diff --git a/tests/queries/0_stateless/03150_grouping_sets_use_nulls_pushdown.reference b/tests/queries/0_stateless/03150_grouping_sets_use_nulls_pushdown.reference
new file mode 100644
index 00000000000..209c455b6f5
--- /dev/null
+++ b/tests/queries/0_stateless/03150_grouping_sets_use_nulls_pushdown.reference
@@ -0,0 +1,14 @@
+2023-01-05	hello, world
+2023-01-05	\N
+2023-01-05	hello, world
+2023-01-05	\N
+2023-01-05
+2023-01-05
+2023-01-05	hello, world
+2023-01-05	\N
+2023-01-05	hello, world
+2023-01-05	\N
+2023-01-05	10
+2023-01-05	10
+2023-01-05	hello, world
+2023-01-05	\N
diff --git a/tests/queries/0_stateless/03150_grouping_sets_use_nulls_pushdown.sql b/tests/queries/0_stateless/03150_grouping_sets_use_nulls_pushdown.sql
new file mode 100644
index 00000000000..a0bd1381351
--- /dev/null
+++ b/tests/queries/0_stateless/03150_grouping_sets_use_nulls_pushdown.sql
@@ -0,0 +1,49 @@
+DROP TABLE IF EXISTS test_grouping_sets_predicate;
+
+CREATE TABLE test_grouping_sets_predicate ( day_ Date, type_1 String ) ENGINE=MergeTree ORDER BY day_;
+
+INSERT INTO test_grouping_sets_predicate SELECT toDate('2023-01-05') AS day_, 'hello, world' FROM numbers (10);
+
+SET group_by_use_nulls = true;
+
+SELECT *
+FROM ( SELECT day_, type_1 FROM test_grouping_sets_predicate GROUP BY GROUPING SETS ( (day_, type_1), (day_) ) )
+WHERE day_ = '2023-01-05'
+ORDER BY ALL;
+
+
+SELECT *
+FROM ( SELECT * FROM test_grouping_sets_predicate GROUP BY GROUPING SETS ( (day_, type_1), (day_) ) )
+WHERE day_ = '2023-01-05'
+ORDER BY ALL;
+
+SELECT *
+FROM ( SELECT day_ FROM test_grouping_sets_predicate GROUP BY GROUPING SETS ( (day_, type_1), (day_) ) )
+WHERE day_ = '2023-01-05'
+ORDER BY *
+SETTINGS allow_experimental_analyzer=1;
+
+SELECT *
+FROM ( SELECT * FROM test_grouping_sets_predicate GROUP BY GROUPING SETS ( (day_, type_1), (day_) ) )
+WHERE day_ = '2023-01-05'
+GROUP BY *
+ORDER BY ALL
+SETTINGS allow_experimental_analyzer=1;
+
+SELECT *
+FROM ( SELECT * FROM test_grouping_sets_predicate GROUP BY GROUPING SETS ( (*), (day_) ) )
+WHERE day_ = '2023-01-05'
+GROUP BY GROUPING SETS (*)
+ORDER BY type_1
+SETTINGS allow_experimental_analyzer=1;
+
+SELECT *
+FROM ( SELECT day_, COUNT(*) FROM test_grouping_sets_predicate GROUP BY GROUPING SETS ( (day_, type_1), (day_) ) )
+WHERE day_ = '2023-01-05'
+ORDER BY ALL;
+
+
+SELECT t2.*
+FROM ( SELECT t1.* FROM test_grouping_sets_predicate t1 GROUP BY GROUPING SETS ( (day_, type_1), (day_) ) ) t2
+WHERE day_ = '2023-01-05'
+ORDER BY ALL;
diff --git a/tests/queries/0_stateless/03151_analyzer_view_read_only_necessary_columns.reference b/tests/queries/0_stateless/03151_analyzer_view_read_only_necessary_columns.reference
new file mode 100644
index 00000000000..0ec4e34ebfe
--- /dev/null
+++ b/tests/queries/0_stateless/03151_analyzer_view_read_only_necessary_columns.reference
@@ -0,0 +1,8 @@
+Expression ((Project names + Projection))
+Header: sum(id) UInt64
+  Aggregating
+  Header: sum(__table1.id) UInt64
+    Expression ((Before GROUP BY + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))))))
+    Header: __table1.id UInt64
+      ReadFromPreparedSource (Read from NullSource)
+      Header: id UInt64
diff --git a/tests/queries/0_stateless/03151_analyzer_view_read_only_necessary_columns.sql b/tests/queries/0_stateless/03151_analyzer_view_read_only_necessary_columns.sql
new file mode 100644
index 00000000000..40204b5cd03
--- /dev/null
+++ b/tests/queries/0_stateless/03151_analyzer_view_read_only_necessary_columns.sql
@@ -0,0 +1,14 @@
+DROP TABLE IF EXISTS test_table;
+CREATE TABLE test_table
+(
+    id UInt64,
+    value String
+) ENGINE=MergeTree ORDER BY id;
+
+DROP VIEW IF EXISTS test_view;
+CREATE VIEW test_view AS SELECT id, value FROM test_table;
+
+EXPLAIN header = 1 SELECT sum(id) FROM test_view settings allow_experimental_analyzer=1;
+
+DROP VIEW test_view;
+DROP TABLE test_table;
diff --git a/tests/queries/0_stateless/03151_external_cross_join.reference b/tests/queries/0_stateless/03151_external_cross_join.reference
new file mode 100644
index 00000000000..057eadec0e4
--- /dev/null
+++ b/tests/queries/0_stateless/03151_external_cross_join.reference
@@ -0,0 +1,6 @@
+1	1	2
+10	55	11
+100	5050	101
+1000	500500	1001
+10000	50005000	10001
+100000	5000050000	100001
diff --git a/tests/queries/0_stateless/03151_external_cross_join.sql b/tests/queries/0_stateless/03151_external_cross_join.sql
new file mode 100644
index 00000000000..e0e05a10e1e
--- /dev/null
+++ b/tests/queries/0_stateless/03151_external_cross_join.sql
@@ -0,0 +1,20 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (x Int32) ENGINE = Memory;
+
+-- insert several blocks with 1 or 2 rows:
+INSERT INTO t1 VALUES (1);
+INSERT INTO t1 VALUES (10),(100);
+INSERT INTO t1 VALUES (1000);
+INSERT INTO t1 VALUES (10000),(100000);
+
+SET max_rows_in_join = 111;
+
+SELECT x, sum(number), count(), FROM (
+    SELECT t1.x, t2.number
+    FROM t1
+    CROSS JOIN numbers_mt(10_000_000) t2
+    WHERE number <= x
+)
+GROUP BY ALL
+ORDER BY x
+;
\ No newline at end of file
diff --git a/utils/security-generator/generate_security.py b/utils/security-generator/generate_security.py
index ccf9a82067e..2b37e28257a 100755
--- a/utils/security-generator/generate_security.py
+++ b/utils/security-generator/generate_security.py
@@ -10,21 +10,22 @@ HEADER = """<!--
 the file is autogenerated by utils/security-generator/generate_security.py
 -->
 
-# Security Policy
+# ClickHouse Security Vulnerability Response Policy
 
-## Security Announcements
-Security fixes will be announced by posting them in the [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/).
+## Security Change Log and Support
 
-## Scope and Supported Versions
+Details regarding security fixes are publicly reported in our [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/). A summary of known security vulnerabilities is shown at the bottom of this page.
 
-The following versions of ClickHouse server are currently being supported with security updates:
+Vulnerability notifications pre-release or during embargo periods are available to open source users and support customers registered for vulnerability alerts. Refer to our [Embargo Policy](#embargo-policy) below.
+
+The following versions of ClickHouse server are currently supported with security updates:
 """
 
 FOOTER = """## Reporting a Vulnerability
 
 We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers.
 
-To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). We do not offer any financial rewards for reporting issues to us using this method. Alternatively, you can also submit your findings through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement.
+To report a potential vulnerability in ClickHouse please send the details about it through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement.
 
 ### When Should I Report a Vulnerability?
 
@@ -45,6 +46,24 @@ As the security issue moves from triage, to identified fix, to release planning
 ## Public Disclosure Timing
 
 A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days.
+
+## Embargo Policy
+
+Open source users and support customers may subscribe to receive alerts during the embargo period by visiting [https://trust.clickhouse.com/?product=clickhouseoss](https://trust.clickhouse.com/?product=clickhouseoss), requesting access and subscribing for alerts. Subscribers agree not to make these notifications public, issue communications, share this information with others, or issue public patches before the disclosure date. Accidental disclosures must be reported immediately to trust@clickhouse.com. Failure to follow this policy or repeated leaks may result in removal from the subscriber list.
+
+Participation criteria:
+1. Be a current open source user or support customer with a valid corporate email domain (no @gmail.com, @azure.com, etc.).
+1. Sign up to the ClickHouse OSS Trust Center at [https://trust.clickhouse.com](https://trust.clickhouse.com).
+1. Accept the ClickHouse Security Vulnerability Response Policy as outlined above.
+1. Subscribe to ClickHouse OSS Trust Center alerts.
+
+Removal criteria:
+1. Members may be removed for failure to follow this policy or repeated leaks.
+1. Members may be removed for bounced messages (mail delivery failure).
+1. Members may unsubscribe at any time.
+
+Notification process:
+ClickHouse will post notifications within our OSS Trust Center and notify subscribers. Subscribers must log in to the Trust Center to download the notification. The notification will include the timeframe for public disclosure.
 """