Merge branch 'master' into fix-30975

2024-11-24 00:22:29 +00:00 · 2022-12-27 23:21:10 +03:00 · 2022-12-27 23:21:10 +03:00 · 14d7266e70
commit 14d7266e70
parent c27f90c8ed 2892d447d4
33 changed files with 220 additions and 40 deletions
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
--- a/src/AggregateFunctions/AggregateFunctionFactory.cpp
+++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp
@ -72,9 +72,12 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
 {
    auto types_without_low_cardinality = convertLowCardinalityTypesToNested(argument_types);

-    /// If one of the types is Nullable, we apply aggregate function combinator "Null".
-
-    if (std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(),
+    /// If one of the types is Nullable, we apply aggregate function combinator "Null" if it's not window function.
+    /// Window functions are not real aggregate functions. Applying combinators doesn't make sense for them,
+    /// they must handle the nullability themselves
+    auto properties = tryGetPropertiesImpl(name);
+    bool is_window_function = properties.has_value() && properties->is_window_function;
+    if (!is_window_function && std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(),
        [](const auto & type) { return type->isNullable(); }))
    {
        AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix("Null");
--- a/src/AggregateFunctions/AggregateFunctionIf.cpp
+++ b/src/AggregateFunctions/AggregateFunctionIf.cpp
@ -23,7 +23,7 @@ public:
            throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix",
                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

-        if (!isUInt8(arguments.back()))
+        if (!isUInt8(arguments.back()) && !arguments.back()->onlyNull())
            throw Exception("Illegal type " + arguments.back()->getName() + " of last argument for aggregate function with " + getName() + " suffix",
                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

@ -52,6 +52,7 @@ class AggregateFunctionIfNullUnary final
 private:
    size_t num_arguments;
    bool filter_is_nullable = false;
+    bool filter_is_only_null = false;

    /// The name of the nested function, including combinators (i.e. *If)
    ///
@ -84,10 +85,8 @@ private:

            return assert_cast<const ColumnUInt8 &>(*filter_column).getData()[row_num] && !filter_null_map[row_num];
        }
-        else
-        {
-            return assert_cast<const ColumnUInt8 &>(*filter_column).getData()[row_num];
-        }
+
+        return assert_cast<const ColumnUInt8 &>(*filter_column).getData()[row_num];
    }

 public:
@ -106,10 +105,14 @@ public:
                "Aggregate function {} require at least one argument", getName());

        filter_is_nullable = arguments[num_arguments - 1]->isNullable();
+        filter_is_only_null = arguments[num_arguments - 1]->onlyNull();
    }

    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
    {
+        if (filter_is_only_null)
+            return;
+
        const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
        const IColumn * nested_column = &column->getNestedColumn();
        if (!column->isNullAt(row_num) && singleFilter(columns, row_num))
@ -127,6 +130,9 @@ public:
        Arena * arena,
        ssize_t) const override
    {
+        if (filter_is_only_null)
+            return;
+
        const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
        const UInt8 * null_map = column->getNullMapData().data();
        const IColumn * columns_param[] = {&column->getNestedColumn()};
@ -177,6 +183,11 @@ public:

 #if USE_EMBEDDED_COMPILER

+    bool isCompilable() const override
+    {
+        return canBeNativeType(*this->argument_types.back()) && this->nested_function->isCompilable();
+    }
+
    void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
    {
        llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
@ -224,6 +235,9 @@ class AggregateFunctionIfNullVariadic final : public AggregateFunctionNullBase<
                                                  serialize_flag,
                                                  AggregateFunctionIfNullVariadic<result_is_nullable, serialize_flag>>
 {
+private:
+    bool filter_is_only_null = false;
+
 public:

    String getName() const override
@ -243,6 +257,8 @@ public:

        for (size_t i = 0; i < number_of_arguments; ++i)
            is_nullable[i] = arguments[i]->isNullable();
+
+        filter_is_only_null = arguments.back()->onlyNull();
    }

    static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments)
@ -282,6 +298,9 @@ public:
    void addBatchSinglePlace(
        size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t) const final
    {
+        if (filter_is_only_null)
+            return;
+
        std::unique_ptr<UInt8[]> final_null_flags = std::make_unique<UInt8[]>(row_end);
        const size_t filter_column_num = number_of_arguments - 1;

@ -346,6 +365,11 @@ public:

 #if USE_EMBEDDED_COMPILER

+    bool isCompilable() const override
+    {
+        return canBeNativeType(*this->argument_types.back()) && this->nested_function->isCompilable();
+    }
+
    void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
    {
        /// TODO: Check
--- a/src/AggregateFunctions/AggregateFunctionIf.h
+++ b/src/AggregateFunctions/AggregateFunctionIf.h
@ -42,7 +42,7 @@ public:
        if (num_arguments == 0)
            throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

-        if (!isUInt8(types.back()))
+        if (!isUInt8(types.back()) && !types.back()->onlyNull())
            throw Exception("Last argument for aggregate function " + getName() + " must be UInt8", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
    }

@ -199,12 +199,16 @@ public:

    AggregateFunctionPtr getNestedFunction() const override { return nested_func; }

+    std::unordered_set<size_t> getArgumentsThatCanBeOnlyNull() const override
+    {
+        return {num_arguments - 1};
+    }

 #if USE_EMBEDDED_COMPILER

    bool isCompilable() const override
    {
-        return nested_func->isCompilable();
+        return canBeNativeType(*this->argument_types.back()) && nested_func->isCompilable();
    }

    void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
--- a/src/AggregateFunctions/AggregateFunctionNull.cpp
+++ b/src/AggregateFunctions/AggregateFunctionNull.cpp
@ -29,7 +29,13 @@ public:
        size_t size = arguments.size();
        DataTypes res(size);
        for (size_t i = 0; i < size; ++i)
-            res[i] = removeNullable(arguments[i]);
+        {
+            /// Nullable(Nothing) is processed separately, don't convert it to Nothing.
+            if (arguments[i]->onlyNull())
+                res[i] = arguments[i];
+            else
+                res[i] = removeNullable(arguments[i]);
+        }
        return res;
    }

@ -41,12 +47,16 @@ public:
    {
        bool has_nullable_types = false;
        bool has_null_types = false;
-        for (const auto & arg_type : arguments)
+        std::unordered_set<size_t> arguments_that_can_be_only_null;
+        if (nested_function)
+            arguments_that_can_be_only_null = nested_function->getArgumentsThatCanBeOnlyNull();
+
+        for (size_t i = 0; i < arguments.size(); ++i)
        {
-            if (arg_type->isNullable())
+            if (arguments[i]->isNullable())
            {
                has_nullable_types = true;
-                if (arg_type->onlyNull())
+                if (arguments[i]->onlyNull() && !arguments_that_can_be_only_null.contains(i))
                {
                    has_null_types = true;
                    break;
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@ -345,6 +345,14 @@ public:
        return nullptr;
    }

+    /// For most functions if one of arguments is always NULL, we return NULL (it's implemented in combinator Null),
+    /// but in some functions we can want to process this argument somehow (for example condition argument in If combinator).
+    /// This method returns the set of argument indexes that can be always NULL, they will be skipped in combinator Null.
+    virtual std::unordered_set<size_t> getArgumentsThatCanBeOnlyNull() const
+    {
+        return {};
+    }
+
    /** Return the nested function if this is an Aggregate Function Combinator.
      * Otherwise return nullptr.
      */
@ -828,6 +836,9 @@ struct AggregateFunctionProperties
      * Some may also name this property as "non-commutative".
      */
    bool is_order_dependent = false;
+
+    /// Indicates if it's actually window function.
+    bool is_window_function = false;
 };


--- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp
+++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
@ -81,6 +81,7 @@ public:
        if (nested_if_function_arguments_nodes.size() != 3)
            return;

+        auto & cond_argument = nested_if_function_arguments_nodes[0];
        const auto * if_true_condition_constant_node = nested_if_function_arguments_nodes[1]->as<ConstantNode>();
        const auto * if_false_condition_constant_node = nested_if_function_arguments_nodes[2]->as<ConstantNode>();

@ -107,8 +108,8 @@ public:
            return;
        }

-        /// Rewrite `sum(if(cond, 0, 1))` into `countIf(not(cond))`.
-        if (if_true_condition_value == 0 && if_false_condition_value == 1)
+        /// Rewrite `sum(if(cond, 0, 1))` into `countIf(not(cond))` if condition is not Nullable (otherwise the result can be different).
+        if (if_true_condition_value == 0 && if_false_condition_value == 1 && !cond_argument->getResultType()->isNullable())
        {
            DataTypePtr not_function_result_type = std::make_shared<DataTypeUInt8>();

--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@ -318,6 +318,12 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet
            if (auto date_type = tryInferDateOrDateTimeFromString(field, format_settings))
                return date_type;

+            /// Special case when we have number that starts with 0. In TSV we don't parse such numbers,
+            /// see readIntTextUnsafe in ReadHelpers.h. If we see data started with 0, we can determine it
+            /// as a String, so parsing won't fail.
+            if (field[0] == '0' && field.size() != 1)
+                return std::make_shared<DataTypeString>();
+
            auto type = tryInferDataTypeForSingleField(field, format_settings);
            if (!type)
                return std::make_shared<DataTypeString>();
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@ -250,7 +250,7 @@ namespace
        {
            if (isArray(type))
                nested_types.push_back(assert_cast<const DataTypeArray &>(*type).getNestedType());
-            else
+            else if (isTuple(type))
            {
                const auto & elements = assert_cast<const DataTypeTuple &>(*type).getElements();
                for (const auto & element : elements)
@ -262,7 +262,10 @@ namespace
        if (checkIfTypesAreEqual(nested_types))
        {
            for (auto & type : data_types)
-                type = std::make_shared<DataTypeArray>(nested_types.back());
+            {
+                if (isArray(type) || isTuple(type))
+                    type = std::make_shared<DataTypeArray>(nested_types.back());
+            }
        }
    }

@ -826,14 +829,40 @@ void transformInferredJSONTypesIfNeeded(

 void transformJSONTupleToArrayIfPossible(DataTypePtr & data_type, const FormatSettings & settings, JSONInferenceInfo * json_info)
 {
-    if (!data_type || !isTuple(data_type))
+    if (!data_type)
        return;

-    const auto * tuple_type = assert_cast<const DataTypeTuple *>(data_type.get());
-    auto nested_types = tuple_type->getElements();
-    transformInferredTypesIfNeededImpl<true>(nested_types, settings, json_info);
-    if (checkIfTypesAreEqual(nested_types))
-        data_type = std::make_shared<DataTypeArray>(nested_types.back());
+    if (const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get()))
+    {
+        auto nested_type = array_type->getNestedType();
+        transformJSONTupleToArrayIfPossible(nested_type, settings, json_info);
+        data_type = std::make_shared<DataTypeArray>(nested_type);
+        return;
+    }
+
+    if (const auto * map_type = typeid_cast<const DataTypeMap *>(data_type.get()))
+    {
+        auto value_type = map_type->getValueType();
+        transformJSONTupleToArrayIfPossible(value_type, settings, json_info);
+        data_type = std::make_shared<DataTypeMap>(map_type->getKeyType(), value_type);
+        return;
+    }
+
+    if (const auto * tuple_type = typeid_cast<const DataTypeTuple *>(data_type.get()))
+    {
+        auto nested_types = tuple_type->getElements();
+        for (auto & nested_type : nested_types)
+            transformJSONTupleToArrayIfPossible(nested_type, settings, json_info);
+
+        auto nested_types_copy = nested_types;
+        transformInferredTypesIfNeededImpl<true>(nested_types_copy, settings, json_info);
+        if (checkIfTypesAreEqual(nested_types_copy))
+            data_type = std::make_shared<DataTypeArray>(nested_types_copy.back());
+        else
+            data_type = std::make_shared<DataTypeTuple>(nested_types);
+
+        return;
+    }
 }

 DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings)
--- a/src/Functions/FunctionsLogical.h
+++ b/src/Functions/FunctionsLogical.h
@ -176,6 +176,7 @@ public:
    ColumnPtr executeShortCircuit(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const;
    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
    size_t getNumberOfArguments() const override { return 0; }
+    bool canBeExecutedOnLowCardinalityDictionary() const override { return false; }

    bool useDefaultImplementationForNulls() const override { return !Impl::specialImplementationForNulls(); }

--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@ -1026,6 +1026,7 @@ public:
    }
    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
    ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const override { return {0}; }
+    bool canBeExecutedOnLowCardinalityDictionary() const override { return false; }

    /// Get result types by argument types. If the function does not apply to these arguments, throw an exception.
    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
--- a/src/Functions/multiIf.cpp
+++ b/src/Functions/multiIf.cpp
@ -51,6 +51,7 @@ public:
    size_t getNumberOfArguments() const override { return 0; }
    bool useDefaultImplementationForNulls() const override { return false; }
    bool useDefaultImplementationForNothing() const override { return false; }
+    bool canBeExecutedOnLowCardinalityDictionary() const override { return false; }

    ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const override
    {
--- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
@ -71,7 +71,7 @@ Chunk ArrowBlockInputFormat::generate()

    ++record_batch_current;

-    arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result);
+    arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result, (*table_result)->num_rows());

    /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields.
    /// Otherwise fill the missing columns with zero values of its type.
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@ -69,7 +69,6 @@ namespace ErrorCodes
    extern const int DUPLICATE_COLUMN;
    extern const int THERE_IS_NO_COLUMN;
    extern const int UNKNOWN_EXCEPTION;
-    extern const int INCORRECT_NUMBER_OF_COLUMNS;
    extern const int INCORRECT_DATA;
 }

@ -810,7 +809,7 @@ ArrowColumnToCHColumn::ArrowColumnToCHColumn(
 {
 }

-void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table)
+void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table, size_t num_rows)
 {
    NameToColumnPtr name_to_column_ptr;
    for (auto column_name : table->ColumnNames())
@ -824,16 +823,12 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arr
        name_to_column_ptr[std::move(column_name)] = arrow_column;
    }

-    arrowColumnsToCHChunk(res, name_to_column_ptr);
+    arrowColumnsToCHChunk(res, name_to_column_ptr, num_rows);
 }

-void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr)
+void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows)
 {
-    if (unlikely(name_to_column_ptr.empty()))
-        throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Columns is empty");
-
    Columns columns_list;
-    UInt64 num_rows = name_to_column_ptr.begin()->second->length();
    columns_list.reserve(header.columns());
    std::unordered_map<String, std::pair<BlockPtr, std::shared_ptr<NestedColumnExtractHelper>>> nested_tables;
    bool skipped = false;
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
@ -28,9 +28,9 @@ public:
        bool allow_missing_columns_,
        bool case_insensitive_matching_ = false);

-    void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table);
+    void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table, size_t num_rows);

-    void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr);
+    void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows);

    /// Get missing columns that exists in header but not in arrow::Schema
    std::vector<size_t> getMissingColumns(const arrow::Schema & schema) const;
--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
@ -54,14 +54,19 @@ Chunk ORCBlockInputFormat::generate()
        throw ParsingException(
            ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading batch of ORC data: {}", table_result.status().ToString());

+    /// We should extract the number of rows directly from the stripe, because in case when
+    /// record batch contains 0 columns (for example if we requested only columns that
+    /// are not presented in data) the number of rows in record batch will be 0.
+    size_t num_rows = file_reader->GetRawORCReader()->getStripe(stripe_current)->getNumberOfRows();
+
    auto table = table_result.ValueOrDie();
-    if (!table || !table->num_rows())
+    if (!table || !num_rows)
        return {};

    ++stripe_current;

    Chunk res;
-    arrow_column_to_ch_column->arrowTableToCHChunk(res, table);
+    arrow_column_to_ch_column->arrowTableToCHChunk(res, table, num_rows);
    /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields.
    /// Otherwise fill the missing columns with zero values of its type.
    if (format_settings.defaults_for_omitted_fields)
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@ -70,7 +70,7 @@ Chunk ParquetBlockInputFormat::generate()

    ++row_group_current;

-    arrow_column_to_ch_column->arrowTableToCHChunk(res, table);
+    arrow_column_to_ch_column->arrowTableToCHChunk(res, table, table->num_rows());

    /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields.
    /// Otherwise fill the missing columns with zero values of its type.
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@ -2313,7 +2313,8 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
        .returns_default_when_only_null = true,
        // This probably doesn't make any difference for window functions because
        // it is an Aggregator-specific setting.
-        .is_order_dependent = true };
+        .is_order_dependent = true,
+        .is_window_function = true};

    factory.registerFunction("rank", {[](const std::string & name,
            const DataTypes & argument_types, const Array & parameters, const Settings *)
--- a/tests/ci/workflow_approve_rerun_lambda/app.py
+++ b/tests/ci/workflow_approve_rerun_lambda/app.py
@ -25,7 +25,7 @@ MAX_RETRY = 5

 # Number of times a check can re-run as a whole.
 # It is needed, because we are using AWS "spot" instances, that are terminated often
-MAX_WORKFLOW_RERUN = 20
+MAX_WORKFLOW_RERUN = 30

 WorkflowDescription = namedtuple(
    "WorkflowDescription",
--- a/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.reference
+++ b/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.reference
@ -0,0 +1,3 @@
+x	Array(Array(Nullable(Int64)))					
+x	Tuple(Array(Array(Nullable(Int64))), Nullable(Int64))					
+x	Map(String, Array(Nullable(Int64)))					
--- a/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.sql
+++ b/tests/queries/0_stateless/02416_json_tuple_to_array_schema_inference.sql
@ -0,0 +1,4 @@
+desc format(JSONEachRow, '{"x" : [[42, null], [24, null]]}');
+desc format(JSONEachRow, '{"x" : [[[42, null], []], 24]}');
+desc format(JSONEachRow, '{"x" : {"key" : [42, null]}}');
+
--- a/tests/queries/0_stateless/02494_combinators_with_null_argument.reference
+++ b/tests/queries/0_stateless/02494_combinators_with_null_argument.reference
@ -0,0 +1,18 @@
+-- { echoOn }
+
+select sumIf(1, NULL);
+0
+select sumIf(NULL, 1);
+\N
+select sumIf(NULL, NULL);
+\N
+select countIf(1, NULL);
+0
+select countIf(NULL, 1);
+0
+select countIf(1, NULL);
+0
+select sumArray([NULL, NULL]);
+\N
+select countArray([NULL, NULL]);
+0
--- a/tests/queries/0_stateless/02494_combinators_with_null_argument.sql
+++ b/tests/queries/0_stateless/02494_combinators_with_null_argument.sql
@ -0,0 +1,11 @@
+-- { echoOn }
+
+select sumIf(1, NULL);
+select sumIf(NULL, 1);
+select sumIf(NULL, NULL);
+select countIf(1, NULL);
+select countIf(NULL, 1);
+select countIf(1, NULL);
+select sumArray([NULL, NULL]);
+select countArray([NULL, NULL]);
+
--- a/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.reference
+++ b/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.reference
@ -0,0 +1,3 @@
+1024
+0
+1024
--- a/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.sql
+++ b/tests/queries/0_stateless/02495_sum_if_to_count_if_bug.sql
@ -0,0 +1,4 @@
+select sum(if((number % NULL) = 2, 0, 1)) FROM numbers(1024) settings optimize_rewrite_sum_if_to_count_if=0;
+select sum(if((number % NULL) = 2, 0, 1)) FROM numbers(1024) settings optimize_rewrite_sum_if_to_count_if=1, allow_experimental_analyzer=0;
+select sum(if((number % NULL) = 2, 0, 1)) FROM numbers(1024) settings optimize_rewrite_sum_if_to_count_if=1, allow_experimental_analyzer=1;
+
--- a/tests/queries/0_stateless/02511_parquet_orc_missing_columns.reference
+++ b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.reference
@ -0,0 +1,8 @@
+Hello
+Hello
+Hello
+6	6
+Hello
+Hello
+Hello
+6	6
--- a/tests/queries/0_stateless/02511_parquet_orc_missing_columns.sh
+++ b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.sh
@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+#Tags: no-fasttest, no-parallel
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "select number as x from numbers(3) format Parquet" > 02511_data1.parquet
+$CLICKHOUSE_LOCAL -q "select y from file(02511_data1.parquet, auto, 'x UInt64, y String default \'Hello\'') settings input_format_parquet_allow_missing_columns=1"
+$CLICKHOUSE_LOCAL -q "select number as x, 'Hello' as y from numbers(3) format Parquet" > 02511_data2.parquet
+$CLICKHOUSE_LOCAL -q "select count(*), count(y) from file('02511_data*.parquet', auto, 'x UInt64, y String') settings input_format_parquet_allow_missing_columns=1"
+
+$CLICKHOUSE_LOCAL -q "select number as x from numbers(3) format ORC" > 02511_data1.orc
+$CLICKHOUSE_LOCAL -q "select y from file(02511_data1.orc, auto, 'x UInt64, y String default \'Hello\'') settings input_format_orc_allow_missing_columns=1"
+$CLICKHOUSE_LOCAL -q "select number as x, 'Hello' as y from numbers(3) format ORC" > 02511_data2.orc
+$CLICKHOUSE_LOCAL -q "select count(*), count(y) from file('02511_data*.orc', auto, 'x UInt64, y String') settings input_format_orc_allow_missing_columns=1"
+
+rm 02511_data*
+
--- a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.reference
+++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.reference
@ -0,0 +1 @@
+1
--- a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql
+++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql
@ -0,0 +1,5 @@
+create table if not exists t (`arr.key` Array(LowCardinality(String)), `arr.value` Array(LowCardinality(String))) engine = Memory;
+insert into t (`arr.key`, `arr.value`) values (['a'], ['b']);
+select if(true, if(lowerUTF8(arr.key) = 'a', 1, 2), 3) as x from t left array join arr;
+drop table t;
+
--- a/tests/queries/0_stateless/02514_tsv_zero_started_number.reference
+++ b/tests/queries/0_stateless/02514_tsv_zero_started_number.reference
@ -0,0 +1 @@
+Nullable(String)	0123
--- a/tests/queries/0_stateless/02514_tsv_zero_started_number.sql
+++ b/tests/queries/0_stateless/02514_tsv_zero_started_number.sql
@ -0,0 +1,2 @@
+select toTypeName(*), * from format(TSV, '0123');
+
--- a/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.reference
+++ b/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.reference
@ -0,0 +1,4 @@
+UInt8
+UInt8
+UInt8
+UInt8
--- a/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.sql
+++ b/tests/queries/0_stateless/02515_and_or_if_multiif_not_return_lc.sql
@ -0,0 +1,5 @@
+select toTypeName(if(toLowCardinality(number % 2), 1, 2)) from numbers(1);
+select toTypeName(multiIf(toLowCardinality(number % 2), 1, 1, 2, 3)) from numbers(1);
+select toTypeName(toLowCardinality(number % 2) and 2) from numbers(1);
+select toTypeName(toLowCardinality(number % 2) or 2) from numbers(1);
+
				`@ -0,0 +1,2 @@`
				`select toTypeName(), from format(TSV, '0123');`