Merge branch 'master' into zookeeper_client_fault_injection

2024-11-28 02:21:59 +00:00 · 2022-08-19 11:12:40 +03:00 · 2022-08-19 11:12:40 +03:00 · a90d2ad35e
commit a90d2ad35e
parent 3d253ec51b 24615059fb
26 changed files with 428 additions and 176 deletions
--- a/.github/workflows/tags_stable.yml
+++ b/.github/workflows/tags_stable.yml
@ -35,8 +35,9 @@ jobs:
        GID=$(id -g "${UID}")
        docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 \
            --volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \
-                /ClickHouse/utils/changelog/changelog.py -vv --gh-user-or-token="$GITHUB_TOKEN" \
-                --output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" --jobs=5 "${GITHUB_TAG}"
+                /ClickHouse/utils/changelog/changelog.py -v --debug-helpers \
+                --gh-user-or-token="$GITHUB_TOKEN" --jobs=5 \
+                --output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" "${GITHUB_TAG}"
        git add "./docs/changelogs/${GITHUB_TAG}.md"
        git diff HEAD
    - name: Create Pull Request
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@ -1822,10 +1822,13 @@ Result:
 Evaluate external model.
 Accepts a model name and model arguments. Returns Float64.

-## throwIf(x\[, custom_message\])
+## throwIf(x\[, message\[, error_code\]\])

 Throw an exception if the argument is non zero.
-custom_message - is an optional parameter: a constant string, provides an error message
+`message` - is an optional parameter: a constant string providing a custom error message
+`error_code` - is an optional parameter: a constant integer providing a custom error code
+
+To use the `error_code` argument, configuration parameter `allow_custom_error_code_in_throwif` must be enabled.

 ``` sql
 SELECT throwIf(number = 3, 'Too many') FROM numbers(10);
--- a/docs/ru/sql-reference/functions/other-functions.md
+++ b/docs/ru/sql-reference/functions/other-functions.md
@ -1727,10 +1727,13 @@ SELECT joinGet(db_test.id_val,'val',toUInt32(number)) from numbers(4) SETTINGS j

 Принимает на вход имя и аргументы модели. Возвращает Float64.

-## throwIf(x\[, custom_message\]) {#throwifx-custom-message}
+## throwIf(x\[, message\[, error_code\]\]) {#throwifx-custom-message}

 Бросает исключение, если аргумент не равен нулю.
-custom_message - необязательный параметр, константная строка, задает текст сообщения об ошибке.
+`custom_message` - необязательный параметр, константная строка, задает текст сообщения об ошибке.
+`error_code` - необязательный параметр, константное число, задает код ошибки.
+
+Чтобы использовать аргумент `error_code`, должен быть включен параметр конфигурации `allow_custom_error_code_in_throwif`.

 ``` sql
 SELECT throwIf(number = 3, 'Too many') FROM numbers(10);
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -408,6 +408,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
    M(UInt64, low_cardinality_max_dictionary_size, 8192, "Maximum size (in rows) of shared global dictionary for LowCardinality type.", 0) \
    M(Bool, low_cardinality_use_single_dictionary_for_part, false, "LowCardinality type serialization setting. If is true, than will use additional keys when global dictionary overflows. Otherwise, will create several shared dictionaries.", 0) \
    M(Bool, decimal_check_overflow, true, "Check overflow of decimal arithmetic/comparison operations", 0) \
+    M(Bool, allow_custom_error_code_in_throwif, false, "Enable custom error code in function throwIf(). If true, thrown exceptions may have unexpected error codes.", 0) \
    \
    M(Bool, prefer_localhost_replica, true, "If it's true then queries will be always sent to local replica (if it exists). If it's false then replica to send a query will be chosen between local and remote ones according to load_balancing", 0) \
    M(UInt64, max_fetch_partition_retries_count, 5, "Amount of retries while fetching partition from another host.", 0) \
@ -713,7 +714,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
    M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format ORC", 0) \
    M(Bool, input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Arrow", 0) \
    M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
-    M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
+    M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \
    M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
    M(Bool, input_format_json_try_infer_numbers_from_strings, true, "Try to infer numbers from string fields while schema inference", 0) \
    M(Bool, input_format_try_infer_integers, true, "Try to infer numbers from string fields while schema inference in text formats", 0) \
--- a/src/Functions/FunctionDateOrDateTimeAddInterval.h
+++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h
@ -611,7 +611,7 @@ public:
                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

        if (!isNativeNumber(arguments[1].type))
-            throw Exception("Second argument for function " + getName() + " (delta) must be number",
+            throw Exception("Second argument for function " + getName() + " (delta) must be a number",
                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

        if (arguments.size() == 2)
@ -627,7 +627,7 @@ public:
            {
                throw Exception(
                    "Function " + getName() + " supports 2 or 3 arguments. The 1st argument "
-                    "must be of type Date or DateTime. The 2nd argument must be number. "
+                    "must be of type Date or DateTime. The 2nd argument must be a number. "
                    "The 3rd argument (optional) must be "
                    "a constant string with timezone name. The timezone argument is allowed "
                    "only when the 1st argument has the type DateTime",
--- a/src/Functions/FunctionNumericPredicate.h
+++ b/src/Functions/FunctionNumericPredicate.h
@ -46,7 +46,7 @@ public:
    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
    {
        if (!isNativeNumber(arguments.front()))
-            throw Exception{"Argument for function " + getName() + " must be number", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+            throw Exception{"Argument for function " + getName() + " must be a number", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};

        return std::make_shared<DataTypeUInt8>();
    }
--- a/src/Functions/makeDate.cpp
+++ b/src/Functions/makeDate.cpp
@ -62,7 +62,7 @@ protected:
            DataTypePtr argument_type = arguments[i].type;
            if (!isNumber(argument_type))
                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    "Argument '{}' for function {} must be number", std::string(argument_names[i]), getName());
+                    "Argument '{}' for function {} must be a number", std::string(argument_names[i]), getName());
        }
    }

@ -322,7 +322,7 @@ public:
            const auto& fraction_argument = arguments[argument_names.size()];
            if (!isNumber(fraction_argument.type))
                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                    "Argument 'fraction' for function {} must be number", getName());
+                    "Argument 'fraction' for function {} must be a number", getName());
        }

        /// Optional precision argument
--- a/src/Functions/throwIf.cpp
+++ b/src/Functions/throwIf.cpp
@ -4,9 +4,10 @@
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnsCommon.h>
+#include <Common/ErrorCodes.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <IO/WriteHelpers.h>
-
+#include <Interpreters/Context.h>

 namespace DB
 {
@ -21,55 +22,56 @@ namespace ErrorCodes
 namespace
 {

+/// The regex-based code style check script in CI complains when it sees "ErrorCodes:: ErrorCode" (space added to avoid another match).
+/// Because this expression is only used in this file, don't add some suppression mechanism to the already complex style checker, instead
+/// work around by creating a namespace alias.
+namespace ErrorCodeAlias = ErrorCodes;
+
 /// Throw an exception if the argument is non zero.
 class FunctionThrowIf : public IFunction
 {
 public:
    static constexpr auto name = "throwIf";
-    static FunctionPtr create(ContextPtr)
-    {
-        return std::make_shared<FunctionThrowIf>();
-    }

-    String getName() const override
-    {
-        return name;
-    }
+    static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionThrowIf>(context); }

+    explicit FunctionThrowIf(ContextPtr context_) : allow_custom_error_code_argument(context_->getSettingsRef().allow_custom_error_code_in_throwif) {}
+    String getName() const override { return name; }
    bool isVariadic() const override { return true; }
    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-    size_t getNumberOfArguments() const override
-    {
-        return 0;
-    }
+    size_t getNumberOfArguments() const override { return 0; }

    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
    {
        const size_t number_of_arguments = arguments.size();

-        if (number_of_arguments < 1 || number_of_arguments > 2)
+        if (number_of_arguments < 1 || number_of_arguments > (allow_custom_error_code_argument ? 3 : 2))
            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2",
-                getName(),
-                toString(number_of_arguments));
+                "Number of arguments for function {} doesn't match: passed {}, should be {}",
+                getName(), toString(number_of_arguments), allow_custom_error_code_argument ? "1 or 2 or 3" : "1 or 2");

        if (!isNativeNumber(arguments[0]))
            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Argument for function {} must be number",
-                getName());
+                "First argument of function {} must be a number (passed: {})", getName(), arguments[0]->getName());

        if (number_of_arguments > 1 && !isString(arguments[1]))
            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type {} of argument of function {}",
-                arguments[1]->getName(),
-                getName());
+                "Second argument of function {} must be a string (passed: {})", getName(), arguments[1]->getName());
+
+        if (allow_custom_error_code_argument && number_of_arguments > 2)
+        {
+            WhichDataType which(arguments[2]);
+            if (!(which.isInt8() || which.isInt16() || which.isInt32()))
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Third argument of function {} must be Int8, Int16 or Int32 (passed: {})", getName(), arguments[2]->getName());
+        }


        return std::make_shared<DataTypeUInt8>();
    }

    bool useDefaultImplementationForConstants() const override { return false; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }

    /** Prevent constant folding for FunctionThrowIf because for short circuit evaluation
      * it is unsafe to evaluate this function during DAG analysis.
@ -86,36 +88,44 @@ public:
        {
            const auto * message_column = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
            if (!message_column)
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                    "Second argument for function {} must be constant String",
-                    getName());
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant String", getName());

            custom_message = message_column->getValue<String>();
        }

+        std::optional<ErrorCodeAlias::ErrorCode> custom_error_code;
+        if (allow_custom_error_code_argument && arguments.size() == 3)
+        {
+            if (!isColumnConst(*(arguments[2].column)))
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Third argument for function {} must be constant number", getName());
+
+            custom_error_code = arguments[2].column->getInt(0);
+        }
+
        auto first_argument_column = arguments.front().column;
        const auto * in = first_argument_column.get();

        ColumnPtr res;
-        if (!((res = execute<UInt8>(in, custom_message))
-            || (res = execute<UInt16>(in, custom_message))
-            || (res = execute<UInt32>(in, custom_message))
-            || (res = execute<UInt64>(in, custom_message))
-            || (res = execute<Int8>(in, custom_message))
-            || (res = execute<Int16>(in, custom_message))
-            || (res = execute<Int32>(in, custom_message))
-            || (res = execute<Int64>(in, custom_message))
-            || (res = execute<Float32>(in, custom_message))
-            || (res = execute<Float64>(in, custom_message))))
+        if (!((res = execute<UInt8>(in, custom_message, custom_error_code))
+            || (res = execute<UInt16>(in, custom_message, custom_error_code))
+            || (res = execute<UInt32>(in, custom_message, custom_error_code))
+            || (res = execute<UInt64>(in, custom_message, custom_error_code))
+            || (res = execute<Int8>(in, custom_message, custom_error_code))
+            || (res = execute<Int16>(in, custom_message, custom_error_code))
+            || (res = execute<Int32>(in, custom_message, custom_error_code))
+            || (res = execute<Int64>(in, custom_message, custom_error_code))
+            || (res = execute<Float32>(in, custom_message, custom_error_code))
+            || (res = execute<Float64>(in, custom_message, custom_error_code))))
        {
-            throw Exception{"Illegal column " + in->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName());
        }

        return res;
    }

+private:
    template <typename T>
-    ColumnPtr execute(const IColumn * in_untyped, const std::optional<String> & message) const
+    ColumnPtr execute(const IColumn * in_untyped, const std::optional<String> & message, const std::optional<ErrorCodeAlias::ErrorCode> & error_code) const
    {
        const auto * in = checkAndGetColumn<ColumnVector<T>>(in_untyped);

@ -127,8 +137,9 @@ public:
            const auto & in_data = in->getData();
            if (!memoryIsZero(in_data.data(), 0, in_data.size() * sizeof(in_data[0])))
            {
-                throw Exception(ErrorCodes::FUNCTION_THROW_IF_VALUE_IS_NON_ZERO,
-                    message.value_or("Value passed to '" + getName() + "' function is non zero"));
+                throw Exception(
+                    error_code.value_or(ErrorCodes::FUNCTION_THROW_IF_VALUE_IS_NON_ZERO),
+                    message.value_or("Value passed to '" + getName() + "' function is non-zero"));
            }

            size_t result_size = in_untyped->size();
@ -139,6 +150,8 @@ public:

        return nullptr;
    }
+
+    bool allow_custom_error_code_argument;
 };

 }
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@ -70,6 +70,7 @@ namespace ErrorCodes
    extern const int THERE_IS_NO_COLUMN;
    extern const int UNKNOWN_EXCEPTION;
    extern const int INCORRECT_NUMBER_OF_COLUMNS;
+    extern const int INCORRECT_DATA;
 }

 /// Inserts numeric data right into internal column data to reduce an overhead
@ -266,6 +267,9 @@ static ColumnWithTypeAndName readColumnWithDecimalData(std::shared_ptr<arrow::Ch
 /// Creates a null bytemap from arrow's null bitmap
 static ColumnPtr readByteMapFromArrowColumn(std::shared_ptr<arrow::ChunkedArray> & arrow_column)
 {
+    if (!arrow_column->null_count())
+        return ColumnUInt8::create(arrow_column->length(), 0);
+
    auto nullmap_column = ColumnUInt8::create();
    PaddedPODArray<UInt8> & bytemap_data = assert_cast<ColumnVector<UInt8> &>(*nullmap_column).getData();
    bytemap_data.reserve(arrow_column->length());
@ -298,14 +302,121 @@ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr<arrow::ChunkedAr
    return offsets_column;
 }

-static ColumnPtr readColumnWithIndexesData(std::shared_ptr<arrow::ChunkedArray> & arrow_column)
+/*
+ * Arrow Dictionary and ClickHouse LowCardinality types are a bit different.
+ * Dictionary(Nullable(X)) in ArrowColumn format is composed of a nullmap, dictionary and an index.
+ * It doesn't have the concept of null or default values.
+ * An empty string is just a regular value appended at any position of the dictionary.
+ * Null values have an index of 0, but it should be ignored since the nullmap will return null.
+ * In ClickHouse LowCardinality, it's different. The dictionary contains null (if dictionary type is Nullable)
+ * and default values at the beginning. [default, ...] when default values have index of 0 or [null, default, ...]
+ * when null values have an index of 0 and default values have an index of 1.
+ * So, we should remap indexes while converting Arrow Dictionary to ClickHouse LowCardinality
+ * */
+template <typename NumericType, typename VectorType = ColumnVector<NumericType>>
+static ColumnWithTypeAndName readColumnWithIndexesDataImpl(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name, Int64 default_value_index, NumericType dict_size, bool is_nullable)
+{
+    auto internal_type = std::make_shared<DataTypeNumber<NumericType>>();
+    auto internal_column = internal_type->createColumn();
+    auto & column_data = static_cast<VectorType &>(*internal_column).getData();
+    column_data.reserve(arrow_column->length());
+    NumericType shift = is_nullable ? 2 : 1;
+
+    for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
+    {
+        std::shared_ptr<arrow::Array> chunk = arrow_column->chunk(chunk_i);
+        if (chunk->length() == 0)
+            continue;
+
+        /// buffers[0] is a null bitmap and buffers[1] are actual values
+        std::shared_ptr<arrow::Buffer> buffer = chunk->data()->buffers[1];
+        const auto * data = reinterpret_cast<const NumericType *>(buffer->data());
+
+        /// Check that indexes are correct (protection against corrupted files)
+        for (int64_t i = 0; i != chunk->length(); ++i)
+        {
+            if (data[i] < 0 || data[i] >= dict_size)
+                throw Exception(ErrorCodes::INCORRECT_DATA, "Index {} in Dictionary column is out of bounds, dictionary size is {}", Int64(data[i]), UInt64(dict_size));
+        }
+
+        /// If dictionary type is not nullable and arrow dictionary contains default type
+        /// at 0 index, we don't need to remap anything (it's the case when this data
+        /// was generated by ClickHouse)
+        if (!is_nullable && default_value_index == 0)
+        {
+            column_data.insert_assume_reserved(data, data + chunk->length());
+        }
+        /// If dictionary don't contain default value, we should move all indexes
+        /// to the right one or two (if dictionary is Nullable) positions
+        /// Example:
+        /// Dictionary:
+        ///     dict: ["one", "two"]
+        ///     indexes: [0, 1, 0]
+        /// LowCardinality:
+        ///     dict: ["", "one", "two"]
+        ///     indexes: [1, 2, 1]
+        /// LowCardinality(Nullable):
+        ///     dict: [null, "", "one", "two"]
+        ///     indexes: [2, 3, 2]
+        else if (default_value_index == -1)
+        {
+            for (int64_t i = 0; i != chunk->length(); ++i)
+            {
+                if (chunk->IsNull(i))
+                    column_data.push_back(0);
+                else
+                    column_data.push_back(data[i] + shift);
+            }
+        }
+        /// If dictionary contains default value, we change all indexes of it to
+        /// 0 or 1 (if dictionary type is Nullable) and move all indexes
+        /// that are less then default value index to the right one or two
+        /// (if dictionary is Nullable) position and all indexes that are
+        /// greater then default value index zero or one (if dictionary is Nullable)
+        /// positions.
+        /// Example:
+        /// Dictionary:
+        ///     dict: ["one", "two", "", "three"]
+        ///     indexes: [0, 1, 2, 3, 0]
+        /// LowCardinality :
+        ///     dict: ["", "one", "two", "three"]
+        ///     indexes: [1, 2, 0, 3, 1]
+        /// LowCardinality(Nullable):
+        ///     dict: [null, "", "one", "two", "three"]
+        ///     indexes: [2, 3, 1, 4, 2]
+        else
+        {
+            NumericType new_default_index = is_nullable ? 1 : 0;
+            NumericType default_index = NumericType(default_value_index);
+            for (int64_t i = 0; i != chunk->length(); ++i)
+            {
+                if (chunk->IsNull(i))
+                    column_data.push_back(0);
+                else
+                {
+                    NumericType value = data[i];
+                    if (value == default_index)
+                        value = new_default_index;
+                    else if (value < default_index)
+                        value += shift;
+                    else
+                        value += shift - 1;
+                    column_data.push_back(value);
+                }
+            }
+        }
+    }
+    return {std::move(internal_column), std::move(internal_type), column_name};
+}
+
+static ColumnPtr readColumnWithIndexesData(std::shared_ptr<arrow::ChunkedArray> & arrow_column, Int64 default_value_index, UInt64 dict_size, bool is_nullable)
 {
    switch (arrow_column->type()->id())
    {
 #    define DISPATCH(ARROW_NUMERIC_TYPE, CPP_NUMERIC_TYPE) \
            case ARROW_NUMERIC_TYPE: \
            { \
-                    return readColumnWithNumericData<CPP_NUMERIC_TYPE>(arrow_column, "").column; \
+                    return readColumnWithIndexesDataImpl<CPP_NUMERIC_TYPE>(arrow_column, "", default_value_index, dict_size, is_nullable).column; \
            }
        FOR_ARROW_INDEXES_TYPES(DISPATCH)
 #    undef DISPATCH
@ -327,85 +438,25 @@ static std::shared_ptr<arrow::ChunkedArray> getNestedArrowColumn(std::shared_ptr
    return std::make_shared<arrow::ChunkedArray>(array_vector);
 }

-static ColumnWithTypeAndName createLCColumnFromArrowDictionaryValues(
-    const std::shared_ptr<ColumnWithTypeAndName> & dict_values,
-    const ColumnPtr & indexes_column,
-    const String & column_name
-)
-{
-    auto lc_type = std::make_shared<DataTypeLowCardinality>(dict_values->type);
-
-    auto lc_column = lc_type->createColumn();
-
-    for (auto i = 0u; i < indexes_column->size(); i++)
-    {
-        Field f;
-        dict_values->column->get(indexes_column->getUInt(i), f);
-        lc_column->insert(f);
-    }
-
-    return {std::move(lc_column), std::move(lc_type), column_name};
-}
-
-/*
- * Dictionary(Nullable(X)) in ArrowColumn format is composed of a nullmap, dictionary and an index.
- * It doesn't have the concept of null or default values.
- * An empty string is just a regular value appended at any position of the dictionary.
- * Null values have an index of 0, but it should be ignored since the nullmap will return null.
- * In ClickHouse LowCardinality, it's different. The dictionary contains null and default values at the beginning.
- * [null, default, ...]. Therefore, null values have an index of 0 and default values have an index of 1.
- * No nullmap is used.
- * */
-static ColumnWithTypeAndName createLCOfNullableColumnFromArrowDictionaryValues(
-    const std::shared_ptr<ColumnWithTypeAndName> & dict_values,
-    const ColumnPtr & indexes_column,
-    const ColumnPtr & nullmap_column,
-    const String & column_name
-)
-{
-    /*
-     * ArrowColumn format handles nulls by maintaining a nullmap column, there is no nullable type.
-     * Therefore, dict_values->type is the actual data type/ non-nullable. It needs to be transformed into nullable
-     * so LC column is created from nullable type and a null value at the beginning of the collection
-     * is automatically added.
-     * */
-    auto lc_type = std::make_shared<DataTypeLowCardinality>(makeNullable(dict_values->type));
-
-    auto lc_column = lc_type->createColumn();
-
-    for (auto i = 0u; i < indexes_column->size(); i++)
-    {
-        if (nullmap_column && nullmap_column->getBool(i))
-        {
-            lc_column->insertDefault();
-        }
-        else
-        {
-            Field f;
-            dict_values->column->get(indexes_column->getUInt(i), f);
-            lc_column->insert(f);
-        }
-    }
-
-    return {std::move(lc_column), std::move(lc_type), column_name};
-}
-
 static ColumnWithTypeAndName readColumnFromArrowColumn(
    std::shared_ptr<arrow::ChunkedArray> & arrow_column,
    const std::string & column_name,
    const std::string & format_name,
    bool is_nullable,
-    std::unordered_map<String, std::shared_ptr<ColumnWithTypeAndName>> & dictionary_values,
-    bool read_ints_as_dates,
+    std::unordered_map<String, ArrowColumnToCHColumn::DictionaryInfo> & dictionary_infos,
    bool allow_null_type,
    bool skip_columns_with_unsupported_types,
-    bool & skipped)
+    bool & skipped,
+    DataTypePtr type_hint = nullptr)
 {
-    if (!is_nullable && arrow_column->null_count() && arrow_column->type()->id() != arrow::Type::LIST
+    if (!is_nullable && (arrow_column->null_count() || (type_hint && type_hint->isNullable())) && arrow_column->type()->id() != arrow::Type::LIST
        && arrow_column->type()->id() != arrow::Type::MAP && arrow_column->type()->id() != arrow::Type::STRUCT &&
        arrow_column->type()->id() != arrow::Type::DICTIONARY)
    {
-        auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
+        DataTypePtr nested_type_hint;
+        if (type_hint)
+            nested_type_hint = removeNullable(type_hint);
+        auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint);
        if (skipped)
            return {};
        auto nullmap_column = readByteMapFromArrowColumn(arrow_column);
@ -435,14 +486,14 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
        case arrow::Type::UINT16:
        {
            auto column = readColumnWithNumericData<UInt16>(arrow_column, column_name);
-            if (read_ints_as_dates)
+            if (type_hint && (isDateOrDate32(type_hint) || isDateTime(type_hint) || isDateTime64(type_hint)))
                column.type = std::make_shared<DataTypeDate>();
            return column;
        }
        case arrow::Type::UINT32:
        {
            auto column = readColumnWithNumericData<UInt32>(arrow_column, column_name);
-            if (read_ints_as_dates)
+            if (type_hint && (isDateOrDate32(type_hint) || isDateTime(type_hint) || isDateTime64(type_hint)))
                column.type = std::make_shared<DataTypeDateTime>();
            return column;
        }
@ -454,8 +505,15 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
            return readColumnWithDecimalData<arrow::Decimal256Array>(arrow_column, column_name);
        case arrow::Type::MAP:
        {
+            DataTypePtr nested_type_hint;
+            if (type_hint)
+            {
+                const auto * map_type_hint = typeid_cast<const DataTypeMap *>(type_hint.get());
+                if (map_type_hint)
+                    nested_type_hint = assert_cast<const DataTypeArray *>(map_type_hint->getNestedType().get())->getNestedType();
+            }
            auto arrow_nested_column = getNestedArrowColumn(arrow_column);
-            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
+            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint);
            if (skipped)
                return {};

@ -469,8 +527,15 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
        }
        case arrow::Type::LIST:
        {
+            DataTypePtr nested_type_hint;
+            if (type_hint)
+            {
+                const auto * array_type_hint = typeid_cast<const DataTypeArray *>(type_hint.get());
+                if (array_type_hint)
+                    nested_type_hint = array_type_hint->getNestedType();
+            }
            auto arrow_nested_column = getNestedArrowColumn(arrow_column);
-            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
+            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint);
            if (skipped)
                return {};
            auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);
@ -493,11 +558,25 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
            Columns tuple_elements;
            DataTypes tuple_types;
            std::vector<String> tuple_names;
+            const auto * tuple_type_hint = type_hint ? typeid_cast<const DataTypeTuple *>(type_hint.get()) : nullptr;

            for (int i = 0; i != arrow_struct_type->num_fields(); ++i)
            {
+                auto field_name = arrow_struct_type->field(i)->name();
+                DataTypePtr nested_type_hint;
+                if (tuple_type_hint)
+                {
+                    if (tuple_type_hint->haveExplicitNames())
+                    {
+                        auto pos = tuple_type_hint->tryGetPositionByName(field_name);
+                        if (pos)
+                            nested_type_hint = tuple_type_hint->getElement(*pos);
+                    }
+                    else if (size_t(i) < tuple_type_hint->getElements().size())
+                        nested_type_hint = tuple_type_hint->getElement(i);
+                }
                auto nested_arrow_column = std::make_shared<arrow::ChunkedArray>(nested_arrow_columns[i]);
-                auto element = readColumnFromArrowColumn(nested_arrow_column, arrow_struct_type->field(i)->name(), format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
+                auto element = readColumnFromArrowColumn(nested_arrow_column, field_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint);
                if (skipped)
                    return {};
                tuple_elements.emplace_back(std::move(element.column));
@ -511,9 +590,11 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
        }
        case arrow::Type::DICTIONARY:
        {
-            auto & dict_values = dictionary_values[column_name];
+            auto & dict_info = dictionary_infos[column_name];
+            const auto is_lc_nullable = arrow_column->null_count() > 0 || (type_hint && type_hint->isLowCardinalityNullable());
+
            /// Load dictionary values only once and reuse it.
-            if (!dict_values)
+            if (!dict_info.values)
            {
                arrow::ArrayVector dict_array;
                for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i)
@ -522,8 +603,22 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
                    dict_array.emplace_back(dict_chunk.dictionary());
                }
                auto arrow_dict_column = std::make_shared<arrow::ChunkedArray>(dict_array);
-                auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
-                dict_values = std::make_shared<ColumnWithTypeAndName>(std::move(dict_column));
+                auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped);
+                for (size_t i = 0; i != dict_column.column->size(); ++i)
+                {
+                    if (dict_column.column->isDefaultAt(i))
+                    {
+                        dict_info.default_value_index = i;
+                        break;
+                    }
+                }
+                auto lc_type = std::make_shared<DataTypeLowCardinality>(is_lc_nullable ? makeNullable(dict_column.type) : dict_column.type);
+                auto tmp_lc_column = lc_type->createColumn();
+                auto tmp_dict_column = IColumn::mutate(assert_cast<ColumnLowCardinality *>(tmp_lc_column.get())->getDictionaryPtr());
+                dynamic_cast<IColumnUnique *>(tmp_dict_column.get())->uniqueInsertRangeFrom(*dict_column.column, 0, dict_column.column->size());
+                dict_column.column = std::move(tmp_dict_column);
+                dict_info.values = std::make_shared<ColumnWithTypeAndName>(std::move(dict_column));
+                dict_info.dictionary_size = arrow_dict_column->length();
            }

            arrow::ArrayVector indexes_array;
@ -534,20 +629,10 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
            }

            auto arrow_indexes_column = std::make_shared<arrow::ChunkedArray>(indexes_array);
-            auto indexes_column = readColumnWithIndexesData(arrow_indexes_column);
-
-            const auto contains_null = arrow_column->null_count() > 0;
-
-            if (contains_null)
-            {
-                auto nullmap_column = readByteMapFromArrowColumn(arrow_column);
-
-                return createLCOfNullableColumnFromArrowDictionaryValues(dict_values, indexes_column, nullmap_column, column_name);
-            }
-            else
-            {
-                return createLCColumnFromArrowDictionaryValues(dict_values, indexes_column, column_name);
-            }
+            auto indexes_column = readColumnWithIndexesData(arrow_indexes_column, dict_info.default_value_index, dict_info.dictionary_size, is_lc_nullable);
+            auto lc_column = ColumnLowCardinality::create(dict_info.values->column, indexes_column);
+            auto lc_type = std::make_shared<DataTypeLowCardinality>(is_lc_nullable ? makeNullable(dict_info.values->type) : dict_info.values->type);
+            return {std::move(lc_column), std::move(lc_type), column_name};
        }
 #    define DISPATCH(ARROW_NUMERIC_TYPE, CPP_NUMERIC_TYPE) \
        case ARROW_NUMERIC_TYPE: \
@ -623,13 +708,13 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(

        arrow::ArrayVector array_vector = {arrow_array};
        auto arrow_column = std::make_shared<arrow::ChunkedArray>(array_vector);
-        std::unordered_map<std::string, std::shared_ptr<ColumnWithTypeAndName>> dict_values;
+        std::unordered_map<std::string, DictionaryInfo> dict_infos;
        bool skipped = false;
        bool allow_null_type = false;
        if (hint_header && hint_header->has(field->name()) && hint_header->getByName(field->name()).type->isNullable())
            allow_null_type = true;
        ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(
-            arrow_column, field->name(), format_name, false, dict_values, false, allow_null_type, skip_columns_with_unsupported_types, skipped);
+            arrow_column, field->name(), format_name, false, dict_infos, allow_null_type, skip_columns_with_unsupported_types, skipped);
        if (!skipped)
            sample_columns.emplace_back(std::move(sample_column));
    }
@ -700,9 +785,17 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
                {
                    if (!nested_tables.contains(search_nested_table_name))
                    {
+                        NamesAndTypesList nested_columns;
+                        for (const auto & name_and_type : header.getNamesAndTypesList())
+                        {
+                            if (name_and_type.name.starts_with(nested_table_name + "."))
+                                nested_columns.push_back(name_and_type);
+                        }
+                        auto nested_table_type = Nested::collect(nested_columns).front().type;
+
                        std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[search_nested_table_name];
                        ColumnsWithTypeAndName cols = {readColumnFromArrowColumn(
-                            arrow_column, nested_table_name, format_name, false, dictionary_values, true, true, false, skipped)};
+                            arrow_column, nested_table_name, format_name, false, dictionary_infos, true, false, skipped, nested_table_type)};
                        BlockPtr block_ptr = std::make_shared<Block>(cols);
                        auto column_extractor = std::make_shared<NestedColumnExtractHelper>(*block_ptr, case_insensitive_matching);
                        nested_tables[search_nested_table_name] = {block_ptr, column_extractor};
@ -735,7 +828,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
        {
            auto arrow_column = name_to_column_ptr[search_column_name];
            column = readColumnFromArrowColumn(
-                arrow_column, header_column.name, format_name, false, dictionary_values, true, true, false, skipped);
+                arrow_column, header_column.name, format_name, false, dictionary_infos, true, false, skipped, header_column.type);
        }

        try
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
@ -44,6 +44,14 @@ public:
        const Block * hint_header = nullptr,
        bool ignore_case = false);

+    struct DictionaryInfo
+    {
+        std::shared_ptr<ColumnWithTypeAndName> values;
+        Int64 default_value_index = -1;
+        UInt64 dictionary_size;
+    };
+
+
 private:
    const Block & header;
    const std::string format_name;
@ -55,7 +63,7 @@ private:
    /// Map {column name : dictionary column}.
    /// To avoid converting dictionary from Arrow Dictionary
    /// to LowCardinality every chunk we save it and reuse.
-    std::unordered_map<std::string, std::shared_ptr<ColumnWithTypeAndName>> dictionary_values;
+    std::unordered_map<std::string, DictionaryInfo> dictionary_infos;
 };

 }
--- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
+++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
@ -235,27 +235,30 @@ namespace DB
    }

    template<typename T>
-    static PaddedPODArray<Int64> extractIndexesImpl(ColumnPtr column, size_t start, size_t end)
+    static PaddedPODArray<Int64> extractIndexesImpl(ColumnPtr column, size_t start, size_t end, bool shift)
    {
        const PaddedPODArray<T> & data = assert_cast<const ColumnVector<T> *>(column.get())->getData();
        PaddedPODArray<Int64> result;
        result.reserve(end - start);
-        std::transform(data.begin() + start, data.begin() + end, std::back_inserter(result), [](T value) { return Int64(value); });
+        if (shift)
+            std::transform(data.begin() + start, data.begin() + end, std::back_inserter(result), [](T value) { return Int64(value) - 1; });
+        else
+            std::transform(data.begin() + start, data.begin() + end, std::back_inserter(result), [](T value) { return Int64(value); });
        return result;
    }

-    static PaddedPODArray<Int64> extractIndexesImpl(ColumnPtr column, size_t start, size_t end)
+    static PaddedPODArray<Int64> extractIndexesImpl(ColumnPtr column, size_t start, size_t end, bool shift)
    {
        switch (column->getDataType())
        {
            case TypeIndex::UInt8:
-                return extractIndexesImpl<UInt8>(column, start, end);
+                return extractIndexesImpl<UInt8>(column, start, end, shift);
            case TypeIndex::UInt16:
-                return extractIndexesImpl<UInt16>(column, start, end);
+                return extractIndexesImpl<UInt16>(column, start, end, shift);
            case TypeIndex::UInt32:
-                return extractIndexesImpl<UInt32>(column, start, end);
+                return extractIndexesImpl<UInt32>(column, start, end, shift);
            case TypeIndex::UInt64:
-                return extractIndexesImpl<UInt64>(column, start, end);
+                return extractIndexesImpl<UInt64>(column, start, end, shift);
            default:
                throw Exception(fmt::format("Indexes column must be ColumnUInt, got {}.", column->getName()),
                                ErrorCodes::LOGICAL_ERROR);
@ -267,7 +270,7 @@ namespace DB
        const String & column_name,
        ColumnPtr & column,
        const std::shared_ptr<const IDataType> & column_type,
-        const PaddedPODArray<UInt8> * null_bytemap,
+        const PaddedPODArray<UInt8> *,
        arrow::ArrayBuilder * array_builder,
        String format_name,
        size_t start,
@ -278,6 +281,7 @@ namespace DB
        const auto * column_lc = assert_cast<const ColumnLowCardinality *>(column.get());
        arrow::DictionaryBuilder<ValueType> * builder = assert_cast<arrow::DictionaryBuilder<ValueType> *>(array_builder);
        auto & dict_values = dictionary_values[column_name];
+        bool is_nullable = column_type->isLowCardinalityNullable();

        /// Convert dictionary from LowCardinality to Arrow dictionary only once and then reuse it.
        if (!dict_values)
@ -288,9 +292,9 @@ namespace DB
            arrow::Status status = MakeBuilder(pool, value_type, &values_builder);
            checkStatus(status, column->getName(), format_name);

-            auto dict_column = column_lc->getDictionary().getNestedColumn();
-            const auto & dict_type = assert_cast<const DataTypeLowCardinality *>(column_type.get())->getDictionaryType();
-            fillArrowArray(column_name, dict_column, dict_type, nullptr, values_builder.get(), format_name, 0, dict_column->size(), output_string_as_string, dictionary_values);
+            auto dict_column = column_lc->getDictionary().getNestedNotNullableColumn();
+            const auto & dict_type = removeNullable(assert_cast<const DataTypeLowCardinality *>(column_type.get())->getDictionaryType());
+            fillArrowArray(column_name, dict_column, dict_type, nullptr, values_builder.get(), format_name, is_nullable, dict_column->size(), output_string_as_string, dictionary_values);
            status = values_builder->Finish(&dict_values);
            checkStatus(status, column->getName(), format_name);
        }
@ -300,15 +304,14 @@ namespace DB

        /// AppendIndices in DictionaryBuilder works only with int64_t data, so we cannot use
        /// fillArrowArray here and should copy all indexes to int64_t container.
-        auto indexes = extractIndexesImpl(column_lc->getIndexesPtr(), start, end);
+        auto indexes = extractIndexesImpl(column_lc->getIndexesPtr(), start, end, is_nullable);
        const uint8_t * arrow_null_bytemap_raw_ptr = nullptr;
        PaddedPODArray<uint8_t> arrow_null_bytemap;
-        if (null_bytemap)
+        if (column_type->isLowCardinalityNullable())
        {
-            /// Invert values since Arrow interprets 1 as a non-null value, while CH as a null
            arrow_null_bytemap.reserve(end - start);
            for (size_t i = start; i < end; ++i)
-                arrow_null_bytemap.emplace_back(!(*null_bytemap)[i]);
+                arrow_null_bytemap.emplace_back(!column_lc->isNullAt(i));

            arrow_null_bytemap_raw_ptr = arrow_null_bytemap.data();
        }
@ -680,7 +683,7 @@ namespace DB
        {
            auto nested_type = assert_cast<const DataTypeLowCardinality *>(column_type.get())->getDictionaryType();
            const auto * lc_column = assert_cast<const ColumnLowCardinality *>(column.get());
-            const auto & nested_column = lc_column->getDictionaryPtr();
+            const auto & nested_column = lc_column->getDictionary().getNestedColumn();
            const auto & indexes_column = lc_column->getIndexesPtr();
            return arrow::dictionary(
                getArrowTypeForLowCardinalityIndexes(indexes_column),
--- a/tests/ci/cherry_pick.py
+++ b/tests/ci/cherry_pick.py
@ -211,7 +211,7 @@ Merge it only if you intend to backport changes to the target branch, otherwise
                "Assing to assignees of the original PR: %s",
                ", ".join(user.login for user in self.pr.assignees),
            )
-            self.cherrypick_pr.add_to_assignees(self.pr.assignees)
+            self.cherrypick_pr.add_to_assignees(*self.pr.assignees)
        logging.info("Assign to the author of the original PR: %s", self.pr.user.login)
        self.cherrypick_pr.add_to_assignees(self.pr.user)

@ -249,7 +249,7 @@ Merge it only if you intend to backport changes to the target branch, otherwise
                "Assing to assignees of the original PR: %s",
                ", ".join(user.login for user in self.pr.assignees),
            )
-            self.cherrypick_pr.add_to_assignees(self.pr.assignees)
+            self.cherrypick_pr.add_to_assignees(*self.pr.assignees)
        logging.info("Assign to the author of the original PR: %s", self.pr.user.login)
        self.backport_pr.add_to_assignees(self.pr.user)

--- a/tests/ci/push_to_artifactory.py
+++ b/tests/ci/push_to_artifactory.py
@ -98,7 +98,7 @@ class Packages:

 class S3:
    template = (
-        f"{S3_DOWNLOAD}"
+        f"{S3_DOWNLOAD}/"
        # "clickhouse-builds/"
        f"{S3_BUILDS_BUCKET}/"
        # "33333/" or "21.11/" from --release, if pull request is omitted
--- a/tests/performance/arrow_format.xml
+++ b/tests/performance/arrow_format.xml
@ -0,0 +1,10 @@
+<test>
+    <create_query>CREATE TABLE test (uint32 UInt32, n_uint32 Nullable(UInt32), lc LowCardinality(String)) ENGINE=File(Arrow) SETTINGS output_format_arrow_low_cardinality_as_dictionary=1</create_query>
+    <fill_query>insert into test select number, number, toString(number % 10000) from numbers(10000000)</fill_query>
+
+    <query>SELECT uint32 from test format Null</query>
+    <query>SELECT n_uint32 from test format Null</query>
+    <query>SELECT lc from test format Null</query>
+
+    <drop_query>DROP TABLE IF EXISTS test</drop_query>
+</test>
--- a/tests/queries/0_stateless/00602_throw_if.reference
+++ b/tests/queries/0_stateless/00602_throw_if.reference
@ -1,3 +1,13 @@
 1
 1
-1000000
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1000
--- a/tests/queries/0_stateless/00602_throw_if.sh
+++ b/tests/queries/0_stateless/00602_throw_if.sh
@ -4,9 +4,43 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh

-default_exception_message="Value passed to 'throwIf' function is non zero"
-custom_exception_message="Number equals 1000000"

-${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT throwIf(number = 1000000) FROM system.numbers" 2>&1 | grep -cF "$default_exception_message"
-${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT throwIf(number = 1000000, '$custom_exception_message') FROM system.numbers" 2>&1 | grep -v '^(query: ' | grep -cF "$custom_exception_message"
-${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT sum(x = 0) FROM (SELECT throwIf(number = 1000000) AS x FROM numbers(1000000))" 2>&1
+default_exception_message="Value passed to 'throwIf' function is non-zero"
+custom_exception_message="Number equals 1000"
+
+${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT throwIf(number = 1000) FROM system.numbers" 2>&1 \
+    | grep -cF "$default_exception_message"
+
+${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT throwIf(number = 1000, '$custom_exception_message') FROM system.numbers" 2>&1 \
+    | grep -v '^(query: ' | grep -cF "$custom_exception_message"
+
+
+# Custom error code arguments are not enabled via configuration.
+${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT throwIf(number = 1000, '$custom_exception_message', 1) FROM system.numbers" 2>&1 \
+    | grep -v '^(query: ' | grep -c "Number of arguments for function throwIf doesn't match: passed 3, should be 1 or 2"
+
+# Custom error code argument enabled but using the wrong type.
+${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT throwIf(number = 1000, '$custom_exception_message', 1) FROM system.numbers SETTINGS allow_custom_error_code_in_throwif=true" 2>&1 \
+    | grep -v '^(query: ' | grep -c "Third argument of function throwIf must be Int8, Int16 or Int32 (passed: UInt8)"
+
+
+# Normal error code + some weird ones.
+# Internal error codes use the upper half of 32-bit int.
+custom_error_codes=(
+    "42"
+    "0"       # OK
+    "101"     # UNEXPECTED_PACKET_FROM_CLIENT (interpreted by client)
+    "102"     # UNEXPECTED_PACKET_FROM_SERVER (interpreted by client)
+    "1001"    # STD_EXCEPTION
+    "1002"    # UNKNOWN_EXCEPTION
+    "999999"  # Unused error code.
+    "-1")     # Also unused. Weird but we should allow throwing negative errors.
+
+for ec in "${custom_error_codes[@]}"
+do
+    ${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT throwIf(number = 1000, '$custom_exception_message', toInt32($ec)) FROM system.numbers SETTINGS allow_custom_error_code_in_throwif=true" 2>&1 \
+        | grep -v '^(query: ' | grep -c "Code: $ec.*$custom_exception_message"
+done
+
+
+${CLICKHOUSE_CLIENT} --server_logs_file /dev/null --query="SELECT sum(x = 0) FROM (SELECT throwIf(number = 1000) AS x FROM numbers(1000))" 2>&1
--- a/tests/queries/0_stateless/00995_exception_while_insert.sh
+++ b/tests/queries/0_stateless/00995_exception_while_insert.sh
@ -10,6 +10,6 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS check;"

 $CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64, y UInt64 DEFAULT throwIf(x > 1500000)) ENGINE = Memory;"

-seq 1 2000000 | $CLICKHOUSE_CLIENT --query="INSERT INTO check(x) FORMAT TSV" 2>&1 | grep -q "Value passed to 'throwIf' function is non zero." && echo 'OK' || echo 'FAIL' ||:
+seq 1 2000000 | $CLICKHOUSE_CLIENT --query="INSERT INTO check(x) FORMAT TSV" 2>&1 | grep -q "Value passed to 'throwIf' function is non-zero." && echo 'OK' || echo 'FAIL' ||:

 $CLICKHOUSE_CLIENT --query="DROP TABLE check;"
--- a/tests/queries/0_stateless/02383_arrow_dict_special_cases.reference
+++ b/tests/queries/0_stateless/02383_arrow_dict_special_cases.reference
@ -0,0 +1,32 @@
+dict	LowCardinality(Nullable(String))					
+one
+two
+three
+one
+two
+dict	LowCardinality(Nullable(String))					
+
+one
+two
+three
+
+one
+three
+dict	LowCardinality(Nullable(String))					
+one
+two
+
+three
+one
+two
+three
+lc	LowCardinality(Nullable(String))					
+OK
+dict	LowCardinality(Nullable(String))					
+one
+two
+
+three
+one
+\N
+three
--- a/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh
+++ b/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh
@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+
+mkdir -p $USER_FILES_PATH/test_02383
+cp $CURDIR/data_arrow/dictionary*.arrow $USER_FILES_PATH/test_02383/
+cp $CURDIR/data_arrow/corrupted.arrow $USER_FILES_PATH/test_02383/
+cp $CURDIR/data_arrow/dict_with_nulls.arrow $USER_FILES_PATH/test_02383/
+
+$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary1.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary1.arrow')"
+$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary2.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary2.arrow')"
+$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary3.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary3.arrow')"
+
+$CLICKHOUSE_CLIENT -q "desc file('test_02383/corrupted.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('test_02383/corrupted.arrow')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo OK || echo FAIL
+
+$CLICKHOUSE_CLIENT -q "desc file('test_02383/dict_with_nulls.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dict_with_nulls.arrow')"
+
+
+rm -rf $USER_FILES_PATH/test_02383
--- a/tests/queries/0_stateless/02384_nullable_low_cardinality_as_dict_in_arrow.reference
+++ b/tests/queries/0_stateless/02384_nullable_low_cardinality_as_dict_in_arrow.reference
@ -0,0 +1,4 @@
+lc	LowCardinality(Nullable(String))					
+abc
+lc	LowCardinality(Nullable(String))					
+abc
--- a/tests/queries/0_stateless/02384_nullable_low_cardinality_as_dict_in_arrow.sql
+++ b/tests/queries/0_stateless/02384_nullable_low_cardinality_as_dict_in_arrow.sql
@ -0,0 +1,8 @@
+-- Tags: no-fasttest
+
+insert into function file(02384_data.arrow) select toLowCardinality(toNullable('abc')) as lc settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_string_as_string=0, engine_file_truncate_on_insert=1;
+desc file(02384_data.arrow);
+select * from file(02384_data.arrow);
+insert into function file(02384_data.arrow) select toLowCardinality(toNullable('abc')) as lc settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_string_as_string=1, engine_file_truncate_on_insert=1;
+desc file(02384_data.arrow);
+select * from file(02384_data.arrow);
--- a/tests/queries/0_stateless/data_arrow/corrupted.arrow
+++ b/tests/queries/0_stateless/data_arrow/corrupted.arrow
--- a/tests/queries/0_stateless/data_arrow/dict_with_nulls.arrow
+++ b/tests/queries/0_stateless/data_arrow/dict_with_nulls.arrow
--- a/tests/queries/0_stateless/data_arrow/dictionary1.arrow
+++ b/tests/queries/0_stateless/data_arrow/dictionary1.arrow
--- a/tests/queries/0_stateless/data_arrow/dictionary2.arrow
+++ b/tests/queries/0_stateless/data_arrow/dictionary2.arrow
--- a/tests/queries/0_stateless/data_arrow/dictionary3.arrow
+++ b/tests/queries/0_stateless/data_arrow/dictionary3.arrow