Merge pull request #35459 from ClickHouse/case-insensitive-column-matching

Support for case insensitive column matching for ORC/Arrow/Parquet files
2024-11-21 23:21:59 +00:00 · 2022-03-25 08:09:29 +01:00 · 2022-03-25 08:09:29 +01:00 · f6439efcad
commit f6439efcad
parent 3f37a03e62 52099b23a1
26 changed files with 238 additions and 148 deletions
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@ -13,6 +13,7 @@

 #include <iterator>
 #include <base/sort.h>
+#include <boost/algorithm/string.hpp>


 namespace DB
@ -269,8 +270,18 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const
 }


-const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
+const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const
 {
+    if (case_insensitive)
+    {
+        auto found = std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); });
+        if (found == data.end())
+        {
+            return nullptr;
+        }
+        return &*found;
+    }
+
    auto it = index_by_name.find(name);
    if (index_by_name.end() == it)
    {
@ -280,19 +291,23 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
 }


-const ColumnWithTypeAndName & Block::getByName(const std::string & name) const
+const ColumnWithTypeAndName & Block::getByName(const std::string & name, bool case_insensitive) const
 {
-    const auto * result = findByName(name);
+    const auto * result = findByName(name, case_insensitive);
    if (!result)
-        throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
-            , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
+        throw Exception(
+            "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);

    return *result;
 }


-bool Block::has(const std::string & name) const
+bool Block::has(const std::string & name, bool case_insensitive) const
 {
+    if (case_insensitive)
+        return std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); })
+            != data.end();
+
    return index_by_name.end() != index_by_name.find(name);
 }

@ -301,8 +316,8 @@ size_t Block::getPositionByName(const std::string & name) const
 {
    auto it = index_by_name.find(name);
    if (index_by_name.end() == it)
-        throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
-            , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
+        throw Exception(
+            "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);

    return it->second;
 }
--- a/src/Core/Block.h
+++ b/src/Core/Block.h
@ -60,21 +60,21 @@ public:
    ColumnWithTypeAndName & safeGetByPosition(size_t position);
    const ColumnWithTypeAndName & safeGetByPosition(size_t position) const;

-    ColumnWithTypeAndName* findByName(const std::string & name)
+    ColumnWithTypeAndName* findByName(const std::string & name, bool case_insensitive = false)
    {
        return const_cast<ColumnWithTypeAndName *>(
-            const_cast<const Block *>(this)->findByName(name));
+            const_cast<const Block *>(this)->findByName(name, case_insensitive));
    }

-    const ColumnWithTypeAndName * findByName(const std::string & name) const;
+    const ColumnWithTypeAndName * findByName(const std::string & name, bool case_insensitive = false) const;

-    ColumnWithTypeAndName & getByName(const std::string & name)
+    ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false)
    {
        return const_cast<ColumnWithTypeAndName &>(
-            const_cast<const Block *>(this)->getByName(name));
+            const_cast<const Block *>(this)->getByName(name, case_insensitive));
    }

-    const ColumnWithTypeAndName & getByName(const std::string & name) const;
+    const ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) const;

    Container::iterator begin() { return data.begin(); }
    Container::iterator end() { return data.end(); }
@ -83,7 +83,7 @@ public:
    Container::const_iterator cbegin() const { return data.cbegin(); }
    Container::const_iterator cend() const { return data.cend(); }

-    bool has(const std::string & name) const;
+    bool has(const std::string & name, bool case_insensitive = false) const;

    size_t getPositionByName(const std::string & name) const;

--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -616,11 +616,13 @@ class IColumn;
    M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
    M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \
    M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \
-    M(Bool, input_format_use_lowercase_column_name, false, "Use lowercase column name while reading input formats", 0) \
    M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \
+    M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \
    M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \
    M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \
+    M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \
    M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
+    M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \
    M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
    M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
    M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \
--- a/src/DataTypes/NestedUtils.cpp
+++ b/src/DataTypes/NestedUtils.cpp
@ -15,6 +15,8 @@

 #include <Parsers/IAST.h>

+#include <boost/algorithm/string/case_conv.hpp>
+

 namespace DB
 {
@ -227,14 +229,17 @@ void validateArraySizes(const Block & block)
 }


-std::unordered_set<String> getAllTableNames(const Block & block)
+std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case)
 {
    std::unordered_set<String> nested_table_names;
-    for (auto & name : block.getNames())
+    for (const auto & name : block.getNames())
    {
        auto nested_table_name = Nested::extractTableName(name);
+        if (to_lower_case)
+            boost::to_lower(nested_table_name);
+
        if (!nested_table_name.empty())
-            nested_table_names.insert(nested_table_name);
+            nested_table_names.insert(std::move(nested_table_name));
    }
    return nested_table_names;
 }
--- a/src/DataTypes/NestedUtils.h
+++ b/src/DataTypes/NestedUtils.h
@ -32,7 +32,7 @@ namespace Nested
    void validateArraySizes(const Block & block);

    /// Get all nested tables names from a block.
-    std::unordered_set<String> getAllTableNames(const Block & block);
+    std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case = false);
 }

 }
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@ -89,10 +89,10 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
    format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
    format_settings.null_as_default = settings.input_format_null_as_default;
-    format_settings.use_lowercase_column_name = settings.input_format_use_lowercase_column_name;
    format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
    format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
    format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
+    format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
    format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
    format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
    format_settings.pretty.color = settings.output_format_pretty_color;
@ -123,9 +123,11 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary;
    format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
    format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns;
+    format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching;
    format_settings.orc.import_nested = settings.input_format_orc_import_nested;
    format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
    format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
+    format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
    format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
    format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
    format_settings.seekable_read = settings.input_format_allow_seeks;
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@ -32,7 +32,6 @@ struct FormatSettings
    bool null_as_default = true;
    bool decimal_trailing_zeros = false;
    bool defaults_for_omitted_fields = true;
-    bool use_lowercase_column_name = false;

    bool seekable_read = true;
    UInt64 max_rows_to_read_for_schema_inference = 100;
@ -75,6 +74,7 @@ struct FormatSettings
        bool low_cardinality_as_dictionary = false;
        bool import_nested = false;
        bool allow_missing_columns = false;
+        bool case_insensitive_column_matching = false;
    } arrow;

    struct
@ -137,6 +137,7 @@ struct FormatSettings
        UInt64 row_group_size = 1000000;
        bool import_nested = false;
        bool allow_missing_columns = false;
+        bool case_insensitive_column_matching = false;
    } parquet;

    struct Pretty
@ -217,6 +218,7 @@ struct FormatSettings
        bool import_nested = false;
        bool allow_missing_columns = false;
        int64_t row_batch_size = 100'000;
+        bool case_insensitive_column_matching = false;
    } orc;

    /// For capnProto format we should determine how to
--- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
@ -139,7 +139,11 @@ void ArrowBlockInputFormat::prepareReader()
    }

    arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(
-        getPort().getHeader(), "Arrow", format_settings.arrow.import_nested, format_settings.arrow.allow_missing_columns);
+        getPort().getHeader(),
+        "Arrow",
+        format_settings.arrow.import_nested,
+        format_settings.arrow.allow_missing_columns,
+        format_settings.arrow.case_insensitive_column_matching);
    missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema);

    if (stream)
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@ -31,6 +31,7 @@
 #include <algorithm>
 #include <arrow/builder.h>
 #include <arrow/array.h>
+#include <boost/algorithm/string/case_conv.hpp>

 /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn.
 #define FOR_ARROW_NUMERIC_TYPES(M) \
@ -484,19 +485,22 @@ static void checkStatus(const arrow::Status & status, const String & column_name
        throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()};
 }

-Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, const Block * hint_header)
+Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(
+    const arrow::Schema & schema, const std::string & format_name, const Block * hint_header, bool ignore_case)
 {
    ColumnsWithTypeAndName sample_columns;
    std::unordered_set<String> nested_table_names;
    if (hint_header)
-        nested_table_names = Nested::getAllTableNames(*hint_header);
+        nested_table_names = Nested::getAllTableNames(*hint_header, ignore_case);
+
    for (const auto & field : schema.fields())
    {
-        if (hint_header && !hint_header->has(field->name()) && !nested_table_names.contains(field->name()))
+        if (hint_header && !hint_header->has(field->name(), ignore_case)
+            && !nested_table_names.contains(ignore_case ? boost::to_lower_copy(field->name()) : field->name()))
            continue;

        /// Create empty arrow column by it's type and convert it to ClickHouse column.
-        arrow::MemoryPool* pool = arrow::default_memory_pool();
+        arrow::MemoryPool * pool = arrow::default_memory_pool();
        std::unique_ptr<arrow::ArrayBuilder> array_builder;
        arrow::Status status = MakeBuilder(pool, field->type(), &array_builder);
        checkStatus(status, field->name(), format_name);
@ -516,20 +520,31 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema,
 }

 ArrowColumnToCHColumn::ArrowColumnToCHColumn(
-    const Block & header_, const std::string & format_name_, bool import_nested_, bool allow_missing_columns_)
-    : header(header_), format_name(format_name_), import_nested(import_nested_), allow_missing_columns(allow_missing_columns_)
+    const Block & header_,
+    const std::string & format_name_,
+    bool import_nested_,
+    bool allow_missing_columns_,
+    bool case_insensitive_matching_)
+    : header(header_)
+    , format_name(format_name_)
+    , import_nested(import_nested_)
+    , allow_missing_columns(allow_missing_columns_)
+    , case_insensitive_matching(case_insensitive_matching_)
 {
 }

 void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table)
 {
    NameToColumnPtr name_to_column_ptr;
-    for (const auto & column_name : table->ColumnNames())
+    for (auto column_name : table->ColumnNames())
    {
        std::shared_ptr<arrow::ChunkedArray> arrow_column = table->GetColumnByName(column_name);
        if (!arrow_column)
            throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column '{}' is duplicated", column_name);
-        name_to_column_ptr[column_name] = arrow_column;
+
+        if (case_insensitive_matching)
+            boost::to_lower(column_name);
+        name_to_column_ptr[std::move(column_name)] = arrow_column;
    }

    arrowColumnsToCHChunk(res, name_to_column_ptr);
@ -548,22 +563,31 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
    {
        const ColumnWithTypeAndName & header_column = header.getByPosition(column_i);

+        auto search_column_name = header_column.name;
+        if (case_insensitive_matching)
+            boost::to_lower(search_column_name);
+
        bool read_from_nested = false;
        String nested_table_name = Nested::extractTableName(header_column.name);
-        if (!name_to_column_ptr.contains(header_column.name))
+        String search_nested_table_name = nested_table_name;
+        if (case_insensitive_matching)
+            boost::to_lower(search_nested_table_name);
+
+        if (!name_to_column_ptr.contains(search_column_name))
        {
            /// Check if it's a column from nested table.
-            if (import_nested && name_to_column_ptr.contains(nested_table_name))
+            if (import_nested && name_to_column_ptr.contains(search_nested_table_name))
            {
-                if (!nested_tables.contains(nested_table_name))
+                if (!nested_tables.contains(search_nested_table_name))
                {
-                    std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[nested_table_name];
-                    ColumnsWithTypeAndName cols = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true)};
+                    std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[search_nested_table_name];
+                    ColumnsWithTypeAndName cols
+                        = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true)};
                    Block block(cols);
-                    nested_tables[nested_table_name] = std::make_shared<Block>(Nested::flatten(block));
+                    nested_tables[search_nested_table_name] = std::make_shared<Block>(Nested::flatten(block));
                }

-                read_from_nested = nested_tables[nested_table_name]->has(header_column.name);
+                read_from_nested = nested_tables[search_nested_table_name]->has(header_column.name, case_insensitive_matching);
            }

            if (!read_from_nested)
@ -580,13 +604,19 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
            }
        }

-        std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[header_column.name];

        ColumnWithTypeAndName column;
        if (read_from_nested)
-            column = nested_tables[nested_table_name]->getByName(header_column.name);
+        {
+            column = nested_tables[search_nested_table_name]->getByName(header_column.name, case_insensitive_matching);
+            if (case_insensitive_matching)
+                column.name = header_column.name;
+        }
        else
+        {
+            auto arrow_column = name_to_column_ptr[search_column_name];
            column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true);
+        }

        try
        {
@ -594,8 +624,11 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
        }
        catch (Exception & e)
        {
-            e.addMessage(fmt::format("while converting column {} from type {} to type {}",
-                backQuote(header_column.name), column.type->getName(), header_column.type->getName()));
+            e.addMessage(fmt::format(
+                "while converting column {} from type {} to type {}",
+                backQuote(header_column.name),
+                column.type->getName(),
+                header_column.type->getName()));
            throw;
        }

@ -609,22 +642,23 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
 std::vector<size_t> ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema & schema) const
 {
    std::vector<size_t> missing_columns;
-    auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header);
+    auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header, case_insensitive_matching);
    auto flatten_block_from_arrow = Nested::flatten(block_from_arrow);
+
    for (size_t i = 0, columns = header.columns(); i < columns; ++i)
    {
-        const auto & column = header.getByPosition(i);
+        const auto & header_column = header.getByPosition(i);
        bool read_from_nested = false;
-        String nested_table_name = Nested::extractTableName(column.name);
-        if (!block_from_arrow.has(column.name))
+        String nested_table_name = Nested::extractTableName(header_column.name);
+        if (!block_from_arrow.has(header_column.name, case_insensitive_matching))
        {
-            if (import_nested && block_from_arrow.has(nested_table_name))
-                read_from_nested = flatten_block_from_arrow.has(column.name);
+            if (import_nested && block_from_arrow.has(nested_table_name, case_insensitive_matching))
+                read_from_nested = flatten_block_from_arrow.has(header_column.name, case_insensitive_matching);

            if (!read_from_nested)
            {
                if (!allow_missing_columns)
-                    throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", column.name};
+                    throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name};

                missing_columns.push_back(i);
            }
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
@ -25,7 +25,8 @@ public:
        const Block & header_,
        const std::string & format_name_,
        bool import_nested_,
-        bool allow_missing_columns_);
+        bool allow_missing_columns_,
+        bool case_insensitive_matching_ = false);

    void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table);

@ -36,7 +37,8 @@ public:

    /// Transform arrow schema to ClickHouse header. If hint_header is provided,
    /// we will skip columns in schema that are not in hint_header.
-    static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, const Block * hint_header = nullptr);
+    static Block arrowSchemaToCHHeader(
+        const arrow::Schema & schema, const std::string & format_name, const Block * hint_header = nullptr, bool ignore_case = false);

 private:
    const Block & header;
@ -44,6 +46,7 @@ private:
    bool import_nested;
    /// If false, throw exception if some columns in header not exists in arrow table.
    bool allow_missing_columns;
+    bool case_insensitive_matching;

    /// Map {column name : dictionary column}.
    /// To avoid converting dictionary from Arrow Dictionary
--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
@ -53,9 +53,6 @@ Chunk ORCBlockInputFormat::generate()
    if (!table || !table->num_rows())
        return res;

-    if (format_settings.use_lowercase_column_name)
-        table = *table->RenameColumns(include_column_names);
-
    arrow_column_to_ch_column->arrowTableToCHChunk(res, table);
    /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields.
    /// Otherwise fill the missing columns with zero values of its type.
@ -73,7 +70,6 @@ void ORCBlockInputFormat::resetParser()

    file_reader.reset();
    include_indices.clear();
-    include_column_names.clear();
    block_missing_values.clear();
 }

@ -125,20 +121,6 @@ static void getFileReaderAndSchema(
    if (!read_schema_result.ok())
        throw Exception(read_schema_result.status().ToString(), ErrorCodes::BAD_ARGUMENTS);
    schema = std::move(read_schema_result).ValueOrDie();
-
-    if (format_settings.use_lowercase_column_name)
-    {
-        std::vector<std::shared_ptr<::arrow::Field>> fields;
-        fields.reserve(schema->num_fields());
-        for (int i = 0; i < schema->num_fields(); ++i)
-        {
-            const auto& field = schema->field(i);
-            auto name = field->name();
-            boost::to_lower(name);
-            fields.push_back(field->WithName(name));
-        }
-        schema = arrow::schema(fields, schema->metadata());
-    }
 }

 void ORCBlockInputFormat::prepareReader()
@ -149,12 +131,17 @@ void ORCBlockInputFormat::prepareReader()
        return;

    arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(
-        getPort().getHeader(), "ORC", format_settings.orc.import_nested, format_settings.orc.allow_missing_columns);
+        getPort().getHeader(),
+        "ORC",
+        format_settings.orc.import_nested,
+        format_settings.orc.allow_missing_columns,
+        format_settings.orc.case_insensitive_column_matching);
    missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema);

+    const bool ignore_case = format_settings.orc.case_insensitive_column_matching;
    std::unordered_set<String> nested_table_names;
    if (format_settings.orc.import_nested)
-        nested_table_names = Nested::getAllTableNames(getPort().getHeader());
+        nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case);

    /// In ReadStripe column indices should be started from 1,
    /// because 0 indicates to select all columns.
@ -165,19 +152,18 @@ void ORCBlockInputFormat::prepareReader()
        /// so we should recursively count the number of indices we need for this type.
        int indexes_count = countIndicesForType(schema->field(i)->type());
        const auto & name = schema->field(i)->name();
-        if (getPort().getHeader().has(name) || nested_table_names.contains(name))
+        if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name))
        {
            for (int j = 0; j != indexes_count; ++j)
-            {
                include_indices.push_back(index + j);
-                include_column_names.push_back(name);
-            }
        }
+
        index += indexes_count;
    }
 }

-ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_)
+ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
+    : ISchemaReader(in_), format_settings(format_settings_)
 {
 }

--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h
@ -47,7 +47,6 @@ private:

    // indices of columns to read from ORC file
    std::vector<int> include_indices;
-    std::vector<String> include_column_names;

    std::vector<size_t> missing_columns;
    BlockMissingValues block_missing_values;
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@ -53,11 +53,7 @@ Chunk ParquetBlockInputFormat::generate()
    std::shared_ptr<arrow::Table> table;
    arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, column_indices, &table);
    if (!read_status.ok())
-        throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(),
-                        ErrorCodes::CANNOT_READ_ALL_DATA};
-
-    if (format_settings.use_lowercase_column_name)
-        table = *table->RenameColumns(column_names);
+        throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA};

    ++row_group_current;

@ -78,7 +74,6 @@ void ParquetBlockInputFormat::resetParser()

    file_reader.reset();
    column_indices.clear();
-    column_names.clear();
    row_group_current = 0;
    block_missing_values.clear();
 }
@ -123,20 +118,6 @@ static void getFileReaderAndSchema(
        return;
    THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(std::move(arrow_file), arrow::default_memory_pool(), &file_reader));
    THROW_ARROW_NOT_OK(file_reader->GetSchema(&schema));
-
-    if (format_settings.use_lowercase_column_name)
-    {
-        std::vector<std::shared_ptr<::arrow::Field>> fields;
-        fields.reserve(schema->num_fields());
-        for (int i = 0; i < schema->num_fields(); ++i)
-        {
-            const auto& field = schema->field(i);
-            auto name = field->name();
-            boost::to_lower(name);
-            fields.push_back(field->WithName(name));
-        }
-        schema = arrow::schema(fields, schema->metadata());
-    }
 }

 void ParquetBlockInputFormat::prepareReader()
@ -149,12 +130,18 @@ void ParquetBlockInputFormat::prepareReader()
    row_group_total = file_reader->num_row_groups();
    row_group_current = 0;

-    arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(getPort().getHeader(), "Parquet", format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns);
+    arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(
+        getPort().getHeader(),
+        "Parquet",
+        format_settings.parquet.import_nested,
+        format_settings.parquet.allow_missing_columns,
+        format_settings.parquet.case_insensitive_column_matching);
    missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema);

+    const bool ignore_case = format_settings.parquet.case_insensitive_column_matching;
    std::unordered_set<String> nested_table_names;
    if (format_settings.parquet.import_nested)
-        nested_table_names = Nested::getAllTableNames(getPort().getHeader());
+        nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case);

    int index = 0;
    for (int i = 0; i < schema->num_fields(); ++i)
@ -164,19 +151,19 @@ void ParquetBlockInputFormat::prepareReader()
        /// count the number of indices we need for this type.
        int indexes_count = countIndicesForType(schema->field(i)->type());
        const auto & name = schema->field(i)->name();
-        if (getPort().getHeader().has(name) || nested_table_names.contains(name))
+
+        if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name))
        {
            for (int j = 0; j != indexes_count; ++j)
-            {
                column_indices.push_back(index + j);
-                column_names.push_back(name);
-            }
        }
+
        index += indexes_count;
    }
 }

-ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_)
+ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
+    : ISchemaReader(in_), format_settings(format_settings_)
 {
 }

--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
@ -40,7 +40,6 @@ private:
    int row_group_total = 0;
    // indices of columns to read from Parquet file
    std::vector<int> column_indices;
-    std::vector<String> column_names;
    std::unique_ptr<ArrowColumnToCHColumn> arrow_column_to_ch_column;
    int row_group_current = 0;
    std::vector<size_t> missing_columns;
--- a/tests/queries/0_stateless/00900_long_parquet_load.reference
+++ b/tests/queries/0_stateless/00900_long_parquet_load.reference
@ -88,6 +88,9 @@ idx10	['This','is','a','test']
 22
 23
 24
+=== Try load data from case_insensitive_column_matching.parquet
+123	1
+456	2
 === Try load data from datapage_v2.snappy.parquet
 Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Unknown encoding type.: While executing ParquetBlockInputFormat: data for INSERT was parsed from stdin: (in query: INSERT INTO parquet_load FORMAT Parquet). (CANNOT_READ_ALL_DATA)

@ -339,9 +342,6 @@ Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Unkno
 (NULL)
 === Try load data from single_nan.parquet
 \N
-=== Try load data from test_setting_input_format_use_lowercase_column_name.parquet
-123	1
-456	2
 === Try load data from userdata1.parquet
 1454486129	1	Amanda	Jordan	ajordan0@com.com	Female	1.197.201.2	6759521864920116	Indonesia	3/8/1971	49756.53	Internal Auditor	1E+02
 1454519043	2	Albert	Freeman	afreeman1@is.gd	Male	218.111.175.34		Canada	1/16/1968	150280.17	Accountant IV	
--- a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh
+++ b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh
@ -1,22 +0,0 @@
-#!/usr/bin/env bash
-# Tags: no-ubsan, no-fasttest
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CUR_DIR"/../shell_config.sh
-
-echo "Parquet"
-DATA_FILE=$CUR_DIR/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet
-${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load"
-${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (id String, score Int32) ENGINE = Memory"
-cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet SETTINGS input_format_use_lowercase_column_name=true"
-${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load"
-${CLICKHOUSE_CLIENT} --query="drop table parquet_load"
-
-echo "ORC"
-DATA_FILE=$CUR_DIR/data_orc/test_setting_input_format_use_lowercase_column_name.orc
-${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_load"
-${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_load (id String, score Int32) ENGINE = Memory"
-cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_load FORMAT ORC SETTINGS input_format_use_lowercase_column_name=true"
-${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_load"
-${CLICKHOUSE_CLIENT} --query="drop table orc_load"
--- a/tests/queries/0_stateless/02241_parquet_bad_column.reference
+++ b/tests/queries/0_stateless/02241_parquet_bad_column.reference
@ -1 +1,2 @@
 10
+10
--- a/tests/queries/0_stateless/02241_parquet_bad_column.sh
+++ b/tests/queries/0_stateless/02241_parquet_bad_column.sh
@ -5,23 +5,25 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh

-$CLICKHOUSE_CLIENT -q "drop table if exists test_02241"
-$CLICKHOUSE_CLIENT -q "create table test_02241 (image_path Nullable(String),
-                                caption Nullable(String),
-                                NSFW Nullable(String),
-                                similarity Nullable(Float64),
-                                LICENSE Nullable(String),
-                                url Nullable(String),
-                                key Nullable(UInt64),
-                                shard_id Nullable(UInt64),
-                                status Nullable(String),
-                                width Nullable(UInt32),
-                                height Nullable(UInt32),
-                                exif Nullable(String),
-                                original_width Nullable(UInt32),
-                                original_height Nullable(UInt32)) engine=Memory"
+for case_insensitive in "true" "false"; do
+    $CLICKHOUSE_CLIENT -q "drop table if exists test_02241"
+    $CLICKHOUSE_CLIENT -q "create table test_02241 (image_path Nullable(String),
+                                    caption Nullable(String),
+                                    NSFW Nullable(String),
+                                    similarity Nullable(Float64),
+                                    LICENSE Nullable(String),
+                                    url Nullable(String),
+                                    key Nullable(UInt64),
+                                    shard_id Nullable(UInt64),
+                                    status Nullable(String),
+                                    width Nullable(UInt32),
+                                    height Nullable(UInt32),
+                                    exif Nullable(String),
+                                    original_width Nullable(UInt32),
+                                    original_height Nullable(UInt32)) engine=Memory"

-cat $CUR_DIR/data_parquet_bad_column/metadata_0.parquet | $CLICKHOUSE_CLIENT -q "insert into test_02241 format Parquet"
+    cat $CUR_DIR/data_parquet_bad_column/metadata_0.parquet | $CLICKHOUSE_CLIENT -q "insert into test_02241 format Parquet SETTINGS input_format_parquet_case_insensitive_column_matching=$case_insensitive"

-$CLICKHOUSE_CLIENT -q "select count() from test_02241"
-$CLICKHOUSE_CLIENT -q "drop table test_02241"
+    $CLICKHOUSE_CLIENT -q "select count() from test_02241"
+    $CLICKHOUSE_CLIENT -q "drop table test_02241"
+done
--- a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference
+++ b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference
@ -4,3 +4,6 @@ Parquet
 ORC
 123	1
 456	2
+Arrow
+123	1
+456	2
--- a/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh
+++ b/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh
@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+# Tags: no-ubsan, no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+echo "Parquet"
+DATA_FILE=$CUR_DIR/data_parquet/case_insensitive_column_matching.parquet
+${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load"
+${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (iD String, scOre Int32) ENGINE = Memory"
+cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet SETTINGS input_format_parquet_case_insensitive_column_matching=true"
+${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load"
+${CLICKHOUSE_CLIENT} --query="drop table parquet_load"
+
+echo "ORC"
+DATA_FILE=$CUR_DIR/data_orc/case_insensitive_column_matching.orc
+${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_load"
+${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_load (iD String, sCorE Int32) ENGINE = Memory"
+cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_load FORMAT ORC SETTINGS input_format_orc_case_insensitive_column_matching=true"
+${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_load"
+${CLICKHOUSE_CLIENT} --query="drop table orc_load"
+
+echo "Arrow"
+DATA_FILE=$CUR_DIR/data_arrow/case_insensitive_column_matching.arrow
+${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_load"
+${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_load (iD String, sCorE Int32) ENGINE = Memory"
+cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO arrow_load FORMAT Arrow SETTINGS input_format_arrow_case_insensitive_column_matching=true"
+${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_load"
+${CLICKHOUSE_CLIENT} --query="drop table arrow_load"
--- a/tests/queries/0_stateless/02242_case_insensitive_nested.reference
+++ b/tests/queries/0_stateless/02242_case_insensitive_nested.reference
@ -0,0 +1,12 @@
+Arrow
+[1,2,3]	['123','456','789']	[9.8,10.12,11.14]
+[4,5,6]	['101112','131415','161718']	[123.8,10.2,11.414]
+[7,8,9]	['101','415','118']	[13.08,1.12,0.414]
+Parquet
+[1,2,3]	['123','456','789']	[9.8,10.12,11.14]
+[4,5,6]	['101112','131415','161718']	[123.8,10.2,11.414]
+[7,8,9]	['101','415','118']	[13.08,1.12,0.414]
+ORC
+[1,2,3]	['123','456','789']	[9.8,10.12,11.14]
+[4,5,6]	['101112','131415','161718']	[123.8,10.2,11.414]
+[7,8,9]	['101','415','118']	[13.08,1.12,0.414]
--- a/tests/queries/0_stateless/02242_case_insensitive_nested.sh
+++ b/tests/queries/0_stateless/02242_case_insensitive_nested.sh
@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS nested_table"
+${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS nested_nested_table"
+
+${CLICKHOUSE_CLIENT} --query="CREATE TABLE nested_table (table Nested(eLeM1 Int32, elEm2 String, ELEM3 Float32)) engine=Memory"
+
+formats=('Arrow' 'Parquet' 'ORC')
+format_files=('arrow' 'parquet' 'orc')
+
+for ((i = 0; i < 3; i++)) do
+    echo ${formats[i]}
+
+    ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE nested_table"
+    cat $CUR_DIR/data_orc_arrow_parquet_nested/nested_table.${format_files[i]} | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nested_table FORMAT ${formats[i]} SETTINGS input_format_${format_files[i]}_import_nested = 1, input_format_${format_files[i]}_case_insensitive_column_matching = true"
+
+    ${CLICKHOUSE_CLIENT} --query="SELECT * FROM nested_table"
+
+done
+
+${CLICKHOUSE_CLIENT} --query="DROP TABLE nested_table"
--- a/tests/queries/0_stateless/data_arrow/case_insensitive_column_matching.arrow
+++ b/tests/queries/0_stateless/data_arrow/case_insensitive_column_matching.arrow
--- a/tests/queries/0_stateless/data_orc/case_insensitive_column_matching.orc
+++ b/tests/queries/0_stateless/data_orc/case_insensitive_column_matching.orc
--- a/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet
+++ b/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet
--- a/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet.columns