From 557edbd172aac7a97aca68d37b204ce2c3f8f749 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Thu, 24 Mar 2022 12:54:12 +0000
Subject: [PATCH 01/17] Add some improvements and fixes in schema inference

---
 src/Core/Settings.h                           |   6 +
 src/DataTypes/DataTypeMap.cpp                 |  36 ++-
 src/DataTypes/DataTypeMap.h                   |   2 +
 src/Formats/EscapingRuleUtils.cpp             | 294 +++++++++++++++---
 src/Formats/EscapingRuleUtils.h               |   5 +-
 src/Formats/FormatFactory.cpp                 |   8 +-
 src/Formats/FormatFactory.h                   |   2 +-
 src/Formats/FormatSettings.h                  |   9 +-
 src/Formats/ReadSchemaUtils.cpp               |  30 +-
 src/Formats/ReadSchemaUtils.h                 |   8 +-
 src/IO/ReadHelpers.cpp                        |  11 +
 src/Processors/Formats/ISchemaReader.cpp      |  22 +-
 src/Processors/Formats/ISchemaReader.h        |   9 +-
 .../Formats/Impl/ArrowBlockInputFormat.cpp    |  10 +-
 .../Formats/Impl/ArrowColumnToCHColumn.cpp    |  70 ++++-
 .../Formats/Impl/ArrowColumnToCHColumn.h      |   6 +-
 .../Formats/Impl/AvroRowInputFormat.cpp       |   4 +-
 .../Formats/Impl/BinaryRowInputFormat.cpp     |   4 +-
 .../Formats/Impl/CSVRowInputFormat.cpp        |  12 +-
 .../Formats/Impl/CSVRowInputFormat.h          |   3 +-
 .../Impl/CustomSeparatedRowInputFormat.cpp    |  11 +-
 .../Impl/CustomSeparatedRowInputFormat.h      |   3 +-
 .../Impl/JSONCompactEachRowRowInputFormat.cpp |   4 +-
 .../Impl/JSONEachRowRowInputFormat.cpp        |   4 +-
 .../Formats/Impl/MsgPackRowInputFormat.cpp    |   4 +-
 src/Processors/Formats/Impl/NativeFormat.cpp  |   2 +-
 .../Formats/Impl/ORCBlockInputFormat.cpp      |   8 +-
 .../Formats/Impl/ParquetBlockInputFormat.cpp  |   8 +-
 .../Formats/Impl/RegexpRowInputFormat.cpp     |  11 +-
 .../Formats/Impl/RegexpRowInputFormat.h       |   3 +-
 .../Formats/Impl/TSKVRowInputFormat.cpp       |   2 +-
 .../Impl/TabSeparatedRowInputFormat.cpp       |   4 +-
 .../Formats/Impl/TemplateRowInputFormat.cpp   |  12 +-
 .../Formats/Impl/TemplateRowInputFormat.h     |   4 +-
 .../Formats/Impl/ValuesBlockInputFormat.cpp   |  50 ++-
 .../Formats/Impl/ValuesBlockInputFormat.h     |   4 +-
 .../RowInputFormatWithNamesAndTypes.cpp       |   4 +-
 .../Formats/RowInputFormatWithNamesAndTypes.h |   2 +-
 .../02149_schema_inference.reference          |  32 +-
 ...ma_inference_formats_with_schema.reference | 168 +++++-----
 ...arquet_nullable_schema_inference.reference |  40 +++
 ...w_orc_parquet_nullable_schema_inference.sh |  21 ++
 ...ead_null_type_to_nullable_column.reference |   1 +
 ...arrow_read_null_type_to_nullable_column.sh |  28 ++
 ...column_names_in_shcmea_inference.reference |   8 +
 ...02244_column_names_in_shcmea_inference.sql |  12 +
 .../02245_parquet_skip_unknown_type.reference |  16 +
 .../02245_parquet_skip_unknown_type.sh        |  18 ++
 ...csv_best_effort_schema_inference.reference | 107 +++++++
 ...46_tsv_csv_best_effort_schema_inference.sh | 220 +++++++++++++
 50 files changed, 1062 insertions(+), 300 deletions(-)
 create mode 100644 tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference
 create mode 100755 tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh
 create mode 100644 tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.reference
 create mode 100755 tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.sh
 create mode 100644 tests/queries/0_stateless/02244_column_names_in_shcmea_inference.reference
 create mode 100644 tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql
 create mode 100644 tests/queries/0_stateless/02245_parquet_skip_unknown_type.reference
 create mode 100755 tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh
 create mode 100644 tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.reference
 create mode 100755 tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index ca2e9f12e66..86ea202fda7 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -631,6 +631,12 @@ class IColumn;
     M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \
     M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \
     M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \
+    M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \
+    M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \
+    M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format Parquet", 0) \
+    M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format ORC", 0) \
+    M(Bool, input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format Arrow", 0) \
+    M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
     \
     M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \
     M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp
index 41de17982aa..42ec739c33b 100644
--- a/src/DataTypes/DataTypeMap.cpp
+++ b/src/DataTypes/DataTypeMap.cpp
@@ -45,22 +45,7 @@ DataTypeMap::DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & valu
 
 void DataTypeMap::assertKeyType() const
 {
-    bool type_error = false;
-    if (key_type->getTypeId() == TypeIndex::LowCardinality)
-    {
-        const auto & low_cardinality_data_type = assert_cast<const DataTypeLowCardinality &>(*key_type);
-        if (!isStringOrFixedString(*(low_cardinality_data_type.getDictionaryType())))
-            type_error = true;
-    }
-    else if (!key_type->isValueRepresentedByInteger()
-        && !isStringOrFixedString(*key_type)
-        && !WhichDataType(key_type).isNothing()
-        && !WhichDataType(key_type).isUUID())
-    {
-        type_error = true;
-    }
-
-    if (type_error)
+    if (!checkKeyType(key_type))
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
             "Type of Map key must be a type, that can be represented by integer or String or FixedString (possibly LowCardinality) or UUID,"
             " but {} given", key_type->getName());
@@ -102,6 +87,25 @@ bool DataTypeMap::equals(const IDataType & rhs) const
     return nested->equals(*rhs_map.nested);
 }
 
+bool DataTypeMap::checkKeyType(DataTypePtr key_type)
+{
+    if (key_type->getTypeId() == TypeIndex::LowCardinality)
+    {
+        const auto & low_cardinality_data_type = assert_cast<const DataTypeLowCardinality &>(*key_type);
+        if (!isStringOrFixedString(*(low_cardinality_data_type.getDictionaryType())))
+            return false;
+    }
+    else if (!key_type->isValueRepresentedByInteger()
+             && !isStringOrFixedString(*key_type)
+             && !WhichDataType(key_type).isNothing()
+             && !WhichDataType(key_type).isUUID())
+    {
+        return false;
+    }
+
+    return true;
+}
+
 static DataTypePtr create(const ASTPtr & arguments)
 {
     if (!arguments || arguments->children.size() != 2)
diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h
index 65bdd93ca4d..479008031fe 100644
--- a/src/DataTypes/DataTypeMap.h
+++ b/src/DataTypes/DataTypeMap.h
@@ -48,6 +48,8 @@ public:
 
     SerializationPtr doGetDefaultSerialization() const override;
 
+    static bool checkKeyType(DataTypePtr key_type);
+
 private:
     void assertKeyType() const;
 };
diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp
index b0ea10abdb6..08b34ebb0fc 100644
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@@ -5,12 +5,17 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNothing.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/getLeastSupertype.h>
+#include <DataTypes/DataTypeMap.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
-#include <Poco/JSON/Parser.h>
+#include <IO/ReadBufferFromString.h>
 #include <Parsers/TokenIterator.h>
-#include <Parsers/ExpressionListParsers.h>
-#include <Interpreters/evaluateConstantExpression.h>
+
 
 namespace DB
 {
@@ -138,7 +143,8 @@ bool deserializeFieldByEscapingRule(
                 serialization->deserializeTextRaw(column, buf, format_settings);
             break;
         default:
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Escaping rule {} is not suitable for deserialization", escapingRuleToString(escaping_rule));
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS, "Escaping rule {} is not suitable for deserialization", escapingRuleToString(escaping_rule));
     }
     return read;
 }
@@ -176,7 +182,8 @@ void serializeFieldByEscapingRule(
     }
 }
 
-void writeStringByEscapingRule(const String & value, WriteBuffer & out, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
+void writeStringByEscapingRule(
+    const String & value, WriteBuffer & out, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
 {
     switch (escaping_rule)
     {
@@ -249,85 +256,270 @@ String readStringByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule e
     return readByEscapingRule<true>(buf, escaping_rule, format_settings);
 }
 
-static bool evaluateConstantExpressionFromString(const StringRef & field, DataTypePtr & type, ContextPtr context)
+static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
 {
-    if (!context)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "You must provide context to evaluate constant expression");
+    if (buf.eof())
+        return nullptr;
 
-    ParserExpression parser;
-    Expected expected;
-    Tokens tokens(field.data, field.data + field.size);
-    IParser::Pos token_iterator(tokens, context->getSettingsRef().max_parser_depth);
-    ASTPtr ast;
-
-    /// FIXME: Our parser cannot parse maps in the form of '{key : value}' that is used in text formats.
-    bool parsed = parser.parse(token_iterator, ast, expected);
-    if (!parsed || !token_iterator->isEnd())
-        return false;
-
-    try
+    /// Array
+    if (checkChar('[', buf))
     {
-        std::pair<Field, DataTypePtr> result = evaluateConstantExpression(ast, context);
-        type = generalizeDataType(result.second);
-        return true;
+        skipWhitespaceIfAny(buf);
+
+        DataTypes nested_types;
+        bool first = true;
+        while (!buf.eof() && *buf.position() != ']')
+        {
+            if (!first)
+            {
+                skipWhitespaceIfAny(buf);
+                if (!checkChar(',', buf))
+                    return nullptr;
+                skipWhitespaceIfAny(buf);
+            }
+            else
+                first = false;
+
+            auto nested_type = determineDataTypeForSingleFieldImpl(buf);
+            if (!nested_type)
+                return nullptr;
+
+            nested_types.push_back(nested_type);
+        }
+
+        if (buf.eof())
+            return nullptr;
+
+        ++buf.position();
+
+        if (nested_types.empty())
+            return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNothing>());
+
+        auto least_supertype = tryGetLeastSupertype(nested_types);
+        if (!least_supertype)
+            return nullptr;
+
+        return std::make_shared<DataTypeArray>(least_supertype);
     }
-    catch (...)
+
+    /// Tuple
+    if (checkChar('(', buf))
     {
-        return false;
+        skipWhitespaceIfAny(buf);
+
+        DataTypes nested_types;
+        bool first = true;
+        while (!buf.eof() && *buf.position() != ')')
+        {
+            if (!first)
+            {
+                skipWhitespaceIfAny(buf);
+                if (!checkChar(',', buf))
+                    return nullptr;
+                skipWhitespaceIfAny(buf);
+            }
+            else
+                first = false;
+
+            auto nested_type = determineDataTypeForSingleFieldImpl(buf);
+            if (!nested_type)
+                return nullptr;
+
+            nested_types.push_back(nested_type);
+        }
+
+        if (buf.eof() || nested_types.empty())
+            return nullptr;
+
+        ++buf.position();
+
+        return std::make_shared<DataTypeTuple>(nested_types);
     }
+
+    /// Map
+    if (checkChar('{', buf))
+    {
+        skipWhitespaceIfAny(buf);
+
+        DataTypes key_types;
+        DataTypes value_types;
+        bool first = true;
+        while (!buf.eof() && *buf.position() != '}')
+        {
+            if (!first)
+            {
+                skipWhitespaceIfAny(buf);
+                if (!checkChar(',', buf))
+                    return nullptr;
+                skipWhitespaceIfAny(buf);
+            }
+            else
+                first = false;
+
+            auto key_type = determineDataTypeForSingleFieldImpl(buf);
+            if (!key_type)
+                return nullptr;
+
+            key_types.push_back(key_type);
+
+            skipWhitespaceIfAny(buf);
+            if (!checkChar(':', buf))
+                return nullptr;
+            skipWhitespaceIfAny(buf);
+
+            auto value_type = determineDataTypeForSingleFieldImpl(buf);
+            if (!value_type)
+                return nullptr;
+
+            value_types.push_back(value_type);
+        }
+
+        if (buf.eof())
+            return nullptr;
+
+        ++buf.position();
+        skipWhitespaceIfAny(buf);
+
+        if (key_types.empty())
+            return std::make_shared<DataTypeMap>(std::make_shared<DataTypeNothing>(), std::make_shared<DataTypeNothing>());
+
+        auto key_least_supertype = tryGetLeastSupertype(key_types);
+
+        auto value_least_supertype = tryGetLeastSupertype(value_types);
+        if (!key_least_supertype || !value_least_supertype)
+            return nullptr;
+
+        if (!DataTypeMap::checkKeyType(key_least_supertype))
+            return nullptr;
+
+        return std::make_shared<DataTypeMap>(key_least_supertype, value_least_supertype);
+    }
+
+    /// String
+    if (*buf.position() == '\'')
+    {
+        ++buf.position();
+        while (!buf.eof())
+        {
+            char * next_pos = find_first_symbols<'\\', '\''>(buf.position(), buf.buffer().end());
+            buf.position() = next_pos;
+
+            if (!buf.hasPendingData())
+                continue;
+
+            if (*buf.position() == '\'')
+                break;
+
+            if (*buf.position() == '\\')
+                ++buf.position();
+        }
+
+        if (buf.eof())
+            return nullptr;
+
+        ++buf.position();
+        return std::make_shared<DataTypeString>();
+    }
+
+    /// Bool
+    if (checkStringCaseInsensitive("true", buf) || checkStringCaseInsensitive("false", buf))
+        return DataTypeFactory::instance().get("Bool");
+
+    /// Null
+    if (checkStringCaseInsensitive("NULL", buf))
+        return std::make_shared<DataTypeNothing>();
+
+    Float64 tmp;
+    if (tryReadFloatText(tmp, buf))
+        return std::make_shared<DataTypeFloat64>();
+
+    return nullptr;
 }
 
-DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context)
+static DataTypePtr determineDataTypeForSingleField(ReadBuffer & buf)
+{
+    return makeNullableRecursivelyAndCheckForNothing(determineDataTypeForSingleFieldImpl(buf));
+}
+
+DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule)
 {
     switch (escaping_rule)
     {
         case FormatSettings::EscapingRule::Quoted:
         {
-            DataTypePtr type;
-            bool parsed = evaluateConstantExpressionFromString(field, type, context);
-            return parsed ? type : nullptr;
+            ReadBufferFromString buf(field);
+            auto type = determineDataTypeForSingleField(buf);
+            return buf.eof() ? type : nullptr;
         }
         case FormatSettings::EscapingRule::JSON:
             return getDataTypeFromJSONField(field);
         case FormatSettings::EscapingRule::CSV:
         {
+            if (!format_settings.csv.input_format_use_best_effort_in_schema_inference)
+                return makeNullable(std::make_shared<DataTypeString>());
+
             if (field.empty() || field == format_settings.csv.null_representation)
                 return nullptr;
 
             if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation)
-                return std::make_shared<DataTypeUInt8>();
+                return DataTypeFactory::instance().get("Nullable(Bool)");
 
-            DataTypePtr type;
-            bool parsed;
-            if (field[0] == '\'' || field[0] == '"')
+            if (field.size() > 1 && ((field.front() == '\'' && field.back() == '\'') || (field.front() == '"' && field.back() == '"')))
             {
-                /// Try to evaluate expression inside quotes.
-                parsed = evaluateConstantExpressionFromString(StringRef(field.data() + 1, field.size() - 2), type, context);
-                /// If it's a number in quotes we determine it as a string.
-                if (parsed && type && isNumber(removeNullable(type)))
-                    return makeNullable(std::make_shared<DataTypeString>());
-            }
-            else
-                parsed = evaluateConstantExpressionFromString(field, type, context);
+                auto s = std::string_view(field.data() + 1, field.size() - 2);
 
-            /// If we couldn't parse an expression, determine it as a string.
-            return parsed ? type : makeNullable(std::make_shared<DataTypeString>());
+                ReadBufferFromString buf(std::string_view(field.data() + 1, field.size() - 2));
+                /// Try to determine the type of value inside quotes
+                auto type = determineDataTypeForSingleField(buf);
+
+                if (!type)
+                    return nullptr;
+
+                /// If it's a number or tuple in quotes or there is some unread data in buffer, we determine it as a string.
+                if (isNumber(removeNullable(type)) || isTuple(type) || !buf.eof())
+                    return makeNullable(std::make_shared<DataTypeString>());
+
+                return type;
+            }
+
+            /// Case when CSV value is not in quotes. Check if it's a number, and if not, determine it's as a string.
+            ReadBufferFromString buf(field);
+            Float64 tmp;
+            if (tryReadFloatText(tmp, buf) && buf.eof())
+                return makeNullable(std::make_shared<DataTypeFloat64>());
+
+            return makeNullable(std::make_shared<DataTypeString>());
         }
         case FormatSettings::EscapingRule::Raw: [[fallthrough]];
         case FormatSettings::EscapingRule::Escaped:
-            /// TODO: Try to use some heuristics here to determine the type of data.
-            return field.empty() ? nullptr : makeNullable(std::make_shared<DataTypeString>());
+        {
+            if (!format_settings.tsv.input_format_use_best_effort_in_schema_inference)
+                return makeNullable(std::make_shared<DataTypeString>());
+
+            if (field.empty() || field == format_settings.tsv.null_representation)
+                return nullptr;
+
+            if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation)
+                return DataTypeFactory::instance().get("Nullable(Bool)");
+
+            ReadBufferFromString buf(field);
+            auto type = determineDataTypeForSingleField(buf);
+            if (!buf.eof())
+                return makeNullable(std::make_shared<DataTypeString>());
+
+            return type;
+        }
         default:
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the type for value with {} escaping rule", escapingRuleToString(escaping_rule));
     }
 }
 
-DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context)
+DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule)
 {
     DataTypes data_types;
     data_types.reserve(fields.size());
     for (const auto & field : fields)
-        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, escaping_rule, context));
+        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, escaping_rule));
     return data_types;
 }
 
@@ -344,4 +536,12 @@ DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escap
     }
 }
 
+DataTypes getDefaultDataTypeForEscapingRules(const std::vector<FormatSettings::EscapingRule> & escaping_rules)
+{
+    DataTypes data_types;
+    for (const auto & rule : escaping_rules)
+        data_types.push_back(getDefaultDataTypeForEscapingRule(rule));
+    return data_types;
+}
+
 }
diff --git a/src/Formats/EscapingRuleUtils.h b/src/Formats/EscapingRuleUtils.h
index 10147b29ad6..3c7c768c003 100644
--- a/src/Formats/EscapingRuleUtils.h
+++ b/src/Formats/EscapingRuleUtils.h
@@ -49,9 +49,10 @@ String readFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule es
 ///      expression inside quotes as a constant expression, and if it fails or
 ///      the result is a number (we don't parse numbers in quotes) we treat it as a String.
 /// - For TSV and TSVRaw we treat each field as a String (TODO: try to use some tweaks and heuristics here)
-DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context = nullptr);
-DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context = nullptr);
+DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule);
+DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule);
 
 DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escaping_rule);
+DataTypes getDefaultDataTypeForEscapingRules(const std::vector<FormatSettings::EscapingRule> & escaping_rules);
 
 }
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 08554cf7e07..f8636768d00 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -65,6 +65,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.csv.input_format_enum_as_number = settings.input_format_csv_enum_as_number;
     format_settings.csv.null_representation = settings.format_csv_null_representation;
     format_settings.csv.input_format_arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv;
+    format_settings.csv.input_format_use_best_effort_in_schema_inference = settings.input_format_csv_use_best_effort_in_schema_inference;
     format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
     format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
     format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
@@ -94,6 +95,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
     format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
     format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
+    format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference;
     format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
     format_settings.pretty.color = settings.output_format_pretty_color;
     format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
@@ -114,6 +116,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.tsv.empty_as_default = settings.input_format_tsv_empty_as_default;
     format_settings.tsv.input_format_enum_as_number = settings.input_format_tsv_enum_as_number;
     format_settings.tsv.null_representation = settings.format_tsv_null_representation;
+    format_settings.tsv.input_format_use_best_effort_in_schema_inference = settings.input_format_tsv_use_best_effort_in_schema_inference;
     format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
     format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
     format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
@@ -123,15 +126,18 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary;
     format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
     format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns;
+    format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference;
     format_settings.orc.import_nested = settings.input_format_orc_import_nested;
     format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
     format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
+    format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference;
     format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
     format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
     format_settings.seekable_read = settings.input_format_allow_seeks;
     format_settings.msgpack.number_of_columns = settings.input_format_msgpack_number_of_columns;
     format_settings.msgpack.output_uuid_representation = settings.output_format_msgpack_uuid_representation;
     format_settings.max_rows_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference;
+    format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference;
 
     /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
     if (format_settings.schema.is_server)
@@ -366,7 +372,7 @@ SchemaReaderPtr FormatFactory::getSchemaReader(
         throw Exception("FormatFactory: Format " + name + " doesn't support schema inference.", ErrorCodes::LOGICAL_ERROR);
 
     auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context);
-    return schema_reader_creator(buf, format_settings, context);
+    return schema_reader_creator(buf, format_settings);
 }
 
 ExternalSchemaReaderPtr FormatFactory::getExternalSchemaReader(
diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h
index 344dabd3f4d..2f53da3bdff 100644
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@@ -97,7 +97,7 @@ private:
     /// The checker should return true if format support append.
     using AppendSupportChecker = std::function<bool(const FormatSettings & settings)>;
 
-    using SchemaReaderCreator = std::function<SchemaReaderPtr(ReadBuffer & in, const FormatSettings & settings, ContextPtr context)>;
+    using SchemaReaderCreator = std::function<SchemaReaderPtr(ReadBuffer & in, const FormatSettings & settings)>;
     using ExternalSchemaReaderCreator = std::function<ExternalSchemaReaderPtr(const FormatSettings & settings)>;
 
     struct Creators
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 4881c1a43c8..6ecd04536a6 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -37,6 +37,8 @@ struct FormatSettings
     bool seekable_read = true;
     UInt64 max_rows_to_read_for_schema_inference = 100;
 
+    String column_names_for_schema_inference = "";
+
     enum class DateTimeInputFormat
     {
         Basic,        /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp.
@@ -75,6 +77,7 @@ struct FormatSettings
         bool low_cardinality_as_dictionary = false;
         bool import_nested = false;
         bool allow_missing_columns = false;
+        bool skip_columns_with_unsupported_types_in_schema_inference = false;
     } arrow;
 
     struct
@@ -101,6 +104,7 @@ struct FormatSettings
         bool input_format_arrays_as_nested_csv = false;
         String null_representation = "\\N";
         char tuple_delimiter = ',';
+        bool input_format_use_best_effort_in_schema_inference = true;
     } csv;
 
     struct HiveText
@@ -108,7 +112,7 @@ struct FormatSettings
         char fields_delimiter = '\x01';
         char collection_items_delimiter = '\x02';
         char map_keys_delimiter = '\x03';
-        Names input_field_names;
+        Names input_field_names = {""};
     } hive_text;
 
     struct Custom
@@ -137,6 +141,7 @@ struct FormatSettings
         UInt64 row_group_size = 1000000;
         bool import_nested = false;
         bool allow_missing_columns = false;
+        bool skip_columns_with_unsupported_types_in_schema_inference = false;
     } parquet;
 
     struct Pretty
@@ -203,6 +208,7 @@ struct FormatSettings
         bool crlf_end_of_line = false;
         String null_representation = "\\N";
         bool input_format_enum_as_number = false;
+        bool input_format_use_best_effort_in_schema_inference = true;
     } tsv;
 
     struct
@@ -217,6 +223,7 @@ struct FormatSettings
         bool import_nested = false;
         bool allow_missing_columns = false;
         int64_t row_batch_size = 100'000;
+        bool skip_columns_with_unsupported_types_in_schema_inference = false;
     } orc;
 
     /// For capnProto format we should determine how to
diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index 559fac4cfaa..18080b0e896 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -65,8 +65,11 @@ ColumnsDescription readSchemaFromFormat(const String & format_name, const std::o
     return readSchemaFromFormat(format_name, format_settings, read_buffer_creator, context, buf_out);
 }
 
-DataTypePtr generalizeDataType(DataTypePtr type)
+DataTypePtr makeNullableRecursivelyAndCheckForNothing(DataTypePtr type)
 {
+    if (!type)
+        return nullptr;
+
     WhichDataType which(type);
 
     if (which.isNothing())
@@ -75,16 +78,13 @@ DataTypePtr generalizeDataType(DataTypePtr type)
     if (which.isNullable())
     {
         const auto * nullable_type = assert_cast<const DataTypeNullable *>(type.get());
-        return generalizeDataType(nullable_type->getNestedType());
+        return makeNullableRecursivelyAndCheckForNothing(nullable_type->getNestedType());
     }
 
-    if (isNumber(type))
-        return makeNullable(std::make_shared<DataTypeFloat64>());
-
     if (which.isArray())
     {
         const auto * array_type = assert_cast<const DataTypeArray *>(type.get());
-        auto nested_type = generalizeDataType(array_type->getNestedType());
+        auto nested_type = makeNullableRecursivelyAndCheckForNothing(array_type->getNestedType());
         return nested_type ? std::make_shared<DataTypeArray>(nested_type) : nullptr;
     }
 
@@ -94,7 +94,7 @@ DataTypePtr generalizeDataType(DataTypePtr type)
         DataTypes nested_types;
         for (const auto & element : tuple_type->getElements())
         {
-            auto nested_type = generalizeDataType(element);
+            auto nested_type = makeNullableRecursivelyAndCheckForNothing(element);
             if (!nested_type)
                 return nullptr;
             nested_types.push_back(nested_type);
@@ -105,19 +105,27 @@ DataTypePtr generalizeDataType(DataTypePtr type)
     if (which.isMap())
     {
         const auto * map_type = assert_cast<const DataTypeMap *>(type.get());
-        auto key_type = removeNullable(generalizeDataType(map_type->getKeyType()));
-        auto value_type = generalizeDataType(map_type->getValueType());
-        return key_type && value_type ? std::make_shared<DataTypeMap>(key_type, value_type) : nullptr;
+        auto key_type = makeNullableRecursivelyAndCheckForNothing(map_type->getKeyType());
+        auto value_type = makeNullableRecursivelyAndCheckForNothing(map_type->getValueType());
+        return key_type && value_type ? std::make_shared<DataTypeMap>(removeNullable(key_type), value_type) : nullptr;
     }
 
     if (which.isLowCarnality())
     {
         const auto * lc_type = assert_cast<const DataTypeLowCardinality *>(type.get());
-        auto nested_type = generalizeDataType(lc_type->getDictionaryType());
+        auto nested_type = makeNullableRecursivelyAndCheckForNothing(lc_type->getDictionaryType());
         return nested_type ? std::make_shared<DataTypeLowCardinality>(nested_type) : nullptr;
     }
 
     return makeNullable(type);
 }
 
+NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header)
+{
+    NamesAndTypesList result;
+    for (auto & [name, type] : header.getNamesAndTypesList())
+        result.emplace_back(name, makeNullableRecursivelyAndCheckForNothing(type));
+    return result;
+}
+
 }
diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h
index 4446393a581..ea8ebbad4c0 100644
--- a/src/Formats/ReadSchemaUtils.h
+++ b/src/Formats/ReadSchemaUtils.h
@@ -29,14 +29,16 @@ ColumnsDescription readSchemaFromFormat(
     ContextPtr context,
     std::unique_ptr<ReadBuffer> & buf_out);
 
-/// Convert type to the most general type:
-/// - IntN, UIntN, FloatN, Decimal -> Float64
+/// Make type Nullable recursively:
 /// - Type -> Nullable(type)
 /// - Array(Type) -> Array(Nullable(Type))
 /// - Tuple(Type1, ..., TypeN) -> Tuple(Nullable(Type1), ..., Nullable(TypeN))
 /// - Map(KeyType, ValueType) -> Map(KeyType, Nullable(ValueType))
 /// - LowCardinality(Type) -> LowCardinality(Nullable(Type))
 /// If type is Nothing or one of the nested types is Nothing, return nullptr.
-DataTypePtr generalizeDataType(DataTypePtr type);
+DataTypePtr makeNullableRecursivelyAndCheckForNothing(DataTypePtr type);
 
+/// Call makeNullableRecursivelyAndCheckForNothing for all types
+/// in the block and return names and types.
+NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header);
 }
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index e086f16be54..98a33612aa8 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1366,6 +1366,7 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf)
     /// - Tuples: (...)
     /// - Maps: {...}
     /// - NULL
+    /// - Bool: true/false
     /// - Number: integer, float, decimal.
 
     if (*buf.position() == '\'')
@@ -1394,6 +1395,16 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf)
             s.append("NaN");
         }
     }
+    else if (checkCharCaseInsensitive('t', buf))
+    {
+        assertStringCaseInsensitive("rue", buf);
+        s.append("true");
+    }
+    else if (checkCharCaseInsensitive('f', buf))
+    {
+        assertStringCaseInsensitive("alse", buf);
+        s.append("false");
+    }
     else
     {
         /// It's an integer, float or decimal. They all can be parsed as float.
diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp
index 096e39a2893..17db8865310 100644
--- a/src/Processors/Formats/ISchemaReader.cpp
+++ b/src/Processors/Formats/ISchemaReader.cpp
@@ -1,6 +1,7 @@
 #include <Processors/Formats/ISchemaReader.h>
 #include <Formats/ReadSchemaUtils.h>
 #include <DataTypes/DataTypeString.h>
+#include <boost/algorithm/string.hpp>
 
 namespace DB
 {
@@ -10,9 +11,16 @@ namespace ErrorCodes
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 }
 
-IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_)
-    : ISchemaReader(in_), max_rows_to_read(max_rows_to_read_), default_type(default_type_)
+IRowSchemaReader::IRowSchemaReader(
+    ReadBuffer & in_, const FormatSettings & format_settings, DataTypePtr default_type_, const DataTypes & default_types_)
+    : ISchemaReader(in_)
+    , max_rows_to_read(format_settings.max_rows_to_read_for_schema_inference), default_type(default_type_), default_types(default_types_)
 {
+    if (!format_settings.column_names_for_schema_inference.empty())
+    {
+        /// column_names_for_schema_inference is a string in format 'column1,column2,column3,...'
+        boost::split(column_names, format_settings.column_names_for_schema_inference, boost::is_any_of(","));
+    }
 }
 
 NamesAndTypesList IRowSchemaReader::readSchema()
@@ -43,6 +51,8 @@ NamesAndTypesList IRowSchemaReader::readSchema()
             {
                 if (default_type)
                     data_types[i] = default_type;
+                else if (!default_types.empty() && i < default_types.size() && default_types[i])
+                    data_types[i] = default_types[i];
                 else
                     throw Exception(
                         ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
@@ -74,14 +84,16 @@ NamesAndTypesList IRowSchemaReader::readSchema()
         /// Check that we could determine the type of this column.
         if (!data_types[i])
         {
-            if (!default_type)
+            if (default_type)
+                data_types[i] = default_type;
+            else if (!default_types.empty() && i < default_types.size() && default_types[i])
+                data_types[i] = default_types[i];
+            else
                 throw Exception(
                     ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
                     "Cannot determine table structure by first {} rows of data, because some columns contain only Nulls. To increase the maximum "
                     "number of rows to read for structure determination, use setting input_format_max_rows_to_read_for_schema_inference",
                     max_rows_to_read);
-
-            data_types[i] = default_type;
         }
         result.emplace_back(column_names[i], data_types[i]);
     }
diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h
index 2d35809e26a..cd0b552c021 100644
--- a/src/Processors/Formats/ISchemaReader.h
+++ b/src/Processors/Formats/ISchemaReader.h
@@ -27,12 +27,14 @@ protected:
 /// Base class for schema inference for formats that read data row by row.
 /// It reads data row by row (up to max_rows_to_read), determines types of columns
 /// for each row and compare them with types from the previous rows. If some column
-/// contains values with different types in different rows, the default type will be
-/// used for this column or the exception will be thrown (if default type is not set).
+/// contains values with different types in different rows, the default type
+/// (from argument default_type_) will be used for this column or the exception
+/// will be thrown (if default type is not set). If different columns have different
+/// default types, you can provide them by default_types_ argument.
 class IRowSchemaReader : public ISchemaReader
 {
 public:
-    IRowSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_ = nullptr);
+    IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, DataTypePtr default_type_ = nullptr, const DataTypes & default_types_ = {});
     NamesAndTypesList readSchema() override;
 
 protected:
@@ -47,6 +49,7 @@ protected:
 private:
     size_t max_rows_to_read;
     DataTypePtr default_type;
+    DataTypes default_types;
     std::vector<String> column_names;
 };
 
diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
index cf5cfa681a1..42c68e4654b 100644
--- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
@@ -3,6 +3,7 @@
 #if USE_ARROW
 
 #include <Formats/FormatFactory.h>
+#include <Formats/ReadSchemaUtils.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/WriteHelpers.h>
 #include <IO/copyData.h>
@@ -167,8 +168,9 @@ NamesAndTypesList ArrowSchemaReader::readSchema()
         schema = createFileReader(in, format_settings, is_stopped)->schema();
     }
 
-    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema, stream ? "ArrowStream" : "Arrow");
-    return header.getNamesAndTypesList();
+    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
+        *schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference);
+    return getNamesAndRecursivelyNullableTypes(header);
 }
 
 void registerInputFormatArrow(FormatFactory & factory)
@@ -198,13 +200,13 @@ void registerArrowSchemaReader(FormatFactory & factory)
 {
     factory.registerSchemaReader(
         "Arrow",
-        [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+        [](ReadBuffer & buf, const FormatSettings & settings)
         {
             return std::make_shared<ArrowSchemaReader>(buf, false, settings);
         });
     factory.registerSchemaReader(
         "ArrowStream",
-        [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+        [](ReadBuffer & buf, const FormatSettings & settings)
         {
             return std::make_shared<ArrowSchemaReader>(buf, true, settings);
         });}
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index 14c81a0d90d..8f5bb205bef 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -15,6 +15,7 @@
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypeNothing.h>
 #include <Common/DateLUTImpl.h>
 #include <base/types.h>
 #include <Processors/Chunk.h>
@@ -26,11 +27,13 @@
 #include <Columns/ColumnUnique.h>
 #include <Columns/ColumnMap.h>
 #include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnNothing.h>
 #include <Interpreters/castColumn.h>
 #include <Common/quoteString.h>
 #include <algorithm>
 #include <arrow/builder.h>
 #include <arrow/array.h>
+#include <boost/algorithm/string.hpp>
 
 /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn.
 #define FOR_ARROW_NUMERIC_TYPES(M) \
@@ -328,12 +331,17 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
     const std::string & format_name,
     bool is_nullable,
     std::unordered_map<String, std::shared_ptr<ColumnWithTypeAndName>> & dictionary_values,
-    bool read_ints_as_dates)
+    bool read_ints_as_dates,
+    bool allow_null_type,
+    bool skip_columns_with_unsupported_types,
+    bool & skipped)
 {
     if (!is_nullable && arrow_column->null_count() && arrow_column->type()->id() != arrow::Type::LIST
         && arrow_column->type()->id() != arrow::Type::MAP && arrow_column->type()->id() != arrow::Type::STRUCT)
     {
-        auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_values, read_ints_as_dates);
+        auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
+        if (skipped)
+            return {};
         auto nullmap_column = readByteMapFromArrowColumn(arrow_column);
         auto nullable_type = std::make_shared<DataTypeNullable>(std::move(nested_column.type));
         auto nullable_column = ColumnNullable::create(nested_column.column, nullmap_column);
@@ -378,7 +386,10 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
         case arrow::Type::MAP:
         {
             auto arrow_nested_column = getNestedArrowColumn(arrow_column);
-            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates);
+            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
+            if (skipped)
+                return {};
+
             auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);
 
             const auto * tuple_column = assert_cast<const ColumnTuple *>(nested_column.column.get());
@@ -390,7 +401,9 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
         case arrow::Type::LIST:
         {
             auto arrow_nested_column = getNestedArrowColumn(arrow_column);
-            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates);
+            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
+            if (skipped)
+                return {};
             auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);
             auto array_column = ColumnArray::create(nested_column.column, offsets_column);
             auto array_type = std::make_shared<DataTypeArray>(nested_column.type);
@@ -415,7 +428,9 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
             for (int i = 0; i != arrow_struct_type->num_fields(); ++i)
             {
                 auto nested_arrow_column = std::make_shared<arrow::ChunkedArray>(nested_arrow_columns[i]);
-                auto element = readColumnFromArrowColumn(nested_arrow_column, arrow_struct_type->field(i)->name(), format_name, false, dictionary_values, read_ints_as_dates);
+                auto element = readColumnFromArrowColumn(nested_arrow_column, arrow_struct_type->field(i)->name(), format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
+                if (skipped)
+                    return {};
                 tuple_elements.emplace_back(std::move(element.column));
                 tuple_types.emplace_back(std::move(element.type));
                 tuple_names.emplace_back(std::move(element.name));
@@ -438,7 +453,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
                     dict_array.emplace_back(dict_chunk.dictionary());
                 }
                 auto arrow_dict_column = std::make_shared<arrow::ChunkedArray>(dict_array);
-                auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_values, read_ints_as_dates);
+                auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
 
                 /// We should convert read column to ColumnUnique.
                 auto tmp_lc_column = DataTypeLowCardinality(dict_column.type).createColumn();
@@ -468,9 +483,33 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
 #    undef DISPATCH
             // TODO: read JSON as a string?
             // TODO: read UUID as a string?
+        case arrow::Type::NA:
+        {
+            if (allow_null_type)
+            {
+                auto type = std::make_shared<DataTypeNothing>();
+                auto column = ColumnNothing::create(arrow_column->length());
+                return {std::move(column), type, column_name};
+            }
+            [[fallthrough]];
+        }
         default:
-            throw Exception(ErrorCodes::UNKNOWN_TYPE,
-                    "Unsupported {} type '{}' of an input column '{}'.", format_name, arrow_column->type()->name(), column_name);
+        {
+            if (skip_columns_with_unsupported_types)
+            {
+                skipped = true;
+                return {};
+            }
+
+            throw Exception(
+                ErrorCodes::UNKNOWN_TYPE,
+                "Unsupported {} type '{}' of an input column '{}'. If it happens during schema inference and you want to skip columns with "
+                "unsupported types, you can enable setting input_format_{}_skip_columns_with_unsupported_types_in_schema_inference",
+                format_name,
+                arrow_column->type()->name(),
+                column_name,
+                boost::algorithm::to_lower_copy(format_name));
+        }
     }
 }
 
@@ -484,7 +523,8 @@ static void checkStatus(const arrow::Status & status, const String & column_name
         throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()};
 }
 
-Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, const Block * hint_header)
+
+Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, bool skip_columns_with_unsupported_types, const Block * hint_header)
 {
     ColumnsWithTypeAndName sample_columns;
     std::unordered_set<String> nested_table_names;
@@ -508,9 +548,10 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema,
         arrow::ArrayVector array_vector = {arrow_array};
         auto arrow_column = std::make_shared<arrow::ChunkedArray>(array_vector);
         std::unordered_map<std::string, std::shared_ptr<ColumnWithTypeAndName>> dict_values;
-        ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values, false);
-
-        sample_columns.emplace_back(std::move(sample_column));
+        bool skipped = false;
+        ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values, false, false, skip_columns_with_unsupported_types, skipped);
+        if (!skipped)
+            sample_columns.emplace_back(std::move(sample_column));
     }
     return Block(std::move(sample_columns));
 }
@@ -544,6 +585,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
     UInt64 num_rows = name_to_column_ptr.begin()->second->length();
     columns_list.reserve(header.rows());
     std::unordered_map<String, BlockPtr> nested_tables;
+    bool skipped = false;
     for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i)
     {
         const ColumnWithTypeAndName & header_column = header.getByPosition(column_i);
@@ -558,7 +600,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
                 if (!nested_tables.contains(nested_table_name))
                 {
                     std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[nested_table_name];
-                    ColumnsWithTypeAndName cols = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true)};
+                    ColumnsWithTypeAndName cols = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true, true, false, skipped)};
                     Block block(cols);
                     nested_tables[nested_table_name] = std::make_shared<Block>(Nested::flatten(block));
                 }
@@ -586,7 +628,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
         if (read_from_nested)
             column = nested_tables[nested_table_name]->getByName(header_column.name);
         else
-            column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true);
+            column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true, true, false, skipped);
 
         try
         {
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
index cf4f6bb3ff3..cc5852691e0 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
@@ -36,7 +36,11 @@ public:
 
     /// Transform arrow schema to ClickHouse header. If hint_header is provided,
     /// we will skip columns in schema that are not in hint_header.
-    static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, const Block * hint_header = nullptr);
+    static Block arrowSchemaToCHHeader(
+        const arrow::Schema & schema,
+        const std::string & format_name,
+        bool skip_columns_with_unsupported_types = false,
+        const Block * hint_header = nullptr);
 
 private:
     const Block & header;
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index a372df41344..29429650c19 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -924,12 +924,12 @@ void registerInputFormatAvro(FormatFactory & factory)
 
 void registerAvroSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("Avro", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("Avro", [](ReadBuffer & buf, const FormatSettings & settings)
     {
            return std::make_shared<AvroSchemaReader>(buf, false, settings);
     });
 
-    factory.registerSchemaReader("AvroConfluent", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("AvroConfluent", [](ReadBuffer & buf, const FormatSettings & settings)
     {
         return std::make_shared<AvroSchemaReader>(buf, true, settings);
     });
diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
index 6918220feb4..d3de2fbf494 100644
--- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
@@ -95,7 +95,7 @@ void BinaryFormatReader::skipField(size_t file_column)
 }
 
 BinaryWithNamesAndTypesSchemaReader::BinaryWithNamesAndTypesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
-    : FormatWithNamesAndTypesSchemaReader(in_, 0, true, true, &reader), reader(in_, format_settings_)
+    : FormatWithNamesAndTypesSchemaReader(in_, format_settings_, true, true, &reader), reader(in_, format_settings_)
 {
 }
 
@@ -119,7 +119,7 @@ void registerInputFormatRowBinary(FormatFactory & factory)
 
 void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("RowBinaryWithNamesAndTypes", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("RowBinaryWithNamesAndTypes", [](ReadBuffer & buf, const FormatSettings & settings)
     {
         return std::make_shared<BinaryWithNamesAndTypesSchemaReader>(buf, settings);
     });
diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
index 216ec6b295a..f246d5c0a35 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@@ -9,7 +9,6 @@
 #include <Formats/EscapingRuleUtils.h>
 #include <Processors/Formats/Impl/CSVRowInputFormat.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
-#include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeString.h>
 
 
@@ -259,16 +258,15 @@ bool CSVFormatReader::readField(
 }
 
 
-CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_, ContextPtr context_)
+CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_)
     : FormatWithNamesAndTypesSchemaReader(
         in_,
-        format_setting_.max_rows_to_read_for_schema_inference,
+        format_setting_,
         with_names_,
         with_types_,
         &reader,
         getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV))
     , reader(in_, format_setting_)
-    , context(context_)
 {
 }
 
@@ -279,7 +277,7 @@ DataTypes CSVSchemaReader::readRowAndGetDataTypes()
         return {};
 
     auto fields = reader.readRow();
-    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV, context);
+    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV);
 }
 
 
@@ -382,9 +380,9 @@ void registerCSVSchemaReader(FormatFactory & factory)
 {
     auto register_func = [&](const String & format_name, bool with_names, bool with_types)
     {
-        factory.registerSchemaReader(format_name, [with_names, with_types](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+        factory.registerSchemaReader(format_name, [with_names, with_types](ReadBuffer & buf, const FormatSettings & settings)
         {
-            return std::make_shared<CSVSchemaReader>(buf, with_names, with_types, settings, context);
+            return std::make_shared<CSVSchemaReader>(buf, with_names, with_types, settings);
         });
     };
 
diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h
index ad9f6c4e492..ee45264d573 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h
@@ -74,13 +74,12 @@ public:
 class CSVSchemaReader : public FormatWithNamesAndTypesSchemaReader
 {
 public:
-    CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_, ContextPtr context_);
+    CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_);
 
 private:
     DataTypes readRowAndGetDataTypes() override;
 
     CSVFormatReader reader;
-    ContextPtr context;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
index d2e0d6e21a9..74c5fb1945a 100644
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
@@ -289,17 +289,16 @@ void CustomSeparatedFormatReader::setReadBuffer(ReadBuffer & in_)
 }
 
 CustomSeparatedSchemaReader::CustomSeparatedSchemaReader(
-    ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_, ContextPtr context_)
+    ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_)
     : FormatWithNamesAndTypesSchemaReader(
         buf,
-        format_setting_.max_rows_to_read_for_schema_inference,
+        format_setting_,
         with_names_,
         with_types_,
         &reader,
         getDefaultDataTypeForEscapingRule(format_setting_.custom.escaping_rule))
     , buf(in_)
     , reader(buf, ignore_spaces_, updateFormatSettings(format_setting_))
-    , context(context_)
 {
 }
 
@@ -315,7 +314,7 @@ DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypes()
         first_row = false;
 
     auto fields = reader.readRow();
-    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), context);
+    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule());
 }
 
 void registerInputFormatCustomSeparated(FormatFactory & factory)
@@ -343,9 +342,9 @@ void registerCustomSeparatedSchemaReader(FormatFactory & factory)
     {
         auto register_func = [&](const String & format_name, bool with_names, bool with_types)
         {
-            factory.registerSchemaReader(format_name, [with_names, with_types, ignore_spaces](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+            factory.registerSchemaReader(format_name, [with_names, with_types, ignore_spaces](ReadBuffer & buf, const FormatSettings & settings)
             {
-                return std::make_shared<CustomSeparatedSchemaReader>(buf, with_names, with_types, ignore_spaces, settings, context);
+                return std::make_shared<CustomSeparatedSchemaReader>(buf, with_names, with_types, ignore_spaces, settings);
             });
         };
 
diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
index a2f4509d307..d9e62a1b8e9 100644
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
@@ -92,14 +92,13 @@ private:
 class CustomSeparatedSchemaReader : public FormatWithNamesAndTypesSchemaReader
 {
 public:
-    CustomSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_, ContextPtr context_);
+    CustomSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_);
 
 private:
     DataTypes readRowAndGetDataTypes() override;
 
     PeekableReadBuffer buf;
     CustomSeparatedFormatReader reader;
-    ContextPtr context;
     bool first_row = true;
 };
 
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
index c087749d8d8..15b31eec11a 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
@@ -182,7 +182,7 @@ bool JSONCompactEachRowFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer &
 }
 
 JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_)
-    : FormatWithNamesAndTypesSchemaReader(in_, format_settings_.max_rows_to_read_for_schema_inference, with_names_, with_types_, &reader), reader(in_, yield_strings_, format_settings_)
+    : FormatWithNamesAndTypesSchemaReader(in_, format_settings_, with_names_, with_types_, &reader), reader(in_, yield_strings_, format_settings_)
 {
 }
 
@@ -231,7 +231,7 @@ void registerJSONCompactEachRowSchemaReader(FormatFactory & factory)
     {
         auto register_func = [&](const String & format_name, bool with_names, bool with_types)
         {
-            factory.registerSchemaReader(format_name, [=](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+            factory.registerSchemaReader(format_name, [=](ReadBuffer & buf, const FormatSettings & settings)
             {
                 return std::make_shared<JSONCompactEachRowRowSchemaReader>(buf, with_names, with_types, json_strings, settings);
             });
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
index 549fd7a6113..19a97f50984 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@@ -382,12 +382,12 @@ void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory
 
 void registerJSONEachRowSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("JSONEachRow", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("JSONEachRow", [](ReadBuffer & buf, const FormatSettings & settings)
     {
         return std::make_unique<JSONEachRowSchemaReader>(buf, false, settings);
     });
 
-    factory.registerSchemaReader("JSONStringsEachRow", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("JSONStringsEachRow", [](ReadBuffer & buf, const FormatSettings & settings)
     {
         return std::make_unique<JSONEachRowSchemaReader>(buf, true, settings);
     });
diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
index 607e6f36767..36e7a56ebea 100644
--- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
@@ -414,7 +414,7 @@ void MsgPackRowInputFormat::setReadBuffer(ReadBuffer & in_)
 }
 
 MsgPackSchemaReader::MsgPackSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
-    : IRowSchemaReader(buf, format_settings_.max_rows_to_read_for_schema_inference), buf(in_), number_of_columns(format_settings_.msgpack.number_of_columns)
+    : IRowSchemaReader(buf, format_settings_), buf(in_), number_of_columns(format_settings_.msgpack.number_of_columns)
 {
     if (!number_of_columns)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "You must specify setting input_format_msgpack_number_of_columns to extract table schema from MsgPack data");
@@ -535,7 +535,7 @@ void registerInputFormatMsgPack(FormatFactory & factory)
 
 void registerMsgPackSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("MsgPack", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("MsgPack", [](ReadBuffer & buf, const FormatSettings & settings)
     {
         return std::make_shared<MsgPackSchemaReader>(buf, settings);
     });
diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp
index bd95cfd6376..c1dc60022f5 100644
--- a/src/Processors/Formats/Impl/NativeFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeFormat.cpp
@@ -133,7 +133,7 @@ void registerOutputFormatNative(FormatFactory & factory)
 
 void registerNativeSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("Native", [](ReadBuffer & buf, const FormatSettings &, ContextPtr)
+    factory.registerSchemaReader("Native", [](ReadBuffer & buf, const FormatSettings &)
     {
         return std::make_shared<NativeSchemaReader>(buf);
     });
diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
index aa9f7874ae8..e93897edfbe 100644
--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
@@ -3,6 +3,7 @@
 #if USE_ORC
 
 #include <Formats/FormatFactory.h>
+#include <Formats/ReadSchemaUtils.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/WriteHelpers.h>
 #include <IO/copyData.h>
@@ -187,8 +188,9 @@ NamesAndTypesList ORCSchemaReader::readSchema()
     std::shared_ptr<arrow::Schema> schema;
     std::atomic<int> is_stopped = 0;
     getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped);
-    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema, "ORC");
-    return header.getNamesAndTypesList();
+    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
+        *schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference);
+    return getNamesAndRecursivelyNullableTypes(header);
 }
 
 void registerInputFormatORC(FormatFactory & factory)
@@ -209,7 +211,7 @@ void registerORCSchemaReader(FormatFactory & factory)
 {
     factory.registerSchemaReader(
         "ORC",
-        [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+        [](ReadBuffer & buf, const FormatSettings & settings)
         {
             return std::make_shared<ORCSchemaReader>(buf, settings);
         }
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index 548bf0138f5..f3d81822297 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -4,6 +4,7 @@
 #if USE_PARQUET
 
 #include <Formats/FormatFactory.h>
+#include <Formats/ReadSchemaUtils.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/copyData.h>
 #include <arrow/api.h>
@@ -186,8 +187,9 @@ NamesAndTypesList ParquetSchemaReader::readSchema()
     std::shared_ptr<arrow::Schema> schema;
     std::atomic<int> is_stopped = 0;
     getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped);
-    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema, "Parquet");
-    return header.getNamesAndTypesList();
+    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
+        *schema, "Parquet", format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference);
+    return getNamesAndRecursivelyNullableTypes(header);
 }
 
 void registerInputFormatParquet(FormatFactory & factory)
@@ -208,7 +210,7 @@ void registerParquetSchemaReader(FormatFactory & factory)
 {
     factory.registerSchemaReader(
         "Parquet",
-        [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+        [](ReadBuffer & buf, const FormatSettings & settings)
         {
             return std::make_shared<ParquetSchemaReader>(buf, settings);
         }
diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
index 4754b70d375..f18b6b0aaab 100644
--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
@@ -128,15 +128,14 @@ void RegexpRowInputFormat::setReadBuffer(ReadBuffer & in_)
     IInputFormat::setReadBuffer(*buf);
 }
 
-RegexpSchemaReader::RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, ContextPtr context_)
+RegexpSchemaReader::RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
     : IRowSchemaReader(
         buf,
-        format_settings_.max_rows_to_read_for_schema_inference,
+        format_settings_,
         getDefaultDataTypeForEscapingRule(format_settings_.regexp.escaping_rule))
     , format_settings(format_settings_)
     , field_extractor(format_settings)
     , buf(in_)
-    , context(context_)
 {
 }
 
@@ -152,7 +151,7 @@ DataTypes RegexpSchemaReader::readRowAndGetDataTypes()
     for (size_t i = 0; i != field_extractor.getMatchedFieldsSize(); ++i)
     {
         String field(field_extractor.getField(i));
-        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, format_settings.regexp.escaping_rule, context));
+        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, format_settings.regexp.escaping_rule));
     }
 
     return data_types;
@@ -203,9 +202,9 @@ void registerFileSegmentationEngineRegexp(FormatFactory & factory)
 
 void registerRegexpSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("Regexp", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+    factory.registerSchemaReader("Regexp", [](ReadBuffer & buf, const FormatSettings & settings)
     {
-        return std::make_shared<RegexpSchemaReader>(buf, settings, context);
+        return std::make_shared<RegexpSchemaReader>(buf, settings);
     });
 }
 
diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h
index 04f24bbb3e4..3cc6a3192fd 100644
--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h
@@ -76,7 +76,7 @@ private:
 class RegexpSchemaReader : public IRowSchemaReader
 {
 public:
-    RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, ContextPtr context_);
+    RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings);
 
 private:
     DataTypes readRowAndGetDataTypes() override;
@@ -85,7 +85,6 @@ private:
     const FormatSettings format_settings;
     RegexpFieldExtractor field_extractor;
     PeekableReadBuffer buf;
-    ContextPtr context;
 };
 
 }
diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
index f63d6fa9c46..4c50e4d9b03 100644
--- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
@@ -280,7 +280,7 @@ void registerInputFormatTSKV(FormatFactory & factory)
 }
 void registerTSKVSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("TSKV", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("TSKV", [](ReadBuffer & buf, const FormatSettings & settings)
     {
         return std::make_shared<TSKVSchemaReader>(buf, settings);
     });
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index bb844ec68ea..b6c9438a57c 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -235,7 +235,7 @@ TabSeparatedSchemaReader::TabSeparatedSchemaReader(
     ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings_)
     : FormatWithNamesAndTypesSchemaReader(
         in_,
-        format_settings_.max_rows_to_read_for_schema_inference,
+        format_settings_,
         with_names_,
         with_types_,
         &reader,
@@ -280,7 +280,7 @@ void registerTSVSchemaReader(FormatFactory & factory)
     {
         auto register_func = [&](const String & format_name, bool with_names, bool with_types)
         {
-            factory.registerSchemaReader(format_name, [with_names, with_types, is_raw](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+            factory.registerSchemaReader(format_name, [with_names, with_types, is_raw](ReadBuffer & buf, const FormatSettings & settings)
             {
                 return std::make_shared<TabSeparatedSchemaReader>(buf, with_names, with_types, is_raw, settings);
             });
diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
index 06d6ba06bcc..a1f70730b39 100644
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
@@ -453,14 +453,12 @@ TemplateSchemaReader::TemplateSchemaReader(
     const ParsedTemplateFormatString & format_,
     const ParsedTemplateFormatString & row_format_,
     std::string row_between_delimiter,
-    const FormatSettings & format_settings_,
-    ContextPtr context_)
-    : IRowSchemaReader(buf, format_settings_.max_rows_to_read_for_schema_inference)
+    const FormatSettings & format_settings_)
+    : IRowSchemaReader(buf, format_settings_, nullptr, getDefaultDataTypeForEscapingRules(row_format_.escaping_rules))
     , buf(in_)
     , format(format_)
     , row_format(row_format_)
     , format_settings(format_settings_)
-    , context(context_)
     , format_reader(buf, ignore_spaces_, format, row_format, row_between_delimiter, format_settings)
 {
     setColumnNames(row_format.column_names);
@@ -489,7 +487,7 @@ DataTypes TemplateSchemaReader::readRowAndGetDataTypes()
             format_settings.csv.delimiter = row_format.delimiters[i + 1].empty() ? format_settings.csv.delimiter : row_format.delimiters[i + 1].front();
 
         field = readFieldByEscapingRule(buf, row_format.escaping_rules[i], format_settings);
-        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i], context));
+        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i]));
     }
 
     format_reader.skipRowEndDelimiter();
@@ -564,12 +562,12 @@ void registerTemplateSchemaReader(FormatFactory & factory)
 {
     for (bool ignore_spaces : {false, true})
     {
-        factory.registerSchemaReader(ignore_spaces ? "TemplateIgnoreSpaces" : "Template", [ignore_spaces](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+        factory.registerSchemaReader(ignore_spaces ? "TemplateIgnoreSpaces" : "Template", [ignore_spaces](ReadBuffer & buf, const FormatSettings & settings)
         {
             size_t index = 0;
             auto idx_getter = [&](const String &) -> std::optional<size_t> { return index++; };
             auto row_format = fillRowFormat(settings, idx_getter, false);
-            return std::make_shared<TemplateSchemaReader>(buf, ignore_spaces, fillResultSetFormat(settings), row_format, settings.template_settings.row_between_delimiter, settings, context);
+            return std::make_shared<TemplateSchemaReader>(buf, ignore_spaces, fillResultSetFormat(settings), row_format, settings.template_settings.row_between_delimiter, settings);
         });
     }
 }
diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h
index b5ced707ace..ab7043f057e 100644
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h
@@ -116,8 +116,7 @@ public:
                          const ParsedTemplateFormatString & format_,
                          const ParsedTemplateFormatString & row_format_,
                          std::string row_between_delimiter,
-                         const FormatSettings & format_settings_,
-                         ContextPtr context_);
+                         const FormatSettings & format_settings_);
 
     DataTypes readRowAndGetDataTypes() override;
 
@@ -126,7 +125,6 @@ private:
     const ParsedTemplateFormatString format;
     const ParsedTemplateFormatString row_format;
     FormatSettings format_settings;
-    ContextPtr context;
     TemplateFormatReader format_reader;
     bool first_row = true;
 };
diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
index bf8feb077ed..e8b4c69bd19 100644
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
@@ -6,6 +6,7 @@
 #include <Processors/Formats/Impl/ValuesBlockInputFormat.h>
 #include <Formats/FormatFactory.h>
 #include <Formats/ReadSchemaUtils.h>
+#include <Formats/EscapingRuleUtils.h>
 #include <Core/Block.h>
 #include <base/find_symbols.h>
 #include <Common/typeid_cast.h>
@@ -571,8 +572,8 @@ void ValuesBlockInputFormat::setReadBuffer(ReadBuffer & in_)
     IInputFormat::setReadBuffer(*buf);
 }
 
-ValuesSchemaReader::ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, ContextPtr context_)
-    : IRowSchemaReader(buf, format_settings_.max_rows_to_read_for_schema_inference), buf(in_), context(context_)
+ValuesSchemaReader::ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
+    : IRowSchemaReader(buf, format_settings_), buf(in_), format_settings(format_settings_)
 {
 }
 
@@ -589,38 +590,25 @@ DataTypes ValuesSchemaReader::readRowAndGetDataTypes()
         return {};
 
     assertChar('(', buf);
-    PeekableReadBufferCheckpoint checkpoint(buf);
-    skipToNextRow(&buf, 0, 1);
-    buf.makeContinuousMemoryFromCheckpointToPos();
-    buf.rollbackToCheckpoint();
-
-    Tokens tokens(buf.position(), buf.buffer().end());
-    IParser::Pos token_iterator(tokens, context->getSettingsRef().max_parser_depth);
-
+    skipWhitespaceIfAny(buf);
     DataTypes data_types;
-    bool finish = false;
-    while (!finish)
+    String value;
+    while (!buf.eof() && *buf.position() != ')')
     {
-        Expected expected;
-        ASTPtr ast;
+        if (!data_types.empty())
+        {
+            skipWhitespaceIfAny(buf);
+            assertChar(',', buf);
+            skipWhitespaceIfAny(buf);
+        }
 
-        bool parsed = parser.parse(token_iterator, ast, expected);
-        /// Consider delimiter after value (',' or ')') as part of expression
-        parsed &= token_iterator->type == TokenType::Comma || token_iterator->type == TokenType::ClosingRoundBracket;
-
-        if (!parsed)
-            throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot parse expression here: {}, token: {}",
-                            String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position())), String(token_iterator.get().begin, token_iterator.get().end));
-
-        std::pair<Field, DataTypePtr> result = evaluateConstantExpression(ast, context);
-        data_types.push_back(generalizeDataType(result.second));
-
-        if (token_iterator->type == TokenType::ClosingRoundBracket)
-            finish = true;
-        ++token_iterator;
-        buf.position() = const_cast<char *>(token_iterator->begin);
+        readQuotedFieldIntoString(value, buf);
+        auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted);
+        data_types.push_back(std::move(type));
     }
 
+    assertChar(')', buf);
+
     skipWhitespaceIfAny(buf);
     if (!buf.eof() && *buf.position() == ',')
         ++buf.position();
@@ -642,9 +630,9 @@ void registerInputFormatValues(FormatFactory & factory)
 
 void registerValuesSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("Values", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+    factory.registerSchemaReader("Values", [](ReadBuffer & buf, const FormatSettings & settings)
     {
-        return std::make_shared<ValuesSchemaReader>(buf, settings, context);
+        return std::make_shared<ValuesSchemaReader>(buf, settings);
     });
 }
 
diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
index e1521955472..77967181566 100644
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
@@ -97,13 +97,13 @@ private:
 class ValuesSchemaReader : public IRowSchemaReader
 {
 public:
-    ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, ContextPtr context_);
+    ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings);
 
 private:
     DataTypes readRowAndGetDataTypes() override;
 
     PeekableReadBuffer buf;
-    ContextPtr context;
+    const FormatSettings format_settings;
     ParserExpression parser;
     bool first_row = true;
 };
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
index 7720b01dc74..8ab8b55c9c2 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
@@ -295,12 +295,12 @@ void RowInputFormatWithNamesAndTypes::setReadBuffer(ReadBuffer & in_)
 
 FormatWithNamesAndTypesSchemaReader::FormatWithNamesAndTypesSchemaReader(
     ReadBuffer & in_,
-    size_t max_rows_to_read_,
+    const FormatSettings & format_settings,
     bool with_names_,
     bool with_types_,
     FormatWithNamesAndTypesReader * format_reader_,
     DataTypePtr default_type_)
-    : IRowSchemaReader(in_, max_rows_to_read_, default_type_), with_names(with_names_), with_types(with_types_), format_reader(format_reader_)
+    : IRowSchemaReader(in_, format_settings, default_type_), with_names(with_names_), with_types(with_types_), format_reader(format_reader_)
 {
 }
 
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
index 25ffc8d6de2..8d24d23186b 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
@@ -124,7 +124,7 @@ class FormatWithNamesAndTypesSchemaReader : public IRowSchemaReader
 public:
     FormatWithNamesAndTypesSchemaReader(
         ReadBuffer & in,
-        size_t max_rows_to_read_,
+        const FormatSettings & format_settings,
         bool with_names_,
         bool with_types_,
         FormatWithNamesAndTypesReader * format_reader_,
diff --git a/tests/queries/0_stateless/02149_schema_inference.reference b/tests/queries/0_stateless/02149_schema_inference.reference
index f46e3bee101..8fca786df05 100644
--- a/tests/queries/0_stateless/02149_schema_inference.reference
+++ b/tests/queries/0_stateless/02149_schema_inference.reference
@@ -1,17 +1,17 @@
 TSV
-c1	Nullable(String)					
+c1	Nullable(Float64)					
 c2	Nullable(String)					
-c3	Nullable(String)					
-c4	Nullable(String)					
-42	Some string	[1, 2, 3, 4]	(1, 2, 3)
-42	abcd	[]	(4, 5, 6)
+c3	Array(Nullable(Float64))					
+c4	Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64))					
+42	Some string	[1,2,3,4]	(1,2,3)
+42	abcd	[]	(4,5,6)
 TSVWithNames
-number	Nullable(String)					
+number	Nullable(Float64)					
 string	Nullable(String)					
-array	Nullable(String)					
-tuple	Nullable(String)					
-42	Some string	[1, 2, 3, 4]	(1, 2, 3)
-42	abcd	[]	(4, 5, 6)
+array	Array(Nullable(Float64))					
+tuple	Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64))					
+42	Some string	[1,2,3,4]	(1,2,3)
+42	abcd	[]	(4,5,6)
 CSV
 c1	Nullable(Float64)					
 c2	Nullable(String)					
@@ -74,12 +74,12 @@ s1	[]	1
 \N	[3]	\N
 TSKV
 b	Nullable(String)					
-c	Nullable(String)					
-a	Nullable(String)					
-s1	\N	1
+c	Array(Nullable(Float64))					
+a	Nullable(Float64)					
+s1	[]	1
 }	[2]	2
-\N	\N	\N
-\N	\N	\N
+\N	[]	\N
+\N	[]	\N
 \N	[3]	\N
 Values
 c1	Nullable(Float64)					
@@ -96,7 +96,7 @@ c5	Tuple(Array(Nullable(Float64)), Array(Tuple(Nullable(Float64), Nullable(Strin
 42.42	\N	[1,NULL,3]	(1,NULL)	([1,2],[(3,'4'),(5,'6')])
 \N	Some string	[10]	(1,2)	([],[])
 Regexp
-c1	Nullable(String)					
+c1	Nullable(Float64)					
 c2	Nullable(String)					
 c3	Nullable(String)					
 42	Some string 1	[([1, 2, 3], String 1), ([], String 1)]
diff --git a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference
index d3d2d86d696..b0ec4bef499 100644
--- a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference
+++ b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference
@@ -1,137 +1,137 @@
 Arrow
-int8	Int8					
-uint8	UInt8					
-int16	Int16					
-uint16	UInt16					
-int32	Int32					
-uint32	UInt32					
-int64	Int64					
-uint64	UInt64					
+int8	Nullable(Int8)					
+uint8	Nullable(UInt8)					
+int16	Nullable(Int16)					
+uint16	Nullable(UInt16)					
+int32	Nullable(Int32)					
+uint32	Nullable(UInt32)					
+int64	Nullable(Int64)					
+uint64	Nullable(UInt64)					
 0	0	0	0	0	0	0	0
 -1	1	-1	1	-1	1	-1	1
-float32	Float32					
-float64	Float64					
-decimal32	Decimal(9, 5)					
-decimal64	Decimal(18, 5)					
+float32	Nullable(Float32)					
+float64	Nullable(Float64)					
+decimal32	Nullable(Decimal(9, 5))					
+decimal64	Nullable(Decimal(18, 5))					
 0	0	0	0
 1.2	0.7692307692307692	3.33333	333.33333
-date	UInt16					
-date32	Date32					
+date	Nullable(UInt16)					
+date32	Nullable(Date32)					
 0	1970-01-01
 1	1970-01-02
-str	String					
-fixed_string	String					
+str	Nullable(String)					
+fixed_string	Nullable(String)					
 Str: 0	100
 Str: 1	200
-array	Array(UInt64)					
-tuple	Tuple(`tuple.0` UInt64, `tuple.1` String)					
-map	Map(String, UInt64)					
+array	Array(Nullable(UInt64))					
+tuple	Tuple(Nullable(UInt64), Nullable(String))					
+map	Map(String, Nullable(UInt64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(`nested1.0` Array(UInt64), `nested1.1` Map(String, UInt64)))					
-nested2	Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(UInt64)), `nested2.0.1` Map(UInt64, Array(Tuple(`nested2.0.1.0` UInt64, `nested2.0.1.1` String)))), `nested2.1` UInt8)					
+nested1	Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64))))					
+nested2	Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 ArrowStream
-int8	Int8					
-uint8	UInt8					
-int16	Int16					
-uint16	UInt16					
-int32	Int32					
-uint32	UInt32					
-int64	Int64					
-uint64	UInt64					
+int8	Nullable(Int8)					
+uint8	Nullable(UInt8)					
+int16	Nullable(Int16)					
+uint16	Nullable(UInt16)					
+int32	Nullable(Int32)					
+uint32	Nullable(UInt32)					
+int64	Nullable(Int64)					
+uint64	Nullable(UInt64)					
 0	0	0	0	0	0	0	0
 -1	1	-1	1	-1	1	-1	1
-float32	Float32					
-float64	Float64					
-decimal32	Decimal(9, 5)					
-decimal64	Decimal(18, 5)					
+float32	Nullable(Float32)					
+float64	Nullable(Float64)					
+decimal32	Nullable(Decimal(9, 5))					
+decimal64	Nullable(Decimal(18, 5))					
 0	0	0	0
 1.2	0.7692307692307692	3.33333	333.33333
-date	UInt16					
-date32	Date32					
+date	Nullable(UInt16)					
+date32	Nullable(Date32)					
 0	1970-01-01
 1	1970-01-02
-str	String					
-fixed_string	String					
+str	Nullable(String)					
+fixed_string	Nullable(String)					
 Str: 0	100
 Str: 1	200
-array	Array(UInt64)					
-tuple	Tuple(`tuple.0` UInt64, `tuple.1` String)					
-map	Map(String, UInt64)					
+array	Array(Nullable(UInt64))					
+tuple	Tuple(Nullable(UInt64), Nullable(String))					
+map	Map(String, Nullable(UInt64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(`nested1.0` Array(UInt64), `nested1.1` Map(String, UInt64)))					
-nested2	Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(UInt64)), `nested2.0.1` Map(UInt64, Array(Tuple(`nested2.0.1.0` UInt64, `nested2.0.1.1` String)))), `nested2.1` UInt8)					
+nested1	Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64))))					
+nested2	Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 Parquet
-int8	Int8					
-uint8	UInt8					
-int16	Int16					
-uint16	UInt16					
-int32	Int32					
-uint32	Int64					
-int64	Int64					
-uint64	UInt64					
+int8	Nullable(Int8)					
+uint8	Nullable(UInt8)					
+int16	Nullable(Int16)					
+uint16	Nullable(UInt16)					
+int32	Nullable(Int32)					
+uint32	Nullable(Int64)					
+int64	Nullable(Int64)					
+uint64	Nullable(UInt64)					
 0	0	0	0	0	0	0	0
 -1	1	-1	1	-1	1	-1	1
-float32	Float32					
-float64	Float64					
-decimal32	Decimal(9, 5)					
-decimal64	Decimal(18, 5)					
+float32	Nullable(Float32)					
+float64	Nullable(Float64)					
+decimal32	Nullable(Decimal(9, 5))					
+decimal64	Nullable(Decimal(18, 5))					
 0	0	0	0
 1.2	0.7692307692307692	3.33333	333.33333
-date	UInt16					
-date32	Date32					
+date	Nullable(UInt16)					
+date32	Nullable(Date32)					
 0	1970-01-01
 1	1970-01-02
-str	String					
-fixed_string	String					
+str	Nullable(String)					
+fixed_string	Nullable(String)					
 Str: 0	100
 Str: 1	200
-array	Array(UInt64)					
-tuple	Tuple(`tuple.0` UInt64, `tuple.1` String)					
-map	Map(String, UInt64)					
+array	Array(Nullable(UInt64))					
+tuple	Tuple(Nullable(UInt64), Nullable(String))					
+map	Map(String, Nullable(UInt64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(`nested1.0` Array(UInt64), `nested1.1` Map(String, UInt64)))					
-nested2	Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(UInt64)), `nested2.0.1` Map(UInt64, Array(Tuple(`nested2.0.1.0` UInt64, `nested2.0.1.1` String)))), `nested2.1` UInt8)					
+nested1	Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64))))					
+nested2	Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 ORC
-int8	Int8					
-uint8	Int8					
-int16	Int16					
-uint16	Int16					
-int32	Int32					
-uint32	Int32					
-int64	Int64					
-uint64	Int64					
+int8	Nullable(Int8)					
+uint8	Nullable(Int8)					
+int16	Nullable(Int16)					
+uint16	Nullable(Int16)					
+int32	Nullable(Int32)					
+uint32	Nullable(Int32)					
+int64	Nullable(Int64)					
+uint64	Nullable(Int64)					
 0	0	0	0	0	0	0	0
 -1	1	-1	1	-1	1	-1	1
-float32	Float32					
-float64	Float64					
-decimal32	Decimal(9, 5)					
-decimal64	Decimal(18, 5)					
+float32	Nullable(Float32)					
+float64	Nullable(Float64)					
+decimal32	Nullable(Decimal(9, 5))					
+decimal64	Nullable(Decimal(18, 5))					
 0	0	0	0
 1.2	0.7692307692307692	3.33333	333.33333
-date	Date32					
-date32	Date32					
+date	Nullable(Date32)					
+date32	Nullable(Date32)					
 1970-01-01	1970-01-01
 1970-01-02	1970-01-02
-str	String					
-fixed_string	String					
+str	Nullable(String)					
+fixed_string	Nullable(String)					
 Str: 0	100
 Str: 1	200
-array	Array(Int64)					
-tuple	Tuple(`tuple.0` Int64, `tuple.1` String)					
-map	Map(String, Int64)					
+array	Array(Nullable(Int64))					
+tuple	Tuple(Nullable(Int64), Nullable(String))					
+map	Map(String, Nullable(Int64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(`nested1.0` Array(Int64), `nested1.1` Map(String, Int64)))					
-nested2	Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(Int64)), `nested2.0.1` Map(Int64, Array(Tuple(`nested2.0.1.0` Int64, `nested2.0.1.1` String)))), `nested2.1` Int8)					
+nested1	Array(Tuple(Array(Nullable(Int64)), Map(String, Nullable(Int64))))					
+nested2	Tuple(Tuple(Array(Array(Nullable(Int64))), Map(Int64, Array(Tuple(Nullable(Int64), Nullable(String))))), Nullable(Int8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 Native
diff --git a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference
new file mode 100644
index 00000000000..debc5c58936
--- /dev/null
+++ b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference
@@ -0,0 +1,40 @@
+Arrow
+x	Nullable(UInt64)					
+arr1	Array(Nullable(UInt64))					
+arr2	Array(Array(Nullable(String)))					
+arr3	Array(Tuple(Nullable(String), Nullable(UInt64)))					
+0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
+\N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
+2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
+\N	[NULL,4]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,3)]
+4	[4,5]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,4)]
+ArrowStream
+x	Nullable(UInt64)					
+arr1	Array(Nullable(UInt64))					
+arr2	Array(Array(Nullable(String)))					
+arr3	Array(Tuple(Nullable(String), Nullable(UInt64)))					
+0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
+\N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
+2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
+\N	[NULL,4]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,3)]
+4	[4,5]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,4)]
+Parquet
+x	Nullable(UInt64)					
+arr1	Array(Nullable(UInt64))					
+arr2	Array(Array(Nullable(String)))					
+arr3	Array(Tuple(Nullable(String), Nullable(UInt64)))					
+0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
+\N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
+2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
+\N	[NULL,4]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,3)]
+4	[4,5]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,4)]
+ORC
+x	Nullable(Int64)					
+arr1	Array(Nullable(Int64))					
+arr2	Array(Array(Nullable(String)))					
+arr3	Array(Tuple(Nullable(String), Nullable(Int64)))					
+0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
+\N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
+2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
+\N	[NULL,4]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,3)]
+4	[4,5]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,4)]
diff --git a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh
new file mode 100755
index 00000000000..1b6999e3f09
--- /dev/null
+++ b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+FILE_NAME=test_02242.data
+DATA_FILE=$USER_FILES_PATH/$FILE_NAME
+
+for format in Arrow ArrowStream Parquet ORC
+do
+    echo $format
+    $CLICKHOUSE_CLIENT -q "select number % 2 ? NULL : number as x, [number % 2 ? NULL : number, number + 1] as arr1, [[NULL, 'String'], [NULL], []] as arr2, [(NULL, NULL), ('String', NULL), (NULL, number)] as arr3 from numbers(5) format $format" > $DATA_FILE
+    $CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', '$format')"
+    $CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', '$format')"
+done
+
+rm $DATA_FILE
diff --git a/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.reference b/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.reference
new file mode 100644
index 00000000000..f599e28b8ab
--- /dev/null
+++ b/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.reference
@@ -0,0 +1 @@
+10
diff --git a/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.sh b/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.sh
new file mode 100755
index 00000000000..cc8db7fb316
--- /dev/null
+++ b/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "drop table if exists test_02243"
+$CLICKHOUSE_CLIENT -q "create table test_02243 (image_path Nullable(String),
+                                caption Nullable(String),
+                                NSFW Nullable(String),
+                                similarity Nullable(Float64),
+                                LICENSE Nullable(String),
+                                url Nullable(String),
+                                key Nullable(UInt64),
+                                shard_id Nullable(UInt64),
+                                status Nullable(String),
+                                error_message Nullable(String),
+                                width Nullable(UInt32),
+                                height Nullable(UInt32),
+                                exif Nullable(String),
+                                original_width Nullable(UInt32),
+                                original_height Nullable(UInt32)) engine=Memory"
+
+cat $CUR_DIR/data_parquet_bad_column/metadata_0.parquet | $CLICKHOUSE_CLIENT  --stacktrace -q "insert into test_02243 format Parquet"
+
+$CLICKHOUSE_CLIENT -q "select count() from test_02243"
+$CLICKHOUSE_CLIENT -q "drop table test_02243"
diff --git a/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.reference b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.reference
new file mode 100644
index 00000000000..d237caf630f
--- /dev/null
+++ b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.reference
@@ -0,0 +1,8 @@
+x	Nullable(String)					
+y	Nullable(Float64)					
+x	Nullable(String)					
+y	Nullable(Float64)					
+x	Nullable(String)					
+y	Nullable(Float64)					
+x	Nullable(String)					
+y	Nullable(Float64)					
diff --git a/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql
new file mode 100644
index 00000000000..cf9f312ab0c
--- /dev/null
+++ b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql
@@ -0,0 +1,12 @@
+insert into function file('test_02244', 'TSV', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1;
+desc file('test_02244', 'TSV') settings column_names_for_schema_inference='x,y';
+
+insert into function file('test_02244', 'CSV', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1;
+desc file('test_02244', 'CSV') settings column_names_for_schema_inference='x,y';
+
+insert into function file('test_02244', 'JSONCompactEachRow', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1;
+desc file('test_02244', 'JSONCompactEachRow') settings column_names_for_schema_inference='x,y';
+
+insert into function file('test_02244', 'Values', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1;
+desc file('test_02244', 'Values') settings column_names_for_schema_inference='x,y';
+
diff --git a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.reference b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.reference
new file mode 100644
index 00000000000..4f9cde534f0
--- /dev/null
+++ b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.reference
@@ -0,0 +1,16 @@
+OK
+image_path	Nullable(String)					
+caption	Nullable(String)					
+NSFW	Nullable(String)					
+similarity	Nullable(Float64)					
+LICENSE	Nullable(String)					
+url	Nullable(String)					
+key	Nullable(Int64)					
+shard_id	Nullable(Int64)					
+status	Nullable(String)					
+width	Nullable(Int64)					
+height	Nullable(Int64)					
+exif	Nullable(String)					
+original_width	Nullable(Int64)					
+original_height	Nullable(Int64)					
+10
diff --git a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh
new file mode 100755
index 00000000000..005c089e434
--- /dev/null
+++ b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+FILE_NAME=test_02245.parquet
+DATA_FILE=$USER_FILES_PATH/$FILE_NAME
+
+cp $CUR_DIR/data_parquet_bad_column/metadata_0.parquet $DATA_FILE 
+
+
+$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet)" 2>&1 | grep -qF "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL"
+$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1"
+$CLICKHOUSE_CLIENT -q "select count(*) from file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1"
+
diff --git a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.reference b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.reference
new file mode 100644
index 00000000000..c245f13fdbe
--- /dev/null
+++ b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.reference
@@ -0,0 +1,107 @@
+TSV
+c1	Nullable(Float64)					
+c2	Nullable(String)					
+c3	Array(Nullable(Float64))					
+c4	Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64))					
+42	Some string	[1,2,3,4]	(1,2,3)
+42	abcd	[]	(4,5,6)
+c1	Nullable(String)					
+[({\'key\' : 42.42}, [\'String\', \'String2\'], 42.42), ({}, [], -42), ({\'key2\' : NULL}, [NULL], NULL)]
+[]
+[({}, [], 0)]
+[({}, [NULL], NULL)]
+[({}, [\'String3\'], NULL)]
+[({\'key3\': NULL}, []), NULL]
+c1	Array(Tuple(Map(String, Nullable(Float64)), Array(Nullable(String)), Nullable(Float64)))					
+[({'key':42.42},['String','String2'],42.42),({},[],-42),({'key2':NULL},[NULL],NULL)]
+[]
+[({},[],0)]
+[({},[NULL],NULL)]
+[({},['String3'],NULL)]
+[({'key3':NULL},[],NULL)]
+c1	Nullable(Bool)					
+true
+false
+\N
+c1	Array(Nullable(Bool))					
+[true,NULL]
+[]
+[NULL]
+[false]
+c1	Nullable(String)					
+[]
+c1	Nullable(String)					
+{}
+c1	Nullable(String)					
+()
+c1	Nullable(String)					
+[1, 2, 3
+c1	Nullable(String)					
+[(1, 2, 3 4)]
+c1	Nullable(String)					
+[1, 2, 3 + 4]
+c1	Nullable(String)					
+(1, 2,
+c1	Nullable(String)					
+[1, Some trash, 42.2]
+c1	Nullable(String)					
+[1, \'String\', {\'key\' : 2}]
+c1	Nullable(String)					
+{\'key\' : 1, [1] : 10}
+c1	Nullable(String)					
+{}{}
+c1	Nullable(String)					
+[1, 2, 3
+c1	Nullable(String)					
+[abc, def]
+c1	Array(Nullable(String))					
+['abc','def']
+c1	Nullable(String)					
+[\'string]
+c1	Nullable(String)					
+\'string
+c1	Nullable(Float64)					
+42.42
+c1	Nullable(String)					
+42.42sometrash
+c1	Nullable(String)					
+[42.42sometrash, 42.42]
+
+CSV
+c1	Nullable(String)					
+c2	Nullable(String)					
+c3	Array(Nullable(Float64))					
+c4	Array(Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64)))					
+42	Some string	[1,2,3,4]	[(1,2,3)]
+42\\	abcd	[]	[(4,5,6)]
+c1	Nullable(String)					
+[({\'key\' : 42.42}, [\'String\', \'String2\'], 42.42), ({}, [], -42), ({\'key2\' : NULL}, [NULL], NULL)]
+[]
+[({}, [], 0)]
+[({}, [NULL], NULL)]
+[({}, [\'String3\'], NULL)]
+[({\'key3\': NULL}, []), NULL]
+c1	Array(Tuple(Map(String, Nullable(Float64)), Array(Nullable(String)), Nullable(Float64)))					
+[({'key':42.42},['String','String2'],42.42),({},[],-42),({'key2':NULL},[NULL],NULL)]
+[]
+[({},[],0)]
+[({},[NULL],NULL)]
+[({},['String3'],NULL)]
+[({'key3':NULL},[],NULL)]
+c1	Nullable(Bool)					
+true
+false
+\N
+c1	Array(Nullable(Bool))					
+[true,NULL]
+[]
+[NULL]
+[false]
+c1	Nullable(String)					
+(1, 2, 3)
+c1	Nullable(String)					
+123.123
+c1	Array(Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64)))					
+[(1,2,3)]
+c1	Array(Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64)))					
+[(1,2,3)]
diff --git a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh
new file mode 100755
index 00000000000..6589765f739
--- /dev/null
+++ b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh
@@ -0,0 +1,220 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+FILE_NAME=test_02149.data
+DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME
+
+touch $DATA_FILE
+
+echo "TSV"
+
+echo -e "42\tSome string\t[1, 2, 3, 4]\t(1, 2, 3)
+42\tabcd\t[]\t(4, 5, 6)" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]
+[]
+[({}, [], 0)]
+[({}, [NULL], NULL)]
+[({}, ['String3'], NULL)]
+[({'key3': NULL}, []), NULL]"> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV') settings input_format_tsv_use_best_effort_in_schema_inference=false"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV') settings input_format_tsv_use_best_effort_in_schema_inference=false"
+
+
+echo -e "[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]
+[]
+[({}, [], 0)]
+[({}, [NULL], NULL)]
+[({}, ['String3'], NULL)]
+[({'key3': NULL}, [], NULL)]"> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "true
+false
+\N" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[true, NULL]
+[]
+[NULL]
+[false]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "{}" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "()" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[1, 2, 3" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[(1, 2, 3 4)]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[1, 2, 3 + 4]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "(1, 2," > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[1, Some trash, 42.2]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[1, 'String', {'key' : 2}]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "{'key' : 1, [1] : 10}" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "{}{}" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[1, 2, 3" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[abc, def]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "['abc', 'def']" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "['string]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "'string" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "42.42" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "42.42sometrash" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[42.42sometrash, 42.42]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+
+echo
+echo "CSV"
+
+echo -e "42,Some string,'[1, 2, 3, 4]','[(1, 2, 3)]'
+42\,abcd,'[]','[(4, 5, 6)]'" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]\"
+'[]'
+'[({}, [], 0)]'
+'[({}, [NULL], NULL)]'
+\"[({}, ['String3'], NULL)]\"
+\"[({'key3': NULL}, []), NULL]\""> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false"
+
+echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]\"
+'[]'
+'[({}, [], 0)]'
+'[({}, [NULL], NULL)]'
+\"[({}, ['String3'], NULL)]\"
+\"[({'key3': NULL}, [], NULL)]\""> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "true
+false
+\N" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "'[true, NULL]'
+'[]'
+'[NULL]'
+'[false]'" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+
+echo -e "'(1, 2, 3)'"> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "'123.123'"> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "'[(1, 2, 3)]'"> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "\"[(1, 2, 3)]\""> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+

From abc020a502962239cd3cf5f0585684a16d0f7fc5 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Thu, 24 Mar 2022 13:08:58 +0000
Subject: [PATCH 02/17] Clean up

---
 src/Formats/EscapingRuleUtils.cpp |  1 +
 src/Formats/EscapingRuleUtils.h   | 17 +++++++++++------
 src/Formats/FormatSettings.h      |  2 +-
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp
index 08b34ebb0fc..b3e36e9c14a 100644
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@@ -429,6 +429,7 @@ static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
     if (checkStringCaseInsensitive("NULL", buf))
         return std::make_shared<DataTypeNothing>();
 
+    /// Number
     Float64 tmp;
     if (tryReadFloatText(tmp, buf))
         return std::make_shared<DataTypeFloat64>();
diff --git a/src/Formats/EscapingRuleUtils.h b/src/Formats/EscapingRuleUtils.h
index 3c7c768c003..b4f609cd9a6 100644
--- a/src/Formats/EscapingRuleUtils.h
+++ b/src/Formats/EscapingRuleUtils.h
@@ -43,12 +43,17 @@ String readFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule es
 /// - For JSON escaping rule we can use JSON parser to parse a single field
 ///   and then convert JSON type of this field to ClickHouse type.
 /// - For CSV escaping rule we can do the next:
-///    - If the field is an unquoted string, then we could try to evaluate it
-///      as a constant expression, and if it fails, treat it as a String.
-///    - If the field is a string in quotes, then we can try to evaluate
-///      expression inside quotes as a constant expression, and if it fails or
-///      the result is a number (we don't parse numbers in quotes) we treat it as a String.
-/// - For TSV and TSVRaw we treat each field as a String (TODO: try to use some tweaks and heuristics here)
+///    - If the field is an unquoted string, then we try to parse it as s number,
+///      and if we cannot, treat it as a String.
+///    - If the field is a string in quotes, then we try to use some
+///      tweaks and heuristics to determine the type inside quotes, and if we can't or
+///      the result is a number or tuple (we don't parse numbers in quotes and don't
+///      support tuples in CSV) we treat it as a String.
+///    - If input_format_csv_use_best_effort_in_schema_inference is disabled, we
+///      treat everything as a string.
+/// - For TSV and TSVRaw we try to use some tweaks and heuristics to determine the type
+///   of value if setting input_format_tsv_use_best_effort_in_schema_inference is enabled,
+///   otherwise we treat everything as a string.
 DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule);
 DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule);
 
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 6ecd04536a6..4b39d255110 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -112,7 +112,7 @@ struct FormatSettings
         char fields_delimiter = '\x01';
         char collection_items_delimiter = '\x02';
         char map_keys_delimiter = '\x03';
-        Names input_field_names = {""};
+        Names input_field_names;
     } hive_text;
 
     struct Custom

From 3b801a4093322dd048f286d410ef7f1d952a972c Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 24 Mar 2022 19:16:41 +0100
Subject: [PATCH 03/17] Update src/Processors/Formats/ISchemaReader.cpp

Co-authored-by: Vladimir C <vdimir@clickhouse.com>
---
 src/Processors/Formats/ISchemaReader.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp
index 17db8865310..a80c56a0449 100644
--- a/src/Processors/Formats/ISchemaReader.cpp
+++ b/src/Processors/Formats/ISchemaReader.cpp
@@ -20,6 +20,13 @@ IRowSchemaReader::IRowSchemaReader(
     {
         /// column_names_for_schema_inference is a string in format 'column1,column2,column3,...'
         boost::split(column_names, format_settings.column_names_for_schema_inference, boost::is_any_of(","));
+        for (size_t i = 0 ; i < column_names.size() ; ++i)
+        {
+            std::string col_name_trimmed = column_names[i];
+            boost::trim(col_name_trimmed);
+            if (!col_name.empty())
+                column_names[i] = col_name_trimmed;
+        }
     }
 }
 

From 6a9df9d471190ddf60ec59a9385d821f2678bee0 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 24 Mar 2022 19:16:47 +0100
Subject: [PATCH 04/17] Update src/Processors/Formats/ISchemaReader.cpp

Co-authored-by: Vladimir C <vdimir@clickhouse.com>
---
 src/Processors/Formats/ISchemaReader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp
index a80c56a0449..a74de0447cb 100644
--- a/src/Processors/Formats/ISchemaReader.cpp
+++ b/src/Processors/Formats/ISchemaReader.cpp
@@ -58,7 +58,7 @@ NamesAndTypesList IRowSchemaReader::readSchema()
             {
                 if (default_type)
                     data_types[i] = default_type;
-                else if (!default_types.empty() && i < default_types.size() && default_types[i])
+                else if (i < default_types.size() && default_types[i])
                     data_types[i] = default_types[i];
                 else
                     throw Exception(

From 287e1a6efc6d63dce9a111821079e87bc21321f8 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 24 Mar 2022 19:16:52 +0100
Subject: [PATCH 05/17] Update src/Processors/Formats/ISchemaReader.cpp

Co-authored-by: Vladimir C <vdimir@clickhouse.com>
---
 src/Processors/Formats/ISchemaReader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp
index a74de0447cb..e421692743d 100644
--- a/src/Processors/Formats/ISchemaReader.cpp
+++ b/src/Processors/Formats/ISchemaReader.cpp
@@ -93,7 +93,7 @@ NamesAndTypesList IRowSchemaReader::readSchema()
         {
             if (default_type)
                 data_types[i] = default_type;
-            else if (!default_types.empty() && i < default_types.size() && default_types[i])
+            else if (i < default_types.size() && default_types[i])
                 data_types[i] = default_types[i];
             else
                 throw Exception(

From 1823cac89d24d906c4f5894d9dd8814bbcd694dc Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 24 Mar 2022 19:19:32 +0100
Subject: [PATCH 06/17] Update src/Formats/EscapingRuleUtils.h

Co-authored-by: Vladimir C <vdimir@clickhouse.com>
---
 src/Formats/EscapingRuleUtils.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Formats/EscapingRuleUtils.h b/src/Formats/EscapingRuleUtils.h
index b4f609cd9a6..1ce04a8d1b7 100644
--- a/src/Formats/EscapingRuleUtils.h
+++ b/src/Formats/EscapingRuleUtils.h
@@ -43,7 +43,7 @@ String readFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule es
 /// - For JSON escaping rule we can use JSON parser to parse a single field
 ///   and then convert JSON type of this field to ClickHouse type.
 /// - For CSV escaping rule we can do the next:
-///    - If the field is an unquoted string, then we try to parse it as s number,
+///    - If the field is an unquoted string, then we try to parse it as a number,
 ///      and if we cannot, treat it as a String.
 ///    - If the field is a string in quotes, then we try to use some
 ///      tweaks and heuristics to determine the type inside quotes, and if we can't or

From ae92963b15ec6b48408e994fd21bd2898935c209 Mon Sep 17 00:00:00 2001
From: Vladimir C <vdimir@clickhouse.com>
Date: Fri, 25 Mar 2022 11:30:25 +0100
Subject: [PATCH 07/17] Fix build error in Formats/ISchemaReader.cpp

---
 src/Processors/Formats/ISchemaReader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp
index e421692743d..392b636e073 100644
--- a/src/Processors/Formats/ISchemaReader.cpp
+++ b/src/Processors/Formats/ISchemaReader.cpp
@@ -24,7 +24,7 @@ IRowSchemaReader::IRowSchemaReader(
         {
             std::string col_name_trimmed = column_names[i];
             boost::trim(col_name_trimmed);
-            if (!col_name.empty())
+            if (!col_name_trimmed.empty())
                 column_names[i] = col_name_trimmed;
         }
     }

From 6fb3c3be043b7006e0ef63b62f22cb7620853145 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Fri, 25 Mar 2022 12:02:21 +0000
Subject: [PATCH 08/17] Fix comments and build

---
 src/Processors/Formats/ISchemaReader.cpp      | 35 ++++++++++++++-----
 src/Processors/Formats/ISchemaReader.h        |  5 ++-
 .../Formats/Impl/ArrowColumnToCHColumn.cpp    |  2 +-
 .../Formats/Impl/TemplateRowInputFormat.cpp   |  2 +-
 4 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp
index 392b636e073..0709b8f4d75 100644
--- a/src/Processors/Formats/ISchemaReader.cpp
+++ b/src/Processors/Formats/ISchemaReader.cpp
@@ -12,24 +12,35 @@ namespace ErrorCodes
 }
 
 IRowSchemaReader::IRowSchemaReader(
-    ReadBuffer & in_, const FormatSettings & format_settings, DataTypePtr default_type_, const DataTypes & default_types_)
+    ReadBuffer & in_, const FormatSettings & format_settings)
     : ISchemaReader(in_)
-    , max_rows_to_read(format_settings.max_rows_to_read_for_schema_inference), default_type(default_type_), default_types(default_types_)
+    , max_rows_to_read(format_settings.max_rows_to_read_for_schema_inference)
 {
     if (!format_settings.column_names_for_schema_inference.empty())
     {
         /// column_names_for_schema_inference is a string in format 'column1,column2,column3,...'
         boost::split(column_names, format_settings.column_names_for_schema_inference, boost::is_any_of(","));
-        for (size_t i = 0 ; i < column_names.size() ; ++i)
+        for (auto & column_name : column_names)
         {
-            std::string col_name_trimmed = column_names[i];
-            boost::trim(col_name_trimmed);
+            std::string col_name_trimmed = boost::trim_copy(column_name);
             if (!col_name_trimmed.empty())
-                column_names[i] = col_name_trimmed;
+                column_name = col_name_trimmed;
         }
     }
 }
 
+IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, DataTypePtr default_type_)
+    : IRowSchemaReader(in_, format_settings)
+{
+    default_type = default_type_;
+}
+
+IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, const DataTypes & default_types_)
+    : IRowSchemaReader(in_, format_settings)
+{
+    default_types = default_types_;
+}
+
 NamesAndTypesList IRowSchemaReader::readSchema()
 {
     DataTypes data_types = readRowAndGetDataTypes();
@@ -63,7 +74,11 @@ NamesAndTypesList IRowSchemaReader::readSchema()
                 else
                     throw Exception(
                         ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                        "Automatically defined type {} for column {} in row {} differs from type defined by previous rows: {}", new_data_types[i]->getName(), i + 1, row, data_types[i]->getName());
+                        "Automatically defined type {} for column {} in row {} differs from type defined by previous rows: {}",
+                        new_data_types[i]->getName(),
+                        i + 1,
+                        row,
+                        data_types[i]->getName());
             }
         }
     }
@@ -146,7 +161,11 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
                 else
                     throw Exception(
                         ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                        "Automatically defined type {} for column {} in row {} differs from type defined by previous rows: {}", type->getName(), name, row, new_type->getName());
+                        "Automatically defined type {} for column {} in row {} differs from type defined by previous rows: {}",
+                        type->getName(),
+                        name,
+                        row,
+                        new_type->getName());
             }
         }
     }
diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h
index cd0b552c021..1716b78f1b4 100644
--- a/src/Processors/Formats/ISchemaReader.h
+++ b/src/Processors/Formats/ISchemaReader.h
@@ -34,7 +34,10 @@ protected:
 class IRowSchemaReader : public ISchemaReader
 {
 public:
-    IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, DataTypePtr default_type_ = nullptr, const DataTypes & default_types_ = {});
+    IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings);
+    IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, DataTypePtr default_type_);
+    IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, const DataTypes & default_types_);
+
     NamesAndTypesList readSchema() override;
 
 protected:
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index c17c86d51ea..bb58e851ff8 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -684,7 +684,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
 std::vector<size_t> ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema & schema) const
 {
     std::vector<size_t> missing_columns;
-    auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header, case_insensitive_matching);
+    auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, false, &header, case_insensitive_matching);
     auto flatten_block_from_arrow = Nested::flatten(block_from_arrow);
 
     for (size_t i = 0, columns = header.columns(); i < columns; ++i)
diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
index a1f70730b39..df4d49b172c 100644
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
@@ -454,7 +454,7 @@ TemplateSchemaReader::TemplateSchemaReader(
     const ParsedTemplateFormatString & row_format_,
     std::string row_between_delimiter,
     const FormatSettings & format_settings_)
-    : IRowSchemaReader(buf, format_settings_, nullptr, getDefaultDataTypeForEscapingRules(row_format_.escaping_rules))
+    : IRowSchemaReader(buf, format_settings_, getDefaultDataTypeForEscapingRules(row_format_.escaping_rules))
     , buf(in_)
     , format(format_)
     , row_format(row_format_)

From fddeecdd699c049e07b60010e57a005a500eb884 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 28 Mar 2022 21:59:43 +0200
Subject: [PATCH 09/17] Fix fast test

---
 .../0_stateless/02244_column_names_in_shcmea_inference.sql      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql
index cf9f312ab0c..4733690b225 100644
--- a/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql
+++ b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql
@@ -1,3 +1,5 @@
+-- Tags: no-fasttest
+
 insert into function file('test_02244', 'TSV', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1;
 desc file('test_02244', 'TSV') settings column_names_for_schema_inference='x,y';
 

From 97f5033ea98d7f1e06a21210ad9460a121300653 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Tue, 29 Mar 2022 13:07:37 +0000
Subject: [PATCH 10/17] Fix tests

---
 src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp  |  6 +++++-
 .../02166_arrow_dictionary_inference.reference         |  2 +-
 .../0_stateless/02166_arrow_dictionary_inference.sh    |  2 +-
 .../02211_shcema_inference_from_stdin.reference        |  6 +++---
 .../02240_tskv_schema_inference_bug.reference          | 10 +++++-----
 .../02244_column_names_in_shcmea_inference.sql         |  2 +-
 6 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index bb58e851ff8..c792d828e44 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -553,7 +553,11 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(
         auto arrow_column = std::make_shared<arrow::ChunkedArray>(array_vector);
         std::unordered_map<std::string, std::shared_ptr<ColumnWithTypeAndName>> dict_values;
         bool skipped = false;
-        ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values, false, false, skip_columns_with_unsupported_types, skipped);
+        bool allow_null_type = false;
+        if (hint_header && hint_header->has(field->name()) && hint_header->getByName(field->name()).type->isNullable())
+            allow_null_type = true;
+        ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(
+            arrow_column, field->name(), format_name, false, dict_values, false, allow_null_type, skip_columns_with_unsupported_types, skipped);
         if (!skipped)
             sample_columns.emplace_back(std::move(sample_column));
     }
diff --git a/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference b/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference
index 46f448cfba7..20f3368e446 100644
--- a/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference
+++ b/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference
@@ -1 +1 @@
-x	LowCardinality(UInt64)					
+x	LowCardinality(Nullable(UInt64))					
diff --git a/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh b/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh
index e560dc10d2c..7d313b571d9 100755
--- a/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh
+++ b/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT -q "insert into table function file('arrow.dict', 'Arrow', 'x LowCardinality(UInt64)') select number from numbers(10) settings output_format_arrow_low_cardinality_as_dictionary=1"
+$CLICKHOUSE_CLIENT -q "insert into table function file('arrow.dict', 'Arrow', 'x LowCardinality(UInt64)') select number from numbers(10) settings output_format_arrow_low_cardinality_as_dictionary=1, engine_file_truncate_on_insert=1"
 
 $CLICKHOUSE_CLIENT -q "desc file('arrow.dict', 'Arrow')"
 
diff --git a/tests/queries/0_stateless/02211_shcema_inference_from_stdin.reference b/tests/queries/0_stateless/02211_shcema_inference_from_stdin.reference
index d176e0ee1ed..6920aa16198 100644
--- a/tests/queries/0_stateless/02211_shcema_inference_from_stdin.reference
+++ b/tests/queries/0_stateless/02211_shcema_inference_from_stdin.reference
@@ -9,7 +9,7 @@ x	Nullable(Float64)
 7
 8
 9
-c1	Nullable(String)					
-c2	Nullable(String)					
-c3	Nullable(String)					
+c1	Nullable(Float64)					
+c2	Nullable(Float64)					
+c3	Nullable(Float64)					
 1	2	3
diff --git a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference
index a8abc33648e..7950770cfd4 100644
--- a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference
+++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference
@@ -1,8 +1,8 @@
 b	Nullable(String)					
-c	Nullable(String)					
-a	Nullable(String)					
-s1	\N	1
+c	Array(Nullable(Float64))					
+a	Nullable(Float64)					
+s1	[]	1
 }	[2]	2
-\N	\N	\N
-\N	\N	\N
+\N	[]	\N
+\N	[]	\N
 \N	[3]	\N
diff --git a/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql
index 4733690b225..af56856f0be 100644
--- a/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql
+++ b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql
@@ -1,4 +1,4 @@
--- Tags: no-fasttest
+-- Tags: no-fasttest, no-parallel
 
 insert into function file('test_02244', 'TSV', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1;
 desc file('test_02244', 'TSV') settings column_names_for_schema_inference='x,y';

From a2fd09e0314f83d9e64ea758c973ea56f39d740c Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 29 Mar 2022 16:34:07 +0200
Subject: [PATCH 11/17] Fix style

---
 src/Formats/EscapingRuleUtils.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp
index b3e36e9c14a..870202faf72 100644
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@@ -23,7 +23,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
-    extern const int LOGICAL_ERROR;
 }
 
 FormatSettings::EscapingRule stringToEscapingRule(const String & escaping_rule)

From 666ef3586c5d200a277b2a806b289f9781360628 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 29 Mar 2022 16:42:01 +0200
Subject: [PATCH 12/17] Fix typos check

---
 utils/check-style/codespell-ignore-words.list | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/codespell-ignore-words.list b/utils/check-style/codespell-ignore-words.list
index d3a7586647c..7aabaff17c5 100644
--- a/utils/check-style/codespell-ignore-words.list
+++ b/utils/check-style/codespell-ignore-words.list
@@ -10,3 +10,4 @@ ths
 offsett
 numer
 ue
+alse

From 265fbaaa5aadf4e65e3569e840550690b3025586 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 30 Mar 2022 13:23:43 +0000
Subject: [PATCH 13/17] Fix tests

---
 .../02245_s3_schema_desc.reference            | 24 +++++++++----------
 .../0_stateless/02245_s3_schema_desc.sql      | 17 ++++++-------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/tests/queries/0_stateless/02245_s3_schema_desc.reference b/tests/queries/0_stateless/02245_s3_schema_desc.reference
index a5b0f81a2c7..e039680d933 100644
--- a/tests/queries/0_stateless/02245_s3_schema_desc.reference
+++ b/tests/queries/0_stateless/02245_s3_schema_desc.reference
@@ -1,21 +1,21 @@
-c1	Nullable(String)					
-c2	Nullable(String)					
-c3	Nullable(String)					
-c1	Nullable(String)					
-c2	Nullable(String)					
-c3	Nullable(String)					
+c1	Nullable(Float64)					
+c2	Nullable(Float64)					
+c3	Nullable(Float64)					
+c1	Nullable(Float64)					
+c2	Nullable(Float64)					
+c3	Nullable(Float64)					
 c1	UInt64					
 c2	UInt64					
 c3	UInt64					
-c1	Nullable(String)					
-c2	Nullable(String)					
-c3	Nullable(String)					
+c1	Nullable(Float64)					
+c2	Nullable(Float64)					
+c3	Nullable(Float64)					
 c1	UInt64					
 c2	UInt64					
 c3	UInt64					
-c1	Nullable(String)					
-c2	Nullable(String)					
-c3	Nullable(String)					
+c1	Nullable(Float64)					
+c2	Nullable(Float64)					
+c3	Nullable(Float64)					
 c1	UInt64					
 c2	UInt64					
 c3	UInt64					
diff --git a/tests/queries/0_stateless/02245_s3_schema_desc.sql b/tests/queries/0_stateless/02245_s3_schema_desc.sql
index 4ab870e1379..72800f3ab38 100644
--- a/tests/queries/0_stateless/02245_s3_schema_desc.sql
+++ b/tests/queries/0_stateless/02245_s3_schema_desc.sql
@@ -1,13 +1,14 @@
 -- Tags: no-fasttest
 -- Tag no-fasttest: Depends on AWS
 
-desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv');
-desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV');
-desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64');
-desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'test', 'testtest');
-desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto');
-desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'test', 'testtest', 'TSV');
-desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'test', 'testtest', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64');
-desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'test', 'testtest', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto');
+desc file('data_minio/{a,b,c}.tsv');
+desc file('data_minio/{a,b,c}.tsv', 'TSV');
+desc file('data_minio/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64');
+desc file('data_minio/{a,b,c}.tsv');
+desc file('data_minio/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto');
+desc file('data_minio/{a,b,c}.tsv', 'TSV');
+desc file('data_minio/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64');
+desc file('data_minio/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto');
+
 
 SELECT * FROM s3(decodeURLComponent(NULL), [NULL]);  --{serverError 170}

From 564a77c462eda1b1e432008d758f09b8d82a1a26 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 31 Mar 2022 12:49:23 +0200
Subject: [PATCH 14/17] Fix build

---
 src/Formats/EscapingRuleUtils.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp
index 870202faf72..e9d7e464cce 100644
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@@ -466,8 +466,6 @@ DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSe
 
             if (field.size() > 1 && ((field.front() == '\'' && field.back() == '\'') || (field.front() == '"' && field.back() == '"')))
             {
-                auto s = std::string_view(field.data() + 1, field.size() - 2);
-
                 ReadBufferFromString buf(std::string_view(field.data() + 1, field.size() - 2));
                 /// Try to determine the type of value inside quotes
                 auto type = determineDataTypeForSingleField(buf);

From 4db5043ed45fffe512d716bd056a3dc5ae76a1d2 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 31 Mar 2022 12:50:53 +0200
Subject: [PATCH 15/17] Fix test

---
 .../queries/0_stateless/02245_s3_schema_desc.sql | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/02245_s3_schema_desc.sql b/tests/queries/0_stateless/02245_s3_schema_desc.sql
index 72800f3ab38..2cd362ff233 100644
--- a/tests/queries/0_stateless/02245_s3_schema_desc.sql
+++ b/tests/queries/0_stateless/02245_s3_schema_desc.sql
@@ -1,14 +1,14 @@
 -- Tags: no-fasttest
 -- Tag no-fasttest: Depends on AWS
 
-desc file('data_minio/{a,b,c}.tsv');
-desc file('data_minio/{a,b,c}.tsv', 'TSV');
-desc file('data_minio/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64');
-desc file('data_minio/{a,b,c}.tsv');
-desc file('data_minio/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto');
-desc file('data_minio/{a,b,c}.tsv', 'TSV');
-desc file('data_minio/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64');
-desc file('data_minio/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto');
+desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv');
+desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV');
+desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64');
+desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'test', 'testtest');
+desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto');
+desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'test', 'testtest', 'TSV');
+desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'test', 'testtest', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64');
+desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'test', 'testtest', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto');
 
 
 SELECT * FROM s3(decodeURLComponent(NULL), [NULL]);  --{serverError 170}

From a41c73fcf6d90e381691ab6f2bb60809196e6bf8 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Thu, 31 Mar 2022 11:25:52 +0000
Subject: [PATCH 16/17] Fix tests

---
 .../0_stateless/01801_s3_cluster.reference    | 48 +++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/tests/queries/0_stateless/01801_s3_cluster.reference b/tests/queries/0_stateless/01801_s3_cluster.reference
index 31c97f14fa3..0448ff3933b 100644
--- a/tests/queries/0_stateless/01801_s3_cluster.reference
+++ b/tests/queries/0_stateless/01801_s3_cluster.reference
@@ -2,30 +2,6 @@
 0	0	0
 0	0	0
 1	2	3
-10	11	12
-13	14	15
-16	17	18
-20	21	22
-23	24	25
-26	27	28
-4	5	6
-7	8	9
-0	0	0
-0	0	0
-0	0	0
-1	2	3
-10	11	12
-13	14	15
-16	17	18
-20	21	22
-23	24	25
-26	27	28
-4	5	6
-7	8	9
-0	0	0
-0	0	0
-0	0	0
-1	2	3
 4	5	6
 7	8	9
 10	11	12
@@ -38,14 +14,26 @@
 0	0	0
 0	0	0
 1	2	3
+4	5	6
+7	8	9
 10	11	12
 13	14	15
 16	17	18
 20	21	22
 23	24	25
 26	27	28
+0	0	0
+0	0	0
+0	0	0
+1	2	3
 4	5	6
 7	8	9
+10	11	12
+13	14	15
+16	17	18
+20	21	22
+23	24	25
+26	27	28
 0	0	0
 0	0	0
 0	0	0
@@ -62,14 +50,26 @@
 0	0	0
 0	0	0
 1	2	3
+4	5	6
+7	8	9
 10	11	12
 13	14	15
 16	17	18
 20	21	22
 23	24	25
 26	27	28
+0	0	0
+0	0	0
+0	0	0
+1	2	3
 4	5	6
 7	8	9
+10	11	12
+13	14	15
+16	17	18
+20	21	22
+23	24	25
+26	27	28
 0	0	0
 0	0	0
 0	0	0

From 1c783ed88abbad3b144d942aa3a035faf5b27d6c Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Thu, 7 Apr 2022 12:17:48 +0000
Subject: [PATCH 17/17] Resolve conflicts

---
 .../Impl/JSONCompactEachRowRowInputFormat.cpp  | 11 +++--------
 .../02149_schema_inference.reference           |  8 ++++----
 .../02240_tskv_schema_inference_bug.reference  |  8 ++++----
 ...2247_names_order_in_json_and_tskv.reference | 18 +++++++++---------
 4 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
index 798cb69d759..854aefc7562 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
@@ -181,15 +181,10 @@ bool JSONCompactEachRowFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer &
     return true;
 }
 
-JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_)
+JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader(
+    ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_)
     : FormatWithNamesAndTypesSchemaReader(
-        in_,
-        format_settings_
-        with_names_,
-        with_types_,
-        &reader,
-        nullptr,
-        format_settings_.json.read_bools_as_numbers)
+        in_, format_settings_, with_names_, with_types_, &reader, nullptr, format_settings_.json.read_bools_as_numbers)
     , reader(in_, yield_strings_, format_settings_)
 {
 }
diff --git a/tests/queries/0_stateless/02149_schema_inference.reference b/tests/queries/0_stateless/02149_schema_inference.reference
index fe87552e776..2d7dd5caca7 100644
--- a/tests/queries/0_stateless/02149_schema_inference.reference
+++ b/tests/queries/0_stateless/02149_schema_inference.reference
@@ -75,11 +75,11 @@ c	Array(Nullable(Float64))
 TSKV
 a	Nullable(Float64)					
 b	Nullable(String)					
-c	Array(Nullable(Float64))										
-1	s1	\N
+c	Array(Nullable(Float64))					
+1	s1	[]
 2	}	[2]
-\N	\N	\N
-\N	\N	\N
+\N	\N	[]
+\N	\N	[]
 \N	\N	[3]
 Values
 c1	Nullable(Float64)					
diff --git a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference
index 5467b32ffb0..d0ced74f8f6 100644
--- a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference
+++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference
@@ -1,8 +1,8 @@
 a	Nullable(Float64)					
 b	Nullable(String)					
-c	Array(Nullable(Float64))														
-1	s1	\N
+c	Array(Nullable(Float64))					
+1	s1	[]
 2	}	[2]
-\N	\N	\N
-\N	\N	\N
+\N	\N	[]
+\N	\N	[]
 \N	\N	[3]
diff --git a/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.reference b/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.reference
index 49a285dc11a..300846c17a0 100644
--- a/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.reference
+++ b/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.reference
@@ -1,15 +1,15 @@
-a	Nullable(String)					
+a	Nullable(Float64)					
 b	Nullable(String)					
-c	Nullable(String)					
-1	s1	\N
+c	Array(Nullable(Float64))					
+1	s1	[]
 2	}	[2]
-\N	\N	\N
-\N	\N	\N
+\N	\N	[]
+\N	\N	[]
 \N	\N	[3]
-b	Nullable(String)					
-a	Nullable(String)					
-c	Nullable(String)					
-e	Nullable(String)					
+b	Nullable(Float64)					
+a	Nullable(Float64)					
+c	Nullable(Float64)					
+e	Nullable(Float64)					
 1	\N	\N	\N
 \N	2	3	\N
 \N	\N	\N	\N