Merge pull request #35582 from Avogar/improve-schema-inference

Improve schema inference and add some fixes
2024-11-21 07:01:59 +00:00 · 2022-04-08 13:44:52 +02:00 · 2022-04-08 13:44:52 +02:00 · d7b88d7683
commit d7b88d7683
parent 44c80a388f 1c783ed88a
59 changed files with 1167 additions and 368 deletions
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -641,6 +641,12 @@ class IColumn;
    M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \
    M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \
    M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \
+    M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \
+    M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \
+    M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format Parquet", 0) \
+    M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format ORC", 0) \
+    M(Bool, input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format Arrow", 0) \
+    M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
    M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
    \
    M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \
--- a/src/DataTypes/DataTypeMap.cpp
+++ b/src/DataTypes/DataTypeMap.cpp
@ -45,22 +45,7 @@ DataTypeMap::DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & valu

 void DataTypeMap::assertKeyType() const
 {
-    bool type_error = false;
-    if (key_type->getTypeId() == TypeIndex::LowCardinality)
-    {
-        const auto & low_cardinality_data_type = assert_cast<const DataTypeLowCardinality &>(*key_type);
-        if (!isStringOrFixedString(*(low_cardinality_data_type.getDictionaryType())))
-            type_error = true;
-    }
-    else if (!key_type->isValueRepresentedByInteger()
-        && !isStringOrFixedString(*key_type)
-        && !WhichDataType(key_type).isNothing()
-        && !WhichDataType(key_type).isUUID())
-    {
-        type_error = true;
-    }
-
-    if (type_error)
+    if (!checkKeyType(key_type))
        throw Exception(ErrorCodes::BAD_ARGUMENTS,
            "Type of Map key must be a type, that can be represented by integer or String or FixedString (possibly LowCardinality) or UUID,"
            " but {} given", key_type->getName());
@ -102,6 +87,25 @@ bool DataTypeMap::equals(const IDataType & rhs) const
    return nested->equals(*rhs_map.nested);
 }

+bool DataTypeMap::checkKeyType(DataTypePtr key_type)
+{
+    if (key_type->getTypeId() == TypeIndex::LowCardinality)
+    {
+        const auto & low_cardinality_data_type = assert_cast<const DataTypeLowCardinality &>(*key_type);
+        if (!isStringOrFixedString(*(low_cardinality_data_type.getDictionaryType())))
+            return false;
+    }
+    else if (!key_type->isValueRepresentedByInteger()
+             && !isStringOrFixedString(*key_type)
+             && !WhichDataType(key_type).isNothing()
+             && !WhichDataType(key_type).isUUID())
+    {
+        return false;
+    }
+
+    return true;
+}
+
 static DataTypePtr create(const ASTPtr & arguments)
 {
    if (!arguments || arguments->children.size() != 2)
--- a/src/DataTypes/DataTypeMap.h
+++ b/src/DataTypes/DataTypeMap.h
@ -48,6 +48,8 @@ public:

    SerializationPtr doGetDefaultSerialization() const override;

+    static bool checkKeyType(DataTypePtr key_type);
+
 private:
    void assertKeyType() const;
 };
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@ -5,12 +5,17 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNothing.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/getLeastSupertype.h>
+#include <DataTypes/DataTypeMap.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
-#include <Poco/JSON/Parser.h>
+#include <IO/ReadBufferFromString.h>
 #include <Parsers/TokenIterator.h>
-#include <Parsers/ExpressionListParsers.h>
-#include <Interpreters/evaluateConstantExpression.h>
+

 namespace DB
 {
@ -18,7 +23,6 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int BAD_ARGUMENTS;
-    extern const int LOGICAL_ERROR;
 }

 FormatSettings::EscapingRule stringToEscapingRule(const String & escaping_rule)
@ -138,7 +142,8 @@ bool deserializeFieldByEscapingRule(
                serialization->deserializeTextRaw(column, buf, format_settings);
            break;
        default:
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Escaping rule {} is not suitable for deserialization", escapingRuleToString(escaping_rule));
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS, "Escaping rule {} is not suitable for deserialization", escapingRuleToString(escaping_rule));
    }
    return read;
 }
@ -176,7 +181,8 @@ void serializeFieldByEscapingRule(
    }
 }

-void writeStringByEscapingRule(const String & value, WriteBuffer & out, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
+void writeStringByEscapingRule(
+    const String & value, WriteBuffer & out, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
 {
    switch (escaping_rule)
    {
@ -249,85 +255,269 @@ String readStringByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule e
    return readByEscapingRule<true>(buf, escaping_rule, format_settings);
 }

-static bool evaluateConstantExpressionFromString(const StringRef & field, DataTypePtr & type, ContextPtr context)
+static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
 {
-    if (!context)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "You must provide context to evaluate constant expression");
+    if (buf.eof())
+        return nullptr;

-    ParserExpression parser;
-    Expected expected;
-    Tokens tokens(field.data, field.data + field.size);
-    IParser::Pos token_iterator(tokens, context->getSettingsRef().max_parser_depth);
-    ASTPtr ast;
-
-    /// FIXME: Our parser cannot parse maps in the form of '{key : value}' that is used in text formats.
-    bool parsed = parser.parse(token_iterator, ast, expected);
-    if (!parsed || !token_iterator->isEnd())
-        return false;
-
-    try
+    /// Array
+    if (checkChar('[', buf))
    {
-        std::pair<Field, DataTypePtr> result = evaluateConstantExpression(ast, context);
-        type = generalizeDataType(result.second);
-        return true;
+        skipWhitespaceIfAny(buf);
+
+        DataTypes nested_types;
+        bool first = true;
+        while (!buf.eof() && *buf.position() != ']')
+        {
+            if (!first)
+            {
+                skipWhitespaceIfAny(buf);
+                if (!checkChar(',', buf))
+                    return nullptr;
+                skipWhitespaceIfAny(buf);
+            }
+            else
+                first = false;
+
+            auto nested_type = determineDataTypeForSingleFieldImpl(buf);
+            if (!nested_type)
+                return nullptr;
+
+            nested_types.push_back(nested_type);
+        }
+
+        if (buf.eof())
+            return nullptr;
+
+        ++buf.position();
+
+        if (nested_types.empty())
+            return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNothing>());
+
+        auto least_supertype = tryGetLeastSupertype(nested_types);
+        if (!least_supertype)
+            return nullptr;
+
+        return std::make_shared<DataTypeArray>(least_supertype);
    }
-    catch (...)
+
+    /// Tuple
+    if (checkChar('(', buf))
    {
-        return false;
+        skipWhitespaceIfAny(buf);
+
+        DataTypes nested_types;
+        bool first = true;
+        while (!buf.eof() && *buf.position() != ')')
+        {
+            if (!first)
+            {
+                skipWhitespaceIfAny(buf);
+                if (!checkChar(',', buf))
+                    return nullptr;
+                skipWhitespaceIfAny(buf);
+            }
+            else
+                first = false;
+
+            auto nested_type = determineDataTypeForSingleFieldImpl(buf);
+            if (!nested_type)
+                return nullptr;
+
+            nested_types.push_back(nested_type);
+        }
+
+        if (buf.eof() || nested_types.empty())
+            return nullptr;
+
+        ++buf.position();
+
+        return std::make_shared<DataTypeTuple>(nested_types);
    }
+
+    /// Map
+    if (checkChar('{', buf))
+    {
+        skipWhitespaceIfAny(buf);
+
+        DataTypes key_types;
+        DataTypes value_types;
+        bool first = true;
+        while (!buf.eof() && *buf.position() != '}')
+        {
+            if (!first)
+            {
+                skipWhitespaceIfAny(buf);
+                if (!checkChar(',', buf))
+                    return nullptr;
+                skipWhitespaceIfAny(buf);
+            }
+            else
+                first = false;
+
+            auto key_type = determineDataTypeForSingleFieldImpl(buf);
+            if (!key_type)
+                return nullptr;
+
+            key_types.push_back(key_type);
+
+            skipWhitespaceIfAny(buf);
+            if (!checkChar(':', buf))
+                return nullptr;
+            skipWhitespaceIfAny(buf);
+
+            auto value_type = determineDataTypeForSingleFieldImpl(buf);
+            if (!value_type)
+                return nullptr;
+
+            value_types.push_back(value_type);
+        }
+
+        if (buf.eof())
+            return nullptr;
+
+        ++buf.position();
+        skipWhitespaceIfAny(buf);
+
+        if (key_types.empty())
+            return std::make_shared<DataTypeMap>(std::make_shared<DataTypeNothing>(), std::make_shared<DataTypeNothing>());
+
+        auto key_least_supertype = tryGetLeastSupertype(key_types);
+
+        auto value_least_supertype = tryGetLeastSupertype(value_types);
+        if (!key_least_supertype || !value_least_supertype)
+            return nullptr;
+
+        if (!DataTypeMap::checkKeyType(key_least_supertype))
+            return nullptr;
+
+        return std::make_shared<DataTypeMap>(key_least_supertype, value_least_supertype);
+    }
+
+    /// String
+    if (*buf.position() == '\'')
+    {
+        ++buf.position();
+        while (!buf.eof())
+        {
+            char * next_pos = find_first_symbols<'\\', '\''>(buf.position(), buf.buffer().end());
+            buf.position() = next_pos;
+
+            if (!buf.hasPendingData())
+                continue;
+
+            if (*buf.position() == '\'')
+                break;
+
+            if (*buf.position() == '\\')
+                ++buf.position();
+        }
+
+        if (buf.eof())
+            return nullptr;
+
+        ++buf.position();
+        return std::make_shared<DataTypeString>();
+    }
+
+    /// Bool
+    if (checkStringCaseInsensitive("true", buf) || checkStringCaseInsensitive("false", buf))
+        return DataTypeFactory::instance().get("Bool");
+
+    /// Null
+    if (checkStringCaseInsensitive("NULL", buf))
+        return std::make_shared<DataTypeNothing>();
+
+    /// Number
+    Float64 tmp;
+    if (tryReadFloatText(tmp, buf))
+        return std::make_shared<DataTypeFloat64>();
+
+    return nullptr;
 }

-DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context)
+static DataTypePtr determineDataTypeForSingleField(ReadBuffer & buf)
+{
+    return makeNullableRecursivelyAndCheckForNothing(determineDataTypeForSingleFieldImpl(buf));
+}
+
+DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule)
 {
    switch (escaping_rule)
    {
        case FormatSettings::EscapingRule::Quoted:
        {
-            DataTypePtr type;
-            bool parsed = evaluateConstantExpressionFromString(field, type, context);
-            return parsed ? type : nullptr;
+            ReadBufferFromString buf(field);
+            auto type = determineDataTypeForSingleField(buf);
+            return buf.eof() ? type : nullptr;
        }
        case FormatSettings::EscapingRule::JSON:
            return getDataTypeFromJSONField(field);
        case FormatSettings::EscapingRule::CSV:
        {
+            if (!format_settings.csv.input_format_use_best_effort_in_schema_inference)
+                return makeNullable(std::make_shared<DataTypeString>());
+
            if (field.empty() || field == format_settings.csv.null_representation)
                return nullptr;

            if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation)
-                return std::make_shared<DataTypeUInt8>();
+                return DataTypeFactory::instance().get("Nullable(Bool)");

-            DataTypePtr type;
-            bool parsed;
-            if (field[0] == '\'' || field[0] == '"')
+            if (field.size() > 1 && ((field.front() == '\'' && field.back() == '\'') || (field.front() == '"' && field.back() == '"')))
            {
-                /// Try to evaluate expression inside quotes.
-                parsed = evaluateConstantExpressionFromString(StringRef(field.data() + 1, field.size() - 2), type, context);
-                /// If it's a number in quotes we determine it as a string.
-                if (parsed && type && isNumber(removeNullable(type)))
-                    return makeNullable(std::make_shared<DataTypeString>());
-            }
-            else
-                parsed = evaluateConstantExpressionFromString(field, type, context);
+                ReadBufferFromString buf(std::string_view(field.data() + 1, field.size() - 2));
+                /// Try to determine the type of value inside quotes
+                auto type = determineDataTypeForSingleField(buf);

-            /// If we couldn't parse an expression, determine it as a string.
-            return parsed ? type : makeNullable(std::make_shared<DataTypeString>());
+                if (!type)
+                    return nullptr;
+
+                /// If it's a number or tuple in quotes or there is some unread data in buffer, we determine it as a string.
+                if (isNumber(removeNullable(type)) || isTuple(type) || !buf.eof())
+                    return makeNullable(std::make_shared<DataTypeString>());
+
+                return type;
+            }
+
+            /// Case when CSV value is not in quotes. Check if it's a number, and if not, determine it's as a string.
+            ReadBufferFromString buf(field);
+            Float64 tmp;
+            if (tryReadFloatText(tmp, buf) && buf.eof())
+                return makeNullable(std::make_shared<DataTypeFloat64>());
+
+            return makeNullable(std::make_shared<DataTypeString>());
        }
        case FormatSettings::EscapingRule::Raw: [[fallthrough]];
        case FormatSettings::EscapingRule::Escaped:
-            /// TODO: Try to use some heuristics here to determine the type of data.
-            return field.empty() ? nullptr : makeNullable(std::make_shared<DataTypeString>());
+        {
+            if (!format_settings.tsv.input_format_use_best_effort_in_schema_inference)
+                return makeNullable(std::make_shared<DataTypeString>());
+
+            if (field.empty() || field == format_settings.tsv.null_representation)
+                return nullptr;
+
+            if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation)
+                return DataTypeFactory::instance().get("Nullable(Bool)");
+
+            ReadBufferFromString buf(field);
+            auto type = determineDataTypeForSingleField(buf);
+            if (!buf.eof())
+                return makeNullable(std::make_shared<DataTypeString>());
+
+            return type;
+        }
        default:
            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the type for value with {} escaping rule", escapingRuleToString(escaping_rule));
    }
 }

-DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context)
+DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule)
 {
    DataTypes data_types;
    data_types.reserve(fields.size());
    for (const auto & field : fields)
-        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, escaping_rule, context));
+        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, escaping_rule));
    return data_types;
 }

@ -344,4 +534,12 @@ DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escap
    }
 }

+DataTypes getDefaultDataTypeForEscapingRules(const std::vector<FormatSettings::EscapingRule> & escaping_rules)
+{
+    DataTypes data_types;
+    for (const auto & rule : escaping_rules)
+        data_types.push_back(getDefaultDataTypeForEscapingRule(rule));
+    return data_types;
+}
+
 }
--- a/src/Formats/EscapingRuleUtils.h
+++ b/src/Formats/EscapingRuleUtils.h
@ -43,15 +43,21 @@ String readFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule es
 /// - For JSON escaping rule we can use JSON parser to parse a single field
 ///   and then convert JSON type of this field to ClickHouse type.
 /// - For CSV escaping rule we can do the next:
-///    - If the field is an unquoted string, then we could try to evaluate it
-///      as a constant expression, and if it fails, treat it as a String.
-///    - If the field is a string in quotes, then we can try to evaluate
-///      expression inside quotes as a constant expression, and if it fails or
-///      the result is a number (we don't parse numbers in quotes) we treat it as a String.
-/// - For TSV and TSVRaw we treat each field as a String (TODO: try to use some tweaks and heuristics here)
-DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context = nullptr);
-DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context = nullptr);
+///    - If the field is an unquoted string, then we try to parse it as a number,
+///      and if we cannot, treat it as a String.
+///    - If the field is a string in quotes, then we try to use some
+///      tweaks and heuristics to determine the type inside quotes, and if we can't or
+///      the result is a number or tuple (we don't parse numbers in quotes and don't
+///      support tuples in CSV) we treat it as a String.
+///    - If input_format_csv_use_best_effort_in_schema_inference is disabled, we
+///      treat everything as a string.
+/// - For TSV and TSVRaw we try to use some tweaks and heuristics to determine the type
+///   of value if setting input_format_tsv_use_best_effort_in_schema_inference is enabled,
+///   otherwise we treat everything as a string.
+DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule);
+DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule);

 DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escaping_rule);
+DataTypes getDefaultDataTypeForEscapingRules(const std::vector<FormatSettings::EscapingRule> & escaping_rules);

 }
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@ -65,6 +65,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.csv.input_format_enum_as_number = settings.input_format_csv_enum_as_number;
    format_settings.csv.null_representation = settings.format_csv_null_representation;
    format_settings.csv.input_format_arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv;
+    format_settings.csv.input_format_use_best_effort_in_schema_inference = settings.input_format_csv_use_best_effort_in_schema_inference;
    format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
    format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
    format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
@ -97,6 +98,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
    format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
    format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
+    format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference;
    format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
    format_settings.pretty.color = settings.output_format_pretty_color;
    format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
@ -117,6 +119,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.tsv.empty_as_default = settings.input_format_tsv_empty_as_default;
    format_settings.tsv.input_format_enum_as_number = settings.input_format_tsv_enum_as_number;
    format_settings.tsv.null_representation = settings.format_tsv_null_representation;
+    format_settings.tsv.input_format_use_best_effort_in_schema_inference = settings.input_format_tsv_use_best_effort_in_schema_inference;
    format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
    format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
    format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
@ -126,10 +129,17 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary;
    format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
    format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns;
+    format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference;
+    format_settings.orc.import_nested = settings.input_format_orc_import_nested;
+    format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
+    format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
+    format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference;
+    format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference;
    format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching;
    format_settings.orc.import_nested = settings.input_format_orc_import_nested;
    format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
    format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
+    format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference;
    format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
    format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
    format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
@ -137,6 +147,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.msgpack.number_of_columns = settings.input_format_msgpack_number_of_columns;
    format_settings.msgpack.output_uuid_representation = settings.output_format_msgpack_uuid_representation;
    format_settings.max_rows_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference;
+    format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference;

    /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
    if (format_settings.schema.is_server)
@ -371,7 +382,7 @@ SchemaReaderPtr FormatFactory::getSchemaReader(
        throw Exception("FormatFactory: Format " + name + " doesn't support schema inference.", ErrorCodes::LOGICAL_ERROR);

    auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context);
-    return schema_reader_creator(buf, format_settings, context);
+    return schema_reader_creator(buf, format_settings);
 }

 ExternalSchemaReaderPtr FormatFactory::getExternalSchemaReader(
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@ -97,7 +97,7 @@ private:
    /// The checker should return true if format support append.
    using AppendSupportChecker = std::function<bool(const FormatSettings & settings)>;

-    using SchemaReaderCreator = std::function<SchemaReaderPtr(ReadBuffer & in, const FormatSettings & settings, ContextPtr context)>;
+    using SchemaReaderCreator = std::function<SchemaReaderPtr(ReadBuffer & in, const FormatSettings & settings)>;
    using ExternalSchemaReaderCreator = std::function<ExternalSchemaReaderPtr(const FormatSettings & settings)>;

    struct Creators
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@ -36,6 +36,8 @@ struct FormatSettings
    bool seekable_read = true;
    UInt64 max_rows_to_read_for_schema_inference = 100;

+    String column_names_for_schema_inference = "";
+
    enum class DateTimeInputFormat
    {
        Basic,        /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp.
@ -77,6 +79,7 @@ struct FormatSettings
        bool low_cardinality_as_dictionary = false;
        bool import_nested = false;
        bool allow_missing_columns = false;
+        bool skip_columns_with_unsupported_types_in_schema_inference = false;
        bool case_insensitive_column_matching = false;
    } arrow;

@ -104,6 +107,7 @@ struct FormatSettings
        bool input_format_arrays_as_nested_csv = false;
        String null_representation = "\\N";
        char tuple_delimiter = ',';
+        bool input_format_use_best_effort_in_schema_inference = true;
    } csv;

    struct HiveText
@ -141,6 +145,7 @@ struct FormatSettings
        UInt64 row_group_size = 1000000;
        bool import_nested = false;
        bool allow_missing_columns = false;
+        bool skip_columns_with_unsupported_types_in_schema_inference = false;
        bool case_insensitive_column_matching = false;
        std::unordered_set<int> skip_row_groups = {};
    } parquet;
@ -209,6 +214,7 @@ struct FormatSettings
        bool crlf_end_of_line = false;
        String null_representation = "\\N";
        bool input_format_enum_as_number = false;
+        bool input_format_use_best_effort_in_schema_inference = true;
    } tsv;

    struct
@ -223,6 +229,7 @@ struct FormatSettings
        bool import_nested = false;
        bool allow_missing_columns = false;
        int64_t row_batch_size = 100'000;
+        bool skip_columns_with_unsupported_types_in_schema_inference = false;
        bool case_insensitive_column_matching = false;
        std::unordered_set<int> skip_stripes = {};
    } orc;
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@ -105,8 +105,11 @@ ColumnsDescription readSchemaFromFormat(const String & format_name, const std::o
    return readSchemaFromFormat(format_name, format_settings, read_buffer_creator, context, buf_out);
 }

-DataTypePtr generalizeDataType(DataTypePtr type)
+DataTypePtr makeNullableRecursivelyAndCheckForNothing(DataTypePtr type)
 {
+    if (!type)
+        return nullptr;
+
    WhichDataType which(type);

    if (which.isNothing())
@ -115,16 +118,13 @@ DataTypePtr generalizeDataType(DataTypePtr type)
    if (which.isNullable())
    {
        const auto * nullable_type = assert_cast<const DataTypeNullable *>(type.get());
-        return generalizeDataType(nullable_type->getNestedType());
+        return makeNullableRecursivelyAndCheckForNothing(nullable_type->getNestedType());
    }

-    if (isNumber(type))
-        return makeNullable(std::make_shared<DataTypeFloat64>());
-
    if (which.isArray())
    {
        const auto * array_type = assert_cast<const DataTypeArray *>(type.get());
-        auto nested_type = generalizeDataType(array_type->getNestedType());
+        auto nested_type = makeNullableRecursivelyAndCheckForNothing(array_type->getNestedType());
        return nested_type ? std::make_shared<DataTypeArray>(nested_type) : nullptr;
    }

@ -134,7 +134,7 @@ DataTypePtr generalizeDataType(DataTypePtr type)
        DataTypes nested_types;
        for (const auto & element : tuple_type->getElements())
        {
-            auto nested_type = generalizeDataType(element);
+            auto nested_type = makeNullableRecursivelyAndCheckForNothing(element);
            if (!nested_type)
                return nullptr;
            nested_types.push_back(nested_type);
@ -145,19 +145,27 @@ DataTypePtr generalizeDataType(DataTypePtr type)
    if (which.isMap())
    {
        const auto * map_type = assert_cast<const DataTypeMap *>(type.get());
-        auto key_type = removeNullable(generalizeDataType(map_type->getKeyType()));
-        auto value_type = generalizeDataType(map_type->getValueType());
-        return key_type && value_type ? std::make_shared<DataTypeMap>(key_type, value_type) : nullptr;
+        auto key_type = makeNullableRecursivelyAndCheckForNothing(map_type->getKeyType());
+        auto value_type = makeNullableRecursivelyAndCheckForNothing(map_type->getValueType());
+        return key_type && value_type ? std::make_shared<DataTypeMap>(removeNullable(key_type), value_type) : nullptr;
    }

    if (which.isLowCarnality())
    {
        const auto * lc_type = assert_cast<const DataTypeLowCardinality *>(type.get());
-        auto nested_type = generalizeDataType(lc_type->getDictionaryType());
+        auto nested_type = makeNullableRecursivelyAndCheckForNothing(lc_type->getDictionaryType());
        return nested_type ? std::make_shared<DataTypeLowCardinality>(nested_type) : nullptr;
    }

    return makeNullable(type);
 }

+NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header)
+{
+    NamesAndTypesList result;
+    for (auto & [name, type] : header.getNamesAndTypesList())
+        result.emplace_back(name, makeNullableRecursivelyAndCheckForNothing(type));
+    return result;
+}
+
 }
--- a/src/Formats/ReadSchemaUtils.h
+++ b/src/Formats/ReadSchemaUtils.h
@ -29,14 +29,16 @@ ColumnsDescription readSchemaFromFormat(
    ContextPtr context,
    std::unique_ptr<ReadBuffer> & buf_out);

-/// Convert type to the most general type:
-/// - IntN, UIntN, FloatN, Decimal -> Float64
+/// Make type Nullable recursively:
 /// - Type -> Nullable(type)
 /// - Array(Type) -> Array(Nullable(Type))
 /// - Tuple(Type1, ..., TypeN) -> Tuple(Nullable(Type1), ..., Nullable(TypeN))
 /// - Map(KeyType, ValueType) -> Map(KeyType, Nullable(ValueType))
 /// - LowCardinality(Type) -> LowCardinality(Nullable(Type))
 /// If type is Nothing or one of the nested types is Nothing, return nullptr.
-DataTypePtr generalizeDataType(DataTypePtr type);
+DataTypePtr makeNullableRecursivelyAndCheckForNothing(DataTypePtr type);

+/// Call makeNullableRecursivelyAndCheckForNothing for all types
+/// in the block and return names and types.
+NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header);
 }
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@ -1366,6 +1366,7 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf)
    /// - Tuples: (...)
    /// - Maps: {...}
    /// - NULL
+    /// - Bool: true/false
    /// - Number: integer, float, decimal.

    if (*buf.position() == '\'')
@ -1394,6 +1395,16 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf)
            s.append("NaN");
        }
    }
+    else if (checkCharCaseInsensitive('t', buf))
+    {
+        assertStringCaseInsensitive("rue", buf);
+        s.append("true");
+    }
+    else if (checkCharCaseInsensitive('f', buf))
+    {
+        assertStringCaseInsensitive("alse", buf);
+        s.append("false");
+    }
    else
    {
        /// It's an integer, float or decimal. They all can be parsed as float.
--- a/src/Processors/Formats/ISchemaReader.cpp
+++ b/src/Processors/Formats/ISchemaReader.cpp
@ -2,6 +2,7 @@
 #include <Formats/ReadSchemaUtils.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeNullable.h>
+#include <boost/algorithm/string.hpp>

 namespace DB
 {
@ -66,9 +67,32 @@ static void checkTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, c
    result.emplace_back(name, type);
 }

-IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_, bool allow_bools_as_numbers_)
-    : ISchemaReader(in_), max_rows_to_read(max_rows_to_read_), default_type(default_type_), allow_bools_as_numbers(allow_bools_as_numbers_)
+IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, bool allow_bools_as_numbers_)
+    : ISchemaReader(in_), max_rows_to_read(format_settings.max_rows_to_read_for_schema_inference), allow_bools_as_numbers(allow_bools_as_numbers_)
 {
+    if (!format_settings.column_names_for_schema_inference.empty())
+    {
+        /// column_names_for_schema_inference is a string in format 'column1,column2,column3,...'
+        boost::split(column_names, format_settings.column_names_for_schema_inference, boost::is_any_of(","));
+        for (auto & column_name : column_names)
+        {
+            std::string col_name_trimmed = boost::trim_copy(column_name);
+            if (!col_name_trimmed.empty())
+                column_name = col_name_trimmed;
+        }
+    }
+}
+
+IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, DataTypePtr default_type_, bool allow_bools_as_numbers_)
+    : IRowSchemaReader(in_, format_settings, allow_bools_as_numbers_)
+{
+    default_type = default_type_;
+}
+
+IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, const DataTypes & default_types_, bool allow_bools_as_numbers_)
+    : IRowSchemaReader(in_, format_settings, allow_bools_as_numbers_)
+{
+    default_types = default_types_;
 }

 NamesAndTypesList IRowSchemaReader::readSchema()
@ -90,7 +114,7 @@ NamesAndTypesList IRowSchemaReader::readSchema()
            if (!new_data_types[i])
                continue;

-            chooseResultType(data_types[i], new_data_types[i], allow_bools_as_numbers, default_type, std::to_string(i + 1), row);
+            chooseResultType(data_types[i], new_data_types[i], allow_bools_as_numbers, getDefaultType(i), std::to_string(i + 1), row);
        }
    }

@ -115,12 +139,21 @@ NamesAndTypesList IRowSchemaReader::readSchema()
    for (size_t i = 0; i != data_types.size(); ++i)
    {
        /// Check that we could determine the type of this column.
-        checkTypeAndAppend(result, data_types[i], column_names[i], default_type, max_rows_to_read);
+        checkTypeAndAppend(result, data_types[i], column_names[i], getDefaultType(i), max_rows_to_read);
    }

    return result;
 }

+DataTypePtr IRowSchemaReader::getDefaultType(size_t column) const
+{
+    if (default_type)
+        return default_type;
+    if (column < default_types.size() && default_types[column])
+        return default_types[column];
+    return nullptr;
+}
+
 IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_, bool allow_bools_as_numbers_)
    : ISchemaReader(in_), max_rows_to_read(max_rows_to_read_), default_type(default_type_), allow_bools_as_numbers(allow_bools_as_numbers_)
 {
--- a/src/Processors/Formats/ISchemaReader.h
+++ b/src/Processors/Formats/ISchemaReader.h
@ -31,12 +31,17 @@ protected:
 /// Base class for schema inference for formats that read data row by row.
 /// It reads data row by row (up to max_rows_to_read), determines types of columns
 /// for each row and compare them with types from the previous rows. If some column
-/// contains values with different types in different rows, the default type will be
-/// used for this column or the exception will be thrown (if default type is not set).
+/// contains values with different types in different rows, the default type
+/// (from argument default_type_) will be used for this column or the exception
+/// will be thrown (if default type is not set). If different columns have different
+/// default types, you can provide them by default_types_ argument.
 class IRowSchemaReader : public ISchemaReader
 {
 public:
-    IRowSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_ = nullptr, bool allow_bools_as_numbers_ = false);
+    IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, bool allow_bools_as_numbers_ = false);
+    IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, DataTypePtr default_type_, bool allow_bools_as_numbers_ = false);
+    IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, const DataTypes & default_types_, bool allow_bools_as_numbers_ = false);
+
    NamesAndTypesList readSchema() override;

 protected:
@ -49,8 +54,11 @@ protected:
    void setColumnNames(const std::vector<String> & names) { column_names = names; }

 private:
+
+    DataTypePtr getDefaultType(size_t column) const;
    size_t max_rows_to_read;
    DataTypePtr default_type;
+    DataTypes default_types;
    bool allow_bools_as_numbers;
    std::vector<String> column_names;
 };
--- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
@ -3,6 +3,7 @@
 #if USE_ARROW

 #include <Formats/FormatFactory.h>
+#include <Formats/ReadSchemaUtils.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/WriteHelpers.h>
 #include <IO/copyData.h>
@ -171,8 +172,9 @@ NamesAndTypesList ArrowSchemaReader::readSchema()
        schema = createFileReader(in, format_settings, is_stopped)->schema();
    }

-    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema, stream ? "ArrowStream" : "Arrow");
-    return header.getNamesAndTypesList();
+    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
+        *schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference);
+    return getNamesAndRecursivelyNullableTypes(header);
 }

 void registerInputFormatArrow(FormatFactory & factory)
@ -202,13 +204,13 @@ void registerArrowSchemaReader(FormatFactory & factory)
 {
    factory.registerSchemaReader(
        "Arrow",
-        [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+        [](ReadBuffer & buf, const FormatSettings & settings)
        {
            return std::make_shared<ArrowSchemaReader>(buf, false, settings);
        });
    factory.registerSchemaReader(
        "ArrowStream",
-        [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+        [](ReadBuffer & buf, const FormatSettings & settings)
        {
            return std::make_shared<ArrowSchemaReader>(buf, true, settings);
        });}
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@ -15,6 +15,7 @@
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypeNothing.h>
 #include <Common/DateLUTImpl.h>
 #include <base/types.h>
 #include <Processors/Chunk.h>
@ -26,11 +27,13 @@
 #include <Columns/ColumnUnique.h>
 #include <Columns/ColumnMap.h>
 #include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnNothing.h>
 #include <Interpreters/castColumn.h>
 #include <Common/quoteString.h>
 #include <algorithm>
 #include <arrow/builder.h>
 #include <arrow/array.h>
+#include <boost/algorithm/string.hpp>
 #include <boost/algorithm/string/case_conv.hpp>

 /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn.
@ -329,12 +332,17 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
    const std::string & format_name,
    bool is_nullable,
    std::unordered_map<String, std::shared_ptr<ColumnWithTypeAndName>> & dictionary_values,
-    bool read_ints_as_dates)
+    bool read_ints_as_dates,
+    bool allow_null_type,
+    bool skip_columns_with_unsupported_types,
+    bool & skipped)
 {
    if (!is_nullable && arrow_column->null_count() && arrow_column->type()->id() != arrow::Type::LIST
        && arrow_column->type()->id() != arrow::Type::MAP && arrow_column->type()->id() != arrow::Type::STRUCT)
    {
-        auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_values, read_ints_as_dates);
+        auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
+        if (skipped)
+            return {};
        auto nullmap_column = readByteMapFromArrowColumn(arrow_column);
        auto nullable_type = std::make_shared<DataTypeNullable>(std::move(nested_column.type));
        auto nullable_column = ColumnNullable::create(nested_column.column, nullmap_column);
@ -379,7 +387,10 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
        case arrow::Type::MAP:
        {
            auto arrow_nested_column = getNestedArrowColumn(arrow_column);
-            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates);
+            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
+            if (skipped)
+                return {};
+
            auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);

            const auto * tuple_column = assert_cast<const ColumnTuple *>(nested_column.column.get());
@ -391,7 +402,9 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
        case arrow::Type::LIST:
        {
            auto arrow_nested_column = getNestedArrowColumn(arrow_column);
-            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates);
+            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
+            if (skipped)
+                return {};
            auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);
            auto array_column = ColumnArray::create(nested_column.column, offsets_column);
            auto array_type = std::make_shared<DataTypeArray>(nested_column.type);
@ -416,7 +429,9 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
            for (int i = 0; i != arrow_struct_type->num_fields(); ++i)
            {
                auto nested_arrow_column = std::make_shared<arrow::ChunkedArray>(nested_arrow_columns[i]);
-                auto element = readColumnFromArrowColumn(nested_arrow_column, arrow_struct_type->field(i)->name(), format_name, false, dictionary_values, read_ints_as_dates);
+                auto element = readColumnFromArrowColumn(nested_arrow_column, arrow_struct_type->field(i)->name(), format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);
+                if (skipped)
+                    return {};
                tuple_elements.emplace_back(std::move(element.column));
                tuple_types.emplace_back(std::move(element.type));
                tuple_names.emplace_back(std::move(element.name));
@ -439,7 +454,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
                    dict_array.emplace_back(dict_chunk.dictionary());
                }
                auto arrow_dict_column = std::make_shared<arrow::ChunkedArray>(dict_array);
-                auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_values, read_ints_as_dates);
+                auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped);

                /// We should convert read column to ColumnUnique.
                auto tmp_lc_column = DataTypeLowCardinality(dict_column.type).createColumn();
@ -469,9 +484,33 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
 #    undef DISPATCH
            // TODO: read JSON as a string?
            // TODO: read UUID as a string?
+        case arrow::Type::NA:
+        {
+            if (allow_null_type)
+            {
+                auto type = std::make_shared<DataTypeNothing>();
+                auto column = ColumnNothing::create(arrow_column->length());
+                return {std::move(column), type, column_name};
+            }
+            [[fallthrough]];
+        }
        default:
-            throw Exception(ErrorCodes::UNKNOWN_TYPE,
-                    "Unsupported {} type '{}' of an input column '{}'.", format_name, arrow_column->type()->name(), column_name);
+        {
+            if (skip_columns_with_unsupported_types)
+            {
+                skipped = true;
+                return {};
+            }
+
+            throw Exception(
+                ErrorCodes::UNKNOWN_TYPE,
+                "Unsupported {} type '{}' of an input column '{}'. If it happens during schema inference and you want to skip columns with "
+                "unsupported types, you can enable setting input_format_{}_skip_columns_with_unsupported_types_in_schema_inference",
+                format_name,
+                arrow_column->type()->name(),
+                column_name,
+                boost::algorithm::to_lower_copy(format_name));
+        }
    }
 }

@ -485,8 +524,9 @@ static void checkStatus(const arrow::Status & status, const String & column_name
        throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()};
 }

+
 Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(
-    const arrow::Schema & schema, const std::string & format_name, const Block * hint_header, bool ignore_case)
+    const arrow::Schema & schema, const std::string & format_name, bool skip_columns_with_unsupported_types, const Block * hint_header, bool ignore_case)
 {
    ColumnsWithTypeAndName sample_columns;
    std::unordered_set<String> nested_table_names;
@ -512,9 +552,14 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(
        arrow::ArrayVector array_vector = {arrow_array};
        auto arrow_column = std::make_shared<arrow::ChunkedArray>(array_vector);
        std::unordered_map<std::string, std::shared_ptr<ColumnWithTypeAndName>> dict_values;
-        ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values, false);
-
-        sample_columns.emplace_back(std::move(sample_column));
+        bool skipped = false;
+        bool allow_null_type = false;
+        if (hint_header && hint_header->has(field->name()) && hint_header->getByName(field->name()).type->isNullable())
+            allow_null_type = true;
+        ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(
+            arrow_column, field->name(), format_name, false, dict_values, false, allow_null_type, skip_columns_with_unsupported_types, skipped);
+        if (!skipped)
+            sample_columns.emplace_back(std::move(sample_column));
    }
    return Block(std::move(sample_columns));
 }
@ -559,6 +604,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
    UInt64 num_rows = name_to_column_ptr.begin()->second->length();
    columns_list.reserve(header.rows());
    std::unordered_map<String, BlockPtr> nested_tables;
+    bool skipped = false;
    for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i)
    {
        const ColumnWithTypeAndName & header_column = header.getByPosition(column_i);
@ -582,7 +628,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
                {
                    std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[search_nested_table_name];
                    ColumnsWithTypeAndName cols
-                        = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true)};
+                        = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true, true, false, skipped)};
                    Block block(cols);
                    nested_tables[search_nested_table_name] = std::make_shared<Block>(Nested::flatten(block));
                }
@ -615,7 +661,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
        else
        {
            auto arrow_column = name_to_column_ptr[search_column_name];
-            column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true);
+            column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true, true, false, skipped);
        }

        try
@ -642,7 +688,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
 std::vector<size_t> ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema & schema) const
 {
    std::vector<size_t> missing_columns;
-    auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header, case_insensitive_matching);
+    auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, false, &header, case_insensitive_matching);
    auto flatten_block_from_arrow = Nested::flatten(block_from_arrow);

    for (size_t i = 0, columns = header.columns(); i < columns; ++i)
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h
@ -38,7 +38,11 @@ public:
    /// Transform arrow schema to ClickHouse header. If hint_header is provided,
    /// we will skip columns in schema that are not in hint_header.
    static Block arrowSchemaToCHHeader(
-        const arrow::Schema & schema, const std::string & format_name, const Block * hint_header = nullptr, bool ignore_case = false);
+        const arrow::Schema & schema,
+        const std::string & format_name,
+        bool skip_columns_with_unsupported_types = false,
+        const Block * hint_header = nullptr,
+        bool ignore_case = false);

 private:
    const Block & header;
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@ -924,12 +924,12 @@ void registerInputFormatAvro(FormatFactory & factory)

 void registerAvroSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("Avro", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("Avro", [](ReadBuffer & buf, const FormatSettings & settings)
    {
           return std::make_shared<AvroSchemaReader>(buf, false, settings);
    });

-    factory.registerSchemaReader("AvroConfluent", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("AvroConfluent", [](ReadBuffer & buf, const FormatSettings & settings)
    {
        return std::make_shared<AvroSchemaReader>(buf, true, settings);
    });
--- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp
@ -95,7 +95,7 @@ void BinaryFormatReader::skipField(size_t file_column)
 }

 BinaryWithNamesAndTypesSchemaReader::BinaryWithNamesAndTypesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
-    : FormatWithNamesAndTypesSchemaReader(in_, 0, true, true, &reader), reader(in_, format_settings_)
+    : FormatWithNamesAndTypesSchemaReader(in_, format_settings_, true, true, &reader), reader(in_, format_settings_)
 {
 }

@ -119,7 +119,7 @@ void registerInputFormatRowBinary(FormatFactory & factory)

 void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("RowBinaryWithNamesAndTypes", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("RowBinaryWithNamesAndTypes", [](ReadBuffer & buf, const FormatSettings & settings)
    {
        return std::make_shared<BinaryWithNamesAndTypesSchemaReader>(buf, settings);
    });
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@ -9,7 +9,6 @@
 #include <Formats/EscapingRuleUtils.h>
 #include <Processors/Formats/Impl/CSVRowInputFormat.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
-#include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeString.h>


@ -259,16 +258,15 @@ bool CSVFormatReader::readField(
 }


-CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_, ContextPtr context_)
+CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_)
    : FormatWithNamesAndTypesSchemaReader(
        in_,
-        format_setting_.max_rows_to_read_for_schema_inference,
+        format_setting_,
        with_names_,
        with_types_,
        &reader,
        getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV))
    , reader(in_, format_setting_)
-    , context(context_)
 {
 }

@ -279,7 +277,7 @@ DataTypes CSVSchemaReader::readRowAndGetDataTypes()
        return {};

    auto fields = reader.readRow();
-    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV, context);
+    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV);
 }


@ -382,9 +380,9 @@ void registerCSVSchemaReader(FormatFactory & factory)
 {
    auto register_func = [&](const String & format_name, bool with_names, bool with_types)
    {
-        factory.registerSchemaReader(format_name, [with_names, with_types](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+        factory.registerSchemaReader(format_name, [with_names, with_types](ReadBuffer & buf, const FormatSettings & settings)
        {
-            return std::make_shared<CSVSchemaReader>(buf, with_names, with_types, settings, context);
+            return std::make_shared<CSVSchemaReader>(buf, with_names, with_types, settings);
        });
    };

--- a/src/Processors/Formats/Impl/CSVRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h
@ -74,13 +74,12 @@ public:
 class CSVSchemaReader : public FormatWithNamesAndTypesSchemaReader
 {
 public:
-    CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_, ContextPtr context_);
+    CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_);

 private:
    DataTypes readRowAndGetDataTypes() override;

    CSVFormatReader reader;
-    ContextPtr context;
 };

 }
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
@ -289,17 +289,16 @@ void CustomSeparatedFormatReader::setReadBuffer(ReadBuffer & in_)
 }

 CustomSeparatedSchemaReader::CustomSeparatedSchemaReader(
-    ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_, ContextPtr context_)
+    ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_)
    : FormatWithNamesAndTypesSchemaReader(
        buf,
-        format_setting_.max_rows_to_read_for_schema_inference,
+        format_setting_,
        with_names_,
        with_types_,
        &reader,
        getDefaultDataTypeForEscapingRule(format_setting_.custom.escaping_rule))
    , buf(in_)
    , reader(buf, ignore_spaces_, updateFormatSettings(format_setting_))
-    , context(context_)
 {
 }

@ -315,7 +314,7 @@ DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypes()
        first_row = false;

    auto fields = reader.readRow();
-    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), context);
+    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule());
 }

 void registerInputFormatCustomSeparated(FormatFactory & factory)
@ -343,9 +342,9 @@ void registerCustomSeparatedSchemaReader(FormatFactory & factory)
    {
        auto register_func = [&](const String & format_name, bool with_names, bool with_types)
        {
-            factory.registerSchemaReader(format_name, [with_names, with_types, ignore_spaces](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+            factory.registerSchemaReader(format_name, [with_names, with_types, ignore_spaces](ReadBuffer & buf, const FormatSettings & settings)
            {
-                return std::make_shared<CustomSeparatedSchemaReader>(buf, with_names, with_types, ignore_spaces, settings, context);
+                return std::make_shared<CustomSeparatedSchemaReader>(buf, with_names, with_types, ignore_spaces, settings);
            });
        };

--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
@ -92,14 +92,13 @@ private:
 class CustomSeparatedSchemaReader : public FormatWithNamesAndTypesSchemaReader
 {
 public:
-    CustomSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_, ContextPtr context_);
+    CustomSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_);

 private:
    DataTypes readRowAndGetDataTypes() override;

    PeekableReadBuffer buf;
    CustomSeparatedFormatReader reader;
-    ContextPtr context;
    bool first_row = true;
 };

--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
@ -181,15 +181,10 @@ bool JSONCompactEachRowFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer &
    return true;
 }

-JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_)
+JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader(
+    ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_)
    : FormatWithNamesAndTypesSchemaReader(
-        in_,
-        format_settings_.max_rows_to_read_for_schema_inference,
-        with_names_,
-        with_types_,
-        &reader,
-        nullptr,
-        format_settings_.json.read_bools_as_numbers)
+        in_, format_settings_, with_names_, with_types_, &reader, nullptr, format_settings_.json.read_bools_as_numbers)
    , reader(in_, yield_strings_, format_settings_)
 {
 }
@ -239,7 +234,7 @@ void registerJSONCompactEachRowSchemaReader(FormatFactory & factory)
    {
        auto register_func = [&](const String & format_name, bool with_names, bool with_types)
        {
-            factory.registerSchemaReader(format_name, [=](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+            factory.registerSchemaReader(format_name, [=](ReadBuffer & buf, const FormatSettings & settings)
            {
                return std::make_shared<JSONCompactEachRowRowSchemaReader>(buf, with_names, with_types, json_strings, settings);
            });
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@ -387,12 +387,12 @@ void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory

 void registerJSONEachRowSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("JSONEachRow", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("JSONEachRow", [](ReadBuffer & buf, const FormatSettings & settings)
    {
        return std::make_unique<JSONEachRowSchemaReader>(buf, false, settings);
    });

-    factory.registerSchemaReader("JSONStringsEachRow", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("JSONStringsEachRow", [](ReadBuffer & buf, const FormatSettings & settings)
    {
        return std::make_unique<JSONEachRowSchemaReader>(buf, true, settings);
    });
--- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
@ -414,7 +414,7 @@ void MsgPackRowInputFormat::setReadBuffer(ReadBuffer & in_)
 }

 MsgPackSchemaReader::MsgPackSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
-    : IRowSchemaReader(buf, format_settings_.max_rows_to_read_for_schema_inference), buf(in_), number_of_columns(format_settings_.msgpack.number_of_columns)
+    : IRowSchemaReader(buf, format_settings_), buf(in_), number_of_columns(format_settings_.msgpack.number_of_columns)
 {
    if (!number_of_columns)
        throw Exception(ErrorCodes::BAD_ARGUMENTS, "You must specify setting input_format_msgpack_number_of_columns to extract table schema from MsgPack data");
@ -535,7 +535,7 @@ void registerInputFormatMsgPack(FormatFactory & factory)

 void registerMsgPackSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("MsgPack", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("MsgPack", [](ReadBuffer & buf, const FormatSettings & settings)
    {
        return std::make_shared<MsgPackSchemaReader>(buf, settings);
    });
--- a/src/Processors/Formats/Impl/NativeFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeFormat.cpp
@ -133,7 +133,7 @@ void registerOutputFormatNative(FormatFactory & factory)

 void registerNativeSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("Native", [](ReadBuffer & buf, const FormatSettings &, ContextPtr)
+    factory.registerSchemaReader("Native", [](ReadBuffer & buf, const FormatSettings &)
    {
        return std::make_shared<NativeSchemaReader>(buf);
    });
--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
@ -3,6 +3,7 @@
 #if USE_ORC

 #include <Formats/FormatFactory.h>
+#include <Formats/ReadSchemaUtils.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/WriteHelpers.h>
 #include <IO/copyData.h>
@ -183,8 +184,9 @@ NamesAndTypesList ORCSchemaReader::readSchema()
    std::shared_ptr<arrow::Schema> schema;
    std::atomic<int> is_stopped = 0;
    getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped);
-    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema, "ORC");
-    return header.getNamesAndTypesList();
+    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
+        *schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference);
+    return getNamesAndRecursivelyNullableTypes(header);
 }

 void registerInputFormatORC(FormatFactory & factory)
@ -205,7 +207,7 @@ void registerORCSchemaReader(FormatFactory & factory)
 {
    factory.registerSchemaReader(
        "ORC",
-        [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+        [](ReadBuffer & buf, const FormatSettings & settings)
        {
            return std::make_shared<ORCSchemaReader>(buf, settings);
        }
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@ -4,6 +4,7 @@
 #if USE_PARQUET

 #include <Formats/FormatFactory.h>
+#include <Formats/ReadSchemaUtils.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/copyData.h>
 #include <arrow/api.h>
@ -176,8 +177,9 @@ NamesAndTypesList ParquetSchemaReader::readSchema()
    std::shared_ptr<arrow::Schema> schema;
    std::atomic<int> is_stopped = 0;
    getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped);
-    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema, "Parquet");
-    return header.getNamesAndTypesList();
+    auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
+        *schema, "Parquet", format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference);
+    return getNamesAndRecursivelyNullableTypes(header);
 }

 void registerInputFormatParquet(FormatFactory & factory)
@ -198,7 +200,7 @@ void registerParquetSchemaReader(FormatFactory & factory)
 {
    factory.registerSchemaReader(
        "Parquet",
-        [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+        [](ReadBuffer & buf, const FormatSettings & settings)
        {
            return std::make_shared<ParquetSchemaReader>(buf, settings);
        }
--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
@ -128,15 +128,14 @@ void RegexpRowInputFormat::setReadBuffer(ReadBuffer & in_)
    IInputFormat::setReadBuffer(*buf);
 }

-RegexpSchemaReader::RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, ContextPtr context_)
+RegexpSchemaReader::RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
    : IRowSchemaReader(
        buf,
-        format_settings_.max_rows_to_read_for_schema_inference,
+        format_settings_,
        getDefaultDataTypeForEscapingRule(format_settings_.regexp.escaping_rule))
    , format_settings(format_settings_)
    , field_extractor(format_settings)
    , buf(in_)
-    , context(context_)
 {
 }

@ -152,7 +151,7 @@ DataTypes RegexpSchemaReader::readRowAndGetDataTypes()
    for (size_t i = 0; i != field_extractor.getMatchedFieldsSize(); ++i)
    {
        String field(field_extractor.getField(i));
-        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, format_settings.regexp.escaping_rule, context));
+        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, format_settings.regexp.escaping_rule));
    }

    return data_types;
@ -203,9 +202,9 @@ void registerFileSegmentationEngineRegexp(FormatFactory & factory)

 void registerRegexpSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("Regexp", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+    factory.registerSchemaReader("Regexp", [](ReadBuffer & buf, const FormatSettings & settings)
    {
-        return std::make_shared<RegexpSchemaReader>(buf, settings, context);
+        return std::make_shared<RegexpSchemaReader>(buf, settings);
    });
 }

--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h
@ -76,7 +76,7 @@ private:
 class RegexpSchemaReader : public IRowSchemaReader
 {
 public:
-    RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, ContextPtr context_);
+    RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings);

 private:
    DataTypes readRowAndGetDataTypes() override;
@ -85,7 +85,6 @@ private:
    const FormatSettings format_settings;
    RegexpFieldExtractor field_extractor;
    PeekableReadBuffer buf;
-    ContextPtr context;
 };

 }
--- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
@ -283,7 +283,7 @@ void registerInputFormatTSKV(FormatFactory & factory)
 }
 void registerTSKVSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("TSKV", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+    factory.registerSchemaReader("TSKV", [](ReadBuffer & buf, const FormatSettings & settings)
    {
        return std::make_shared<TSKVSchemaReader>(buf, settings);
    });
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@ -235,7 +235,7 @@ TabSeparatedSchemaReader::TabSeparatedSchemaReader(
    ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings_)
    : FormatWithNamesAndTypesSchemaReader(
        in_,
-        format_settings_.max_rows_to_read_for_schema_inference,
+        format_settings_,
        with_names_,
        with_types_,
        &reader,
@ -280,7 +280,7 @@ void registerTSVSchemaReader(FormatFactory & factory)
    {
        auto register_func = [&](const String & format_name, bool with_names, bool with_types)
        {
-            factory.registerSchemaReader(format_name, [with_names, with_types, is_raw](ReadBuffer & buf, const FormatSettings & settings, ContextPtr)
+            factory.registerSchemaReader(format_name, [with_names, with_types, is_raw](ReadBuffer & buf, const FormatSettings & settings)
            {
                return std::make_shared<TabSeparatedSchemaReader>(buf, with_names, with_types, is_raw, settings);
            });
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
@ -453,14 +453,12 @@ TemplateSchemaReader::TemplateSchemaReader(
    const ParsedTemplateFormatString & format_,
    const ParsedTemplateFormatString & row_format_,
    std::string row_between_delimiter,
-    const FormatSettings & format_settings_,
-    ContextPtr context_)
-    : IRowSchemaReader(buf, format_settings_.max_rows_to_read_for_schema_inference)
+    const FormatSettings & format_settings_)
+    : IRowSchemaReader(buf, format_settings_, getDefaultDataTypeForEscapingRules(row_format_.escaping_rules))
    , buf(in_)
    , format(format_)
    , row_format(row_format_)
    , format_settings(format_settings_)
-    , context(context_)
    , format_reader(buf, ignore_spaces_, format, row_format, row_between_delimiter, format_settings)
 {
    setColumnNames(row_format.column_names);
@ -489,7 +487,7 @@ DataTypes TemplateSchemaReader::readRowAndGetDataTypes()
            format_settings.csv.delimiter = row_format.delimiters[i + 1].empty() ? format_settings.csv.delimiter : row_format.delimiters[i + 1].front();

        field = readFieldByEscapingRule(buf, row_format.escaping_rules[i], format_settings);
-        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i], context));
+        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i]));
    }

    format_reader.skipRowEndDelimiter();
@ -564,12 +562,12 @@ void registerTemplateSchemaReader(FormatFactory & factory)
 {
    for (bool ignore_spaces : {false, true})
    {
-        factory.registerSchemaReader(ignore_spaces ? "TemplateIgnoreSpaces" : "Template", [ignore_spaces](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+        factory.registerSchemaReader(ignore_spaces ? "TemplateIgnoreSpaces" : "Template", [ignore_spaces](ReadBuffer & buf, const FormatSettings & settings)
        {
            size_t index = 0;
            auto idx_getter = [&](const String &) -> std::optional<size_t> { return index++; };
            auto row_format = fillRowFormat(settings, idx_getter, false);
-            return std::make_shared<TemplateSchemaReader>(buf, ignore_spaces, fillResultSetFormat(settings), row_format, settings.template_settings.row_between_delimiter, settings, context);
+            return std::make_shared<TemplateSchemaReader>(buf, ignore_spaces, fillResultSetFormat(settings), row_format, settings.template_settings.row_between_delimiter, settings);
        });
    }
 }
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h
@ -116,8 +116,7 @@ public:
                         const ParsedTemplateFormatString & format_,
                         const ParsedTemplateFormatString & row_format_,
                         std::string row_between_delimiter,
-                         const FormatSettings & format_settings_,
-                         ContextPtr context_);
+                         const FormatSettings & format_settings_);

    DataTypes readRowAndGetDataTypes() override;

@ -126,7 +125,6 @@ private:
    const ParsedTemplateFormatString format;
    const ParsedTemplateFormatString row_format;
    FormatSettings format_settings;
-    ContextPtr context;
    TemplateFormatReader format_reader;
    bool first_row = true;
 };
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
@ -6,6 +6,7 @@
 #include <Processors/Formats/Impl/ValuesBlockInputFormat.h>
 #include <Formats/FormatFactory.h>
 #include <Formats/ReadSchemaUtils.h>
+#include <Formats/EscapingRuleUtils.h>
 #include <Core/Block.h>
 #include <base/find_symbols.h>
 #include <Common/typeid_cast.h>
@ -571,8 +572,8 @@ void ValuesBlockInputFormat::setReadBuffer(ReadBuffer & in_)
    IInputFormat::setReadBuffer(*buf);
 }

-ValuesSchemaReader::ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, ContextPtr context_)
-    : IRowSchemaReader(buf, format_settings_.max_rows_to_read_for_schema_inference), buf(in_), context(context_)
+ValuesSchemaReader::ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
+    : IRowSchemaReader(buf, format_settings_), buf(in_), format_settings(format_settings_)
 {
 }

@ -589,38 +590,25 @@ DataTypes ValuesSchemaReader::readRowAndGetDataTypes()
        return {};

    assertChar('(', buf);
-    PeekableReadBufferCheckpoint checkpoint(buf);
-    skipToNextRow(&buf, 0, 1);
-    buf.makeContinuousMemoryFromCheckpointToPos();
-    buf.rollbackToCheckpoint();
-
-    Tokens tokens(buf.position(), buf.buffer().end());
-    IParser::Pos token_iterator(tokens, context->getSettingsRef().max_parser_depth);
-
+    skipWhitespaceIfAny(buf);
    DataTypes data_types;
-    bool finish = false;
-    while (!finish)
+    String value;
+    while (!buf.eof() && *buf.position() != ')')
    {
-        Expected expected;
-        ASTPtr ast;
+        if (!data_types.empty())
+        {
+            skipWhitespaceIfAny(buf);
+            assertChar(',', buf);
+            skipWhitespaceIfAny(buf);
+        }

-        bool parsed = parser.parse(token_iterator, ast, expected);
-        /// Consider delimiter after value (',' or ')') as part of expression
-        parsed &= token_iterator->type == TokenType::Comma || token_iterator->type == TokenType::ClosingRoundBracket;
-
-        if (!parsed)
-            throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot parse expression here: {}, token: {}",
-                            String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position())), String(token_iterator.get().begin, token_iterator.get().end));
-
-        std::pair<Field, DataTypePtr> result = evaluateConstantExpression(ast, context);
-        data_types.push_back(generalizeDataType(result.second));
-
-        if (token_iterator->type == TokenType::ClosingRoundBracket)
-            finish = true;
-        ++token_iterator;
-        buf.position() = const_cast<char *>(token_iterator->begin);
+        readQuotedFieldIntoString(value, buf);
+        auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted);
+        data_types.push_back(std::move(type));
    }

+    assertChar(')', buf);
+
    skipWhitespaceIfAny(buf);
    if (!buf.eof() && *buf.position() == ',')
        ++buf.position();
@ -642,9 +630,9 @@ void registerInputFormatValues(FormatFactory & factory)

 void registerValuesSchemaReader(FormatFactory & factory)
 {
-    factory.registerSchemaReader("Values", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context)
+    factory.registerSchemaReader("Values", [](ReadBuffer & buf, const FormatSettings & settings)
    {
-        return std::make_shared<ValuesSchemaReader>(buf, settings, context);
+        return std::make_shared<ValuesSchemaReader>(buf, settings);
    });
 }

--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
@ -97,13 +97,13 @@ private:
 class ValuesSchemaReader : public IRowSchemaReader
 {
 public:
-    ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, ContextPtr context_);
+    ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings);

 private:
    DataTypes readRowAndGetDataTypes() override;

    PeekableReadBuffer buf;
-    ContextPtr context;
+    const FormatSettings format_settings;
    ParserExpression parser;
    bool first_row = true;
 };
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
@ -293,13 +293,13 @@ void RowInputFormatWithNamesAndTypes::setReadBuffer(ReadBuffer & in_)

 FormatWithNamesAndTypesSchemaReader::FormatWithNamesAndTypesSchemaReader(
    ReadBuffer & in_,
-    size_t max_rows_to_read_,
+    const FormatSettings & format_settings,
    bool with_names_,
    bool with_types_,
    FormatWithNamesAndTypesReader * format_reader_,
    DataTypePtr default_type_,
    bool allow_bools_as_numbers_)
-    : IRowSchemaReader(in_, max_rows_to_read_, default_type_, allow_bools_as_numbers_), with_names(with_names_), with_types(with_types_), format_reader(format_reader_)
+    : IRowSchemaReader(in_, format_settings, default_type_, allow_bools_as_numbers_), with_names(with_names_), with_types(with_types_), format_reader(format_reader_)
 {
 }

--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
@ -124,7 +124,7 @@ class FormatWithNamesAndTypesSchemaReader : public IRowSchemaReader
 public:
    FormatWithNamesAndTypesSchemaReader(
        ReadBuffer & in,
-        size_t max_rows_to_read_,
+        const FormatSettings & format_settings,
        bool with_names_,
        bool with_types_,
        FormatWithNamesAndTypesReader * format_reader_,
--- a/tests/queries/0_stateless/01801_s3_cluster.reference
+++ b/tests/queries/0_stateless/01801_s3_cluster.reference
@ -2,30 +2,6 @@
 0	0	0
 0	0	0
 1	2	3
-10	11	12
-13	14	15
-16	17	18
-20	21	22
-23	24	25
-26	27	28
-4	5	6
-7	8	9
-0	0	0
-0	0	0
-0	0	0
-1	2	3
-10	11	12
-13	14	15
-16	17	18
-20	21	22
-23	24	25
-26	27	28
-4	5	6
-7	8	9
-0	0	0
-0	0	0
-0	0	0
-1	2	3
 4	5	6
 7	8	9
 10	11	12
@ -38,14 +14,26 @@
 0	0	0
 0	0	0
 1	2	3
+4	5	6
+7	8	9
 10	11	12
 13	14	15
 16	17	18
 20	21	22
 23	24	25
 26	27	28
+0	0	0
+0	0	0
+0	0	0
+1	2	3
 4	5	6
 7	8	9
+10	11	12
+13	14	15
+16	17	18
+20	21	22
+23	24	25
+26	27	28
 0	0	0
 0	0	0
 0	0	0
@ -62,14 +50,26 @@
 0	0	0
 0	0	0
 1	2	3
+4	5	6
+7	8	9
 10	11	12
 13	14	15
 16	17	18
 20	21	22
 23	24	25
 26	27	28
+0	0	0
+0	0	0
+0	0	0
+1	2	3
 4	5	6
 7	8	9
+10	11	12
+13	14	15
+16	17	18
+20	21	22
+23	24	25
+26	27	28
 0	0	0
 0	0	0
 0	0	0
--- a/tests/queries/0_stateless/02149_schema_inference.reference
+++ b/tests/queries/0_stateless/02149_schema_inference.reference
@ -1,17 +1,17 @@
 TSV
-c1	Nullable(String)					
+c1	Nullable(Float64)					
 c2	Nullable(String)					
-c3	Nullable(String)					
-c4	Nullable(String)					
-42	Some string	[1, 2, 3, 4]	(1, 2, 3)
-42	abcd	[]	(4, 5, 6)
+c3	Array(Nullable(Float64))					
+c4	Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64))					
+42	Some string	[1,2,3,4]	(1,2,3)
+42	abcd	[]	(4,5,6)
 TSVWithNames
-number	Nullable(String)					
+number	Nullable(Float64)					
 string	Nullable(String)					
-array	Nullable(String)					
-tuple	Nullable(String)					
-42	Some string	[1, 2, 3, 4]	(1, 2, 3)
-42	abcd	[]	(4, 5, 6)
+array	Array(Nullable(Float64))					
+tuple	Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64))					
+42	Some string	[1,2,3,4]	(1,2,3)
+42	abcd	[]	(4,5,6)
 CSV
 c1	Nullable(Float64)					
 c2	Nullable(String)					
@ -73,13 +73,13 @@ c	Array(Nullable(Float64))
 \N	\N	[]
 \N	\N	[3]
 TSKV
-a	Nullable(String)					
+a	Nullable(Float64)					
 b	Nullable(String)					
-c	Nullable(String)					
-1	s1	\N
+c	Array(Nullable(Float64))					
+1	s1	[]
 2	}	[2]
-\N	\N	\N
-\N	\N	\N
+\N	\N	[]
+\N	\N	[]
 \N	\N	[3]
 Values
 c1	Nullable(Float64)					
@ -96,7 +96,7 @@ c5	Tuple(Array(Nullable(Float64)), Array(Tuple(Nullable(Float64), Nullable(Strin
 42.42	\N	[1,NULL,3]	(1,NULL)	([1,2],[(3,'4'),(5,'6')])
 \N	Some string	[10]	(1,2)	([],[])
 Regexp
-c1	Nullable(String)					
+c1	Nullable(Float64)					
 c2	Nullable(String)					
 c3	Nullable(String)					
 42	Some string 1	[([1, 2, 3], String 1), ([], String 1)]
--- a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference
+++ b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference
@ -1,137 +1,137 @@
 Arrow
-int8	Int8					
-uint8	UInt8					
-int16	Int16					
-uint16	UInt16					
-int32	Int32					
-uint32	UInt32					
-int64	Int64					
-uint64	UInt64					
+int8	Nullable(Int8)					
+uint8	Nullable(UInt8)					
+int16	Nullable(Int16)					
+uint16	Nullable(UInt16)					
+int32	Nullable(Int32)					
+uint32	Nullable(UInt32)					
+int64	Nullable(Int64)					
+uint64	Nullable(UInt64)					
 0	0	0	0	0	0	0	0
 -1	1	-1	1	-1	1	-1	1
-float32	Float32					
-float64	Float64					
-decimal32	Decimal(9, 5)					
-decimal64	Decimal(18, 5)					
+float32	Nullable(Float32)					
+float64	Nullable(Float64)					
+decimal32	Nullable(Decimal(9, 5))					
+decimal64	Nullable(Decimal(18, 5))					
 0	0	0	0
 1.2	0.7692307692307692	3.33333	333.33333
-date	UInt16					
-date32	Date32					
+date	Nullable(UInt16)					
+date32	Nullable(Date32)					
 0	1970-01-01
 1	1970-01-02
-str	String					
-fixed_string	String					
+str	Nullable(String)					
+fixed_string	Nullable(String)					
 Str: 0	100
 Str: 1	200
-array	Array(UInt64)					
-tuple	Tuple(`tuple.0` UInt64, `tuple.1` String)					
-map	Map(String, UInt64)					
+array	Array(Nullable(UInt64))					
+tuple	Tuple(Nullable(UInt64), Nullable(String))					
+map	Map(String, Nullable(UInt64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(`nested1.0` Array(UInt64), `nested1.1` Map(String, UInt64)))					
-nested2	Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(UInt64)), `nested2.0.1` Map(UInt64, Array(Tuple(`nested2.0.1.0` UInt64, `nested2.0.1.1` String)))), `nested2.1` UInt8)					
+nested1	Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64))))					
+nested2	Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 ArrowStream
-int8	Int8					
-uint8	UInt8					
-int16	Int16					
-uint16	UInt16					
-int32	Int32					
-uint32	UInt32					
-int64	Int64					
-uint64	UInt64					
+int8	Nullable(Int8)					
+uint8	Nullable(UInt8)					
+int16	Nullable(Int16)					
+uint16	Nullable(UInt16)					
+int32	Nullable(Int32)					
+uint32	Nullable(UInt32)					
+int64	Nullable(Int64)					
+uint64	Nullable(UInt64)					
 0	0	0	0	0	0	0	0
 -1	1	-1	1	-1	1	-1	1
-float32	Float32					
-float64	Float64					
-decimal32	Decimal(9, 5)					
-decimal64	Decimal(18, 5)					
+float32	Nullable(Float32)					
+float64	Nullable(Float64)					
+decimal32	Nullable(Decimal(9, 5))					
+decimal64	Nullable(Decimal(18, 5))					
 0	0	0	0
 1.2	0.7692307692307692	3.33333	333.33333
-date	UInt16					
-date32	Date32					
+date	Nullable(UInt16)					
+date32	Nullable(Date32)					
 0	1970-01-01
 1	1970-01-02
-str	String					
-fixed_string	String					
+str	Nullable(String)					
+fixed_string	Nullable(String)					
 Str: 0	100
 Str: 1	200
-array	Array(UInt64)					
-tuple	Tuple(`tuple.0` UInt64, `tuple.1` String)					
-map	Map(String, UInt64)					
+array	Array(Nullable(UInt64))					
+tuple	Tuple(Nullable(UInt64), Nullable(String))					
+map	Map(String, Nullable(UInt64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(`nested1.0` Array(UInt64), `nested1.1` Map(String, UInt64)))					
-nested2	Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(UInt64)), `nested2.0.1` Map(UInt64, Array(Tuple(`nested2.0.1.0` UInt64, `nested2.0.1.1` String)))), `nested2.1` UInt8)					
+nested1	Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64))))					
+nested2	Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 Parquet
-int8	Int8					
-uint8	UInt8					
-int16	Int16					
-uint16	UInt16					
-int32	Int32					
-uint32	Int64					
-int64	Int64					
-uint64	UInt64					
+int8	Nullable(Int8)					
+uint8	Nullable(UInt8)					
+int16	Nullable(Int16)					
+uint16	Nullable(UInt16)					
+int32	Nullable(Int32)					
+uint32	Nullable(Int64)					
+int64	Nullable(Int64)					
+uint64	Nullable(UInt64)					
 0	0	0	0	0	0	0	0
 -1	1	-1	1	-1	1	-1	1
-float32	Float32					
-float64	Float64					
-decimal32	Decimal(9, 5)					
-decimal64	Decimal(18, 5)					
+float32	Nullable(Float32)					
+float64	Nullable(Float64)					
+decimal32	Nullable(Decimal(9, 5))					
+decimal64	Nullable(Decimal(18, 5))					
 0	0	0	0
 1.2	0.7692307692307692	3.33333	333.33333
-date	UInt16					
-date32	Date32					
+date	Nullable(UInt16)					
+date32	Nullable(Date32)					
 0	1970-01-01
 1	1970-01-02
-str	String					
-fixed_string	String					
+str	Nullable(String)					
+fixed_string	Nullable(String)					
 Str: 0	100
 Str: 1	200
-array	Array(UInt64)					
-tuple	Tuple(`tuple.0` UInt64, `tuple.1` String)					
-map	Map(String, UInt64)					
+array	Array(Nullable(UInt64))					
+tuple	Tuple(Nullable(UInt64), Nullable(String))					
+map	Map(String, Nullable(UInt64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(`nested1.0` Array(UInt64), `nested1.1` Map(String, UInt64)))					
-nested2	Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(UInt64)), `nested2.0.1` Map(UInt64, Array(Tuple(`nested2.0.1.0` UInt64, `nested2.0.1.1` String)))), `nested2.1` UInt8)					
+nested1	Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64))))					
+nested2	Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 ORC
-int8	Int8					
-uint8	Int8					
-int16	Int16					
-uint16	Int16					
-int32	Int32					
-uint32	Int32					
-int64	Int64					
-uint64	Int64					
+int8	Nullable(Int8)					
+uint8	Nullable(Int8)					
+int16	Nullable(Int16)					
+uint16	Nullable(Int16)					
+int32	Nullable(Int32)					
+uint32	Nullable(Int32)					
+int64	Nullable(Int64)					
+uint64	Nullable(Int64)					
 0	0	0	0	0	0	0	0
 -1	1	-1	1	-1	1	-1	1
-float32	Float32					
-float64	Float64					
-decimal32	Decimal(9, 5)					
-decimal64	Decimal(18, 5)					
+float32	Nullable(Float32)					
+float64	Nullable(Float64)					
+decimal32	Nullable(Decimal(9, 5))					
+decimal64	Nullable(Decimal(18, 5))					
 0	0	0	0
 1.2	0.7692307692307692	3.33333	333.33333
-date	Date32					
-date32	Date32					
+date	Nullable(Date32)					
+date32	Nullable(Date32)					
 1970-01-01	1970-01-01
 1970-01-02	1970-01-02
-str	String					
-fixed_string	String					
+str	Nullable(String)					
+fixed_string	Nullable(String)					
 Str: 0	100
 Str: 1	200
-array	Array(Int64)					
-tuple	Tuple(`tuple.0` Int64, `tuple.1` String)					
-map	Map(String, Int64)					
+array	Array(Nullable(Int64))					
+tuple	Tuple(Nullable(Int64), Nullable(String))					
+map	Map(String, Nullable(Int64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(`nested1.0` Array(Int64), `nested1.1` Map(String, Int64)))					
-nested2	Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(Int64)), `nested2.0.1` Map(Int64, Array(Tuple(`nested2.0.1.0` Int64, `nested2.0.1.1` String)))), `nested2.1` Int8)					
+nested1	Array(Tuple(Array(Nullable(Int64)), Map(String, Nullable(Int64))))					
+nested2	Tuple(Tuple(Array(Array(Nullable(Int64))), Map(Int64, Array(Tuple(Nullable(Int64), Nullable(String))))), Nullable(Int8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 Native
--- a/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference
+++ b/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference
@ -1 +1 @@
-x	LowCardinality(UInt64)					
+x	LowCardinality(Nullable(UInt64))					
--- a/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh
+++ b/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh
@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh

-$CLICKHOUSE_CLIENT -q "insert into table function file('arrow.dict', 'Arrow', 'x LowCardinality(UInt64)') select number from numbers(10) settings output_format_arrow_low_cardinality_as_dictionary=1"
+$CLICKHOUSE_CLIENT -q "insert into table function file('arrow.dict', 'Arrow', 'x LowCardinality(UInt64)') select number from numbers(10) settings output_format_arrow_low_cardinality_as_dictionary=1, engine_file_truncate_on_insert=1"

 $CLICKHOUSE_CLIENT -q "desc file('arrow.dict', 'Arrow')"

--- a/tests/queries/0_stateless/02211_shcema_inference_from_stdin.reference
+++ b/tests/queries/0_stateless/02211_shcema_inference_from_stdin.reference
@ -9,7 +9,7 @@ x	Nullable(Float64)
 7
 8
 9
-c1	Nullable(String)					
-c2	Nullable(String)					
-c3	Nullable(String)					
+c1	Nullable(Float64)					
+c2	Nullable(Float64)					
+c3	Nullable(Float64)					
 1	2	3
--- a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference
+++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference
@ -1,8 +1,8 @@
-a	Nullable(String)					
+a	Nullable(Float64)					
 b	Nullable(String)					
-c	Nullable(String)					
-1	s1	\N
+c	Array(Nullable(Float64))					
+1	s1	[]
 2	}	[2]
-\N	\N	\N
-\N	\N	\N
+\N	\N	[]
+\N	\N	[]
 \N	\N	[3]
--- a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference
+++ b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference
@ -0,0 +1,40 @@
+Arrow
+x	Nullable(UInt64)					
+arr1	Array(Nullable(UInt64))					
+arr2	Array(Array(Nullable(String)))					
+arr3	Array(Tuple(Nullable(String), Nullable(UInt64)))					
+0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
+\N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
+2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
+\N	[NULL,4]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,3)]
+4	[4,5]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,4)]
+ArrowStream
+x	Nullable(UInt64)					
+arr1	Array(Nullable(UInt64))					
+arr2	Array(Array(Nullable(String)))					
+arr3	Array(Tuple(Nullable(String), Nullable(UInt64)))					
+0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
+\N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
+2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
+\N	[NULL,4]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,3)]
+4	[4,5]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,4)]
+Parquet
+x	Nullable(UInt64)					
+arr1	Array(Nullable(UInt64))					
+arr2	Array(Array(Nullable(String)))					
+arr3	Array(Tuple(Nullable(String), Nullable(UInt64)))					
+0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
+\N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
+2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
+\N	[NULL,4]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,3)]
+4	[4,5]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,4)]
+ORC
+x	Nullable(Int64)					
+arr1	Array(Nullable(Int64))					
+arr2	Array(Array(Nullable(String)))					
+arr3	Array(Tuple(Nullable(String), Nullable(Int64)))					
+0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
+\N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
+2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
+\N	[NULL,4]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,3)]
+4	[4,5]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,4)]
--- a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh
+++ b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh
@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+FILE_NAME=test_02242.data
+DATA_FILE=$USER_FILES_PATH/$FILE_NAME
+
+for format in Arrow ArrowStream Parquet ORC
+do
+    echo $format
+    $CLICKHOUSE_CLIENT -q "select number % 2 ? NULL : number as x, [number % 2 ? NULL : number, number + 1] as arr1, [[NULL, 'String'], [NULL], []] as arr2, [(NULL, NULL), ('String', NULL), (NULL, number)] as arr3 from numbers(5) format $format" > $DATA_FILE
+    $CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', '$format')"
+    $CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', '$format')"
+done
+
+rm $DATA_FILE
--- a/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.reference
+++ b/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.reference
@ -0,0 +1 @@
+10
--- a/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.sh
+++ b/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.sh
@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "drop table if exists test_02243"
+$CLICKHOUSE_CLIENT -q "create table test_02243 (image_path Nullable(String),
+                                caption Nullable(String),
+                                NSFW Nullable(String),
+                                similarity Nullable(Float64),
+                                LICENSE Nullable(String),
+                                url Nullable(String),
+                                key Nullable(UInt64),
+                                shard_id Nullable(UInt64),
+                                status Nullable(String),
+                                error_message Nullable(String),
+                                width Nullable(UInt32),
+                                height Nullable(UInt32),
+                                exif Nullable(String),
+                                original_width Nullable(UInt32),
+                                original_height Nullable(UInt32)) engine=Memory"
+
+cat $CUR_DIR/data_parquet_bad_column/metadata_0.parquet | $CLICKHOUSE_CLIENT  --stacktrace -q "insert into test_02243 format Parquet"
+
+$CLICKHOUSE_CLIENT -q "select count() from test_02243"
+$CLICKHOUSE_CLIENT -q "drop table test_02243"
--- a/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.reference
+++ b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.reference
@ -0,0 +1,8 @@
+x	Nullable(String)					
+y	Nullable(Float64)					
+x	Nullable(String)					
+y	Nullable(Float64)					
+x	Nullable(String)					
+y	Nullable(Float64)					
+x	Nullable(String)					
+y	Nullable(Float64)					
--- a/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql
+++ b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql
@ -0,0 +1,14 @@
+-- Tags: no-fasttest, no-parallel
+
+insert into function file('test_02244', 'TSV', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1;
+desc file('test_02244', 'TSV') settings column_names_for_schema_inference='x,y';
+
+insert into function file('test_02244', 'CSV', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1;
+desc file('test_02244', 'CSV') settings column_names_for_schema_inference='x,y';
+
+insert into function file('test_02244', 'JSONCompactEachRow', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1;
+desc file('test_02244', 'JSONCompactEachRow') settings column_names_for_schema_inference='x,y';
+
+insert into function file('test_02244', 'Values', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1;
+desc file('test_02244', 'Values') settings column_names_for_schema_inference='x,y';
+
--- a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.reference
+++ b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.reference
@ -0,0 +1,16 @@
+OK
+image_path	Nullable(String)					
+caption	Nullable(String)					
+NSFW	Nullable(String)					
+similarity	Nullable(Float64)					
+LICENSE	Nullable(String)					
+url	Nullable(String)					
+key	Nullable(Int64)					
+shard_id	Nullable(Int64)					
+status	Nullable(String)					
+width	Nullable(Int64)					
+height	Nullable(Int64)					
+exif	Nullable(String)					
+original_width	Nullable(Int64)					
+original_height	Nullable(Int64)					
+10
--- a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh
+++ b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh
@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+FILE_NAME=test_02245.parquet
+DATA_FILE=$USER_FILES_PATH/$FILE_NAME
+
+cp $CUR_DIR/data_parquet_bad_column/metadata_0.parquet $DATA_FILE 
+
+
+$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet)" 2>&1 | grep -qF "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL"
+$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1"
+$CLICKHOUSE_CLIENT -q "select count(*) from file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1"
+
--- a/tests/queries/0_stateless/02245_s3_schema_desc.reference
+++ b/tests/queries/0_stateless/02245_s3_schema_desc.reference
@ -1,21 +1,21 @@
-c1	Nullable(String)					
-c2	Nullable(String)					
-c3	Nullable(String)					
-c1	Nullable(String)					
-c2	Nullable(String)					
-c3	Nullable(String)					
+c1	Nullable(Float64)					
+c2	Nullable(Float64)					
+c3	Nullable(Float64)					
+c1	Nullable(Float64)					
+c2	Nullable(Float64)					
+c3	Nullable(Float64)					
 c1	UInt64					
 c2	UInt64					
 c3	UInt64					
-c1	Nullable(String)					
-c2	Nullable(String)					
-c3	Nullable(String)					
+c1	Nullable(Float64)					
+c2	Nullable(Float64)					
+c3	Nullable(Float64)					
 c1	UInt64					
 c2	UInt64					
 c3	UInt64					
-c1	Nullable(String)					
-c2	Nullable(String)					
-c3	Nullable(String)					
+c1	Nullable(Float64)					
+c2	Nullable(Float64)					
+c3	Nullable(Float64)					
 c1	UInt64					
 c2	UInt64					
 c3	UInt64					
--- a/tests/queries/0_stateless/02245_s3_schema_desc.sql
+++ b/tests/queries/0_stateless/02245_s3_schema_desc.sql
@ -10,4 +10,5 @@ desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test
 desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'test', 'testtest', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64');
 desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'test', 'testtest', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto');

+
 SELECT * FROM s3(decodeURLComponent(NULL), [NULL]);  --{serverError 170}
--- a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.reference
+++ b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.reference
@ -0,0 +1,107 @@
+TSV
+c1	Nullable(Float64)					
+c2	Nullable(String)					
+c3	Array(Nullable(Float64))					
+c4	Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64))					
+42	Some string	[1,2,3,4]	(1,2,3)
+42	abcd	[]	(4,5,6)
+c1	Nullable(String)					
+[({\'key\' : 42.42}, [\'String\', \'String2\'], 42.42), ({}, [], -42), ({\'key2\' : NULL}, [NULL], NULL)]
+[]
+[({}, [], 0)]
+[({}, [NULL], NULL)]
+[({}, [\'String3\'], NULL)]
+[({\'key3\': NULL}, []), NULL]
+c1	Array(Tuple(Map(String, Nullable(Float64)), Array(Nullable(String)), Nullable(Float64)))					
+[({'key':42.42},['String','String2'],42.42),({},[],-42),({'key2':NULL},[NULL],NULL)]
+[]
+[({},[],0)]
+[({},[NULL],NULL)]
+[({},['String3'],NULL)]
+[({'key3':NULL},[],NULL)]
+c1	Nullable(Bool)					
+true
+false
+\N
+c1	Array(Nullable(Bool))					
+[true,NULL]
+[]
+[NULL]
+[false]
+c1	Nullable(String)					
+[]
+c1	Nullable(String)					
+{}
+c1	Nullable(String)					
+()
+c1	Nullable(String)					
+[1, 2, 3
+c1	Nullable(String)					
+[(1, 2, 3 4)]
+c1	Nullable(String)					
+[1, 2, 3 + 4]
+c1	Nullable(String)					
+(1, 2,
+c1	Nullable(String)					
+[1, Some trash, 42.2]
+c1	Nullable(String)					
+[1, \'String\', {\'key\' : 2}]
+c1	Nullable(String)					
+{\'key\' : 1, [1] : 10}
+c1	Nullable(String)					
+{}{}
+c1	Nullable(String)					
+[1, 2, 3
+c1	Nullable(String)					
+[abc, def]
+c1	Array(Nullable(String))					
+['abc','def']
+c1	Nullable(String)					
+[\'string]
+c1	Nullable(String)					
+\'string
+c1	Nullable(Float64)					
+42.42
+c1	Nullable(String)					
+42.42sometrash
+c1	Nullable(String)					
+[42.42sometrash, 42.42]
+
+CSV
+c1	Nullable(String)					
+c2	Nullable(String)					
+c3	Array(Nullable(Float64))					
+c4	Array(Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64)))					
+42	Some string	[1,2,3,4]	[(1,2,3)]
+42\\	abcd	[]	[(4,5,6)]
+c1	Nullable(String)					
+[({\'key\' : 42.42}, [\'String\', \'String2\'], 42.42), ({}, [], -42), ({\'key2\' : NULL}, [NULL], NULL)]
+[]
+[({}, [], 0)]
+[({}, [NULL], NULL)]
+[({}, [\'String3\'], NULL)]
+[({\'key3\': NULL}, []), NULL]
+c1	Array(Tuple(Map(String, Nullable(Float64)), Array(Nullable(String)), Nullable(Float64)))					
+[({'key':42.42},['String','String2'],42.42),({},[],-42),({'key2':NULL},[NULL],NULL)]
+[]
+[({},[],0)]
+[({},[NULL],NULL)]
+[({},['String3'],NULL)]
+[({'key3':NULL},[],NULL)]
+c1	Nullable(Bool)					
+true
+false
+\N
+c1	Array(Nullable(Bool))					
+[true,NULL]
+[]
+[NULL]
+[false]
+c1	Nullable(String)					
+(1, 2, 3)
+c1	Nullable(String)					
+123.123
+c1	Array(Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64)))					
+[(1,2,3)]
+c1	Array(Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64)))					
+[(1,2,3)]
--- a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh
+++ b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh
@ -0,0 +1,220 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+FILE_NAME=test_02149.data
+DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME
+
+touch $DATA_FILE
+
+echo "TSV"
+
+echo -e "42\tSome string\t[1, 2, 3, 4]\t(1, 2, 3)
+42\tabcd\t[]\t(4, 5, 6)" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]
+[]
+[({}, [], 0)]
+[({}, [NULL], NULL)]
+[({}, ['String3'], NULL)]
+[({'key3': NULL}, []), NULL]"> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV') settings input_format_tsv_use_best_effort_in_schema_inference=false"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV') settings input_format_tsv_use_best_effort_in_schema_inference=false"
+
+
+echo -e "[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]
+[]
+[({}, [], 0)]
+[({}, [NULL], NULL)]
+[({}, ['String3'], NULL)]
+[({'key3': NULL}, [], NULL)]"> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "true
+false
+\N" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[true, NULL]
+[]
+[NULL]
+[false]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "{}" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "()" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[1, 2, 3" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[(1, 2, 3 4)]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[1, 2, 3 + 4]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "(1, 2," > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[1, Some trash, 42.2]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[1, 'String', {'key' : 2}]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "{'key' : 1, [1] : 10}" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "{}{}" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[1, 2, 3" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[abc, def]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "['abc', 'def']" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "['string]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "'string" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "42.42" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "42.42sometrash" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+echo -e "[42.42sometrash, 42.42]" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')"
+
+
+echo
+echo "CSV"
+
+echo -e "42,Some string,'[1, 2, 3, 4]','[(1, 2, 3)]'
+42\,abcd,'[]','[(4, 5, 6)]'" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]\"
+'[]'
+'[({}, [], 0)]'
+'[({}, [NULL], NULL)]'
+\"[({}, ['String3'], NULL)]\"
+\"[({'key3': NULL}, []), NULL]\""> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false"
+
+echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]\"
+'[]'
+'[({}, [], 0)]'
+'[({}, [NULL], NULL)]'
+\"[({}, ['String3'], NULL)]\"
+\"[({'key3': NULL}, [], NULL)]\""> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "true
+false
+\N" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "'[true, NULL]'
+'[]'
+'[NULL]'
+'[false]'" > $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+
+echo -e "'(1, 2, 3)'"> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "'123.123'"> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "'[(1, 2, 3)]'"> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+echo -e "\"[(1, 2, 3)]\""> $DATA_FILE
+
+$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')"
+$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')"
+
+
--- a/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.reference
+++ b/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.reference
@ -1,15 +1,15 @@
-a	Nullable(String)					
+a	Nullable(Float64)					
 b	Nullable(String)					
-c	Nullable(String)					
-1	s1	\N
+c	Array(Nullable(Float64))					
+1	s1	[]
 2	}	[2]
-\N	\N	\N
-\N	\N	\N
+\N	\N	[]
+\N	\N	[]
 \N	\N	[3]
-b	Nullable(String)					
-a	Nullable(String)					
-c	Nullable(String)					
-e	Nullable(String)					
+b	Nullable(Float64)					
+a	Nullable(Float64)					
+c	Nullable(Float64)					
+e	Nullable(Float64)					
 1	\N	\N	\N
 \N	2	3	\N
 \N	\N	\N	\N
--- a/utils/check-style/codespell-ignore-words.list
+++ b/utils/check-style/codespell-ignore-words.list
@ -10,3 +10,4 @@ ths
 offsett
 numer
 ue
+alse