Merge pull request #44019 from Avogar/refactor-schema-inference

Refactor and improve schema inference for text formats
2024-11-25 17:12:03 +00:00 · 2022-12-20 17:29:03 +01:00 · 2022-12-20 17:29:03 +01:00 · 643a35bed1
commit 643a35bed1
parent 8b553d854a 37df9b9990
69 changed files with 1881 additions and 1209 deletions
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -3588,6 +3588,13 @@ y	Nullable(String)
 z	IPv4
 ```

+## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
+
+Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
+If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference.
+
+Default value: `false`.
+
 ## input_format_try_infer_integers {#input_format_try_infer_integers}

 If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from input data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`.
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -764,6 +764,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
    M(Bool, input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Arrow", 0) \
    M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
    M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \
+    M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \
    M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
    M(Bool, input_format_json_try_infer_numbers_from_strings, true, "Try to infer numbers from string fields while schema inference", 0) \
    M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \
--- a/src/Core/Types.h
+++ b/src/Core/Types.h
@ -3,6 +3,7 @@
 #include <cstdint>
 #include <string>
 #include <vector>
+#include <unordered_set>
 #include <base/strong_typedef.h>
 #include <base/Decimal.h>
 #include <base/defines.h>
@ -93,4 +94,5 @@ using Int256 = ::Int256;

 /// Not a data type in database, defined just for convenience.
 using Strings = std::vector<String>;
+using TypeIndexesSet = std::unordered_set<TypeIndex>;
 }
--- a/src/DataTypes/DataTypeObject.h
+++ b/src/DataTypes/DataTypeObject.h
@ -41,6 +41,8 @@ public:
    SerializationPtr doGetDefaultSerialization() const override;

    bool hasNullableSubcolumns() const { return is_nullable; }
+
+    const String & getSchemaFormat() const { return schema_format; }
 };

 }
--- a/src/DataTypes/transformTypesRecursively.cpp
+++ b/src/DataTypes/transformTypesRecursively.cpp
@ -8,74 +8,62 @@
 namespace DB
 {

-void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &)> transform_simple_types, std::function<void(DataTypes &)> transform_complex_types)
+void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_complex_types)
 {
+    TypeIndexesSet type_indexes;
+    for (const auto & type : types)
+        type_indexes.insert(type->getTypeId());
+
+
+    /// Arrays
+    if (type_indexes.contains(TypeIndex::Array))
    {
-        /// Arrays
-        bool have_array = false;
-        bool all_arrays = true;
-        DataTypes nested_types;
-        for (const auto & type : types)
+        /// All types are Array
+        if (type_indexes.size() == 1)
        {
-            if (const DataTypeArray * type_array = typeid_cast<const DataTypeArray *>(type.get()))
-            {
-                have_array = true;
-                nested_types.push_back(type_array->getNestedType());
-            }
-            else
-                all_arrays = false;
+            DataTypes nested_types;
+            for (const auto & type : types)
+                nested_types.push_back(typeid_cast<const DataTypeArray *>(type.get())->getNestedType());
+
+            transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types);
+            for (size_t i = 0; i != types.size(); ++i)
+                types[i] = std::make_shared<DataTypeArray>(nested_types[i]);
        }

-        if (have_array)
-        {
-            if (all_arrays)
-            {
-                transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types);
-                for (size_t i = 0; i != types.size(); ++i)
-                    types[i] = std::make_shared<DataTypeArray>(nested_types[i]);
-            }
+        if (transform_complex_types)
+            transform_complex_types(types, type_indexes);

-            if (transform_complex_types)
-                transform_complex_types(types);
-
-            return;
-        }
+        return;
    }

+    /// Tuples
+    if (type_indexes.contains(TypeIndex::Tuple))
    {
-        /// Tuples
-        bool have_tuple = false;
-        bool all_tuples = true;
-        size_t tuple_size = 0;
-
-        std::vector<DataTypes> nested_types;
-
-        for (const auto & type : types)
+        /// All types are Tuple
+        if (type_indexes.size() == 1)
        {
-            if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
-            {
-                if (!have_tuple)
-                {
-                    tuple_size = type_tuple->getElements().size();
-                    nested_types.resize(tuple_size);
-                    for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
-                        nested_types[elem_idx].reserve(types.size());
-                }
-                else if (tuple_size != type_tuple->getElements().size())
-                    return;
+            std::vector<DataTypes> nested_types;
+            const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(types[0].get());
+            size_t tuple_size = type_tuple->getElements().size();
+            nested_types.resize(tuple_size);
+            for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
+                nested_types[elem_idx].reserve(types.size());

-                have_tuple = true;
+            bool sizes_are_equal = true;
+            for (const auto & type : types)
+            {
+                type_tuple = typeid_cast<const DataTypeTuple *>(type.get());
+                if (type_tuple->getElements().size() != tuple_size)
+                {
+                    sizes_are_equal = false;
+                    break;
+                }

                for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
                    nested_types[elem_idx].emplace_back(type_tuple->getElements()[elem_idx]);
            }
-            else
-                all_tuples = false;
-        }

-        if (have_tuple)
-        {
-            if (all_tuples)
+            if (sizes_are_equal)
            {
                std::vector<DataTypes> transposed_nested_types(types.size());
                for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
@ -88,56 +76,47 @@ void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &
                for (size_t i = 0; i != types.size(); ++i)
                    types[i] = std::make_shared<DataTypeTuple>(transposed_nested_types[i]);
            }
-
-            if (transform_complex_types)
-                transform_complex_types(types);
-
-            return;
        }
+
+        if (transform_complex_types)
+            transform_complex_types(types, type_indexes);
+
+        return;
    }

+    /// Maps
+    if (type_indexes.contains(TypeIndex::Map))
    {
-        /// Maps
-        bool have_maps = false;
-        bool all_maps = true;
-        DataTypes key_types;
-        DataTypes value_types;
-        key_types.reserve(types.size());
-        value_types.reserve(types.size());
-
-        for (const auto & type : types)
+        /// All types are Map
+        if (type_indexes.size() == 1)
        {
-            if (const DataTypeMap * type_map = typeid_cast<const DataTypeMap *>(type.get()))
+            DataTypes key_types;
+            DataTypes value_types;
+            key_types.reserve(types.size());
+            value_types.reserve(types.size());
+            for (const auto & type : types)
            {
-                have_maps = true;
+                const DataTypeMap * type_map = typeid_cast<const DataTypeMap *>(type.get());
                key_types.emplace_back(type_map->getKeyType());
                value_types.emplace_back(type_map->getValueType());
            }
-            else
-                all_maps = false;
+
+            transformTypesRecursively(key_types, transform_simple_types, transform_complex_types);
+            transformTypesRecursively(value_types, transform_simple_types, transform_complex_types);
+
+            for (size_t i = 0; i != types.size(); ++i)
+                types[i] = std::make_shared<DataTypeMap>(key_types[i], value_types[i]);
        }

-        if (have_maps)
-        {
-            if (all_maps)
-            {
-                transformTypesRecursively(key_types, transform_simple_types, transform_complex_types);
-                transformTypesRecursively(value_types, transform_simple_types, transform_complex_types);
+        if (transform_complex_types)
+            transform_complex_types(types, type_indexes);

-                for (size_t i = 0; i != types.size(); ++i)
-                    types[i] = std::make_shared<DataTypeMap>(key_types[i], value_types[i]);
-            }
-
-            if (transform_complex_types)
-                transform_complex_types(types);
-
-            return;
-        }
+        return;
    }

+    /// Nullable
+    if (type_indexes.contains(TypeIndex::Nullable))
    {
-        /// Nullable
-        bool have_nullable = false;
        std::vector<UInt8> is_nullable;
        is_nullable.reserve(types.size());
        DataTypes nested_types;
@ -146,7 +125,6 @@ void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &
        {
            if (const DataTypeNullable * type_nullable = typeid_cast<const DataTypeNullable *>(type.get()))
            {
-                have_nullable = true;
                is_nullable.push_back(1);
                nested_types.push_back(type_nullable->getNestedType());
            }
@ -157,28 +135,28 @@ void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &
            }
        }

-        if (have_nullable)
+        transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types);
+        for (size_t i = 0; i != types.size(); ++i)
        {
-            transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types);
-            for (size_t i = 0; i != types.size(); ++i)
-            {
-                if (is_nullable[i])
-                    types[i] = makeNullable(nested_types[i]);
-                else
-                    types[i] = nested_types[i];
-            }
-
-            return;
+            if (is_nullable[i])
+                types[i] = makeNullable(nested_types[i]);
+            else
+                types[i] = nested_types[i];
        }
+
+        if (transform_complex_types)
+            transform_complex_types(types, type_indexes);
+
+        return;
    }

-    transform_simple_types(types);
+    transform_simple_types(types, type_indexes);
 }

 void callOnNestedSimpleTypes(DataTypePtr & type, std::function<void(DataTypePtr &)> callback)
 {
    DataTypes types = {type};
-    transformTypesRecursively(types, [callback](auto & data_types){ callback(data_types[0]); }, {});
+    transformTypesRecursively(types, [callback](auto & data_types, const TypeIndexesSet &){ callback(data_types[0]); }, {});
 }

 }
--- a/src/DataTypes/transformTypesRecursively.h
+++ b/src/DataTypes/transformTypesRecursively.h
@ -12,7 +12,7 @@ namespace DB
 /// If not all types are the same complex type (Array/Map/Tuple), this function won't be called to nested types.
 /// Function transform_simple_types will be applied to resulting simple types after all recursive calls.
 /// Function transform_complex_types will be applied to complex types (Array/Map/Tuple) after recursive call to their nested types.
-void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &)> transform_simple_types, std::function<void(DataTypes &)> transform_complex_types);
+void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_complex_types);

 void callOnNestedSimpleTypes(DataTypePtr & type, std::function<void(DataTypePtr &)> callback);

--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@ -1,21 +1,11 @@
 #include <Formats/EscapingRuleUtils.h>
-#include <Formats/JSONUtils.h>
-#include <Formats/ReadSchemaUtils.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
 #include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeFactory.h>
-#include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeNothing.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeLowCardinality.h>
-#include <DataTypes/DataTypeMap.h>
-#include <DataTypes/DataTypeObject.h>
-#include <DataTypes/getLeastSupertype.h>
-#include <DataTypes/transformTypesRecursively.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadBufferFromString.h>
@ -261,556 +251,76 @@ String readStringByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule e
    return readByEscapingRule<true>(buf, escaping_rule, format_settings);
 }

-void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, bool is_json, const std::unordered_set<const IDataType *> * numbers_parsed_from_json_strings = nullptr)
-{
-    /// Do nothing if we didn't try to infer something special.
-    if (!settings.try_infer_integers && !settings.try_infer_dates && !settings.try_infer_datetimes && !is_json)
-        return;
-
-    auto transform_simple_types = [&](DataTypes & data_types)
-    {
-        /// If we have floats and integers convert them all to float.
-        if (settings.try_infer_integers)
-        {
-            bool have_floats = false;
-            bool have_integers = false;
-            for (const auto & type : data_types)
-            {
-                have_floats |= isFloat(type);
-                have_integers |= isInteger(type) && !isBool(type);
-            }
-
-            if (have_floats && have_integers)
-            {
-                for (auto & type : data_types)
-                {
-                    if (isInteger(type))
-                        type = std::make_shared<DataTypeFloat64>();
-                }
-            }
-        }
-
-        /// If we have only dates and datetimes, convert dates to datetime.
-        /// If we have date/datetimes and smth else, convert them to string, because
-        /// There is a special case when we inferred both Date/DateTime and Int64 from Strings,
-        /// for example: "arr: ["2020-01-01", "2000"]" -> Tuple(Date, Int64),
-        /// so if we have Date/DateTime and smth else (not only String) we should
-        /// convert Date/DateTime back to String, so then we will be able to
-        /// convert Int64 back to String as well.
-        if (settings.try_infer_dates || settings.try_infer_datetimes)
-        {
-            bool have_dates = false;
-            bool have_datetimes = false;
-            bool all_dates_or_datetimes = true;
-
-            for (const auto & type : data_types)
-            {
-                have_dates |= isDate(type);
-                have_datetimes |= isDateTime64(type);
-                all_dates_or_datetimes &= isDate(type) || isDateTime64(type);
-            }
-
-            if (!all_dates_or_datetimes && (have_dates || have_datetimes))
-            {
-                for (auto & type : data_types)
-                {
-                    if (isDate(type) || isDateTime64(type))
-                        type = std::make_shared<DataTypeString>();
-                }
-            }
-            else if (have_dates && have_datetimes)
-            {
-                for (auto & type : data_types)
-                {
-                    if (isDate(type))
-                        type = std::make_shared<DataTypeDateTime64>(9);
-                }
-            }
-        }
-
-        if (!is_json)
-            return;
-
-        /// Check settings specific for JSON formats.
-
-        /// If we have numbers and strings, convert numbers to strings.
-        if (settings.json.try_infer_numbers_from_strings || settings.json.read_numbers_as_strings)
-        {
-            bool have_strings = false;
-            bool have_numbers = false;
-            for (const auto & type : data_types)
-            {
-                have_strings |= isString(type);
-                have_numbers |= isNumber(type);
-            }
-
-            if (have_strings && have_numbers)
-            {
-                for (auto & type : data_types)
-                {
-                    if (isNumber(type)
-                        && (settings.json.read_numbers_as_strings || !numbers_parsed_from_json_strings
-                            || numbers_parsed_from_json_strings->contains(type.get())))
-                        type = std::make_shared<DataTypeString>();
-                }
-            }
-        }
-
-        if (settings.json.read_bools_as_numbers)
-        {
-            /// Note that have_floats and have_integers both cannot be
-            /// equal to true as in one of previous checks we convert
-            /// integers to floats if we have both.
-            bool have_floats = false;
-            bool have_integers = false;
-            bool have_bools = false;
-            for (const auto & type : data_types)
-            {
-                have_floats |= isFloat(type);
-                have_integers |= isInteger(type) && !isBool(type);
-                have_bools |= isBool(type);
-            }
-
-            if (have_bools && (have_integers || have_floats))
-            {
-                for (auto & type : data_types)
-                {
-                    if (isBool(type))
-                    {
-                        if (have_integers)
-                            type = std::make_shared<DataTypeInt64>();
-                        else
-                            type = std::make_shared<DataTypeFloat64>();
-                    }
-                }
-            }
-        }
-    };
-
-    auto transform_complex_types = [&](DataTypes & data_types)
-    {
-        if (!is_json)
-            return;
-
-        bool have_maps = false;
-        bool have_objects = false;
-        bool have_strings = false;
-        bool are_maps_equal = true;
-        DataTypePtr first_map_type;
-        for (const auto & type : data_types)
-        {
-            if (isMap(type))
-            {
-                if (!have_maps)
-                {
-                    first_map_type = type;
-                    have_maps = true;
-                }
-                else
-                {
-                    are_maps_equal &= type->equals(*first_map_type);
-                }
-            }
-            else if (isObject(type))
-            {
-                have_objects = true;
-            }
-            else if (isString(type))
-            {
-                have_strings = false;
-            }
-        }
-
-        if (have_maps && (have_objects || !are_maps_equal))
-        {
-            for (auto & type : data_types)
-            {
-                if (isMap(type))
-                    type = std::make_shared<DataTypeObject>("json", true);
-            }
-        }
-
-        if (settings.json.read_objects_as_strings && have_strings && (have_maps || have_objects))
-        {
-            for (auto & type : data_types)
-            {
-                if (isMap(type) || isObject(type))
-                    type = std::make_shared<DataTypeString>();
-            }
-        }
-    };
-
-    transformTypesRecursively(types, transform_simple_types, transform_complex_types);
-}
-
-void transformInferredTypesIfNeeded(DataTypes & types, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule)
-{
-    transformInferredTypesIfNeededImpl(types, settings, escaping_rule == FormatSettings::EscapingRule::JSON);
-}
-
-void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule)
-{
-    DataTypes types = {first, second};
-    transformInferredTypesIfNeeded(types, settings, escaping_rule);
-    first = std::move(types[0]);
-    second = std::move(types[1]);
-}
-
-void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, const std::unordered_set<const IDataType *> * numbers_parsed_from_json_strings)
-{
-    transformInferredTypesIfNeededImpl(types, settings, true, numbers_parsed_from_json_strings);
-}
-
-void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings)
-{
-    DataTypes types = {first, second};
-    transformInferredJSONTypesIfNeeded(types, settings);
-    first = std::move(types[0]);
-    second = std::move(types[1]);
-}
-
-bool tryInferDate(const std::string_view & field)
-{
-    ReadBufferFromString buf(field);
-    DayNum tmp;
-    return tryReadDateText(tmp, buf) && buf.eof();
-}
-
-bool tryInferDateTime(const std::string_view & field, const FormatSettings & settings)
-{
-    if (field.empty())
-        return false;
-
-    ReadBufferFromString buf(field);
-    Float64 tmp_float;
-    /// Check if it's just a number, and if so, don't try to infer DateTime from it,
-    /// because we can interpret this number as a timestamp and it will lead to
-    /// inferring DateTime instead of simple Int64/Float64 in some cases.
-    if (tryReadFloatText(tmp_float, buf) && buf.eof())
-        return false;
-
-    buf.seek(0, SEEK_SET); /// Return position to the beginning
-    DateTime64 tmp;
-    switch (settings.date_time_input_format)
-    {
-        case FormatSettings::DateTimeInputFormat::Basic:
-            if (tryReadDateTime64Text(tmp, 9, buf) && buf.eof())
-                return true;
-            break;
-        case FormatSettings::DateTimeInputFormat::BestEffort:
-            if (tryParseDateTime64BestEffort(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof())
-                return true;
-            break;
-        case FormatSettings::DateTimeInputFormat::BestEffortUS:
-            if (tryParseDateTime64BestEffortUS(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof())
-                return true;
-            break;
-    }
-
-    return false;
-}
-
-DataTypePtr tryInferDateOrDateTime(const std::string_view & field, const FormatSettings & settings)
-{
-    if (settings.try_infer_dates && tryInferDate(field))
-        return makeNullable(std::make_shared<DataTypeDate>());
-
-    if (settings.try_infer_datetimes && tryInferDateTime(field, settings))
-        return makeNullable(std::make_shared<DataTypeDateTime64>(9));
-
-    return nullptr;
-}
-
-static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBufferFromString & buf, const FormatSettings & settings)
-{
-    if (buf.eof())
-        return nullptr;
-
-    /// Array
-    if (checkChar('[', buf))
-    {
-        skipWhitespaceIfAny(buf);
-
-        DataTypes nested_types;
-        bool first = true;
-        while (!buf.eof() && *buf.position() != ']')
-        {
-            if (!first)
-            {
-                skipWhitespaceIfAny(buf);
-                if (!checkChar(',', buf))
-                    return nullptr;
-                skipWhitespaceIfAny(buf);
-            }
-            else
-                first = false;
-
-            auto nested_type = determineDataTypeForSingleFieldImpl(buf, settings);
-            if (!nested_type)
-                return nullptr;
-
-            nested_types.push_back(nested_type);
-        }
-
-        if (buf.eof())
-            return nullptr;
-
-        ++buf.position();
-
-        if (nested_types.empty())
-            return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNothing>());
-
-        transformInferredTypesIfNeeded(nested_types, settings);
-
-        auto least_supertype = tryGetLeastSupertype(nested_types);
-        if (!least_supertype)
-            return nullptr;
-
-        return std::make_shared<DataTypeArray>(least_supertype);
-    }
-
-    /// Tuple
-    if (checkChar('(', buf))
-    {
-        skipWhitespaceIfAny(buf);
-
-        DataTypes nested_types;
-        bool first = true;
-        while (!buf.eof() && *buf.position() != ')')
-        {
-            if (!first)
-            {
-                skipWhitespaceIfAny(buf);
-                if (!checkChar(',', buf))
-                    return nullptr;
-                skipWhitespaceIfAny(buf);
-            }
-            else
-                first = false;
-
-            auto nested_type = determineDataTypeForSingleFieldImpl(buf, settings);
-            if (!nested_type)
-                return nullptr;
-
-            nested_types.push_back(nested_type);
-        }
-
-        if (buf.eof() || nested_types.empty())
-            return nullptr;
-
-        ++buf.position();
-
-        return std::make_shared<DataTypeTuple>(nested_types);
-    }
-
-    /// Map
-    if (checkChar('{', buf))
-    {
-        skipWhitespaceIfAny(buf);
-
-        DataTypes key_types;
-        DataTypes value_types;
-        bool first = true;
-        while (!buf.eof() && *buf.position() != '}')
-        {
-            if (!first)
-            {
-                skipWhitespaceIfAny(buf);
-                if (!checkChar(',', buf))
-                    return nullptr;
-                skipWhitespaceIfAny(buf);
-            }
-            else
-                first = false;
-
-            auto key_type = determineDataTypeForSingleFieldImpl(buf, settings);
-            if (!key_type)
-                return nullptr;
-
-            key_types.push_back(key_type);
-
-            skipWhitespaceIfAny(buf);
-            if (!checkChar(':', buf))
-                return nullptr;
-            skipWhitespaceIfAny(buf);
-
-            auto value_type = determineDataTypeForSingleFieldImpl(buf, settings);
-            if (!value_type)
-                return nullptr;
-
-            value_types.push_back(value_type);
-        }
-
-        if (buf.eof())
-            return nullptr;
-
-        ++buf.position();
-        skipWhitespaceIfAny(buf);
-
-        if (key_types.empty())
-            return std::make_shared<DataTypeMap>(std::make_shared<DataTypeNothing>(), std::make_shared<DataTypeNothing>());
-
-        transformInferredTypesIfNeeded(key_types, settings);
-        transformInferredTypesIfNeeded(value_types, settings);
-
-        auto key_least_supertype = tryGetLeastSupertype(key_types);
-
-        auto value_least_supertype = tryGetLeastSupertype(value_types);
-        if (!key_least_supertype || !value_least_supertype)
-            return nullptr;
-
-        if (!DataTypeMap::checkKeyType(key_least_supertype))
-            return nullptr;
-
-        return std::make_shared<DataTypeMap>(key_least_supertype, value_least_supertype);
-    }
-
-    /// String
-    if (*buf.position() == '\'')
-    {
-        ++buf.position();
-        String field;
-        while (!buf.eof())
-        {
-            char * next_pos = find_first_symbols<'\\', '\''>(buf.position(), buf.buffer().end());
-            field.append(buf.position(), next_pos);
-            buf.position() = next_pos;
-
-            if (!buf.hasPendingData())
-                continue;
-
-            if (*buf.position() == '\'')
-                break;
-
-            field.push_back(*buf.position());
-            if (*buf.position() == '\\')
-                ++buf.position();
-        }
-
-        if (buf.eof())
-            return nullptr;
-
-        ++buf.position();
-        if (auto type = tryInferDateOrDateTime(field, settings))
-            return type;
-
-        return std::make_shared<DataTypeString>();
-    }
-
-    /// Bool
-    if (checkStringCaseInsensitive("true", buf) || checkStringCaseInsensitive("false", buf))
-        return DataTypeFactory::instance().get("Bool");
-
-    /// Null
-    if (checkStringCaseInsensitive("NULL", buf))
-        return std::make_shared<DataTypeNothing>();
-
-    /// Number
-    Float64 tmp;
-    auto * pos_before_float = buf.position();
-    if (tryReadFloatText(tmp, buf))
-    {
-        if (settings.try_infer_integers)
-        {
-            auto * float_end_pos = buf.position();
-            buf.position() = pos_before_float;
-            Int64 tmp_int;
-            if (tryReadIntText(tmp_int, buf) && buf.position() == float_end_pos)
-                return std::make_shared<DataTypeInt64>();
-
-            buf.position() = float_end_pos;
-        }
-
-        return std::make_shared<DataTypeFloat64>();
-    }
-
-    return nullptr;
-}
-
-static DataTypePtr determineDataTypeForSingleField(ReadBufferFromString & buf, const FormatSettings & settings)
-{
-    return makeNullableRecursivelyAndCheckForNothing(determineDataTypeForSingleFieldImpl(buf, settings));
-}
-
-DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule)
+DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, JSONInferenceInfo * json_info)
 {
    switch (escaping_rule)
    {
        case FormatSettings::EscapingRule::Quoted:
-        {
-            ReadBufferFromString buf(field);
-            auto type = determineDataTypeForSingleField(buf, format_settings);
-            return buf.eof() ? type : nullptr;
-        }
+            return tryInferDataTypeForSingleField(field, format_settings);
        case FormatSettings::EscapingRule::JSON:
-            return JSONUtils::getDataTypeFromField(field, format_settings);
+            return tryInferDataTypeForSingleJSONField(field, format_settings, json_info);
        case FormatSettings::EscapingRule::CSV:
        {
            if (!format_settings.csv.use_best_effort_in_schema_inference)
-                return makeNullable(std::make_shared<DataTypeString>());
+                return std::make_shared<DataTypeString>();

-            if (field.empty() || field == format_settings.csv.null_representation)
+            if (field.empty())
                return nullptr;

-            if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation)
-                return DataTypeFactory::instance().get("Nullable(Bool)");
+            if (field == format_settings.csv.null_representation)
+                return makeNullable(std::make_shared<DataTypeNothing>());

+            if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation)
+                return DataTypeFactory::instance().get("Bool");
+
+            /// In CSV complex types are serialized in quotes. If we have quotes, we should try to infer type
+            /// from data inside quotes.
            if (field.size() > 1 && ((field.front() == '\'' && field.back() == '\'') || (field.front() == '"' && field.back() == '"')))
            {
                auto data = std::string_view(field.data() + 1, field.size() - 2);
-                if (auto date_type = tryInferDateOrDateTime(data, format_settings))
+                /// First, try to infer dates and datetimes.
+                if (auto date_type = tryInferDateOrDateTimeFromString(data, format_settings))
                    return date_type;

-                ReadBufferFromString buf(data);
                /// Try to determine the type of value inside quotes
-                auto type = determineDataTypeForSingleField(buf, format_settings);
+                auto type = tryInferDataTypeForSingleField(data, format_settings);

-                if (!type)
-                    return nullptr;
-
-                /// If it's a number or tuple in quotes or there is some unread data in buffer, we determine it as a string.
-                if (isNumber(removeNullable(type)) || isTuple(type) || !buf.eof())
-                    return makeNullable(std::make_shared<DataTypeString>());
+                /// If we couldn't infer any type or it's a number or tuple in quotes, we determine it as a string.
+                if (!type || isNumber(removeNullable(type)) || isTuple(type))
+                    return std::make_shared<DataTypeString>();

                return type;
            }

            /// Case when CSV value is not in quotes. Check if it's a number, and if not, determine it's as a string.
-            if (format_settings.try_infer_integers)
-            {
-                ReadBufferFromString buf(field);
-                Int64 tmp_int;
-                if (tryReadIntText(tmp_int, buf) && buf.eof())
-                    return makeNullable(std::make_shared<DataTypeInt64>());
-            }
+            auto type = tryInferNumberFromString(field, format_settings);

-            ReadBufferFromString buf(field);
-            Float64 tmp;
-            if (tryReadFloatText(tmp, buf) && buf.eof())
-                return makeNullable(std::make_shared<DataTypeFloat64>());
+            if (!type)
+                return std::make_shared<DataTypeString>();

-            return makeNullable(std::make_shared<DataTypeString>());
+            return type;
        }
        case FormatSettings::EscapingRule::Raw: [[fallthrough]];
        case FormatSettings::EscapingRule::Escaped:
        {
            if (!format_settings.tsv.use_best_effort_in_schema_inference)
-                return makeNullable(std::make_shared<DataTypeString>());
+                return std::make_shared<DataTypeString>();

-            if (field.empty() || field == format_settings.tsv.null_representation)
+            if (field.empty())
                return nullptr;

-            if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation)
-                return DataTypeFactory::instance().get("Nullable(Bool)");
+            if (field == format_settings.tsv.null_representation)
+                return makeNullable(std::make_shared<DataTypeNothing>());

-            if (auto date_type = tryInferDateOrDateTime(field, format_settings))
+            if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation)
+                return DataTypeFactory::instance().get("Bool");
+
+            if (auto date_type = tryInferDateOrDateTimeFromString(field, format_settings))
                return date_type;

-            ReadBufferFromString buf(field);
-            auto type = determineDataTypeForSingleField(buf, format_settings);
-            if (!buf.eof())
-                return makeNullable(std::make_shared<DataTypeString>());
-
+            auto type = tryInferDataTypeForSingleField(field, format_settings);
+            if (!type)
+                return std::make_shared<DataTypeString>();
            return type;
        }
        default:
@ -818,15 +328,34 @@ DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSe
    }
 }

-DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule)
+DataTypes tryInferDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, JSONInferenceInfo * json_info)
 {
    DataTypes data_types;
    data_types.reserve(fields.size());
    for (const auto & field : fields)
-        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, escaping_rule));
+        data_types.push_back(tryInferDataTypeByEscapingRule(field, format_settings, escaping_rule, json_info));
    return data_types;
 }

+void transformInferredTypesByEscapingRuleIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule, JSONInferenceInfo * json_info)
+{
+    switch (escaping_rule)
+    {
+        case FormatSettings::EscapingRule::JSON:
+            transformInferredJSONTypesIfNeeded(first, second, settings, json_info);
+            break;
+        case FormatSettings::EscapingRule::Escaped: [[fallthrough]];
+        case FormatSettings::EscapingRule::Raw: [[fallthrough]];
+        case FormatSettings::EscapingRule::Quoted: [[fallthrough]];
+        case FormatSettings::EscapingRule::CSV:
+            transformInferredTypesIfNeeded(first, second, settings);
+            break;
+        default:
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot transform inferred types for value with {} escaping rule", escapingRuleToString(escaping_rule));
+    }
+}
+
+
 DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escaping_rule)
 {
    switch (escaping_rule)
@ -834,7 +363,7 @@ DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escap
        case FormatSettings::EscapingRule::CSV:
        case FormatSettings::EscapingRule::Escaped:
        case FormatSettings::EscapingRule::Raw:
-            return makeNullable(std::make_shared<DataTypeString>());
+            return std::make_shared<DataTypeString>();
        default:
            return nullptr;
    }
@ -851,9 +380,10 @@ DataTypes getDefaultDataTypeForEscapingRules(const std::vector<FormatSettings::E
 String getAdditionalFormatInfoForAllRowBasedFormats(const FormatSettings & settings)
 {
    return fmt::format(
-        "schema_inference_hints={}, max_rows_to_read_for_schema_inference={}",
+        "schema_inference_hints={}, max_rows_to_read_for_schema_inference={}, schema_inference_make_columns_nullable={}",
        settings.schema_inference_hints,
-        settings.max_rows_to_read_for_schema_inference);
+        settings.max_rows_to_read_for_schema_inference,
+        settings.schema_inference_make_columns_nullable);
 }

 String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule)
@ -890,7 +420,11 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo
                settings.csv.tuple_delimiter);
            break;
        case FormatSettings::EscapingRule::JSON:
-            result += fmt::format(", try_infer_numbers_from_strings={}, read_bools_as_numbers={}", settings.json.try_infer_numbers_from_strings, settings.json.read_bools_as_numbers);
+            result += fmt::format(
+                ", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, try_infer_objects={}",
+                settings.json.try_infer_numbers_from_strings,
+                settings.json.read_bools_as_numbers,
+                settings.json.try_infer_objects);
            break;
        default:
            break;
--- a/src/Formats/EscapingRuleUtils.h
+++ b/src/Formats/EscapingRuleUtils.h
@ -1,6 +1,7 @@
 #pragma once

 #include <Formats/FormatSettings.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <DataTypes/IDataType.h>
 #include <DataTypes/Serializations/ISerialization.h>
 #include <IO/ReadBuffer.h>
@ -38,45 +39,17 @@ String readFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule es

 /// Try to determine the type of the field written by a specific escaping rule.
 /// If cannot, return nullptr.
-/// - For Quoted escaping rule we can interpret a single field as a constant
-///   expression and get it's type by evaluation this expression.
-/// - For JSON escaping rule we can use JSON parser to parse a single field
-///   and then convert JSON type of this field to ClickHouse type.
-/// - For CSV escaping rule we can do the next:
-///    - If the field is an unquoted string, then we try to parse it as a number,
-///      and if we cannot, treat it as a String.
-///    - If the field is a string in quotes, then we try to use some
-///      tweaks and heuristics to determine the type inside quotes, and if we can't or
-///      the result is a number or tuple (we don't parse numbers in quotes and don't
-///      support tuples in CSV) we treat it as a String.
-///    - If input_format_csv_use_best_effort_in_schema_inference is disabled, we
-///      treat everything as a string.
-/// - For TSV and TSVRaw we try to use some tweaks and heuristics to determine the type
-///   of value if setting input_format_tsv_use_best_effort_in_schema_inference is enabled,
-///   otherwise we treat everything as a string.
-DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule);
-DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule);
+/// See tryInferDataTypeForSingle(JSON)Field in SchemaInferenceUtils.h
+DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, JSONInferenceInfo * json_info = nullptr);
+DataTypes tryInferDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, JSONInferenceInfo * json_info = nullptr);
+
+/// Check if we need to transform types inferred from data and transform it if necessary.
+/// See transformInferred(JSON)TypesIfNeeded in SchemaInferenceUtils.h
+void transformInferredTypesByEscapingRuleIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule, JSONInferenceInfo * json_info = nullptr);

 DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escaping_rule);
 DataTypes getDefaultDataTypeForEscapingRules(const std::vector<FormatSettings::EscapingRule> & escaping_rules);

-/// Try to infer Date or Datetime from string if corresponding settings are enabled.
-DataTypePtr tryInferDateOrDateTime(const std::string_view & field, const FormatSettings & settings);
-
-/// Check if we need to transform types inferred from data and transform it if necessary.
-/// It's used when we try to infer some not ordinary types from another types.
-/// For example dates from strings, we should check if dates were inferred from all strings
-/// in the same way and if not, transform inferred dates back to strings.
-/// For example, if we have array of strings and we tried to infer dates from them,
-/// to make the result type Array(Date) we should ensure that all strings were
-/// successfully parsed as dated and if not, convert all dates back to strings and make result type Array(String).
-void transformInferredTypesIfNeeded(DataTypes & types, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule = FormatSettings::EscapingRule::Escaped);
-void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule = FormatSettings::EscapingRule::Escaped);
-
-/// Same as transformInferredTypesIfNeeded but takes into account settings that are special for JSON formats.
-void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, const std::unordered_set<const IDataType *> * numbers_parsed_from_json_strings = nullptr);
-void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings);
-
 String getAdditionalFormatInfoForAllRowBasedFormats(const FormatSettings & settings);
 String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule);

--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@ -169,6 +169,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.max_rows_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference;
    format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference;
    format_settings.schema_inference_hints = settings.schema_inference_hints;
+    format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable;
    format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name;
    format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names;
    format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size;
@ -182,6 +183,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string;
    format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference;
    format_settings.max_binary_string_size = settings.format_binary_max_string_size;
+    format_settings.max_parser_depth = context->getSettingsRef().max_parser_depth;

    /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
    if (format_settings.schema.is_server)
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@ -71,6 +71,8 @@ struct FormatSettings
        Raw
    };

+    bool schema_inference_make_columns_nullable = true;
+
    DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple;

    bool input_format_ipv4_default_on_conversion_error = false;
@ -81,6 +83,8 @@ struct FormatSettings

    UInt64 max_binary_string_size = 0;

+    UInt64 max_parser_depth = DBMS_DEFAULT_MAX_PARSER_DEPTH;
+
    struct
    {
        UInt64 row_group_size = 1000000;
--- a/src/Formats/JSONUtils.cpp
+++ b/src/Formats/JSONUtils.cpp
@ -6,19 +6,13 @@
 #include <IO/WriteBufferValidUTF8.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
 #include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeObject.h>
 #include <DataTypes/DataTypeFactory.h>
-#include <Common/JSONParsers/SimdJSONParser.h>
-#include <Common/JSONParsers/RapidJSONParser.h>
-#include <Common/JSONParsers/DummyJSONParser.h>

 #include <base/find_symbols.h>

+#include <Common/logger_useful.h>
+
 namespace DB
 {

@ -122,206 +116,6 @@ namespace JSONUtils
        return {loadAtPosition(in, memory, pos), number_of_rows};
    }

-    template <const char opening_bracket, const char closing_bracket>
-    static String readJSONEachRowLineIntoStringImpl(ReadBuffer & in)
-    {
-        Memory memory;
-        fileSegmentationEngineJSONEachRowImpl<opening_bracket, closing_bracket>(in, memory, 0, 1, 1);
-        return String(memory.data(), memory.size());
-    }
-
-    template <class Element>
-    DataTypePtr getDataTypeFromFieldImpl(const Element & field, const FormatSettings & settings, std::unordered_set<const IDataType *> & numbers_parsed_from_json_strings)
-    {
-        if (field.isNull())
-            return nullptr;
-
-        if (field.isBool())
-            return DataTypeFactory::instance().get("Nullable(Bool)");
-
-        if (field.isInt64() || field.isUInt64())
-        {
-            if (settings.try_infer_integers)
-                return makeNullable(std::make_shared<DataTypeInt64>());
-
-            return makeNullable(std::make_shared<DataTypeFloat64>());
-        }
-
-        if (field.isDouble())
-            return makeNullable(std::make_shared<DataTypeFloat64>());
-
-        if (field.isString())
-        {
-            if (auto date_type = tryInferDateOrDateTime(field.getString(), settings))
-                return date_type;
-
-            if (!settings.json.try_infer_numbers_from_strings)
-                return makeNullable(std::make_shared<DataTypeString>());
-
-            ReadBufferFromString buf(field.getString());
-
-            if (settings.try_infer_integers)
-            {
-                Int64 tmp_int;
-                if (tryReadIntText(tmp_int, buf) && buf.eof())
-                {
-                    auto type = std::make_shared<DataTypeInt64>();
-                    numbers_parsed_from_json_strings.insert(type.get());
-                    return makeNullable(type);
-                }
-            }
-
-            Float64 tmp;
-            if (tryReadFloatText(tmp, buf) && buf.eof())
-            {
-                auto type = std::make_shared<DataTypeFloat64>();
-                numbers_parsed_from_json_strings.insert(type.get());
-                return makeNullable(type);
-            }
-
-            return makeNullable(std::make_shared<DataTypeString>());
-        }
-
-        if (field.isArray())
-        {
-            auto array = field.getArray();
-
-            /// Return nullptr in case of empty array because we cannot determine nested type.
-            if (array.size() == 0)
-                return nullptr;
-
-            DataTypes nested_data_types;
-            /// If this array contains fields with different types we will treat it as Tuple.
-            bool are_types_the_same = true;
-            for (const auto element : array)
-            {
-                auto type = getDataTypeFromFieldImpl(element, settings, numbers_parsed_from_json_strings);
-                if (!type)
-                    return nullptr;
-
-                if (!nested_data_types.empty() && !type->equals(*nested_data_types.back()))
-                    are_types_the_same = false;
-
-                nested_data_types.push_back(std::move(type));
-            }
-
-            if (!are_types_the_same)
-            {
-                auto nested_types_copy = nested_data_types;
-                transformInferredJSONTypesIfNeeded(nested_types_copy, settings, &numbers_parsed_from_json_strings);
-                are_types_the_same = true;
-                for (size_t i = 1; i < nested_types_copy.size(); ++i)
-                    are_types_the_same &= nested_types_copy[i]->equals(*nested_types_copy[i - 1]);
-
-                if (are_types_the_same)
-                    nested_data_types = std::move(nested_types_copy);
-            }
-
-            if (!are_types_the_same)
-                return std::make_shared<DataTypeTuple>(nested_data_types);
-
-            return std::make_shared<DataTypeArray>(nested_data_types.back());
-        }
-
-        if (field.isObject())
-        {
-            auto object = field.getObject();
-            DataTypes value_types;
-            for (const auto key_value_pair : object)
-            {
-                auto type = getDataTypeFromFieldImpl(key_value_pair.second, settings, numbers_parsed_from_json_strings);
-                if (!type)
-                {
-                    /// If we couldn't infer nested type and Object type is not enabled,
-                    /// we can't determine the type of this JSON field.
-                    if (!settings.json.try_infer_objects)
-                    {
-                        /// If read_objects_as_strings is enabled, we can read objects into strings.
-                        if (settings.json.read_objects_as_strings)
-                            return makeNullable(std::make_shared<DataTypeString>());
-                        return nullptr;
-                    }
-
-                    continue;
-                }
-
-                if (settings.json.try_infer_objects && isObject(type))
-                    return std::make_shared<DataTypeObject>("json", true);
-
-                value_types.push_back(type);
-            }
-
-            if (value_types.empty())
-                return nullptr;
-
-            transformInferredJSONTypesIfNeeded(value_types, settings, &numbers_parsed_from_json_strings);
-            bool are_types_equal = true;
-            for (size_t i = 1; i < value_types.size(); ++i)
-                are_types_equal &= value_types[i]->equals(*value_types[0]);
-
-            if (!are_types_equal)
-            {
-                if (!settings.json.try_infer_objects)
-                {
-                    /// If read_objects_as_strings is enabled, we can read objects into strings.
-                    if (settings.json.read_objects_as_strings)
-                        return makeNullable(std::make_shared<DataTypeString>());
-                    return nullptr;
-                }
-                return std::make_shared<DataTypeObject>("json", true);
-            }
-
-            return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), value_types[0]);
-        }
-
-        throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"};
-    }
-
-    auto getJSONParserAndElement()
-    {
-#if USE_SIMDJSON
-        return std::pair<SimdJSONParser, SimdJSONParser::Element>();
-#elif USE_RAPIDJSON
-        return std::pair<RapidJSONParser, RapidJSONParser::Element>();
-#else
-        return std::pair<DummyJSONParser, DummyJSONParser::Element>();
-#endif
-    }
-
-    DataTypePtr getDataTypeFromField(const String & field, const FormatSettings & settings)
-    {
-        auto [parser, element] = getJSONParserAndElement();
-        bool parsed = parser.parse(field, element);
-        if (!parsed)
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", field);
-
-        std::unordered_set<const IDataType *> numbers_parsed_from_json_strings;
-        return getDataTypeFromFieldImpl(element, settings, numbers_parsed_from_json_strings);
-    }
-
-    template <class Extractor, const char opening_bracket, const char closing_bracket>
-    static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in, const FormatSettings & settings, bool /*json_strings*/, Extractor & extractor)
-    {
-        String line = readJSONEachRowLineIntoStringImpl<opening_bracket, closing_bracket>(in);
-        auto [parser, element] = getJSONParserAndElement();
-        bool parsed = parser.parse(line, element);
-        if (!parsed)
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", line);
-
-        auto fields = extractor.extract(element);
-
-        DataTypes data_types;
-        data_types.reserve(fields.size());
-        std::unordered_set<const IDataType *> numbers_parsed_from_json_strings;
-        for (const auto & field : fields)
-            data_types.push_back(getDataTypeFromFieldImpl(field, settings, numbers_parsed_from_json_strings));
-
-        /// TODO: For JSONStringsEachRow/JSONCompactStringsEach all types will be strings.
-        ///       Should we try to parse data inside strings somehow in this case?
-
-        return data_types;
-    }
-
    std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows)
    {
        return fileSegmentationEngineJSONEachRowImpl<'{', '}'>(in, memory, min_bytes, 1, max_rows);
@ -333,68 +127,56 @@ namespace JSONUtils
        return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_bytes, min_rows, max_rows);
    }

-    struct JSONEachRowFieldsExtractor
+    NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, JSONInferenceInfo * inference_info)
    {
-        template <class Element>
-        std::vector<Element> extract(const Element & element)
+        skipWhitespaceIfAny(in);
+        assertChar('{', in);
+        bool first = true;
+        NamesAndTypesList names_and_types;
+        String field;
+        while (!in.eof() && *in.position() != '}')
        {
-            /// {..., "<column_name>" : <value>, ...}
+            if (!first)
+                skipComma(in);
+            else
+                first = false;

-            if (!element.isObject())
-                throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an object");
-
-            auto object = element.getObject();
-            std::vector<Element> fields;
-            fields.reserve(object.size());
-            column_names.reserve(object.size());
-            for (const auto & key_value_pair : object)
-            {
-                column_names.emplace_back(key_value_pair.first);
-                fields.push_back(key_value_pair.second);
-            }
-
-            return fields;
+            auto name = readFieldName(in);
+            auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info);
+            names_and_types.emplace_back(name, type);
        }

-        std::vector<String> column_names;
-    };
+        if (in.eof())
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading JSON object");

-    NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, bool json_strings)
-    {
-        JSONEachRowFieldsExtractor extractor;
-        auto data_types
-            = determineColumnDataTypesFromJSONEachRowDataImpl<JSONEachRowFieldsExtractor, '{', '}'>(in, settings, json_strings, extractor);
-        NamesAndTypesList result;
-        for (size_t i = 0; i != extractor.column_names.size(); ++i)
-            result.emplace_back(extractor.column_names[i], data_types[i]);
-        return result;
+        assertChar('}', in);
+        return names_and_types;
    }

-    struct JSONCompactEachRowFieldsExtractor
+    DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, const FormatSettings & settings, JSONInferenceInfo * inference_info)
    {
-        template <class Element>
-        std::vector<Element> extract(const Element & element)
+        skipWhitespaceIfAny(in);
+        assertChar('[', in);
+        bool first = true;
+        DataTypes types;
+        String field;
+        while (!in.eof() && *in.position() != ']')
        {
-            /// [..., <value>, ...]
-            if (!element.isArray())
-                throw Exception(ErrorCodes::INCORRECT_DATA, "Root JSON value is not an array");
-
-            auto array = element.getArray();
-            std::vector<Element> fields;
-            fields.reserve(array.size());
-            for (size_t i = 0; i != array.size(); ++i)
-                fields.push_back(array[i]);
-            return fields;
+            if (!first)
+                skipComma(in);
+            else
+                first = false;
+            auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info);
+            types.push_back(std::move(type));
        }
-    };

-    DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, const FormatSettings & settings, bool json_strings)
-    {
-        JSONCompactEachRowFieldsExtractor extractor;
-        return determineColumnDataTypesFromJSONEachRowDataImpl<JSONCompactEachRowFieldsExtractor, '[', ']'>(in, settings, json_strings, extractor);
+        if (in.eof())
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF while reading JSON array");
+
+        assertChar(']', in);
+        return types;
    }

-
    bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf)
    {
        /// For JSONEachRow we can safely skip whitespace characters
--- a/src/Formats/JSONUtils.h
+++ b/src/Formats/JSONUtils.h
@ -13,24 +13,21 @@
 namespace DB
 {

+struct JSONInferenceInfo;
+
 namespace JSONUtils
 {
    std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows);
    std::pair<bool, size_t> fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows);

-    /// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable.
-    /// JSON array with different nested types is treated as Tuple.
-    /// If cannot convert (for example when field contains null), return nullptr.
-    DataTypePtr getDataTypeFromField(const String & field, const FormatSettings & settings);
-
    /// Read row in JSONEachRow format and try to determine type for each field.
    /// Return list of names and types.
    /// If cannot determine the type of some field, return nullptr for it.
-    NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, bool json_strings);
+    NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, JSONInferenceInfo * inference_info);

    /// Read row in JSONCompactEachRow format and try to determine type for each field.
    /// If cannot determine the type of some field, return nullptr for it.
-    DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, const FormatSettings & settings, bool json_strings);
+    DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, const FormatSettings & settings, JSONInferenceInfo * inference_info);

    bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf);

--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@ -197,69 +197,6 @@ ColumnsDescription readSchemaFromFormat(const String & format_name, const std::o
    return readSchemaFromFormat(format_name, format_settings, read_buffer_iterator, retry, context, buf_out);
 }

-DataTypePtr makeNullableRecursivelyAndCheckForNothing(DataTypePtr type)
-{
-    if (!type)
-        return nullptr;
-
-    WhichDataType which(type);
-
-    if (which.isNothing())
-        return nullptr;
-
-    if (which.isNullable())
-    {
-        const auto * nullable_type = assert_cast<const DataTypeNullable *>(type.get());
-        return makeNullableRecursivelyAndCheckForNothing(nullable_type->getNestedType());
-    }
-
-    if (which.isArray())
-    {
-        const auto * array_type = assert_cast<const DataTypeArray *>(type.get());
-        auto nested_type = makeNullableRecursivelyAndCheckForNothing(array_type->getNestedType());
-        return nested_type ? std::make_shared<DataTypeArray>(nested_type) : nullptr;
-    }
-
-    if (which.isTuple())
-    {
-        const auto * tuple_type = assert_cast<const DataTypeTuple *>(type.get());
-        DataTypes nested_types;
-        for (const auto & element : tuple_type->getElements())
-        {
-            auto nested_type = makeNullableRecursivelyAndCheckForNothing(element);
-            if (!nested_type)
-                return nullptr;
-            nested_types.push_back(nested_type);
-        }
-        return std::make_shared<DataTypeTuple>(std::move(nested_types));
-    }
-
-    if (which.isMap())
-    {
-        const auto * map_type = assert_cast<const DataTypeMap *>(type.get());
-        auto key_type = makeNullableRecursivelyAndCheckForNothing(map_type->getKeyType());
-        auto value_type = makeNullableRecursivelyAndCheckForNothing(map_type->getValueType());
-        return key_type && value_type ? std::make_shared<DataTypeMap>(removeNullable(key_type), value_type) : nullptr;
-    }
-
-    if (which.isLowCarnality())
-    {
-        const auto * lc_type = assert_cast<const DataTypeLowCardinality *>(type.get());
-        auto nested_type = makeNullableRecursivelyAndCheckForNothing(lc_type->getDictionaryType());
-        return nested_type ? std::make_shared<DataTypeLowCardinality>(nested_type) : nullptr;
-    }
-
-    return makeNullable(type);
-}
-
-NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header)
-{
-    NamesAndTypesList result;
-    for (auto & [name, type] : header.getNamesAndTypesList())
-        result.emplace_back(name, makeNullableRecursivelyAndCheckForNothing(type));
-    return result;
-}
-
 SchemaCache::Key getKeyForSchemaCache(const String & source, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context)
 {
    return getKeysForSchemaCache({source}, format, format_settings, context).front();
--- a/src/Formats/ReadSchemaUtils.h
+++ b/src/Formats/ReadSchemaUtils.h
@ -35,21 +35,7 @@ ColumnsDescription readSchemaFromFormat(
    ContextPtr & context,
    std::unique_ptr<ReadBuffer> & buf_out);

-/// Make type Nullable recursively:
-/// - Type -> Nullable(type)
-/// - Array(Type) -> Array(Nullable(Type))
-/// - Tuple(Type1, ..., TypeN) -> Tuple(Nullable(Type1), ..., Nullable(TypeN))
-/// - Map(KeyType, ValueType) -> Map(KeyType, Nullable(ValueType))
-/// - LowCardinality(Type) -> LowCardinality(Nullable(Type))
-/// If type is Nothing or one of the nested types is Nothing, return nullptr.
-DataTypePtr makeNullableRecursivelyAndCheckForNothing(DataTypePtr type);
-
-/// Call makeNullableRecursivelyAndCheckForNothing for all types
-/// in the block and return names and types.
-NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header);
-
 SchemaCache::Key  getKeyForSchemaCache(const String & source, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);
 SchemaCache::Keys  getKeysForSchemaCache(const Strings & sources, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);

-void splitSchemaCacheKey(const String & key, String & source, String & format, String & additional_format_info);
 }
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
--- a/src/Formats/SchemaInferenceUtils.h
+++ b/src/Formats/SchemaInferenceUtils.h
@ -0,0 +1,93 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <IO/ReadBuffer.h>
+
+namespace DB
+{
+
+/// Struct with some additional information about inferred types for JSON formats.
+struct JSONInferenceInfo
+{
+    /// We store numbers that were parsed from strings.
+    /// It's used in types transformation to change such numbers back to string if needed.
+    std::unordered_set<const IDataType *> numbers_parsed_from_json_strings;
+    /// Indicates if currently we are inferring type for Map/Object key.
+    bool is_object_key = false;
+};
+
+/// Try to determine datatype of the value in buffer/string. If the type cannot be inferred, return nullptr.
+/// In general, it tries to parse a type using the following logic:
+/// If we see '[', we try to parse an array of values and recursively determine datatype for each element.
+/// If we see '(', we try to parse a tuple of values and recursively determine datatype for each element.
+/// If we see '{', we try to parse a Map of keys and values and recursively determine datatype for each key/value.
+/// If we see a quote '\'', we treat it as a string and read until next quote.
+/// If we see NULL it returns Nullable(Nothing)
+/// Otherwise we try to read a number.
+DataTypePtr tryInferDataTypeForSingleField(ReadBuffer & buf, const FormatSettings & settings);
+DataTypePtr tryInferDataTypeForSingleField(std::string_view field, const FormatSettings & settings);
+
+/// The same as tryInferDataTypeForSingleField, but for JSON values.
+DataTypePtr tryInferDataTypeForSingleJSONField(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info);
+DataTypePtr tryInferDataTypeForSingleJSONField(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info);
+
+/// Try to parse Date or DateTime value from a string.
+DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const FormatSettings & settings);
+
+/// Try to parse a number value from a string. By default, it tries to parse Float64,
+/// but if setting try_infer_integers is enabled, it also tries to parse Int64.
+DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings);
+
+/// It takes two types inferred for the same column and tries to transform them to a common type if possible.
+/// It's also used when we try to infer some not ordinary types from another types.
+/// Example 1:
+///     Dates inferred from strings. In this case we should check if dates were inferred from all strings
+///     in the same way and if not, transform inferred dates back to strings.
+///     For example, when we have Array(Date) (like `['2020-01-01', '2020-02-02']`) and Array(String) (like `['string', 'abc']`
+///     we will convert the first type to Array(String).
+/// Example 2:
+///     When we have integers and floats for the same value, we should convert all integers to floats.
+///     For example, when we have Array(Int64) (like `[123, 456]`) and Array(Float64) (like `[42.42, 4.42]`)
+///     we will convert the first type to Array(Float64)
+/// Example 3:
+///     When we have not complete types like Nullable(Nothing), Array(Nullable(Nothing)) or Tuple(UInt64, Nullable(Nothing)),
+///     we try to complete them using the other type.
+///     For example, if we have Tuple(UInt64, Nullable(Nothing)) and Tuple(Nullable(Nothing), String) we will convert both
+///     types to common type Tuple(Nullable(UInt64), Nullable(String))
+void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings);
+
+/// The same as transformInferredTypesIfNeeded but uses some specific transformations for JSON.
+/// Example 1:
+///     When we have numbers inferred from strings and strings, we convert all such numbers back to string.
+///     For example, if we have Array(Int64) (like `['123', '456']`) and Array(String) (like `['str', 'abc']`)
+///     we will convert the first type to Array(String). Note that we collect information about numbers inferred
+///     from strings in json_info while inference and use it here, so we will know that Array(Int64) contains
+///     integer inferred from a string.
+/// Example 2:
+///     When we have maps with different value types, we convert all types to JSON object type.
+///     For example, if we have Map(String, UInt64) (like `{"a" : 123}`) and Map(String, String) (like `{"b" : 'abc'}`)
+///     we will convert both types to Object('JSON').
+void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, JSONInferenceInfo * json_info);
+
+/// Check if type is Tuple(...), try to transform nested types to find a common type for them and if all nested types
+/// are the same after transform, we convert this tuple to an Array with common nested type.
+/// For example, if we have Tuple(String, Nullable(Nothing)) we will convert it to Array(String).
+/// It's used when all rows were read and we have Tuple in the result type that can be actually an Array.
+void transformJSONTupleToArrayIfPossible(DataTypePtr & data_type, const FormatSettings & settings, JSONInferenceInfo * json_info);
+
+/// Make type Nullable recursively:
+/// - Type -> Nullable(type)
+/// - Array(Type) -> Array(Nullable(Type))
+/// - Tuple(Type1, ..., TypeN) -> Tuple(Nullable(Type1), ..., Nullable(TypeN))
+/// - Map(KeyType, ValueType) -> Map(KeyType, Nullable(ValueType))
+/// - LowCardinality(Type) -> LowCardinality(Nullable(Type))
+DataTypePtr makeNullableRecursively(DataTypePtr type);
+
+/// Call makeNullableRecursively for all types
+/// in the block and return names and types.
+NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header);
+
+/// Check if type contains Nothing, like Array(Tuple(Nullable(Nothing), String))
+bool checkIfTypeIsComplete(const DataTypePtr & type);
+
+}
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@ -319,12 +319,17 @@ template void readStringUntilEOFInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8
 /** Parse the escape sequence, which can be simple (one character after backslash) or more complex (multiple characters).
  * It is assumed that the cursor is located on the `\` symbol
  */
-template <typename Vector>
-static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
+template <typename Vector, typename ReturnType = void>
+static ReturnType parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
 {
    ++buf.position();
    if (buf.eof())
-        throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
+    {
+        if constexpr (std::is_same_v<ReturnType, void>)
+            throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
+        else
+            return ReturnType(false);
+    }

    char char_after_backslash = *buf.position();

@ -363,6 +368,8 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
        s.push_back(decoded_char);
        ++buf.position();
    }
+
+    return ReturnType(true);
 }


@ -521,14 +528,18 @@ template void readEscapedStringInto<NullOutput>(NullOutput & s, ReadBuffer & buf
  *  backslash escape sequences are also parsed,
  *  that could be slightly confusing.
  */
-template <char quote, bool enable_sql_style_quoting, typename Vector>
-static void readAnyQuotedStringInto(Vector & s, ReadBuffer & buf)
+template <char quote, bool enable_sql_style_quoting, typename Vector, typename ReturnType = void>
+static ReturnType readAnyQuotedStringInto(Vector & s, ReadBuffer & buf)
 {
+    static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
    if (buf.eof() || *buf.position() != quote)
    {
-        throw ParsingException(ErrorCodes::CANNOT_PARSE_QUOTED_STRING,
-            "Cannot parse quoted string: expected opening quote '{}', got '{}'",
-            std::string{quote}, buf.eof() ? "EOF" : std::string{*buf.position()});
+        if constexpr (throw_exception)
+            throw ParsingException(ErrorCodes::CANNOT_PARSE_QUOTED_STRING,
+                "Cannot parse quoted string: expected opening quote '{}', got '{}'",
+                std::string{quote}, buf.eof() ? "EOF" : std::string{*buf.position()});
+        else
+            return ReturnType(false);
    }

    ++buf.position();
@ -554,15 +565,26 @@ static void readAnyQuotedStringInto(Vector & s, ReadBuffer & buf)
                continue;
            }

-            return;
+            return ReturnType(true);
        }

        if (*buf.position() == '\\')
-            parseComplexEscapeSequence(s, buf);
+        {
+            if constexpr (throw_exception)
+                parseComplexEscapeSequence<Vector, ReturnType>(s, buf);
+            else
+            {
+                if (!parseComplexEscapeSequence<Vector, ReturnType>(s, buf))
+                    return ReturnType(false);
+            }
+        }
    }

-    throw ParsingException("Cannot parse quoted string: expected closing quote",
-        ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
+    if constexpr (throw_exception)
+        throw ParsingException("Cannot parse quoted string: expected closing quote",
+            ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
+    else
+        return ReturnType(false);
 }

 template <bool enable_sql_style_quoting, typename Vector>
@ -571,6 +593,14 @@ void readQuotedStringInto(Vector & s, ReadBuffer & buf)
    readAnyQuotedStringInto<'\'', enable_sql_style_quoting>(s, buf);
 }

+template <typename Vector>
+bool tryReadQuotedStringInto(Vector & s, ReadBuffer & buf)
+{
+    return readAnyQuotedStringInto<'\'', false, Vector, bool>(s, buf);
+}
+
+template bool tryReadQuotedStringInto(String & s, ReadBuffer & buf);
+
 template <bool enable_sql_style_quoting, typename Vector>
 void readDoubleQuotedStringInto(Vector & s, ReadBuffer & buf)
 {
@ -934,6 +964,7 @@ template void readJSONStringInto<PaddedPODArray<UInt8>, void>(PaddedPODArray<UIn
 template bool readJSONStringInto<PaddedPODArray<UInt8>, bool>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
 template void readJSONStringInto<NullOutput>(NullOutput & s, ReadBuffer & buf);
 template void readJSONStringInto<String>(String & s, ReadBuffer & buf);
+template bool readJSONStringInto<String, bool>(String & s, ReadBuffer & buf);

 template <typename Vector, typename ReturnType>
 ReturnType readJSONObjectPossiblyInvalid(Vector & s, ReadBuffer & buf)
@ -1501,6 +1532,43 @@ static void readParsedValueInto(Vector & s, ReadBuffer & buf, ParseFunc parse_fu
    peekable_buf.position() = end;
 }

+template <typename Vector>
+static void readQuotedStringFieldInto(Vector & s, ReadBuffer & buf)
+{
+    assertChar('\'', buf);
+    s.push_back('\'');
+    while (!buf.eof())
+    {
+        char * next_pos = find_first_symbols<'\\', '\''>(buf.position(), buf.buffer().end());
+
+        s.append(buf.position(), next_pos);
+        buf.position() = next_pos;
+
+        if (!buf.hasPendingData())
+            continue;
+
+        if (*buf.position() == '\'')
+            break;
+
+        s.push_back(*buf.position());
+        if (*buf.position() == '\\')
+        {
+            ++buf.position();
+            if (!buf.eof())
+            {
+                s.push_back(*buf.position());
+                ++buf.position();
+            }
+        }
+    }
+
+    if (buf.eof())
+        return;
+
+    ++buf.position();
+    s.push_back('\'');
+}
+
 template <char opening_bracket, char closing_bracket, typename Vector>
 static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf)
 {
@ -1518,20 +1586,19 @@ static void readQuotedFieldInBracketsInto(Vector & s, ReadBuffer & buf)
        if (!buf.hasPendingData())
            continue;

-        s.push_back(*buf.position());
-
        if (*buf.position() == '\'')
        {
-            readQuotedStringInto<false>(s, buf);
-            s.push_back('\'');
+            readQuotedStringFieldInto(s, buf);
        }
        else if (*buf.position() == opening_bracket)
        {
+            s.push_back(opening_bracket);
            ++balance;
            ++buf.position();
        }
        else if (*buf.position() == closing_bracket)
        {
+            s.push_back(closing_bracket);
            --balance;
            ++buf.position();
        }
@ -1554,11 +1621,7 @@ void readQuotedFieldInto(Vector & s, ReadBuffer & buf)
    /// - Number: integer, float, decimal.

    if (*buf.position() == '\'')
-    {
-        s.push_back('\'');
-        readQuotedStringInto<false>(s, buf);
-        s.push_back('\'');
-    }
+        readQuotedStringFieldInto(s, buf);
    else if (*buf.position() == '[')
        readQuotedFieldInBracketsInto<'[', ']'>(s, buf);
    else if (*buf.position() == '(')
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@ -613,6 +613,9 @@ bool tryReadJSONStringInto(Vector & s, ReadBuffer & buf)
    return readJSONStringInto<Vector, bool>(s, buf);
 }

+template <typename Vector>
+bool tryReadQuotedStringInto(Vector & s, ReadBuffer & buf);
+
 /// Reads chunk of data between {} in that way,
 /// that it has balanced parentheses sequence of {}.
 /// So, it may form a JSON object, but it can be incorrenct.
--- a/src/Processors/Formats/ISchemaReader.cpp
+++ b/src/Processors/Formats/ISchemaReader.cpp
@ -1,6 +1,5 @@
 #include <Processors/Formats/ISchemaReader.h>
-#include <Formats/ReadSchemaUtils.h>
-#include <Formats/EscapingRuleUtils.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <DataTypes/DataTypeString.h>
 #include <Interpreters/parseColumnsListForTableFunction.h>
 #include <boost/algorithm/string.hpp>
@ -11,65 +10,29 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int ONLY_NULLS_WHILE_READING_SCHEMA;
-    extern const int TYPE_MISMATCH;
    extern const int INCORRECT_DATA;
    extern const int EMPTY_DATA_PASSED;
    extern const int BAD_ARGUMENTS;
 }

-void chooseResultColumnType(
-    DataTypePtr & type,
-    DataTypePtr & new_type,
-    std::function<void(DataTypePtr &, DataTypePtr &)> transform_types_if_needed,
-    const DataTypePtr & default_type,
-    const String & column_name,
-    size_t row)
+void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read)
 {
-    if (!type)
-    {
-        type = new_type;
-        return;
-    }
-
-    if (!new_type || type->equals(*new_type))
-        return;
-
-    transform_types_if_needed(type, new_type);
-    if (type->equals(*new_type))
-        return;
-
-    /// If the new type and the previous type for this column are different,
-    /// we will use default type if we have it or throw an exception.
-    if (default_type)
-        type = default_type;
-    else
-    {
-        throw Exception(
-            ErrorCodes::TYPE_MISMATCH,
-            "Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. "
-            "You can specify the type for this column using setting schema_inference_hints",
-            type->getName(),
-            column_name,
-            row,
-            new_type->getName());
-    }
-}
-
-void checkResultColumnTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t rows_read)
-{
-    if (!type)
+    if (!checkIfTypeIsComplete(type))
    {
        if (!default_type)
            throw Exception(
                ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA,
                "Cannot determine type for column '{}' by first {} rows of data, most likely this column contains only Nulls or empty "
-                "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints",
+                "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. "
+                "If your data contains complex JSON objects, try enabling one of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings",
                name,
                rows_read);

        type = default_type;
    }
-    result.emplace_back(name, type);
+
+    if (settings.schema_inference_make_columns_nullable)
+        type = makeNullableRecursively(type);
 }

 IIRowSchemaReader::IIRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, DataTypePtr default_type_)
@ -88,6 +51,11 @@ void IIRowSchemaReader::setContext(ContextPtr & context)
    }
 }

+void IIRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
+{
+    transformInferredTypesIfNeeded(type, new_type, format_settings);
+}
+
 IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
    : IIRowSchemaReader(in_, format_settings_), column_names(splitColumnNames(format_settings.column_names_for_schema_inference))
 {
@ -160,23 +128,28 @@ NamesAndTypesList IRowSchemaReader::readSchema()
        if (new_data_types.size() != data_types.size())
            throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values");

-        for (size_t i = 0; i != data_types.size(); ++i)
+        for (field_index = 0; field_index != data_types.size(); ++field_index)
        {
            /// Check if we couldn't determine the type of this column in a new row
            /// or the type for this column was taken from hints.
-            if (!new_data_types[i] || hints.contains(column_names[i]))
+            if (!new_data_types[field_index] || hints.contains(column_names[field_index]))
                continue;

-            auto transform_types_if_needed = [&](DataTypePtr & type, DataTypePtr & new_type){ transformTypesIfNeeded(type, new_type, i); };
-            chooseResultColumnType(data_types[i], new_data_types[i], transform_types_if_needed, getDefaultType(i), std::to_string(i + 1), rows_read);
+            chooseResultColumnType(*this, data_types[field_index], new_data_types[field_index], getDefaultType(field_index), std::to_string(field_index + 1), rows_read);
        }
    }

    NamesAndTypesList result;
-    for (size_t i = 0; i != data_types.size(); ++i)
+    for (field_index = 0; field_index != data_types.size(); ++field_index)
    {
-        /// Check that we could determine the type of this column.
-        checkResultColumnTypeAndAppend(result, data_types[i], column_names[i], getDefaultType(i), rows_read);
+        /// Don't check/change types from hints.
+        if (!hints.contains(column_names[field_index]))
+        {
+            transformFinalTypeIfNeeded(data_types[field_index]);
+            /// Check that we could determine the type of this column.
+            checkFinalInferredType(data_types[field_index], column_names[field_index], format_settings, getDefaultType(field_index), rows_read);
+        }
+        result.emplace_back(column_names[field_index], data_types[field_index]);
    }

    return result;
@ -208,11 +181,6 @@ DataTypePtr IRowSchemaReader::getDefaultType(size_t column) const
    return nullptr;
 }

-void IRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t)
-{
-    transformInferredTypesIfNeeded(type, new_type, format_settings);
-}
-
 IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, DataTypePtr default_type_)
    : IIRowSchemaReader(in_, format_settings_, default_type_)
 {
@ -245,7 +213,6 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
        names_order.push_back(name);
    }

-    auto transform_types_if_needed = [&](DataTypePtr & type, DataTypePtr & new_type){ transformTypesIfNeeded(type, new_type); };
    for (rows_read = 1; rows_read < max_rows_to_read; ++rows_read)
    {
        auto new_names_and_types = readRowAndGetNamesAndDataTypes(eof);
@ -277,7 +244,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
                continue;

            auto & type = it->second;
-            chooseResultColumnType(type, new_type, transform_types_if_needed, default_type, name, rows_read);
+            chooseResultColumnType(*this, type, new_type, default_type, name, rows_read);
        }
    }

@ -285,20 +252,21 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
    if (names_to_types.empty())
        throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot read rows from the data");

-    NamesAndTypesList result;
+    NamesAndTypesList result = getStaticNamesAndTypes();
    for (auto & name : names_order)
    {
        auto & type = names_to_types[name];
-        /// Check that we could determine the type of this column.
-        checkResultColumnTypeAndAppend(result, type, name, default_type, rows_read);
+        /// Don't check/change types from hints.
+        if (!hints.contains(name))
+        {
+            transformFinalTypeIfNeeded(type);
+            /// Check that we could determine the type of this column.
+            checkFinalInferredType(type, name, format_settings, default_type, rows_read);
+        }
+        result.emplace_back(name, type);
    }

    return result;
 }

-void IRowWithNamesSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
-{
-    transformInferredTypesIfNeeded(type, new_type, format_settings);
-}
-
 }
--- a/src/Processors/Formats/ISchemaReader.h
+++ b/src/Processors/Formats/ISchemaReader.h
@ -9,6 +9,11 @@
 namespace DB
 {

+namespace ErrorCodes
+{
+    extern const int TYPE_MISMATCH;
+}
+
 /// Base class for schema inference for the data in some specific format.
 /// It reads some data from read buffer and try to determine the schema
 /// from read data.
@ -45,10 +50,14 @@ public:
    bool needContext() const override { return !hints_str.empty(); }
    void setContext(ContextPtr & context) override;

+    virtual void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type);
+
 protected:
    void setMaxRowsToRead(size_t max_rows) override { max_rows_to_read = max_rows; }
    size_t getNumRowsRead() const override { return rows_read; }

+    virtual void transformFinalTypeIfNeeded(DataTypePtr &) {}
+
    size_t max_rows_to_read;
    size_t rows_read = 0;
    DataTypePtr default_type;
@ -82,7 +91,7 @@ protected:

    void setColumnNames(const std::vector<String> & names) { column_names = names; }

-    virtual void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t index);
+    size_t field_index;

 private:
    DataTypePtr getDefaultType(size_t column) const;
@ -111,7 +120,10 @@ protected:
    /// Set eof = true if can't read more data.
    virtual NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) = 0;

-    virtual void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type);
+    /// Get special static types that have the same name/type for each row.
+    /// For example, in JSONObjectEachRow format we have static column with
+    /// type String and name from a settings for object keys.
+    virtual NamesAndTypesList getStaticNamesAndTypes() { return {}; }
 };

 /// Base class for schema inference for formats that don't need any data to
@ -125,16 +137,46 @@ public:
    virtual ~IExternalSchemaReader() = default;
 };

+template <class SchemaReader>
 void chooseResultColumnType(
+    SchemaReader & schema_reader,
    DataTypePtr & type,
    DataTypePtr & new_type,
-    std::function<void(DataTypePtr &, DataTypePtr &)> transform_types_if_needed,
    const DataTypePtr & default_type,
    const String & column_name,
-    size_t row);
+    size_t row)
+{
+    if (!type)
+    {
+        type = new_type;
+        return;
+    }

-void checkResultColumnTypeAndAppend(
-    NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t rows_read);
+    if (!new_type || type->equals(*new_type))
+        return;
+
+    schema_reader.transformTypesIfNeeded(type, new_type);
+    if (type->equals(*new_type))
+        return;
+
+    /// If the new type and the previous type for this column are different,
+    /// we will use default type if we have it or throw an exception.
+    if (default_type)
+        type = default_type;
+    else
+    {
+        throw Exception(
+            ErrorCodes::TYPE_MISMATCH,
+            "Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. "
+            "You can specify the type for this column using setting schema_inference_hints",
+            type->getName(),
+            column_name,
+            row,
+            new_type->getName());
+    }
+}
+
+void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read);

 Strings splitColumnNames(const String & column_names_str);

--- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
@ -3,7 +3,7 @@
 #if USE_ARROW

 #include <Formats/FormatFactory.h>
-#include <Formats/ReadSchemaUtils.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/WriteHelpers.h>
 #include <IO/copyData.h>
--- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
@ -772,27 +772,27 @@ DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, boo
        case BSONType::DOUBLE:
        {
            in.ignore(sizeof(Float64));
-            return makeNullable(std::make_shared<DataTypeFloat64>());
+            return std::make_shared<DataTypeFloat64>();
        }
        case BSONType::BOOL:
        {
            in.ignore(sizeof(UInt8));
-            return makeNullable(DataTypeFactory::instance().get("Bool"));
+            return DataTypeFactory::instance().get("Bool");
        }
        case BSONType::INT64:
        {
            in.ignore(sizeof(Int64));
-            return makeNullable(std::make_shared<DataTypeInt64>());
+            return std::make_shared<DataTypeInt64>();
        }
        case BSONType::DATETIME:
        {
            in.ignore(sizeof(Int64));
-            return makeNullable(std::make_shared<DataTypeDateTime64>(6, "UTC"));
+            return std::make_shared<DataTypeDateTime64>(6, "UTC");
        }
        case BSONType::INT32:
        {
            in.ignore(sizeof(Int32));
-            return makeNullable(std::make_shared<DataTypeInt32>());
+            return std::make_shared<DataTypeInt32>();
        }
        case BSONType::SYMBOL: [[fallthrough]];
        case BSONType::JAVA_SCRIPT_CODE: [[fallthrough]];
@ -802,7 +802,7 @@ DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, boo
            BSONSizeT size;
            readBinary(size, in);
            in.ignore(size);
-            return makeNullable(std::make_shared<DataTypeString>());
+            return std::make_shared<DataTypeString>();
        }
        case BSONType::DOCUMENT:
        {
@ -856,10 +856,10 @@ DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, boo
            {
                case BSONBinarySubtype::BINARY_OLD: [[fallthrough]];
                case BSONBinarySubtype::BINARY:
-                    return makeNullable(std::make_shared<DataTypeString>());
+                    return std::make_shared<DataTypeString>();
                case BSONBinarySubtype::UUID_OLD: [[fallthrough]];
                case BSONBinarySubtype::UUID:
-                    return makeNullable(std::make_shared<DataTypeUUID>());
+                    return std::make_shared<DataTypeUUID>();
                default:
                    throw Exception(ErrorCodes::UNKNOWN_TYPE, "BSON binary subtype {} is not supported", getBSONBinarySubtypeName(subtype));
            }
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@ -274,15 +274,15 @@ void CSVFormatReader::skipPrefixBeforeHeader()
 }


-CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_)
+CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
    : FormatWithNamesAndTypesSchemaReader(
        in_,
-        format_setting_,
+        format_settings_,
        with_names_,
        with_types_,
        &reader,
        getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV))
-    , reader(in_, format_setting_)
+    , reader(in_, format_settings_)
 {
 }

@ -293,7 +293,7 @@ DataTypes CSVSchemaReader::readRowAndGetDataTypes()
        return {};

    auto fields = reader.readRow();
-    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV);
+    return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV);
 }


--- a/src/Processors/Formats/Impl/CSVRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h
@ -75,7 +75,7 @@ public:
 class CSVSchemaReader : public FormatWithNamesAndTypesSchemaReader
 {
 public:
-    CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_);
+    CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_);

 private:
    DataTypes readRowAndGetDataTypes() override;
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
@ -1,6 +1,7 @@
 #include <Processors/Formats/Impl/CustomSeparatedRowInputFormat.h>
 #include <Processors/Formats/Impl/TemplateRowInputFormat.h>
 #include <Formats/EscapingRuleUtils.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <Formats/registerWithNamesAndTypes.h>
 #include <IO/Operators.h>

@ -370,12 +371,12 @@ DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypes()
        first_row = false;

    auto fields = reader.readRow();
-    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule());
+    return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), &json_inference_info);
 }

-void CustomSeparatedSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t)
+void CustomSeparatedSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
 {
-    transformInferredTypesIfNeeded(type, new_type, format_settings, reader.getEscapingRule());
+    transformInferredTypesByEscapingRuleIfNeeded(type, new_type, format_settings, reader.getEscapingRule(), &json_inference_info);
 }

 void registerInputFormatCustomSeparated(FormatFactory & factory)
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
@ -2,6 +2,7 @@

 #include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
 #include <Formats/ParsedTemplateFormatString.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <IO/PeekableReadBuffer.h>
 #include <IO/ReadHelpers.h>

@ -100,11 +101,12 @@ public:
 private:
    DataTypes readRowAndGetDataTypes() override;

-    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) override;
+    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;

    PeekableReadBuffer buf;
    CustomSeparatedFormatReader reader;
    bool first_row = true;
+    JSONInferenceInfo json_inference_info;
 };

 }
--- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
@ -2,8 +2,11 @@
 #include <Processors/Formats/ISchemaReader.h>
 #include <Formats/JSONUtils.h>
 #include <Formats/EscapingRuleUtils.h>
+#include <Formats/SchemaInferenceUtils.h>
+#include <Interpreters/parseColumnsListForTableFunction.h>
 #include <IO/ReadHelpers.h>
 #include <base/find_symbols.h>
+#include <Common/logger_useful.h>

 namespace DB
 {
@ -170,19 +173,25 @@ JSONColumnsSchemaReaderBase::JSONColumnsSchemaReaderBase(
    ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr<JSONColumnsReaderBase> reader_)
    : ISchemaReader(in_)
    , format_settings(format_settings_)
+    , hints_str(format_settings_.schema_inference_hints)
    , reader(std::move(reader_))
    , column_names_from_settings(splitColumnNames(format_settings_.column_names_for_schema_inference))
 {
 }

-void JSONColumnsSchemaReaderBase::chooseResulType(DataTypePtr & type, DataTypePtr & new_type, const String & column_name, size_t row) const
+void JSONColumnsSchemaReaderBase::setContext(ContextPtr & ctx)
 {
-    auto convert_types_if_needed = [&](DataTypePtr & first, DataTypePtr & second)
+    ColumnsDescription columns;
+    if (tryParseColumnsListFromString(hints_str, columns, ctx))
    {
-        DataTypes types = {first, second};
-        transformInferredJSONTypesIfNeeded(types, format_settings);
-    };
-    chooseResultColumnType(type, new_type, convert_types_if_needed, nullptr, column_name, row);
+        for (const auto & [name, type] : columns.getAll())
+            hints[name] = type;
+    }
+}
+
+void JSONColumnsSchemaReaderBase::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
+{
+    transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info);
 }

 NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema()
@ -220,9 +229,18 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema()
            if (!names_to_types.contains(column_name))
                names_order.push_back(column_name);

-            rows_in_block = 0;
-            auto column_type = readColumnAndGetDataType(column_name, rows_in_block, format_settings.max_rows_to_read_for_schema_inference - total_rows_read);
-            chooseResulType(names_to_types[column_name], column_type, column_name, total_rows_read + 1);
+            if (const auto it = hints.find(column_name); it != hints.end())
+            {
+                names_to_types[column_name] = it->second;
+            }
+            else
+            {
+                rows_in_block = 0;
+                auto column_type = readColumnAndGetDataType(
+                    column_name, rows_in_block, format_settings.max_rows_to_read_for_schema_inference - total_rows_read);
+                chooseResultColumnType(*this, names_to_types[column_name], column_type, nullptr, column_name, total_rows_read + 1);
+            }
+
            ++iteration;
        }
        while (!reader->checkChunkEndOrSkipColumnDelimiter());
@ -237,8 +255,14 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema()
    for (auto & name : names_order)
    {
        auto & type = names_to_types[name];
-        /// Check that we could determine the type of this column.
-        checkResultColumnTypeAndAppend(result, type, name, nullptr, format_settings.max_rows_to_read_for_schema_inference);
+        /// Don't check/change types from hints.
+        if (!hints.contains(name))
+        {
+            transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info);
+            /// Check that we could determine the type of this column.
+            checkFinalInferredType(type, name, format_settings, nullptr, format_settings.max_rows_to_read_for_schema_inference);
+        }
+        result.emplace_back(name, type);
    }

    return result;
@ -262,8 +286,8 @@ DataTypePtr JSONColumnsSchemaReaderBase::readColumnAndGetDataType(const String &
        }

        readJSONField(field, in);
-        DataTypePtr field_type = JSONUtils::getDataTypeFromField(field, format_settings);
-        chooseResulType(column_type, field_type, column_name, rows_read);
+        DataTypePtr field_type = tryInferDataTypeForSingleJSONField(field, format_settings, &inference_info);
+        chooseResultColumnType(*this, column_type, field_type, nullptr, column_name, rows_read);
        ++rows_read;
    }
    while (!reader->checkColumnEndOrSkipFieldDelimiter());
--- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h
@ -1,6 +1,7 @@
 #pragma once

 #include <Formats/FormatSettings.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <Processors/Formats/IInputFormat.h>
 #include <Processors/Formats/ISchemaReader.h>

@ -76,18 +77,23 @@ class JSONColumnsSchemaReaderBase : public ISchemaReader
 public:
    JSONColumnsSchemaReaderBase(ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr<JSONColumnsReaderBase> reader_);

+    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type);
+
+    bool needContext() const override { return !hints_str.empty(); }
+    void setContext(ContextPtr & ctx) override;
+
 private:
    NamesAndTypesList readSchema() override;

    /// Read whole column in the block (up to max_rows_to_read rows) and extract the data type.
    DataTypePtr readColumnAndGetDataType(const String & column_name, size_t & rows_read, size_t max_rows_to_read);

-    /// Choose result type for column from two inferred types from different rows.
-    void chooseResulType(DataTypePtr & type, DataTypePtr & new_type, const String & column_name, size_t row) const;
-
    const FormatSettings format_settings;
+    String hints_str;
+    std::unordered_map<String, DataTypePtr> hints;
    std::unique_ptr<JSONColumnsReaderBase> reader;
    Names column_names_from_settings;
+    JSONInferenceInfo inference_info;
 };

 }
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
@ -7,6 +7,7 @@
 #include <Formats/verbosePrintString.h>
 #include <Formats/JSONUtils.h>
 #include <Formats/EscapingRuleUtils.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <Formats/registerWithNamesAndTypes.h>
 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
@ -202,12 +203,17 @@ DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypes()
    if (in.eof())
        return {};

-    return JSONUtils::readRowAndGetDataTypesForJSONCompactEachRow(in, format_settings, reader.yieldStrings());
+    return JSONUtils::readRowAndGetDataTypesForJSONCompactEachRow(in, format_settings, &inference_info);
 }

-void JSONCompactEachRowRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t)
+void JSONCompactEachRowRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
 {
-    transformInferredJSONTypesIfNeeded(type, new_type, format_settings);
+    transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info);
+}
+
+void JSONCompactEachRowRowSchemaReader::transformFinalTypeIfNeeded(DataTypePtr & type)
+{
+    transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info);
 }

 void registerInputFormatJSONCompactEachRow(FormatFactory & factory)
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
@ -4,6 +4,7 @@
 #include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
 #include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <Common/HashTable/HashMap.h>

 namespace DB
@ -80,10 +81,12 @@ public:
 private:
    DataTypes readRowAndGetDataTypes() override;

-    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) override;
+    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
+    void transformFinalTypeIfNeeded(DataTypePtr & type) override;

    JSONCompactEachRowFormatReader reader;
    bool first_row = true;
+    JSONInferenceInfo inference_info;
 };

 }
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@ -4,6 +4,7 @@
 #include <Processors/Formats/Impl/JSONEachRowRowInputFormat.h>
 #include <Formats/JSONUtils.h>
 #include <Formats/EscapingRuleUtils.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <Formats/FormatFactory.h>
 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
@ -300,9 +301,8 @@ void JSONEachRowRowInputFormat::readSuffix()
    assertEOF(*in);
 }

-JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, bool json_strings_, const FormatSettings & format_settings_)
+JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
    : IRowWithNamesSchemaReader(in_, format_settings_)
-    , json_strings(json_strings_)
 {
 }

@ -336,12 +336,17 @@ NamesAndTypesList JSONEachRowSchemaReader::readRowAndGetNamesAndDataTypes(bool &
        return {};
    }

-    return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, json_strings);
+    return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, &inference_info);
 }

 void JSONEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
 {
-    transformInferredJSONTypesIfNeeded(type, new_type, format_settings);
+    transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info);
+}
+
+void JSONEachRowSchemaReader::transformFinalTypeIfNeeded(DataTypePtr & type)
+{
+    transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info);
 }

 void registerInputFormatJSONEachRow(FormatFactory & factory)
@ -391,11 +396,11 @@ void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory

 void registerJSONEachRowSchemaReader(FormatFactory & factory)
 {
-    auto register_schema_reader = [&](const String & format_name, bool json_strings)
+    auto register_schema_reader = [&](const String & format_name)
    {
-        factory.registerSchemaReader(format_name, [json_strings](ReadBuffer & buf, const FormatSettings & settings)
+        factory.registerSchemaReader(format_name, [](ReadBuffer & buf, const FormatSettings & settings)
        {
-            return std::make_unique<JSONEachRowSchemaReader>(buf, json_strings, settings);
+            return std::make_unique<JSONEachRowSchemaReader>(buf, settings);
        });
        factory.registerAdditionalInfoForSchemaCacheGetter(format_name, [](const FormatSettings & settings)
        {
@ -403,10 +408,10 @@ void registerJSONEachRowSchemaReader(FormatFactory & factory)
        });
    };

-    register_schema_reader("JSONEachRow", false);
-    register_schema_reader("JSONLines", false);
-    register_schema_reader("NDJSON", false);
-    register_schema_reader("JSONStringsEachRow", true);
+    register_schema_reader("JSONEachRow");
+    register_schema_reader("JSONLines");
+    register_schema_reader("NDJSON");
+    register_schema_reader("JSONStringsEachRow");
 }

 }
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
@ -4,6 +4,7 @@
 #include <Processors/Formats/IRowInputFormat.h>
 #include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <Common/HashTable/HashMap.h>


@ -94,15 +95,16 @@ protected:
 class JSONEachRowSchemaReader : public IRowWithNamesSchemaReader
 {
 public:
-    JSONEachRowSchemaReader(ReadBuffer & in_, bool json_strings, const FormatSettings & format_settings_);
+    JSONEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_);

 private:
    NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) override;
    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
+    void transformFinalTypeIfNeeded(DataTypePtr & type) override;

-    bool json_strings;
    bool first_row = true;
    bool data_in_square_brackets = false;
+    JSONInferenceInfo inference_info;
 };

 }
--- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp
@ -2,6 +2,7 @@
 #include <Formats/JSONUtils.h>
 #include <Formats/FormatFactory.h>
 #include <Formats/EscapingRuleUtils.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <DataTypes/DataTypeString.h>

 namespace DB
@ -85,15 +86,25 @@ NamesAndTypesList JSONObjectEachRowSchemaReader::readRowAndGetNamesAndDataTypes(
        JSONUtils::skipComma(in);

    JSONUtils::readFieldName(in);
-    auto names_and_types = JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, false);
+    return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, &inference_info);
+}
+
+NamesAndTypesList JSONObjectEachRowSchemaReader::getStaticNamesAndTypes()
+{
    if (!format_settings.json_object_each_row.column_for_object_name.empty())
-        names_and_types.emplace_front(format_settings.json_object_each_row.column_for_object_name, std::make_shared<DataTypeString>());
-    return names_and_types;
+        return {{format_settings.json_object_each_row.column_for_object_name, std::make_shared<DataTypeString>()}};
+
+    return {};
 }

 void JSONObjectEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
 {
-    transformInferredJSONTypesIfNeeded(type, new_type, format_settings);
+    transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info);
+}
+
+void JSONObjectEachRowSchemaReader::transformFinalTypeIfNeeded(DataTypePtr & type)
+{
+    transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info);
 }

 void registerInputFormatJSONObjectEachRow(FormatFactory & factory)
--- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h
@ -4,6 +4,7 @@
 #include <Processors/Formats/Impl/JSONEachRowRowInputFormat.h>
 #include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <Common/HashTable/HashMap.h>


@ -41,9 +42,12 @@ public:

 private:
    NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) override;
+    NamesAndTypesList getStaticNamesAndTypes() override;
    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
+    void transformFinalTypeIfNeeded(DataTypePtr & type) override;

    bool first_row = true;
+    JSONInferenceInfo inference_info;
 };

 std::optional<size_t> getColumnIndexForJSONObjectEachRowObjectName(const Block & header, const FormatSettings & settings);
--- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
@ -247,6 +247,14 @@ static void insertNull(IColumn & column, DataTypePtr type)

 static void insertUUID(IColumn & column, DataTypePtr type, const char * value, size_t size)
 {
+    auto insert_func = [&](IColumn & column_, DataTypePtr type_)
+    {
+        insertUUID(column_, type_, value, size);
+    };
+
+    if (checkAndInsertNullable(column, type, insert_func) || checkAndInsertLowCardinality(column, type, insert_func))
+        return;
+
    if (!isUUID(type))
        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack UUID into column with type {}.", type->getName());
    ReadBufferFromMemory buf(value, size);
@ -470,16 +478,16 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object)
    {
        case msgpack::type::object_type::POSITIVE_INTEGER: [[fallthrough]];
        case msgpack::type::object_type::NEGATIVE_INTEGER:
-            return makeNullable(std::make_shared<DataTypeInt64>());
+            return std::make_shared<DataTypeInt64>();
        case msgpack::type::object_type::FLOAT32:
-            return makeNullable(std::make_shared<DataTypeFloat32>());
+            return std::make_shared<DataTypeFloat32>();
        case msgpack::type::object_type::FLOAT64:
-            return makeNullable(std::make_shared<DataTypeFloat64>());
+            return std::make_shared<DataTypeFloat64>();
        case msgpack::type::object_type::BOOLEAN:
-            return makeNullable(std::make_shared<DataTypeUInt8>());
+            return std::make_shared<DataTypeUInt8>();
        case msgpack::type::object_type::BIN: [[fallthrough]];
        case msgpack::type::object_type::STR:
-            return makeNullable(std::make_shared<DataTypeString>());
+            return std::make_shared<DataTypeString>();
        case msgpack::type::object_type::ARRAY:
        {
            msgpack::object_array object_array = object.via.array;
--- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp
@ -435,7 +435,7 @@ DataTypes MySQLDumpSchemaReader::readRowAndGetDataTypes()
            skipFieldDelimiter(in);

        readQuotedField(value, in);
-        auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted);
+        auto type = tryInferDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted);
        data_types.push_back(std::move(type));
    }
    skipEndOfRow(in, table_name);
--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
@ -3,7 +3,7 @@
 #if USE_ORC

 #include <Formats/FormatFactory.h>
-#include <Formats/ReadSchemaUtils.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/WriteHelpers.h>
 #include <IO/copyData.h>
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@ -4,7 +4,7 @@
 #if USE_PARQUET

 #include <Formats/FormatFactory.h>
-#include <Formats/ReadSchemaUtils.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/copyData.h>
 #include <arrow/api.h>
--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
@ -3,6 +3,7 @@
 #include <Processors/Formats/Impl/RegexpRowInputFormat.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
 #include <Formats/EscapingRuleUtils.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <Formats/newLineSegmentationEngine.h>
 #include <IO/ReadHelpers.h>

@ -155,15 +156,15 @@ DataTypes RegexpSchemaReader::readRowAndGetDataTypes()
    for (size_t i = 0; i != field_extractor.getMatchedFieldsSize(); ++i)
    {
        String field(field_extractor.getField(i));
-        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, format_settings.regexp.escaping_rule));
+        data_types.push_back(tryInferDataTypeByEscapingRule(field, format_settings, format_settings.regexp.escaping_rule, &json_inference_info));
    }

    return data_types;
 }

-void RegexpSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t)
+void RegexpSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
 {
-    transformInferredTypesIfNeeded(type, new_type, format_settings, format_settings.regexp.escaping_rule);
+    transformInferredTypesByEscapingRuleIfNeeded(type, new_type, format_settings, format_settings.regexp.escaping_rule, &json_inference_info);
 }


--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h
@ -5,12 +5,13 @@
 #include <string>
 #include <vector>
 #include <Core/Block.h>
+#include <IO/PeekableReadBuffer.h>
 #include <Processors/Formats/IRowInputFormat.h>
 #include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
 #include <Formats/FormatFactory.h>
-#include <IO/PeekableReadBuffer.h>
 #include <Formats/ParsedTemplateFormatString.h>
+#include <Formats/SchemaInferenceUtils.h>


 namespace DB
@ -81,12 +82,13 @@ public:
 private:
    DataTypes readRowAndGetDataTypes() override;

-    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) override;
+    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;


    using EscapingRule = FormatSettings::EscapingRule;
    RegexpFieldExtractor field_extractor;
    PeekableReadBuffer buf;
+    JSONInferenceInfo json_inference_info;
 };

 }
--- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
@ -249,7 +249,7 @@ NamesAndTypesList TSKVSchemaReader::readRowAndGetNamesAndDataTypes(bool & eof)
        if (has_value)
        {
            readEscapedString(value, in);
-            names_and_types.emplace_back(std::move(name), determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped));
+            names_and_types.emplace_back(std::move(name), tryInferDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped));
        }
        else
        {
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@ -268,7 +268,7 @@ DataTypes TabSeparatedSchemaReader::readRowAndGetDataTypes()
        return {};

    auto fields = reader.readRow();
-    return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule());
+    return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule());
 }

 void registerInputFormatTabSeparated(FormatFactory & factory)
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
@ -2,6 +2,7 @@
 #include <Formats/FormatFactory.h>
 #include <Formats/verbosePrintString.h>
 #include <Formats/EscapingRuleUtils.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <IO/Operators.h>
 #include <DataTypes/DataTypeNothing.h>
 #include <DataTypes/Serializations/SerializationNullable.h>
@ -511,16 +512,16 @@ DataTypes TemplateSchemaReader::readRowAndGetDataTypes()
        format_reader.skipDelimiter(i);
        updateFormatSettingsIfNeeded(row_format.escaping_rules[i], format_settings, row_format, default_csv_delimiter, i);
        field = readFieldByEscapingRule(buf, row_format.escaping_rules[i], format_settings);
-        data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i]));
+        data_types.push_back(tryInferDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i], &json_inference_info));
    }

    format_reader.skipRowEndDelimiter();
    return data_types;
 }

-void TemplateSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t column_idx)
+void TemplateSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
 {
-    transformInferredTypesIfNeeded(type, new_type, format_settings, row_format.escaping_rules[column_idx]);
+    transformInferredTypesByEscapingRuleIfNeeded(type, new_type, format_settings, row_format.escaping_rules[field_index], &json_inference_info);
 }

 static ParsedTemplateFormatString fillResultSetFormat(const FormatSettings & settings)
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h
@ -5,6 +5,7 @@
 #include <Processors/Formats/ISchemaReader.h>
 #include <Formats/FormatSettings.h>
 #include <Formats/ParsedTemplateFormatString.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <IO/ReadHelpers.h>
 #include <IO/PeekableReadBuffer.h>
 #include <Interpreters/Context.h>
@ -121,13 +122,14 @@ public:
    DataTypes readRowAndGetDataTypes() override;

 private:
-    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t column_idx) override;
+    void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;

    PeekableReadBuffer buf;
    const ParsedTemplateFormatString format;
    const ParsedTemplateFormatString row_format;
    TemplateFormatReader format_reader;
    bool first_row = true;
+    JSONInferenceInfo json_inference_info;
    const char default_csv_delimiter;
 };

--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
@ -593,14 +593,14 @@ DataTypes ValuesSchemaReader::readRowAndGetDataTypes()
    {
        if (!data_types.empty())
        {
-            skipWhitespaceIfAny(buf);
            assertChar(',', buf);
            skipWhitespaceIfAny(buf);
        }

        readQuotedField(value, buf);
-        auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted);
+        auto type = tryInferDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted);
        data_types.push_back(std::move(type));
+        skipWhitespaceIfAny(buf);
    }

    assertChar(')', buf);
--- a/tests/performance/schema_inference_text_formats.xml
+++ b/tests/performance/schema_inference_text_formats.xml
@ -0,0 +1,23 @@
+<test>
+
+<substitutions>
+    <substitution>
+        <name>format</name>
+        <values>
+            <value>TabSeparated</value>
+            <value>CSV</value>
+            <value>Values</value>
+            <value>JSONEachRow</value>
+            <value>JSONCompactEachRow</value>
+        </values>
+    </substitution>
+</substitutions>
+    
+
+<fill_query>INSERT INTO function file(data.{format}) SELECT WatchID, Title, EventTime, RefererCategories, RefererRegions FROM test.hits LIMIT 25000 SETTINGS engine_file_truncate_on_insert=1</fill_query>
+
+<query>DESC file(data.{format}) SETTINGS schema_inference_use_cache_for_file=0</query>
+
+<drop_query>INSERT INTO FUNCTION file(data.{format}) SELECT * FROM numbers(0) SETTINGS engine_file_truncate_on_insert=1</drop_query>
+
+</test>
--- a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference
+++ b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference
@ -24,12 +24,12 @@ fixed_string	Nullable(String)
 Str: 0	100
 Str: 1	200
 array	Array(Nullable(UInt64))					
-tuple	Tuple(Nullable(UInt64), Nullable(String))					
+tuple	Tuple(`1` Nullable(UInt64), `2` Nullable(String))					
 map	Map(String, Nullable(UInt64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64))))					
-nested2	Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8))					
+nested1	Array(Tuple(`1` Array(Nullable(UInt64)), `2` Map(String, Nullable(UInt64))))					
+nested2	Tuple(`1` Tuple(`1` Array(Array(Nullable(UInt64))), `2` Map(UInt64, Array(Tuple(`1` Nullable(UInt64), `2` Nullable(String))))), `2` Nullable(UInt8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 ArrowStream
@ -58,12 +58,12 @@ fixed_string	Nullable(String)
 Str: 0	100
 Str: 1	200
 array	Array(Nullable(UInt64))					
-tuple	Tuple(Nullable(UInt64), Nullable(String))					
+tuple	Tuple(`1` Nullable(UInt64), `2` Nullable(String))					
 map	Map(String, Nullable(UInt64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64))))					
-nested2	Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8))					
+nested1	Array(Tuple(`1` Array(Nullable(UInt64)), `2` Map(String, Nullable(UInt64))))					
+nested2	Tuple(`1` Tuple(`1` Array(Array(Nullable(UInt64))), `2` Map(UInt64, Array(Tuple(`1` Nullable(UInt64), `2` Nullable(String))))), `2` Nullable(UInt8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 Parquet
@ -92,12 +92,12 @@ fixed_string	Nullable(String)
 Str: 0	100
 Str: 1	200
 array	Array(Nullable(UInt64))					
-tuple	Tuple(Nullable(UInt64), Nullable(String))					
+tuple	Tuple(`1` Nullable(UInt64), `2` Nullable(String))					
 map	Map(String, Nullable(UInt64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64))))					
-nested2	Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8))					
+nested1	Array(Tuple(`1` Array(Nullable(UInt64)), `2` Map(String, Nullable(UInt64))))					
+nested2	Tuple(`1` Tuple(`1` Array(Array(Nullable(UInt64))), `2` Map(UInt64, Array(Tuple(`1` Nullable(UInt64), `2` Nullable(String))))), `2` Nullable(UInt8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 ORC
@ -126,12 +126,12 @@ fixed_string	Nullable(String)
 Str: 0	100
 Str: 1	200
 array	Array(Nullable(Int64))					
-tuple	Tuple(Nullable(Int64), Nullable(String))					
+tuple	Tuple(`1` Nullable(Int64), `2` Nullable(String))					
 map	Map(String, Nullable(Int64))					
 [0,1]	(0,'0')	{'0':0}
 [1,2]	(1,'1')	{'1':1}
-nested1	Array(Tuple(Array(Nullable(Int64)), Map(String, Nullable(Int64))))					
-nested2	Tuple(Tuple(Array(Array(Nullable(Int64))), Map(Int64, Array(Tuple(Nullable(Int64), Nullable(String))))), Nullable(Int8))					
+nested1	Array(Tuple(`1` Array(Nullable(Int64)), `2` Map(String, Nullable(Int64))))					
+nested2	Tuple(`1` Tuple(`1` Array(Array(Nullable(Int64))), `2` Map(Int64, Array(Tuple(`1` Nullable(Int64), `2` Nullable(String))))), `2` Nullable(Int8))					
 [([0,1],{'42':0}),([],{}),([42],{'42':42})]	(([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42)
 [([1,2],{'42':1}),([],{}),([42],{'42':42})]	(([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42)
 Native
--- a/tests/queries/0_stateless/02187_msg_pack_uuid.reference
+++ b/tests/queries/0_stateless/02187_msg_pack_uuid.reference
@ -1,4 +1,4 @@
 5e7084e0-019f-461f-9e70-84e0019f561f
 5e7084e0-019f-461f-9e70-84e0019f561f
 5e7084e0-019f-461f-9e70-84e0019f561f
-5e7084e0-019f-461f-9e70-84e0019f561f	UUID
+5e7084e0-019f-461f-9e70-84e0019f561f	Nullable(UUID)
--- a/tests/queries/0_stateless/02223_insert_select_schema_inference.sql
+++ b/tests/queries/0_stateless/02223_insert_select_schema_inference.sql
@ -1,5 +1,5 @@
 drop table if exists test;
 create table test (x UInt32, y String, d Date) engine=Memory() as select number as x, toString(number) as y, toDate(number) as d from numbers(10);
-insert into table function file('data.native.zst') select * from test;
+insert into table function file('data.native.zst') select * from test settings engine_file_truncate_on_insert=1;
 desc file('data.native.zst');
 select * from file('data.native.zst');
--- a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference
+++ b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference
@ -2,7 +2,7 @@ Arrow
 x	Nullable(UInt64)					
 arr1	Array(Nullable(UInt64))					
 arr2	Array(Array(Nullable(String)))					
-arr3	Array(Tuple(Nullable(String), Nullable(UInt64)))					
+arr3	Array(Tuple(`1` Nullable(String), `2` Nullable(UInt64)))					
 0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
 \N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
 2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
@ -12,7 +12,7 @@ ArrowStream
 x	Nullable(UInt64)					
 arr1	Array(Nullable(UInt64))					
 arr2	Array(Array(Nullable(String)))					
-arr3	Array(Tuple(Nullable(String), Nullable(UInt64)))					
+arr3	Array(Tuple(`1` Nullable(String), `2` Nullable(UInt64)))					
 0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
 \N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
 2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
@ -22,7 +22,7 @@ Parquet
 x	Nullable(UInt64)					
 arr1	Array(Nullable(UInt64))					
 arr2	Array(Array(Nullable(String)))					
-arr3	Array(Tuple(Nullable(String), Nullable(UInt64)))					
+arr3	Array(Tuple(`1` Nullable(String), `2` Nullable(UInt64)))					
 0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
 \N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
 2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
@ -32,7 +32,7 @@ ORC
 x	Nullable(Int64)					
 arr1	Array(Nullable(Int64))					
 arr2	Array(Array(Nullable(String)))					
-arr3	Array(Tuple(Nullable(String), Nullable(Int64)))					
+arr3	Array(Tuple(`1` Nullable(String), `2` Nullable(Int64)))					
 0	[0,1]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,0)]
 \N	[NULL,2]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,1)]
 2	[2,3]	[[NULL,'String'],[NULL],[]]	[(NULL,NULL),('String',NULL),(NULL,2)]
--- a/tests/queries/0_stateless/02245_s3_support_read_nested_column.reference
+++ b/tests/queries/0_stateless/02245_s3_support_read_nested_column.reference
@ -4,7 +4,8 @@ drop table if exists test_02245_s3_nested_parquet2;
 set input_format_parquet_import_nested = 1;
 create table test_02245_s3_nested_parquet1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_parquet1_{_partition_id}', format='Parquet') partition by a;
 insert into test_02245_s3_nested_parquet1 values (1, (2, 'a'));
-select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_parquet1_*', format='Parquet');  -- { serverError 47 }
+select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_parquet1_*', format='Parquet');
+1	2	a
 create table test_02245_s3_nested_parquet2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_parquet2_{_partition_id}', format='Parquet') partition by a;
 insert into test_02245_s3_nested_parquet2 values (1, (2, (3, 'a')));
 select a, b.a, b.b.c, b.b.d from s3(s3_conn, filename='test_02245_s3_nested_parquet2_*', format='Parquet', structure='a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))');
@ -14,7 +15,8 @@ drop table if exists test_02245_s3_nested_arrow2;
 set input_format_arrow_import_nested=1;
 create table test_02245_s3_nested_arrow1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_arrow1_{_partition_id}', format='Arrow') partition by a;
 insert into test_02245_s3_nested_arrow1 values (1, (2, 'a'));
-select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_arrow1_*', format='Arrow');  -- { serverError 47 }
+select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_arrow1_*', format='Arrow');
+1	2	a
 create table test_02245_s3_nested_arrow2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_arrow2_{_partition_id}', format='Arrow') partition by a;
 insert into test_02245_s3_nested_arrow2 values (1, (2, (3, 'a')));
 select a, b.a, b.b.c, b.b.d from s3(s3_conn, filename='test_02245_s3_nested_arrow2_*', format='Arrow', structure='a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))');
@ -24,7 +26,8 @@ drop table if exists test_02245_s3_nested_orc2;
 set input_format_orc_import_nested=1;
 create table test_02245_s3_nested_orc1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_orc1_{_partition_id}', format='ORC') partition by a;
 insert into test_02245_s3_nested_orc1 values (1, (2, 'a'));
-select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_orc1_*', format='ORC');  -- { serverError 47 }
+select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_orc1_*', format='ORC');
+1	2	a
 create table test_02245_s3_nested_orc2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_orc2_{_partition_id}', format='ORC') partition by a;
 insert into test_02245_s3_nested_orc2 values (1, (2, (3, 'a')));
 select a, b.a, b.b.c, b.b.d from s3(s3_conn, filename='test_02245_s3_nested_orc2_*', format='ORC', structure='a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))');
--- a/tests/queries/0_stateless/02245_s3_support_read_nested_column.sql
+++ b/tests/queries/0_stateless/02245_s3_support_read_nested_column.sql
@ -8,7 +8,7 @@ set input_format_parquet_import_nested = 1;
 create table test_02245_s3_nested_parquet1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_parquet1_{_partition_id}', format='Parquet') partition by a;
 insert into test_02245_s3_nested_parquet1 values (1, (2, 'a'));

-select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_parquet1_*', format='Parquet');  -- { serverError 47 }
+select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_parquet1_*', format='Parquet');

 create table test_02245_s3_nested_parquet2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_parquet2_{_partition_id}', format='Parquet') partition by a;
 insert into test_02245_s3_nested_parquet2 values (1, (2, (3, 'a')));
@ -22,7 +22,7 @@ set input_format_arrow_import_nested=1;
 create table test_02245_s3_nested_arrow1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_arrow1_{_partition_id}', format='Arrow') partition by a;
 insert into test_02245_s3_nested_arrow1 values (1, (2, 'a'));

-select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_arrow1_*', format='Arrow');  -- { serverError 47 }
+select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_arrow1_*', format='Arrow');

 create table test_02245_s3_nested_arrow2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_arrow2_{_partition_id}', format='Arrow') partition by a;
 insert into test_02245_s3_nested_arrow2 values (1, (2, (3, 'a')));
@ -36,7 +36,7 @@ set input_format_orc_import_nested=1;
 create table test_02245_s3_nested_orc1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_orc1_{_partition_id}', format='ORC') partition by a;
 insert into test_02245_s3_nested_orc1 values (1, (2, 'a'));

-select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_orc1_*', format='ORC');  -- { serverError 47 }
+select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_orc1_*', format='ORC');

 create table test_02245_s3_nested_orc2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_orc2_{_partition_id}', format='ORC') partition by a;
 insert into test_02245_s3_nested_orc2 values (1, (2, (3, 'a')));
--- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sh
+++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sh
@ -14,5 +14,5 @@ $CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.
 $CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data4.jsonl', 'TSV') select 1 as x";
 $CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.jsonl', 'TSV') select [1,2,3] as x SETTINGS engine_file_truncate_on_insert = 1";

-$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.jsonl') settings schema_inference_use_cache_for_file=0" 2>&1 | grep -F -q "INCORRECT_DATA" && echo "OK" || echo "FAIL";
+$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.jsonl') settings schema_inference_use_cache_for_file=0" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo "OK" || echo "FAIL";

--- a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql
+++ b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql
@ -1,7 +1,7 @@
 -- Tags: no-fasttest

 insert into function file('02268_data.jsonl', 'TSV') select 1;
-select * from file('02268_data.jsonl'); --{serverError 117}
+select * from file('02268_data.jsonl'); --{serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}

 insert into function file('02268_data.jsonCompactEachRow', 'TSV') select 1;
-select * from file('02268_data.jsonCompactEachRow'); --{serverError 117}
+select * from file('02268_data.jsonCompactEachRow'); --{serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
--- a/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql
+++ b/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql
@ -1,2 +1,2 @@
-insert into function file('02269_data', 'RowBinary') select 1;
+insert into function file('02269_data', 'RowBinary') select 1 settings engine_file_truncate_on_insert=1;
 select * from file('02269_data', 'RowBinary', 'x UInt8');
--- a/tests/queries/0_stateless/02475_bson_each_row_format.reference
+++ b/tests/queries/0_stateless/02475_bson_each_row_format.reference
@ -233,11 +233,11 @@ Schema inference
 x	Nullable(Int32)					
 x	Nullable(Int64)					
 x	Nullable(Int64)					
-FAIL
+OK
 x	Array(Nullable(Int32))					
 x	Array(Nullable(Int64))					
 x	Array(Nullable(Int64))					
-FAIL
+OK
 OK
 OK
 OK
--- a/tests/queries/0_stateless/02475_bson_each_row_format.sh
+++ b/tests/queries/0_stateless/02475_bson_each_row_format.sh
@ -164,7 +164,7 @@ $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select
 $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
 $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::Int64 as x from numbers(2)"
 $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
-$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::UInt64 as x from numbers(2)"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select toString(number) as x from numbers(2)"
 $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "TYPE_MISMATCH" && echo "OK" || echo "FAIL"

 $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::Bool] as x from numbers(2) settings engine_file_truncate_on_insert=1"
@ -174,7 +174,7 @@ $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select
 $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
 $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::Int64] as x from numbers(2)"
 $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
-$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::UInt64] as x from numbers(2)"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [toString(number)] as x from numbers(2)"
 $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "TYPE_MISMATCH" && echo "OK" || echo "FAIL"

 $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [] as x from numbers(2) settings engine_file_truncate_on_insert=1"
--- a/tests/queries/0_stateless/02497_schema_inference_nulls.reference
+++ b/tests/queries/0_stateless/02497_schema_inference_nulls.reference
@ -0,0 +1,48 @@
+JSONEachRow
+x	Array(Nullable(Int64))					
+x	Array(Nullable(Int64))					
+x	Array(Nullable(Int64))					
+x	Array(Nullable(Int64))					
+x	Tuple(Nullable(String), Nullable(Int64))					
+x	Tuple(Nullable(String), Nullable(Int64))					
+x	Map(String, Nullable(Int64))					
+x	Map(String, Nullable(Int64))					
+x	Array(Nullable(Int64))					
+x	Array(Array(Nullable(Int64)))					
+x	Array(Map(String, Nullable(Int64)))					
+x	Array(Array(Nullable(String)))					
+x	Array(Int64)					
+x	Array(Nullable(Int64))					
+x	Array(Int64)					
+x	Array(Nullable(Int64))					
+JSONCompactEachRow
+c1	Array(Nullable(Int64))					
+c1	Array(Nullable(Int64))					
+c1	Array(Nullable(Int64))					
+c1	Array(Nullable(Int64))					
+c1	Tuple(Nullable(String), Nullable(Int64))					
+c1	Tuple(Nullable(String), Nullable(Int64))					
+c1	Map(String, Nullable(Int64))					
+c1	Map(String, Nullable(Int64))					
+c1	Array(Nullable(Int64))					
+c1	Array(Array(Nullable(Int64)))					
+c1	Array(Map(String, Nullable(Int64)))					
+c1	Array(Array(Nullable(String)))					
+c1	Array(Int64)					
+c1	Array(Nullable(Int64))					
+c1	Array(Int64)					
+c1	Array(Nullable(Int64))					
+CSV
+c1	Array(Nullable(Int64))					
+c1	Array(Nullable(Int64))					
+c1	Array(Nullable(Int64))					
+c1	Array(Nullable(Int64))					
+c1	Map(String, Nullable(Int64))					
+c1	Map(String, Nullable(Int64))					
+c1	Array(Array(Nullable(Int64)))					
+c1	Array(Map(String, Nullable(Int64)))					
+c1	Array(Array(Nullable(String)))					
+c1	Array(Int64)					
+c1	Array(Nullable(Int64))					
+c1	Array(Int64)					
+c1	Array(Nullable(Int64))					
--- a/tests/queries/0_stateless/02497_schema_inference_nulls.sql
+++ b/tests/queries/0_stateless/02497_schema_inference_nulls.sql
@ -0,0 +1,63 @@
+select 'JSONEachRow';
+set schema_inference_make_columns_nullable=1;
+desc format(JSONEachRow, '{"x" : 1234}, {"x" : "String"}') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError TYPE_MISMATCH }
+desc format(JSONEachRow, '{"x" : [null, 1]}');
+desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : []}');
+desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : [null]}');
+desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : [1, null]}');
+desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : ["abc", 1]}');
+desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : ["abc", null]}');
+desc format(JSONEachRow, '{"x" : {}}, {"x" : {"a" : 1}}');
+desc format(JSONEachRow, '{"x" : {"a" : null}}, {"x" : {"b" : 1}}');
+desc format(JSONEachRow, '{"x" : null}, {"x" : [1, 2]}');
+desc format(JSONEachRow, '{"x" : [[], [null], [1, 2, 3]]}');
+desc format(JSONEachRow, '{"x" : [{"a" : null}, {"b" : 1}]}');
+desc format(JSONEachRow, '{"x" : [["2020-01-01", null, "1234"], ["abcd"]]}');
+
+set schema_inference_make_columns_nullable=0;
+desc format(JSONEachRow, '{"x" : [1, 2]}');
+desc format(JSONEachRow, '{"x" : [null, 1]}');
+desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [3]}');
+desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [null]}');
+
+select 'JSONCompactEachRow';
+set schema_inference_make_columns_nullable=1;
+desc format(JSONCompactEachRow, '[1234], ["String"]') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError TYPE_MISMATCH }
+desc format(JSONCompactEachRow, '[[null, 1]]');
+desc format(JSONCompactEachRow, '[[null, 1]], [[]]');
+desc format(JSONCompactEachRow, '[[null, 1]], [[null]]');
+desc format(JSONCompactEachRow, '[[null, 1]], [[1, null]]');
+desc format(JSONCompactEachRow, '[[null, 1]], [["abc", 1]]');
+desc format(JSONCompactEachRow, '[[null, 1]], [["abc", null]]');
+desc format(JSONCompactEachRow, '[{}], [{"a" : 1}]');
+desc format(JSONCompactEachRow, '[{"a" : null}], [{"b" : 1}]');
+desc format(JSONCompactEachRow, '[null], [[1, 2]]');
+desc format(JSONCompactEachRow, '[[[], [null], [1, 2, 3]]]');
+desc format(JSONCompactEachRow, '[[{"a" : null}, {"b" : 1}]]');
+desc format(JSONCompactEachRow, '[[["2020-01-01", null, "1234"], ["abcd"]]]');
+
+set schema_inference_make_columns_nullable=0;
+desc format(JSONCompactEachRow, '[[1, 2]]');
+desc format(JSONCompactEachRow, '[[null, 1]]');
+desc format(JSONCompactEachRow, '[[1, 2]], [[3]]');
+desc format(JSONCompactEachRow, '[[1, 2]], [[null]]');
+
+
+select 'CSV';
+set schema_inference_make_columns_nullable=1;
+desc format(CSV, '"[null, 1]"');
+desc format(CSV, '"[null, 1]"\n"[]"');
+desc format(CSV, '"[null, 1]"\n"[null]"');
+desc format(CSV, '"[null, 1]"\n"[1, null]"');
+desc format(CSV, '"{}"\n"{\'a\' : 1}"');
+desc format(CSV, '"{\'a\' : null}"\n"{\'b\' : 1}"');
+desc format(CSV, '"[[], [null], [1, 2, 3]]"');
+desc format(CSV, '"[{\'a\' : null}, {\'b\' : 1}]"');
+desc format(CSV, '"[[\'2020-01-01\', null, \'1234\'], [\'abcd\']]"');
+
+set schema_inference_make_columns_nullable=0;
+desc format(CSV, '"[1,2]"');
+desc format(CSV, '"[NULL, 1]"');
+desc format(CSV, '"[1, 2]"\n"[3]"');
+desc format(CSV, '"[1, 2]"\n"[null]"');
+
--- a/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.reference
+++ b/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.reference
@ -0,0 +1 @@
+s	Nullable(String)					
--- a/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.sh
+++ b/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.sh
@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "select randomString(100) as s format JSONEachRow" | $CLICKHOUSE_LOCAL -q "desc test" --table='test' --input-format='JSONEachRow' 
--- a/tests/queries/0_stateless/02499_escaped_quote_schema_inference.reference
+++ b/tests/queries/0_stateless/02499_escaped_quote_schema_inference.reference
@ -0,0 +1,2 @@
+c1	Array(Nullable(String))					
+c1	Nullable(String)					
--- a/tests/queries/0_stateless/02499_escaped_quote_schema_inference.sql
+++ b/tests/queries/0_stateless/02499_escaped_quote_schema_inference.sql
@ -0,0 +1,2 @@
+desc format(CSV, '"[\'abc\\\'\']"');
+desc format(Values, '(\'abc\\\'\')');
--- a/tests/queries/0_stateless/02500_numbers_inference.reference
+++ b/tests/queries/0_stateless/02500_numbers_inference.reference
@ -0,0 +1,20 @@
+x	Nullable(Float64)					
+x	Nullable(Float64)					
+x	Nullable(Int64)					
+x	Nullable(Int64)					
+x	Nullable(Float64)					
+x	Nullable(Float64)					
+x	Array(Nullable(Float64))					
+x	Array(Nullable(Float64))					
+x	Array(Nullable(Float64))					
+x	Array(Nullable(Float64))					
+c1	Nullable(Float64)					
+c1	Nullable(Float64)					
+c1	Nullable(Int64)					
+c1	Nullable(Int64)					
+c1	Nullable(Float64)					
+c1	Nullable(Float64)					
+c1	Array(Nullable(Float64))					
+c1	Array(Nullable(Float64))					
+c1	Array(Nullable(Float64))					
+c1	Array(Nullable(Float64))					
--- a/tests/queries/0_stateless/02500_numbers_inference.sh
+++ b/tests/queries/0_stateless/02500_numbers_inference.sh
@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1.2}')";
+echo '{"x" : 1.2}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test";
+$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1}')";
+echo '{"x" : 1}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test";
+$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1e10}')";
+echo '{"x" : 1e10}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test";
+$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, 1, 1e10]}')";
+echo '{"x" : [1, 42.42, 1, 1e10]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test";
+$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, false]}')";
+echo '{"x" : [1, 42.42, false]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test";
+
+$CLICKHOUSE_LOCAL -q "desc format(TSV, '1.2')";
+echo '1.2' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test";
+$CLICKHOUSE_LOCAL -q "desc format(TSV, '1')";
+echo '1' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test";
+$CLICKHOUSE_LOCAL -q "desc format(TSV, '1e10')";
+echo '1e10' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test";
+$CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, 1, 1e10]')";
+echo '[1, 42.42, 1, 1e10]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test";
+$CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, false]')";
+echo '[1, 42.42, false]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test";
+
--- a/tests/queries/0_stateless/02501_deep_recusion_schema_inference.reference
+++ b/tests/queries/0_stateless/02501_deep_recusion_schema_inference.reference
@ -0,0 +1 @@
+OK
--- a/tests/queries/0_stateless/02501_deep_recusion_schema_inference.sh
+++ b/tests/queries/0_stateless/02501_deep_recusion_schema_inference.sh
@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+# Tags: no-parallel
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "select repeat('[', 10000) || '1,2,3' || repeat(']', 10000)" > 02501_deep_nested_array.tsv
+$CLICKHOUSE_LOCAL -q "desc file(02501_deep_nested_array.tsv)" 2>&1 | grep -q -F "TOO_DEEP_RECURSION" && echo "OK" || echo "FAIL"
+rm 02501_deep_nested_array.tsv
+
--- a/tests/queries/0_stateless/02502_bad_values_schema_inference.reference
+++ b/tests/queries/0_stateless/02502_bad_values_schema_inference.reference
--- a/tests/queries/0_stateless/02502_bad_values_schema_inference.sql
+++ b/tests/queries/0_stateless/02502_bad_values_schema_inference.sql
@ -0,0 +1,2 @@
+desc format(Values, '(\'abc)'); -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED }
+
				`@ -0,0 +1,2 @@`
				`desc format(Values, '(\'abc)'); -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED }`