ClickHouse/src/Formats/SchemaInferenceUtils.cpp

#include <Formats/SchemaInferenceUtils.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/transformTypesRecursively.h>
#include <DataTypes/DataTypeObject.h>
#include <DataTypes/DataTypeFactory.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/parseDateTimeBestEffort.h>
#include <IO/PeekableReadBuffer.h>

#include <Core/Block.h>
#include <Common/assert_cast.h>
#include <Common/SipHash.h>

namespace DB
{

namespace ErrorCodes
{
    extern const int TOO_DEEP_RECURSION;
    extern const int NOT_IMPLEMENTED;
    extern const int INCORRECT_DATA;
    extern const int ONLY_NULLS_WHILE_READING_SCHEMA;
}

namespace
{
    /// Special data type that represents JSON object as a set of paths and their types.
    /// It supports merging two JSON objects and creating Named Tuple from itself.
    /// It's used only for schema inference of Named Tuples from JSON objects.
    /// Example:
    /// JSON objects:
    /// "obj1" : {"a" : {"b" : 1, "c" : {"d" : 'Hello'}}, "e" : "World"}
    /// "obj2" : {"a" : {"b" : 2, "f" : [1,2,3]}, "g" : {"h" : 42}}
    /// JSONPaths type for each object:
    /// obj1 : {'a.b' : Int64, 'a.c.d' : String, 'e' : String}
    /// obj2 : {'a.b' : Int64, 'a.f' : Array(Int64), 'g.h' : Int64}
    /// Merged JSONPaths type for obj1 and obj2:
    /// obj1 ⋃ obj2 : {'a.b' : Int64, 'a.c.d' : String, 'a.f' : Array(Int64), 'e' : String, 'g.h' : Int64}
    /// Result Named Tuple:
    /// Tuple(a Tuple(b Int64, c Tuple(d String), f Array(Int64)), e String, g Tuple(h Int64))
    class DataTypeJSONPaths : public IDataTypeDummy
    {
    public:
        /// We create DataTypeJSONPaths on each row in input data, to
        /// compare and merge such types faster, we use hash map to
        /// store mapping path -> data_type. Path is a vector
        /// of path components, to use hash map we need a hash
        /// for std::vector<String>. We cannot just concatenate
        /// components with '.' and store it as a string,
        /// because components can also contain '.'
        struct PathHash
        {
            size_t operator()(const std::vector<String> & path) const
            {
                SipHash hash;
                hash.update(path.size());
                for (const auto & part : path)
                    hash.update(part);
                return hash.get64();
            }
        };

        using Paths = std::unordered_map<std::vector<String>, DataTypePtr, PathHash>;

        explicit DataTypeJSONPaths(Paths paths_) : paths(std::move(paths_))
        {
        }

        DataTypeJSONPaths() = default;

        const char * getFamilyName() const override { return "JSONPaths"; }
        String doGetName() const override { return finalize()->getName(); }
        TypeIndex getTypeId() const override { return TypeIndex::JSONPaths; }

        bool isParametric() const override
        {
            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method isParametric is not implemented for JSONObjectForInference type");
        }

        bool equals(const IDataType & rhs) const override
        {
            if (this == &rhs)
                return true;

            if (rhs.getTypeId() != getTypeId())
                return false;

            const auto & rhs_paths = assert_cast<const DataTypeJSONPaths &>(rhs).paths;
            if (paths.size() != rhs_paths.size())
                return false;

            for (const auto & [path, type] : paths)
            {
                auto it = rhs_paths.find(path);
                if (it == rhs_paths.end() || !it->second->equals(*type))
                    return false;
            }

            return true;
        }

        bool merge(const DataTypeJSONPaths & rhs, std::function<void(DataTypePtr & type1, DataTypePtr & type2)> transform_types)
        {
            for (const auto & [rhs_path, rhs_type] : rhs.paths)
            {
                auto [it, inserted] = paths.insert({rhs_path, rhs_type});
                if (!inserted)
                {
                    auto & type = it->second;
                    /// If types are different, try to apply provided transform function.
                    if (!type->equals(*rhs_type))
                    {
                        auto rhs_type_copy = rhs_type;
                        transform_types(type, rhs_type_copy);
                        /// If types for the same path are different even after transform, we cannot merge these objects.
                        if (!type->equals(*rhs_type_copy))
                            return false;
                    }
                }
            }

            return true;
        }

        bool empty() const { return paths.empty(); }

        DataTypePtr finalize() const
        {
            if (paths.empty())
                throw Exception(ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, "Cannot infer named Tuple from JSON object because object is empty");

            /// Construct a path tree from list of paths and their types and convert it to named Tuple.
            /// Example:
            /// Paths : {'a.b' : Int64, 'a.c.d' : String, 'e' : String, 'f.g' : Array(Int64), 'f.h' : String}
            /// Tree:
            ///              ┌─ 'c' ─ 'd' (String)
            ///       ┌─ 'a' ┴─ 'b' (Int64)
            /// root ─┼─ 'e' (String)
            ///       └─ 'f' ┬─ 'g' (Array(Int64))
            ///              └─ 'h' (String)
            /// Result Named Tuple:
            /// Tuple('a' Tuple('b' Int64, 'c' Tuple('d' String)), 'e' String, 'f' Tuple('g' Array(Int64), 'h' String))
            PathNode root_node;
            for (const auto & [path, type] : paths)
            {
                PathNode * current_node = &root_node;
                String current_path;
                for (const auto & name : path)
                {
                    current_path += (current_path.empty() ? "" : ".") + name;
                    current_node = &current_node->nodes[name];
                    current_node->path = current_path;
                }

                current_node->leaf_type = type;
            }

            return root_node.getType();
        }

    private:
        struct PathNode
        {
            /// Use just map to have result tuple with names in lexicographic order.
            /// No strong reason for it, made for consistency.
            std::map<String, PathNode> nodes;
            DataTypePtr leaf_type;
            /// Store path to this node for better exception message in case of ambiguous paths.
            String path;

            DataTypePtr getType() const
            {
                /// Check if we have ambiguous paths.
                /// For example:
                /// 'a.b.c' : Int32 and 'a.b' : String
                /// Also check if leaf type is Nothing, because the next situation is possible:
                /// {"a" : {"b" : null}} -> 'a.b' : Nullable(Nothing)
                /// {"a" : {"b" : {"c" : 42}}} -> 'a.b.c' : Int32
                /// And after merge we will have ambiguous paths 'a.b.c' : Int32 and 'a.b' : Nullable(Nothing),
                /// but it's a valid case and we should ignore path 'a.b'.
                if (leaf_type && !isNothing(removeNullable(leaf_type)) && !nodes.empty())
                    throw Exception(ErrorCodes::INCORRECT_DATA, "JSON objects have ambiguous paths: '{}' with type {} and '{}'", path, leaf_type->getName(), nodes.begin()->second.path);

                if (nodes.empty())
                    return leaf_type;

                Names node_names;
                node_names.reserve(nodes.size());
                DataTypes node_types;
                node_types.reserve(nodes.size());
                for (const auto & [name, node] : nodes)
                {
                    node_names.push_back(name);
                    node_types.push_back(node.getType());
                }

                return std::make_shared<DataTypeTuple>(std::move(node_types), std::move(node_names));
            }
        };

        Paths paths;
    };

    bool checkIfTypesAreEqual(const DataTypes & types)
    {
        if (types.empty())
            return true;

        for (size_t i = 1; i < types.size(); ++i)
        {
            if (!types[0]->equals(*types[i]))
                return false;
        }
        return true;
    }

    void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        type_indexes.clear();
        for (const auto & type : data_types)
            type_indexes.insert(type->getTypeId());
    }

    /// If we have both Nothing and non Nothing types, convert all Nothing types to the first non Nothing.
    /// For example if we have types [Nothing, String, Nothing] we change it to [String, String, String]
    void transformNothingSimpleTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        /// Check if we have both Nothing and non Nothing types.
        if (!type_indexes.contains(TypeIndex::Nothing) || type_indexes.size() <= 1)
            return;

        DataTypePtr not_nothing_type = nullptr;
        for (const auto & type : data_types)
        {
            if (!isNothing(type))
            {
                not_nothing_type = type;
                break;
            }
        }

        for (auto & type : data_types)
        {
            if (isNothing(type))
                type = not_nothing_type;
        }

        type_indexes.erase(TypeIndex::Nothing);
    }

    /// If we have both Int64 and UInt64, convert all Int64 to UInt64,
    /// because UInt64 is inferred only in case of Int64 overflow.
    void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::UInt64))
            return;

        for (auto & type : data_types)
        {
            if (WhichDataType(type).isInt64())
                type = std::make_shared<DataTypeUInt64>();
        }

        type_indexes.erase(TypeIndex::Int64);
    }

    /// If we have both Int64 and Float64 types, convert all Int64 to Float64.
    void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        bool have_floats = type_indexes.contains(TypeIndex::Float64);
        bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64);
        if (!have_integers || !have_floats)
            return;

        for (auto & type : data_types)
        {
            WhichDataType which(type);
            if (which.isInt64() || which.isUInt64())
                type = std::make_shared<DataTypeFloat64>();
        }

        type_indexes.erase(TypeIndex::Int64);
        type_indexes.erase(TypeIndex::UInt64);
    }

    /// If we have only Date and DateTime types, convert Date to DateTime,
    /// otherwise, convert all Date and DateTime to String.
    void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        bool have_dates = type_indexes.contains(TypeIndex::Date);
        bool have_datetimes = type_indexes.contains(TypeIndex::DateTime64);
        bool all_dates_or_datetimes = (type_indexes.size() == (static_cast<size_t>(have_dates) + static_cast<size_t>(have_datetimes)));

        if (!all_dates_or_datetimes && (have_dates || have_datetimes))
        {
            for (auto & type : data_types)
            {
                if (isDate(type) || isDateTime64(type))
                    type = std::make_shared<DataTypeString>();
            }

            type_indexes.erase(TypeIndex::Date);
            type_indexes.erase(TypeIndex::DateTime);
            type_indexes.insert(TypeIndex::String);
            return;
        }

        if (have_dates && have_datetimes)
        {
            for (auto & type : data_types)
            {
                if (isDate(type))
                    type = std::make_shared<DataTypeDateTime64>(9);
            }

            type_indexes.erase(TypeIndex::Date);
        }
    }

    /// If we have numbers (Int64/UInt64/Float64) and String types and numbers were parsed from String,
    /// convert all numbers to String.
    void transformJSONNumbersBackToString(
        DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
    {
        bool have_strings = type_indexes.contains(TypeIndex::String);
        bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64) || type_indexes.contains(TypeIndex::Float64);
        if (!have_strings || !have_numbers)
            return;

        for (auto & type : data_types)
        {
            if (isNumber(type)
                && (settings.json.read_numbers_as_strings || !json_info
                    || json_info->numbers_parsed_from_json_strings.contains(type.get())))
                type = std::make_shared<DataTypeString>();
        }

        updateTypeIndexes(data_types, type_indexes);
    }

    /// If we have both Bool and number (Int64/UInt64/Float64) types,
    /// convert all Bool to Int64/UInt64/Float64.
    void transformBoolsAndNumbersToNumbers(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        bool have_floats = type_indexes.contains(TypeIndex::Float64);
        bool have_signed_integers = type_indexes.contains(TypeIndex::Int64);
        bool have_unsigned_integers = type_indexes.contains(TypeIndex::UInt64);
        bool have_bools = type_indexes.contains(TypeIndex::UInt8);
        /// Check if we have both Bool and Integer/Float.
        if (!have_bools || (!have_signed_integers && !have_unsigned_integers && !have_floats))
            return;

        for (auto & type : data_types)
        {
            if (isBool(type))
            {
                if (have_signed_integers)
                    type = std::make_shared<DataTypeInt64>();
                else if (have_unsigned_integers)
                    type = std::make_shared<DataTypeUInt64>();
                else
                    type = std::make_shared<DataTypeFloat64>();
            }
        }

        type_indexes.erase(TypeIndex::UInt8);
    }

    /// If we have type Nothing/Nullable(Nothing) and some other non Nothing types,
    /// convert all Nothing/Nullable(Nothing) types to the first non Nothing.
    /// For example, when we have [Nothing, Array(Int64)] it will convert it to [Array(Int64), Array(Int64)]
    /// (it can happen when transforming complex nested types like [Array(Nothing), Array(Array(Int64))])
    void transformNothingComplexTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        bool have_nothing = false;
        DataTypePtr not_nothing_type = nullptr;
        for (const auto & type : data_types)
        {
            if (isNothing(removeNullable(type)))
                have_nothing = true;
            else
                not_nothing_type = type;
        }

        if (!have_nothing || !not_nothing_type)
            return;

        for (auto & type : data_types)
        {
            if (isNothing(removeNullable(type)))
                type = not_nothing_type;
        }

        updateTypeIndexes(data_types, type_indexes);
    }

    /// If we have both Nullable and non Nullable types, make all types Nullable
    void transformNullableTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        if (!type_indexes.contains(TypeIndex::Nullable))
            return;

        for (auto & type : data_types)
        {
            if (type->canBeInsideNullable())
                type = makeNullable(type);
        }

        updateTypeIndexes(data_types, type_indexes);
    }

    /// If we have unnamed Tuple with the same nested types like Tuple(Int64, Int64),
    /// convert it to Array(Int64). It's used for JSON values.
    /// For example when we had type Tuple(Int64, Nullable(Nothing)) and we
    /// transformed it to Tuple(Nullable(Int64), Nullable(Int64)) we will
    /// also transform it to Array(Nullable(Int64))
    void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        if (!type_indexes.contains(TypeIndex::Tuple))
            return;

        bool remove_tuple_index = true;
        for (auto & type : data_types)
        {
            if (isTuple(type))
            {
                const auto * tuple_type = assert_cast<const DataTypeTuple *>(type.get());
                if (tuple_type->haveExplicitNames())
                    return;

                if (checkIfTypesAreEqual(tuple_type->getElements()))
                    type = std::make_shared<DataTypeArray>(tuple_type->getElements().back());
                else
                    remove_tuple_index = false;
            }
        }

        if (remove_tuple_index)
            type_indexes.erase(TypeIndex::Tuple);
    }

    template <bool is_json>
    void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info = nullptr);

    /// If we have unnamed Tuple and Array types, try to convert them all to Array
    /// if there is a common type for all nested types.
    /// For example, if we have [Tuple(Nullable(Nothing), String), Array(Date), Tuple(Date, String)]
    /// it will convert them all to Array(String)
    void transformJSONTuplesAndArraysToArrays(
        DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
    {
        if (!type_indexes.contains(TypeIndex::Tuple))
            return;

        bool have_arrays = type_indexes.contains(TypeIndex::Array);
        bool tuple_sizes_are_equal = true;
        size_t tuple_size = 0;
        for (const auto & type : data_types)
        {
            if (isTuple(type))
            {
                const auto & tuple_type = assert_cast<const DataTypeTuple &>(*type);
                if (tuple_type.haveExplicitNames())
                    return;

                const auto & current_tuple_size = tuple_type.getElements().size();
                if (!tuple_size)
                    tuple_size = current_tuple_size;
                else
                    tuple_sizes_are_equal &= current_tuple_size == tuple_size;
            }
        }

        /// Check if we have arrays and tuples with same size.
        if (!have_arrays && !tuple_sizes_are_equal)
            return;

        DataTypes nested_types;
        for (auto & type : data_types)
        {
            if (isArray(type))
                nested_types.push_back(assert_cast<const DataTypeArray &>(*type).getNestedType());
            else if (isTuple(type))
            {
                const auto & elements = assert_cast<const DataTypeTuple &>(*type).getElements();
                for (const auto & element : elements)
                    nested_types.push_back(element);
            }
        }

        transformInferredTypesIfNeededImpl<true>(nested_types, settings, json_info);
        if (checkIfTypesAreEqual(nested_types))
        {
            for (auto & type : data_types)
            {
                if (isArray(type) || isTuple(type))
                    type = std::make_shared<DataTypeArray>(nested_types.back());
            }

            type_indexes.erase(TypeIndex::Tuple);
        }
    }

    void transformMapsAndStringsToStrings(DataTypes & data_types, TypeIndexesSet & type_indexes)
    {
        /// Check if we have both String and Map
        if (!type_indexes.contains(TypeIndex::Map) || !type_indexes.contains(TypeIndex::String))
            return;

        for (auto & type : data_types)
        {
            if (isMap(type))
                type = std::make_shared<DataTypeString>();
        }

        type_indexes.erase(TypeIndex::Map);
    }

    void mergeJSONPaths(DataTypes & data_types, TypeIndexesSet & type_indexes, const FormatSettings & settings, JSONInferenceInfo * json_info)
    {
        if (!type_indexes.contains(TypeIndex::JSONPaths))
            return;

        std::shared_ptr<DataTypeJSONPaths> merged_type = std::make_shared<DataTypeJSONPaths>();
        auto transform_func = [&](DataTypePtr & type1, DataTypePtr & type2){ transformInferredJSONTypesIfNeeded(type1, type2, settings, json_info); };
        for (auto & type : data_types)
        {
            if (const auto * json_type = typeid_cast<const DataTypeJSONPaths *>(type.get()))
                merged_type->merge(*json_type, transform_func);
        }

        for (auto & type : data_types)
        {
            if (type->getTypeId() == TypeIndex::JSONPaths)
                type = merged_type;
        }
    }

    void mergeNamedTuples(DataTypes & data_types, TypeIndexesSet & type_indexes, const FormatSettings & settings, JSONInferenceInfo * json_info)
    {
        if (!type_indexes.contains(TypeIndex::Tuple))
            return;

        /// Collect all names and their types from all named tuples.
        std::unordered_map<String, DataTypes> names_to_types;
        /// Try to save original order of element names.
        Names element_names;
        for (auto & type : data_types)
        {
            const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get());
            if (tuple_type && tuple_type->haveExplicitNames())
            {
                const auto & elements = tuple_type->getElements();
                const auto & names = tuple_type->getElementNames();
                for (size_t i = 0; i != elements.size(); ++i)
                {
                    if (!names_to_types.contains(names[i]))
                        element_names.push_back(names[i]);
                    names_to_types[names[i]].push_back(elements[i]);
                }
            }
        }

        /// Try to find common type for each tuple element with the same name.
        DataTypes element_types;
        element_types.reserve(names_to_types.size());
        for (const auto & name : element_names)
        {
            auto types = names_to_types[name];
            transformInferredTypesIfNeededImpl<true>(types, settings, json_info);
            /// If some element have different types in different tuples, we can't do anything
            if (!checkIfTypesAreEqual(types))
                return;
            element_types.push_back(types.front());
        }

        DataTypePtr result_tuple = std::make_shared<DataTypeTuple>(element_types, element_names);

        for (auto & type : data_types)
        {
            const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get());
            if (tuple_type && tuple_type->haveExplicitNames())
                type = result_tuple;
        }
    }

    template <bool is_json>
    void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info)
    {
        auto transform_simple_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes)
        {
            /// Remove all Nothing type if possible.
            transformNothingSimpleTypes(data_types, type_indexes);

            if (settings.try_infer_integers)
            {
                /// Transform Int64 to UInt64 if needed.
                transformIntegers(data_types, type_indexes);
                /// Transform integers to floats if needed.
                transformIntegersAndFloatsToFloats(data_types, type_indexes);
            }

            /// Transform Date to DateTime or both to String if needed.
            if (settings.try_infer_dates || settings.try_infer_datetimes)
                transformDatesAndDateTimes(data_types, type_indexes);

            if constexpr (!is_json)
                return;

            /// Check settings specific for JSON formats.

            /// Convert numbers inferred from strings back to strings if needed.
            if (settings.json.try_infer_numbers_from_strings || settings.json.read_numbers_as_strings)
                transformJSONNumbersBackToString(data_types, settings, type_indexes, json_info);

            /// Convert Bool to number (Int64/Float64) if needed.
            if (settings.json.read_bools_as_numbers)
                transformBoolsAndNumbersToNumbers(data_types, type_indexes);

            if (settings.json.try_infer_objects_as_tuples)
                mergeJSONPaths(data_types, type_indexes, settings, json_info);
        };

        auto transform_complex_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes)
        {
            /// Make types Nullable if needed.
            transformNullableTypes(data_types, type_indexes);

            /// If we have type Nothing, it means that we had empty Array/Map while inference.
            /// If there is at least one non Nothing type, change all Nothing types to it.
            transformNothingComplexTypes(data_types, type_indexes);

            if constexpr (!is_json)
                return;

            /// Convert JSON tuples with same nested types to arrays.
            transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes);

            /// Convert JSON tuples and arrays to arrays if possible.
            transformJSONTuplesAndArraysToArrays(data_types, settings, type_indexes, json_info);

            if (settings.json.read_objects_as_strings)
                transformMapsAndStringsToStrings(data_types, type_indexes);

            if (json_info && json_info->allow_merging_named_tuples)
                mergeNamedTuples(data_types, type_indexes, settings, json_info);
        };

        transformTypesRecursively(types, transform_simple_types, transform_complex_types);
    }

    template <bool is_json>
    DataTypePtr tryInferDataTypeForSingleFieldImpl(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info, size_t depth = 1);

    bool tryInferDate(std::string_view field)
    {
        if (field.empty())
            return false;

        ReadBufferFromString buf(field);
        Float64 tmp_float;
        /// Check if it's just a number, and if so, don't try to infer Date from it,
        /// because we can interpret this number as a Date (for example 20000101 will be 2000-01-01)
        /// and it will lead to inferring Date instead of simple Int64/UInt64 in some cases.
        if (tryReadFloatText(tmp_float, buf) && buf.eof())
            return false;

        buf.seek(0, SEEK_SET); /// Return position to the beginning

        DayNum tmp;
        return tryReadDateText(tmp, buf) && buf.eof();
    }

    bool tryInferDateTime(std::string_view field, const FormatSettings & settings)
    {
        if (field.empty())
            return false;

        ReadBufferFromString buf(field);
        Float64 tmp_float;
        /// Check if it's just a number, and if so, don't try to infer DateTime from it,
        /// because we can interpret this number as a timestamp and it will lead to
        /// inferring DateTime instead of simple Int64/Float64 in some cases.
        if (tryReadFloatText(tmp_float, buf) && buf.eof())
            return false;

        buf.seek(0, SEEK_SET); /// Return position to the beginning
        DateTime64 tmp;
        switch (settings.date_time_input_format)
        {
            case FormatSettings::DateTimeInputFormat::Basic:
                if (tryReadDateTime64Text(tmp, 9, buf) && buf.eof())
                    return true;
                break;
            case FormatSettings::DateTimeInputFormat::BestEffort:
                if (tryParseDateTime64BestEffort(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof())
                    return true;
                break;
            case FormatSettings::DateTimeInputFormat::BestEffortUS:
                if (tryParseDateTime64BestEffortUS(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof())
                    return true;
                break;
        }

        return false;
    }

    template <bool is_json>
    DataTypePtr tryInferArray(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info, size_t depth)
    {
        assertChar('[', buf);
        skipWhitespaceIfAny(buf);

        DataTypes nested_types;
        bool first = true;
        bool have_invalid_nested_type = false;
        while (!buf.eof() && *buf.position() != ']')
        {
            if (!first)
            {
                /// Skip field delimiter between array elements.
                if (!checkChar(',', buf))
                    return nullptr;
                skipWhitespaceIfAny(buf);
            }
            else
                first = false;

            auto nested_type = tryInferDataTypeForSingleFieldImpl<is_json>(buf, settings, json_info, depth + 2);

            if (nested_type)
                nested_types.push_back(nested_type);
            else
                have_invalid_nested_type = true;

            skipWhitespaceIfAny(buf);
        }

        /// No ']' at the end.
        if (buf.eof())
            return nullptr;

        assertChar(']', buf);
        skipWhitespaceIfAny(buf);

        /// Nested data is invalid.
        if (have_invalid_nested_type)
            return nullptr;

        /// Empty array has type Array(Nothing)
        if (nested_types.empty())
            return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNothing>());

        if (checkIfTypesAreEqual(nested_types))
            return std::make_shared<DataTypeArray>(std::move(nested_types.back()));

        /// If element types are not equal, we should try to find common type.
        /// If after transformation element types are still different, we return Tuple for JSON and
        /// nullptr for other formats (nullptr means we couldn't infer the type).
        if constexpr (is_json)
        {
            /// For JSON if we have not complete types, we should not try to transform them
            /// and return it as a Tuple.
            /// For example, if we have types [Float64, Nullable(Nothing), Float64]
            /// it can be Array(Float64) or Tuple(Float64, <some_type>, Float64) and
            /// we can't determine which one it is. But we will be able to do it later
            /// when we will have types from other rows for this column.
            /// For example, if in the next row we will have types [Nullable(Nothing), String, Float64],
            /// we can determine the type for this column as Tuple(Nullable(Float64), Nullable(String), Float64).
            for (const auto & type : nested_types)
            {
                if (!checkIfTypeIsComplete(type))
                    return std::make_shared<DataTypeTuple>(nested_types);
            }

            auto nested_types_copy = nested_types;
            transformInferredTypesIfNeededImpl<is_json>(nested_types_copy, settings, json_info);

            if (checkIfTypesAreEqual(nested_types_copy))
                return std::make_shared<DataTypeArray>(nested_types_copy.back());

            return std::make_shared<DataTypeTuple>(nested_types);
        }
        else
        {
            transformInferredTypesIfNeededImpl<is_json>(nested_types, settings);
            if (checkIfTypesAreEqual(nested_types))
                return std::make_shared<DataTypeArray>(nested_types.back());

            /// We couldn't determine common type for array element.
            return nullptr;
        }
    }

    DataTypePtr tryInferTuple(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info, size_t depth)
    {
        assertChar('(', buf);
        skipWhitespaceIfAny(buf);

        DataTypes nested_types;
        bool first = true;
        bool have_invalid_nested_type = false;
        while (!buf.eof() && *buf.position() != ')')
        {
            if (!first)
            {
                if (!checkChar(',', buf))
                    return nullptr;
                skipWhitespaceIfAny(buf);
            }
            else
                first = false;

            auto nested_type = tryInferDataTypeForSingleFieldImpl<false>(buf, settings, json_info, depth + 1);
            if (nested_type)
                nested_types.push_back(nested_type);
            else
                have_invalid_nested_type = true;

            skipWhitespaceIfAny(buf);
        }

        /// No ')' at the end.
        if (buf.eof())
            return nullptr;

        assertChar(')', buf);
        skipWhitespaceIfAny(buf);

        /// Nested data is invalid.
        if (have_invalid_nested_type || nested_types.empty())
            return nullptr;

        return std::make_shared<DataTypeTuple>(nested_types);
    }

    DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings)
    {
        if (buf.eof())
            return nullptr;

        Float64 tmp_float;
        if (settings.try_infer_integers)
        {
            /// If we read from String, we can do it in a more efficient way.
            if (auto * string_buf = dynamic_cast<ReadBufferFromString *>(&buf))
            {
                /// Remember the pointer to the start of the number to rollback to it.
                char * number_start = buf.position();
                Int64 tmp_int;
                bool read_int = tryReadIntText(tmp_int, buf);
                /// If we reached eof, it cannot be float (it requires no less data than integer)
                if (buf.eof())
                    return read_int ? std::make_shared<DataTypeInt64>() : nullptr;

                char * int_end = buf.position();
                /// We can safely get back to the start of the number, because we read from a string and we didn't reach eof.
                buf.position() = number_start;

                bool read_uint = false;
                char * uint_end = nullptr;
                /// In case of Int64 overflow we can try to infer UInt64.
                if (!read_int)
                {
                    UInt64 tmp_uint;
                    read_uint = tryReadIntText(tmp_uint, buf);
                    /// If we reached eof, it cannot be float (it requires no less data than integer)
                    if (buf.eof())
                        return read_uint ? std::make_shared<DataTypeUInt64>() : nullptr;

                    uint_end = buf.position();
                    buf.position() = number_start;
                }

                if (tryReadFloatText(tmp_float, buf))
                {
                    if (read_int && buf.position() == int_end)
                        return std::make_shared<DataTypeInt64>();
                    if (read_uint && buf.position() == uint_end)
                        return std::make_shared<DataTypeUInt64>();
                    return std::make_shared<DataTypeFloat64>();
                }

                return nullptr;
            }

            /// We should use PeekableReadBuffer, because we need to
            /// rollback to the start of number to parse it as integer first
            /// and then as float.
            PeekableReadBuffer peekable_buf(buf);
            PeekableReadBufferCheckpoint checkpoint(peekable_buf);
            Int64 tmp_int;
            bool read_int = tryReadIntText(tmp_int, peekable_buf);
            auto * int_end = peekable_buf.position();
            peekable_buf.rollbackToCheckpoint(true);

            bool read_uint = false;
            char * uint_end = nullptr;
            /// In case of Int64 overflow we can try to infer UInt64.
            if (!read_int)
            {
                PeekableReadBufferCheckpoint new_checkpoint(peekable_buf);
                UInt64 tmp_uint;
                read_uint = tryReadIntText(tmp_uint, peekable_buf);
                uint_end = peekable_buf.position();
                peekable_buf.rollbackToCheckpoint(true);
            }

            if (tryReadFloatText(tmp_float, peekable_buf))
            {
                /// Float parsing reads no fewer bytes than integer parsing,
                /// so position of the buffer is either the same, or further.
                /// If it's the same, then it's integer.
                if (read_int && peekable_buf.position() == int_end)
                    return std::make_shared<DataTypeInt64>();
                if (read_uint && peekable_buf.position() == uint_end)
                    return std::make_shared<DataTypeUInt64>();
                return std::make_shared<DataTypeFloat64>();
            }
        }
        else if (tryReadFloatText(tmp_float, buf))
        {
            return std::make_shared<DataTypeFloat64>();
        }

        /// This is not a number.
        return nullptr;
    }

    template <bool is_json>
    DataTypePtr tryInferString(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info)
    {
        String field;
        bool ok = true;
        if constexpr (is_json)
            ok = tryReadJSONStringInto(field, buf);
        else
            ok = tryReadQuotedStringInto(field, buf);

        if (!ok)
            return nullptr;

        skipWhitespaceIfAny(buf);

        /// If it's object key, we should just return String type.
        if constexpr (is_json)
        {
            if (json_info->is_object_key)
                return std::make_shared<DataTypeString>();
        }

        if (auto type = tryInferDateOrDateTimeFromString(field, settings))
            return type;

        if constexpr (is_json)
        {
            if (settings.json.try_infer_numbers_from_strings)
            {
                if (auto number_type = tryInferNumberFromString(field, settings))
                {
                    json_info->numbers_parsed_from_json_strings.insert(number_type.get());
                    return number_type;
                }
            }
        }

        return std::make_shared<DataTypeString>();
    }

    bool tryReadJSONObject(ReadBuffer & buf, const FormatSettings & settings, DataTypeJSONPaths::Paths & paths, const std::vector<String> & path, JSONInferenceInfo * json_info, size_t depth)
    {
        if (depth > settings.max_parser_depth)
            throw Exception(ErrorCodes::TOO_DEEP_RECURSION,
                            "Maximum parse depth ({}) exceeded. Consider raising max_parser_depth setting.", settings.max_parser_depth);

        assertChar('{', buf);
        skipWhitespaceIfAny(buf);
        bool first = true;
        while (!buf.eof() && *buf.position() != '}')
        {
            if (!first)
            {
                if (!checkChar(',', buf))
                    return false;
                skipWhitespaceIfAny(buf);
            }
            else
                first = false;

            String key;
            if (!tryReadJSONStringInto(key, buf))
                return false;

            skipWhitespaceIfAny(buf);
            if (!checkChar(':', buf))
                return false;

            std::vector<String> current_path = path;
            current_path.push_back(std::move(key));

            skipWhitespaceIfAny(buf);

            if (!buf.eof() && *buf.position() == '{')
            {
                if (!tryReadJSONObject(buf, settings, paths, current_path, json_info, depth + 1))
                    return false;
            }
            else
            {
                auto value_type = tryInferDataTypeForSingleFieldImpl<true>(buf, settings, json_info, depth + 1);
                if (!value_type)
                    return false;

                paths[std::move(current_path)] = value_type;
            }

            skipWhitespaceIfAny(buf);
        }

        /// No '}' at the end.
        if (buf.eof())
            return false;

        assertChar('}', buf);
        skipWhitespaceIfAny(buf);

        /// If it was empty object and it's not root object, treat it as null, so we won't
        /// lose this path if this key contains empty object in all sample data.
        /// This case will be processed in JSONPaths type during finalize.
        if (first && !path.empty())
            paths[path] = std::make_shared<DataTypeNothing>();
        return true;
    }

    DataTypePtr tryInferJSONPaths(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info, size_t depth)
    {
        DataTypeJSONPaths::Paths paths;
        if (!tryReadJSONObject(buf, settings, paths, {}, json_info, depth))
            return nullptr;
        return std::make_shared<DataTypeJSONPaths>(std::move(paths));
    }

    template <bool is_json>
    DataTypePtr tryInferMapOrObject(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info, size_t depth)
    {
        assertChar('{', buf);
        skipWhitespaceIfAny(buf);

        DataTypes key_types;
        DataTypes value_types;
        bool first = true;
        bool have_invalid_nested_type = false;
        while (!buf.eof() && *buf.position() != '}')
        {
            if (!first)
            {
                if (!checkChar(',', buf))
                    return nullptr;
                skipWhitespaceIfAny(buf);
            }
            else
                first = false;

            DataTypePtr key_type;
            if constexpr (is_json)
            {
                /// For JSON key type must be String.
                json_info->is_object_key = true;
                key_type = tryInferString<is_json>(buf, settings, json_info);
                json_info->is_object_key = false;
            }
            else
            {
                key_type = tryInferDataTypeForSingleFieldImpl<is_json>(buf, settings, nullptr, depth + 1);
            }

            if (key_type)
                key_types.push_back(key_type);
            else
                have_invalid_nested_type = true;

            skipWhitespaceIfAny(buf);
            if (!checkChar(':', buf))
                return nullptr;
            skipWhitespaceIfAny(buf);

            auto value_type = tryInferDataTypeForSingleFieldImpl<is_json>(buf, settings, json_info, depth + 1);
            if (value_type)
                value_types.push_back(value_type);
            else
                have_invalid_nested_type = true;
            skipWhitespaceIfAny(buf);
        }

        /// No '}' at the end.
        if (buf.eof())
            return nullptr;

        assertChar('}', buf);
        skipWhitespaceIfAny(buf);

        /// Nested data is invalid.
        if (have_invalid_nested_type)
            return nullptr;

        if (key_types.empty())
        {
            if constexpr (is_json)
            {
                if (settings.json.allow_object_type)
                    return std::make_shared<DataTypeObject>("json", true);
            }

            /// Empty Map is Map(Nothing, Nothing)
            return std::make_shared<DataTypeMap>(std::make_shared<DataTypeNothing>(), std::make_shared<DataTypeNothing>());
        }

        if constexpr (is_json)
        {
            if (settings.json.allow_object_type)
                return std::make_shared<DataTypeObject>("json", true);

            if (settings.json.read_objects_as_strings)
                return std::make_shared<DataTypeString>();

            transformInferredTypesIfNeededImpl<is_json>(value_types, settings, json_info);
            if (!checkIfTypesAreEqual(value_types))
                return nullptr;

            return std::make_shared<DataTypeMap>(key_types.back(), value_types.back());
        }

        if (!checkIfTypesAreEqual(key_types))
            transformInferredTypesIfNeededImpl<is_json>(key_types, settings);
        if (!checkIfTypesAreEqual(value_types))
            transformInferredTypesIfNeededImpl<is_json>(value_types, settings);

        if (!checkIfTypesAreEqual(key_types) || !checkIfTypesAreEqual(value_types))
            return nullptr;

        auto key_type = removeNullable(key_types.back());
        if (!DataTypeMap::checkKeyType(key_type))
            return nullptr;

        return std::make_shared<DataTypeMap>(key_type, value_types.back());
    }

    template <bool is_json>
    DataTypePtr tryInferDataTypeForSingleFieldImpl(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info, size_t depth)
    {
        if (depth > settings.max_parser_depth)
            throw Exception(ErrorCodes::TOO_DEEP_RECURSION,
                "Maximum parse depth ({}) exceeded. Consider raising max_parser_depth setting.", settings.max_parser_depth);

        skipWhitespaceIfAny(buf);

        if (buf.eof())
            return nullptr;

        /// Array [field1, field2, ...]
        if (*buf.position() == '[')
            return tryInferArray<is_json>(buf, settings, json_info, depth);

        /// Tuple (field1, field2, ...), if format is not JSON
        if constexpr (!is_json)
        {
            if (*buf.position() == '(')
                return tryInferTuple(buf, settings, json_info, depth);
        }

        /// Map/Object for JSON { key1 : value1, key2 : value2, ...}
        if (*buf.position() == '{')
        {
            if constexpr (is_json)
            {
                if (!settings.json.allow_object_type && settings.json.try_infer_objects_as_tuples)
                    return tryInferJSONPaths(buf, settings, json_info, depth);
            }

            return tryInferMapOrObject<is_json>(buf, settings, json_info, depth);
        }

        /// String
        char quote = is_json ? '"' : '\'';
        if (*buf.position() == quote)
            return tryInferString<is_json>(buf, settings, json_info);

        /// Bool
        if (checkStringCaseInsensitive("true", buf) || checkStringCaseInsensitive("false", buf))
            return DataTypeFactory::instance().get("Bool");

        /// Null or NaN
        if (checkCharCaseInsensitive('n', buf))
        {
            if (checkStringCaseInsensitive("ull", buf))
                return makeNullable(std::make_shared<DataTypeNothing>());
            else if (checkStringCaseInsensitive("an", buf))
                return std::make_shared<DataTypeFloat64>();
        }

        /// Number
        return tryInferNumber(buf, settings);
    }
}

void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings)
{
    DataTypes types = {first, second};
    transformInferredTypesIfNeededImpl<false>(types, settings, nullptr);
    first = std::move(types[0]);
    second = std::move(types[1]);
}

void transformInferredJSONTypesIfNeeded(
    DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, JSONInferenceInfo * json_info)
{
    DataTypes types = {first, second};
    transformInferredTypesIfNeededImpl<true>(types, settings, json_info);
    first = std::move(types[0]);
    second = std::move(types[1]);
}

void transformInferredJSONTypesFromDifferentFilesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings)
{
    JSONInferenceInfo json_info;
    json_info.allow_merging_named_tuples = true;
    transformInferredJSONTypesIfNeeded(first, second, settings, &json_info);
}

void transformFinalInferredJSONTypeIfNeededImpl(DataTypePtr & data_type, const FormatSettings & settings, JSONInferenceInfo * json_info, bool remain_nothing_types = false)
{
    if (!data_type)
        return;

    if (!remain_nothing_types && isNothing(data_type) && settings.json.infer_incomplete_types_as_strings)
    {
        data_type = std::make_shared<DataTypeString>();
        return;
    }

    if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(data_type.get()))
    {
        auto nested_type = nullable_type->getNestedType();
        transformFinalInferredJSONTypeIfNeededImpl(nested_type, settings, json_info, remain_nothing_types);
        data_type = std::make_shared<DataTypeNullable>(std::move(nested_type));
        return;
    }

    if (const auto * json_paths = typeid_cast<const DataTypeJSONPaths *>(data_type.get()))
    {
        /// If all objects were empty, use type String, so these JSON objects will be read as Strings.
        if (json_paths->empty() && settings.json.infer_incomplete_types_as_strings)
        {
            data_type = std::make_shared<DataTypeString>();
            return;
        }

        data_type = json_paths->finalize();
        transformFinalInferredJSONTypeIfNeededImpl(data_type, settings, json_info, remain_nothing_types);
        return;
    }

    if (const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get()))
    {
        auto nested_type = array_type->getNestedType();
        transformFinalInferredJSONTypeIfNeededImpl(nested_type, settings, json_info, remain_nothing_types);
        data_type = std::make_shared<DataTypeArray>(nested_type);
        return;
    }

    if (const auto * map_type = typeid_cast<const DataTypeMap *>(data_type.get()))
    {
        auto key_type = map_type->getKeyType();
        /// If all inferred Maps are empty, use type String, so these JSON objects will be read as Strings.
        if (isNothing(key_type) && settings.json.infer_incomplete_types_as_strings)
            key_type = std::make_shared<DataTypeString>();

        auto value_type = map_type->getValueType();

        transformFinalInferredJSONTypeIfNeededImpl(value_type, settings, json_info, remain_nothing_types);
        data_type = std::make_shared<DataTypeMap>(key_type, value_type);
        return;
    }

    if (const auto * tuple_type = typeid_cast<const DataTypeTuple *>(data_type.get()))
    {
        auto nested_types = tuple_type->getElements();

        if (tuple_type->haveExplicitNames())
        {
            for (auto & nested_type : nested_types)
                transformFinalInferredJSONTypeIfNeededImpl(nested_type, settings, json_info, remain_nothing_types);
            data_type = std::make_shared<DataTypeTuple>(nested_types, tuple_type->getElementNames());
            return;
        }

        for (auto & nested_type : nested_types)
            /// Don't change Nothing to String in nested types here, because we are not sure yet if it's Array or actual Tuple
            transformFinalInferredJSONTypeIfNeededImpl(nested_type, settings, json_info, /*remain_nothing_types=*/ true);

        auto nested_types_copy = nested_types;
        transformInferredTypesIfNeededImpl<true>(nested_types_copy, settings, json_info);
        if (checkIfTypesAreEqual(nested_types_copy))
        {
            data_type = std::make_shared<DataTypeArray>(nested_types_copy.back());
        }
        else
        {
            /// Now we should run transform one more time to convert Nothing to String if needed.
            if (!remain_nothing_types)
            {
                for (auto & nested_type : nested_types)
                    transformFinalInferredJSONTypeIfNeededImpl(nested_type, settings, json_info);
            }

            data_type = std::make_shared<DataTypeTuple>(nested_types);
        }

        return;
    }
}

void transformFinalInferredJSONTypeIfNeeded(DataTypePtr & data_type, const FormatSettings & settings, JSONInferenceInfo * json_info)
{
    transformFinalInferredJSONTypeIfNeededImpl(data_type, settings, json_info);
}

DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings)
{
    ReadBufferFromString buf(field);

    if (settings.try_infer_integers)
    {
        Int64 tmp_int;
        if (tryReadIntText(tmp_int, buf) && buf.eof())
            return std::make_shared<DataTypeInt64>();

        /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof.
        buf.position() = buf.buffer().begin();

        /// In case of Int64 overflow, try to infer UInt64
        UInt64 tmp_uint;
        if (tryReadIntText(tmp_uint, buf) && buf.eof())
            return std::make_shared<DataTypeUInt64>();
    }

    /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof.
    buf.position() = buf.buffer().begin();

    Float64 tmp;
    if (tryReadFloatText(tmp, buf) && buf.eof())
        return std::make_shared<DataTypeFloat64>();

    return nullptr;
}

DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const FormatSettings & settings)
{
    if (settings.try_infer_dates && tryInferDate(field))
        return std::make_shared<DataTypeDate>();

    if (settings.try_infer_datetimes && tryInferDateTime(field, settings))
        return std::make_shared<DataTypeDateTime64>(9);

    return nullptr;
}

DataTypePtr tryInferDataTypeForSingleField(ReadBuffer & buf, const FormatSettings & settings)
{
    return tryInferDataTypeForSingleFieldImpl<false>(buf, settings, nullptr);
}

DataTypePtr tryInferDataTypeForSingleField(std::string_view field, const FormatSettings & settings)
{
    ReadBufferFromString buf(field);
    auto type = tryInferDataTypeForSingleFieldImpl<false>(buf, settings, nullptr);
    /// Check if there is no unread data in buffer.
    if (!buf.eof())
        return nullptr;
    return type;
}

DataTypePtr tryInferDataTypeForSingleJSONField(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info)
{
    return tryInferDataTypeForSingleFieldImpl<true>(buf, settings, json_info);
}

DataTypePtr tryInferDataTypeForSingleJSONField(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info)
{
    ReadBufferFromString buf(field);
    auto type = tryInferDataTypeForSingleFieldImpl<true>(buf, settings, json_info);
    /// Check if there is no unread data in buffer.
    if (!buf.eof())
        return nullptr;
    return type;
}

DataTypePtr makeNullableRecursively(DataTypePtr type)
{
    if (!type)
        return nullptr;

    WhichDataType which(type);

    if (which.isNullable())
        return type;

    if (which.isArray())
    {
        const auto * array_type = assert_cast<const DataTypeArray *>(type.get());
        auto nested_type = makeNullableRecursively(array_type->getNestedType());
        return nested_type ? std::make_shared<DataTypeArray>(nested_type) : nullptr;
    }

    if (which.isTuple())
    {
        const auto * tuple_type = assert_cast<const DataTypeTuple *>(type.get());
        DataTypes nested_types;
        for (const auto & element : tuple_type->getElements())
        {
            auto nested_type = makeNullableRecursively(element);
            if (!nested_type)
                return nullptr;
            nested_types.push_back(nested_type);
        }

        if (tuple_type->haveExplicitNames())
            return std::make_shared<DataTypeTuple>(std::move(nested_types), tuple_type->getElementNames());

        return std::make_shared<DataTypeTuple>(std::move(nested_types));

    }

    if (which.isMap())
    {
        const auto * map_type = assert_cast<const DataTypeMap *>(type.get());
        auto key_type = makeNullableRecursively(map_type->getKeyType());
        auto value_type = makeNullableRecursively(map_type->getValueType());
        return key_type && value_type ? std::make_shared<DataTypeMap>(removeNullable(key_type), value_type) : nullptr;
    }

    if (which.isLowCardinality())
    {
        const auto * lc_type = assert_cast<const DataTypeLowCardinality *>(type.get());
        auto nested_type = makeNullableRecursively(lc_type->getDictionaryType());
        return nested_type ? std::make_shared<DataTypeLowCardinality>(nested_type) : nullptr;
    }

    if (which.isObject())
    {
        const auto * object_type = assert_cast<const DataTypeObject *>(type.get());
        if (object_type->hasNullableSubcolumns())
            return type;
        return std::make_shared<DataTypeObject>(object_type->getSchemaFormat(), true);
    }

    return makeNullable(type);
}

NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header)
{
    NamesAndTypesList result;
    for (auto & [name, type] : header.getNamesAndTypesList())
        result.emplace_back(name, makeNullableRecursively(type));
    return result;
}

bool checkIfTypeIsComplete(const DataTypePtr & type)
{
    if (!type)
        return false;

    WhichDataType which(type);

    if (which.isNothing())
        return false;

    if (which.isNullable())
        return checkIfTypeIsComplete(assert_cast<const DataTypeNullable *>(type.get())->getNestedType());

    if (which.isArray())
        return checkIfTypeIsComplete(assert_cast<const DataTypeArray *>(type.get())->getNestedType());

    if (which.isTuple())
    {
        const auto * tuple_type = assert_cast<const DataTypeTuple *>(type.get());
        for (const auto & element : tuple_type->getElements())
        {
            if (!checkIfTypeIsComplete(element))
                return false;
        }
        return true;
    }

    if (which.isMap())
    {
        const auto * map_type = assert_cast<const DataTypeMap *>(type.get());
        if (!checkIfTypeIsComplete(map_type->getKeyType()))
            return false;
        return checkIfTypeIsComplete(map_type->getValueType());
    }

    return true;
}

}