2021-04-23 23:56:26 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Core/Block.h>
|
|
|
|
#include <Core/NamesAndTypes.h>
|
2021-05-26 02:41:38 +00:00
|
|
|
#include <Common/FieldVisitors.h>
|
2021-07-23 16:30:18 +00:00
|
|
|
#include <Storages/ColumnsDescription.h>
|
2021-12-01 02:58:24 +00:00
|
|
|
#include <DataTypes/DataTypeTuple.h>
|
2022-02-09 20:47:53 +00:00
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
|
|
#include <Columns/ColumnObject.h>
|
2021-04-23 23:56:26 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2022-02-17 19:00:25 +00:00
|
|
|
/// Returns number of dimensions in Array type. 0 if type is not array.
|
2021-05-06 05:33:06 +00:00
|
|
|
size_t getNumberOfDimensions(const IDataType & type);
|
2022-02-17 19:00:25 +00:00
|
|
|
|
|
|
|
/// Returns number of dimensions in Array column. 0 if column is not array.
|
2021-05-06 05:33:06 +00:00
|
|
|
size_t getNumberOfDimensions(const IColumn & column);
|
2022-02-17 19:00:25 +00:00
|
|
|
|
|
|
|
/// Returns type of scalars of Array of arbitrary dimensions.
|
2021-06-10 13:57:31 +00:00
|
|
|
DataTypePtr getBaseTypeOfArray(const DataTypePtr & type);
|
2022-02-17 19:00:25 +00:00
|
|
|
|
|
|
|
/// Returns Array type with requested scalar type and number of dimensions.
|
2022-01-27 00:24:34 +00:00
|
|
|
DataTypePtr createArrayOfType(DataTypePtr type, size_t num_dimensions);
|
2021-05-06 05:33:06 +00:00
|
|
|
|
2022-02-17 19:00:25 +00:00
|
|
|
/// Returns column of scalars of Array of arbitrary dimensions.
|
2021-12-23 12:28:40 +00:00
|
|
|
ColumnPtr getBaseColumnOfArray(const ColumnPtr & column);
|
2022-02-17 19:00:25 +00:00
|
|
|
|
|
|
|
/// Returns empty Array column with requested scalar column and number of dimensions.
|
2021-12-23 12:28:40 +00:00
|
|
|
ColumnPtr createArrayOfColumn(const ColumnPtr & column, size_t num_dimensions);
|
|
|
|
|
2022-02-17 19:00:25 +00:00
|
|
|
/// Returns Array with requested number of dimensions and no scalars.
|
|
|
|
Array createEmptyArrayField(size_t num_dimensions);
|
|
|
|
|
|
|
|
/// Tries to get data type by column. Only limited subset of types is supported
|
2021-04-23 23:56:26 +00:00
|
|
|
DataTypePtr getDataTypeByColumn(const IColumn & column);
|
2022-02-17 19:00:25 +00:00
|
|
|
|
|
|
|
/// Converts Object types and columns to Tuples in @columns_list and @block
|
|
|
|
/// and checks that types are consistent with types in @extended_storage_columns.
|
2022-03-31 13:26:32 +00:00
|
|
|
void convertObjectsToTuples(Block & block, const NamesAndTypesList & extended_storage_columns);
|
2022-02-17 19:00:25 +00:00
|
|
|
|
|
|
|
/// Checks that each path is not the prefix of any other path.
|
2022-02-01 11:14:27 +00:00
|
|
|
void checkObjectHasNoAmbiguosPaths(const PathsInData & paths);
|
2022-02-17 19:00:25 +00:00
|
|
|
|
|
|
|
/// Receives several Tuple types and deduces the least common type among them.
|
2021-09-13 12:40:39 +00:00
|
|
|
DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambiguos_paths = false);
|
2022-02-17 19:00:25 +00:00
|
|
|
|
|
|
|
/// Converts types of object columns to tuples in @columns_list
|
|
|
|
/// according to @object_columns and adds all tuple's subcolumns if needed.
|
2021-07-23 16:30:18 +00:00
|
|
|
void extendObjectColumns(NamesAndTypesList & columns_list, const ColumnsDescription & object_columns, bool with_subcolumns);
|
|
|
|
|
2022-02-09 20:47:53 +00:00
|
|
|
NameSet getNamesOfObjectColumns(const NamesAndTypesList & columns_list);
|
|
|
|
bool hasObjectColumns(const ColumnsDescription & columns);
|
2022-05-18 14:15:16 +00:00
|
|
|
void finalizeObjectColumns(const MutableColumns & columns);
|
2022-02-09 20:47:53 +00:00
|
|
|
|
2022-02-17 19:00:25 +00:00
|
|
|
/// Updates types of objects in @object_columns inplace
|
|
|
|
/// according to types in new_columns.
|
2022-02-09 20:47:53 +00:00
|
|
|
void updateObjectColumns(ColumnsDescription & object_columns, const NamesAndTypesList & new_columns);
|
|
|
|
|
2021-12-01 02:58:24 +00:00
|
|
|
using DataTypeTuplePtr = std::shared_ptr<DataTypeTuple>;
|
|
|
|
|
2022-02-17 19:00:25 +00:00
|
|
|
/// Flattens nested Tuple to plain Tuple. I.e extracts all paths and types from tuple.
|
|
|
|
/// E.g. Tuple(t Tuple(c1 UInt32, c2 String), c3 UInt64) -> Tuple(t.c1 UInt32, t.c2 String, c3 UInt32)
|
2022-02-01 11:14:27 +00:00
|
|
|
std::pair<PathsInData, DataTypes> flattenTuple(const DataTypePtr & type);
|
2022-02-17 19:00:25 +00:00
|
|
|
|
|
|
|
/// Flattens nested Tuple column to plain Tuple column.
|
2022-01-27 00:24:34 +00:00
|
|
|
ColumnPtr flattenTuple(const ColumnPtr & column);
|
2021-12-01 02:58:24 +00:00
|
|
|
|
2022-02-17 19:00:25 +00:00
|
|
|
/// The reverse operation to 'flattenTuple'.
|
|
|
|
/// Creates nested Tuple from all paths and types.
|
|
|
|
/// E.g. Tuple(t.c1 UInt32, t.c2 String, c3 UInt32) -> Tuple(t Tuple(c1 UInt32, c2 String), c3 UInt64)
|
2021-12-01 02:58:24 +00:00
|
|
|
DataTypePtr unflattenTuple(
|
2022-02-01 11:14:27 +00:00
|
|
|
const PathsInData & paths,
|
2021-12-01 02:58:24 +00:00
|
|
|
const DataTypes & tuple_types);
|
|
|
|
|
2022-05-20 15:04:55 +00:00
|
|
|
std::pair<ColumnPtr, DataTypePtr> unflattenObjectToTuple(const ColumnObject & column);
|
|
|
|
|
2022-01-27 00:24:34 +00:00
|
|
|
std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
|
2022-02-01 11:14:27 +00:00
|
|
|
const PathsInData & paths,
|
2021-12-01 02:58:24 +00:00
|
|
|
const DataTypes & tuple_types,
|
|
|
|
const Columns & tuple_columns);
|
|
|
|
|
2022-05-20 15:04:55 +00:00
|
|
|
|
2022-02-17 19:00:25 +00:00
|
|
|
/// For all columns which exist in @expected_columns and
|
|
|
|
/// don't exist in @available_columns adds to WITH clause
|
|
|
|
/// an alias with column name to literal of default value of column type.
|
2021-07-23 16:30:18 +00:00
|
|
|
void replaceMissedSubcolumnsByConstants(
|
|
|
|
const ColumnsDescription & expected_columns,
|
|
|
|
const ColumnsDescription & available_columns,
|
|
|
|
ASTPtr query);
|
2021-07-09 03:15:41 +00:00
|
|
|
|
2022-04-08 14:58:02 +00:00
|
|
|
/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays
|
|
|
|
/// and replaces all scalars or nested arrays to @replacement at that level.
|
|
|
|
class FieldVisitorReplaceScalars : public StaticVisitor<Field>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
FieldVisitorReplaceScalars(const Field & replacement_, size_t num_dimensions_to_keep_)
|
|
|
|
: replacement(replacement_), num_dimensions_to_keep(num_dimensions_to_keep_)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
Field operator()(const Array & x) const;
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
Field operator()(const T &) const { return replacement; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
const Field & replacement;
|
|
|
|
size_t num_dimensions_to_keep;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Calculates number of dimensions in array field.
|
|
|
|
/// Returns 0 for scalar fields.
|
|
|
|
class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t>
|
|
|
|
{
|
|
|
|
public:
|
2022-08-22 10:34:53 +00:00
|
|
|
size_t operator()(const Array & x);
|
2022-04-08 14:58:02 +00:00
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
size_t operator()(const T &) const { return 0; }
|
2022-08-22 10:34:53 +00:00
|
|
|
|
|
|
|
bool need_fold_dimension = false;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Fold field to the higher dimension, e.g. `1` -- fold 2 --> `[[1]]`
|
|
|
|
/// used to normalize dimension of element in an array. e.g [1, [2]] --> [[1], [2]]
|
|
|
|
class FieldVisitorFoldDimension : public StaticVisitor<Field>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
explicit FieldVisitorFoldDimension(size_t num_dimensions_to_fold_) : num_dimensions_to_fold(num_dimensions_to_fold_) { }
|
|
|
|
Field operator()(const Array & x) const;
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
Field operator()(const T & x) const
|
|
|
|
{
|
|
|
|
if (num_dimensions_to_fold == 0)
|
|
|
|
return x;
|
|
|
|
Array res;
|
|
|
|
res.push_back(x);
|
|
|
|
for (size_t i = 1; i < num_dimensions_to_fold; ++i)
|
|
|
|
{
|
|
|
|
Array new_res;
|
|
|
|
new_res.push_back(res);
|
|
|
|
res = new_res;
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
size_t num_dimensions_to_fold;
|
2022-04-08 14:58:02 +00:00
|
|
|
};
|
|
|
|
|
2022-02-17 19:00:25 +00:00
|
|
|
/// Receives range of objects, which contains collections
|
|
|
|
/// of columns-like objects (e.g. ColumnsDescription or NamesAndTypesList)
|
|
|
|
/// and deduces the common types of object columns for all entries.
|
|
|
|
/// @entry_columns_getter should extract reference to collection of
|
|
|
|
/// columns-like objects from entry to which Iterator points.
|
|
|
|
/// columns-like object should have fields "name" and "type".
|
2022-02-09 20:47:53 +00:00
|
|
|
template <typename Iterator, typename EntryColumnsGetter>
|
|
|
|
ColumnsDescription getObjectColumns(
|
|
|
|
Iterator begin, Iterator end,
|
|
|
|
const ColumnsDescription & storage_columns,
|
|
|
|
EntryColumnsGetter && entry_columns_getter)
|
|
|
|
{
|
|
|
|
ColumnsDescription res;
|
|
|
|
|
|
|
|
if (begin == end)
|
|
|
|
{
|
|
|
|
for (const auto & column : storage_columns)
|
|
|
|
{
|
|
|
|
if (isObject(column.type))
|
|
|
|
{
|
|
|
|
auto tuple_type = std::make_shared<DataTypeTuple>(
|
|
|
|
DataTypes{std::make_shared<DataTypeUInt8>()},
|
|
|
|
Names{ColumnObject::COLUMN_NAME_DUMMY});
|
|
|
|
|
|
|
|
res.add({column.name, std::move(tuple_type)});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::unordered_map<String, DataTypes> types_in_entries;
|
|
|
|
|
|
|
|
for (auto it = begin; it != end; ++it)
|
|
|
|
{
|
|
|
|
const auto & entry_columns = entry_columns_getter(*it);
|
|
|
|
for (const auto & column : entry_columns)
|
|
|
|
{
|
|
|
|
auto storage_column = storage_columns.tryGetPhysical(column.name);
|
|
|
|
if (storage_column && isObject(storage_column->type))
|
|
|
|
types_in_entries[column.name].push_back(column.type);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const auto & [name, types] : types_in_entries)
|
2022-03-01 16:32:55 +00:00
|
|
|
res.add({name, getLeastCommonTypeForObject(types)});
|
2022-02-09 20:47:53 +00:00
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2021-04-23 23:56:26 +00:00
|
|
|
}
|