ClickHouse/src/DataTypes/ObjectUtils.h

204 lines
7.2 KiB
C++
Raw Normal View History

2021-04-23 23:56:26 +00:00
#pragma once
#include <Core/Block.h>
#include <Core/NamesAndTypes.h>
#include <Common/FieldVisitors.h>
#include <Storages/ColumnsDescription.h>
2021-12-01 02:58:24 +00:00
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnObject.h>
2021-04-23 23:56:26 +00:00
namespace DB
{
2022-02-17 19:00:25 +00:00
/// Returns number of dimensions in Array type. 0 if type is not array.
size_t getNumberOfDimensions(const IDataType & type);
2022-02-17 19:00:25 +00:00
/// Returns number of dimensions in Array column. 0 if column is not array.
size_t getNumberOfDimensions(const IColumn & column);
2022-02-17 19:00:25 +00:00
/// Returns type of scalars of Array of arbitrary dimensions.
DataTypePtr getBaseTypeOfArray(const DataTypePtr & type);
2022-02-17 19:00:25 +00:00
/// Returns Array type with requested scalar type and number of dimensions.
2022-01-27 00:24:34 +00:00
DataTypePtr createArrayOfType(DataTypePtr type, size_t num_dimensions);
2022-02-17 19:00:25 +00:00
/// Returns column of scalars of Array of arbitrary dimensions.
2021-12-23 12:28:40 +00:00
ColumnPtr getBaseColumnOfArray(const ColumnPtr & column);
2022-02-17 19:00:25 +00:00
/// Returns empty Array column with requested scalar column and number of dimensions.
2021-12-23 12:28:40 +00:00
ColumnPtr createArrayOfColumn(const ColumnPtr & column, size_t num_dimensions);
2022-02-17 19:00:25 +00:00
/// Returns Array with requested number of dimensions and no scalars.
Array createEmptyArrayField(size_t num_dimensions);
/// Tries to get data type by column. Only limited subset of types is supported
2021-04-23 23:56:26 +00:00
DataTypePtr getDataTypeByColumn(const IColumn & column);
2022-02-17 19:00:25 +00:00
/// Converts Object types and columns to Tuples in @columns_list and @block
/// and checks that types are consistent with types in @extended_storage_columns.
void convertObjectsToTuples(Block & block, const NamesAndTypesList & extended_storage_columns);
2022-02-17 19:00:25 +00:00
/// Checks that each path is not the prefix of any other path.
void checkObjectHasNoAmbiguosPaths(const PathsInData & paths);
2022-02-17 19:00:25 +00:00
/// Receives several Tuple types and deduces the least common type among them.
DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambiguos_paths = false);
2022-02-17 19:00:25 +00:00
/// Converts types of object columns to tuples in @columns_list
/// according to @object_columns and adds all tuple's subcolumns if needed.
void extendObjectColumns(NamesAndTypesList & columns_list, const ColumnsDescription & object_columns, bool with_subcolumns);
NameSet getNamesOfObjectColumns(const NamesAndTypesList & columns_list);
bool hasObjectColumns(const ColumnsDescription & columns);
2022-05-18 14:15:16 +00:00
void finalizeObjectColumns(const MutableColumns & columns);
2022-02-17 19:00:25 +00:00
/// Updates types of objects in @object_columns inplace
/// according to types in new_columns.
void updateObjectColumns(ColumnsDescription & object_columns, const NamesAndTypesList & new_columns);
2021-12-01 02:58:24 +00:00
using DataTypeTuplePtr = std::shared_ptr<DataTypeTuple>;
2022-02-17 19:00:25 +00:00
/// Flattens nested Tuple to plain Tuple. I.e extracts all paths and types from tuple.
/// E.g. Tuple(t Tuple(c1 UInt32, c2 String), c3 UInt64) -> Tuple(t.c1 UInt32, t.c2 String, c3 UInt32)
std::pair<PathsInData, DataTypes> flattenTuple(const DataTypePtr & type);
2022-02-17 19:00:25 +00:00
/// Flattens nested Tuple column to plain Tuple column.
2022-01-27 00:24:34 +00:00
ColumnPtr flattenTuple(const ColumnPtr & column);
2021-12-01 02:58:24 +00:00
2022-02-17 19:00:25 +00:00
/// The reverse operation to 'flattenTuple'.
/// Creates nested Tuple from all paths and types.
/// E.g. Tuple(t.c1 UInt32, t.c2 String, c3 UInt32) -> Tuple(t Tuple(c1 UInt32, c2 String), c3 UInt64)
2021-12-01 02:58:24 +00:00
DataTypePtr unflattenTuple(
const PathsInData & paths,
2021-12-01 02:58:24 +00:00
const DataTypes & tuple_types);
std::pair<ColumnPtr, DataTypePtr> unflattenObjectToTuple(const ColumnObject & column);
2022-01-27 00:24:34 +00:00
std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
const PathsInData & paths,
2021-12-01 02:58:24 +00:00
const DataTypes & tuple_types,
const Columns & tuple_columns);
2022-02-17 19:00:25 +00:00
/// For all columns which exist in @expected_columns and
/// don't exist in @available_columns adds to WITH clause
/// an alias with column name to literal of default value of column type.
void replaceMissedSubcolumnsByConstants(
const ColumnsDescription & expected_columns,
const ColumnsDescription & available_columns,
ASTPtr query);
/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays
/// and replaces all scalars or nested arrays to @replacement at that level.
class FieldVisitorReplaceScalars : public StaticVisitor<Field>
{
public:
FieldVisitorReplaceScalars(const Field & replacement_, size_t num_dimensions_to_keep_)
: replacement(replacement_), num_dimensions_to_keep(num_dimensions_to_keep_)
{
}
Field operator()(const Array & x) const;
template <typename T>
Field operator()(const T &) const { return replacement; }
private:
const Field & replacement;
size_t num_dimensions_to_keep;
};
/// Calculates number of dimensions in array field.
/// Returns 0 for scalar fields.
class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t>
{
public:
size_t operator()(const Array & x);
template <typename T>
size_t operator()(const T &) const { return 0; }
bool need_fold_dimension = false;
};
/// Fold field to the higher dimension, e.g. `1` -- fold 2 --> `[[1]]`
/// used to normalize dimension of element in an array. e.g [1, [2]] --> [[1], [2]]
class FieldVisitorFoldDimension : public StaticVisitor<Field>
{
public:
explicit FieldVisitorFoldDimension(size_t num_dimensions_to_fold_) : num_dimensions_to_fold(num_dimensions_to_fold_) { }
Field operator()(const Array & x) const;
template <typename T>
Field operator()(const T & x) const
{
if (num_dimensions_to_fold == 0)
return x;
Array res;
res.push_back(x);
for (size_t i = 1; i < num_dimensions_to_fold; ++i)
{
Array new_res;
new_res.push_back(res);
res = new_res;
}
return res;
}
private:
size_t num_dimensions_to_fold;
};
2022-02-17 19:00:25 +00:00
/// Receives range of objects, which contains collections
/// of columns-like objects (e.g. ColumnsDescription or NamesAndTypesList)
/// and deduces the common types of object columns for all entries.
/// @entry_columns_getter should extract reference to collection of
/// columns-like objects from entry to which Iterator points.
/// columns-like object should have fields "name" and "type".
template <typename Iterator, typename EntryColumnsGetter>
ColumnsDescription getObjectColumns(
Iterator begin, Iterator end,
const ColumnsDescription & storage_columns,
EntryColumnsGetter && entry_columns_getter)
{
ColumnsDescription res;
if (begin == end)
{
for (const auto & column : storage_columns)
{
if (isObject(column.type))
{
auto tuple_type = std::make_shared<DataTypeTuple>(
DataTypes{std::make_shared<DataTypeUInt8>()},
Names{ColumnObject::COLUMN_NAME_DUMMY});
res.add({column.name, std::move(tuple_type)});
}
}
return res;
}
std::unordered_map<String, DataTypes> types_in_entries;
for (auto it = begin; it != end; ++it)
{
const auto & entry_columns = entry_columns_getter(*it);
for (const auto & column : entry_columns)
{
auto storage_column = storage_columns.tryGetPhysical(column.name);
if (storage_column && isObject(storage_column->type))
types_in_entries[column.name].push_back(column.type);
}
}
for (const auto & [name, types] : types_in_entries)
2022-03-01 16:32:55 +00:00
res.add({name, getLeastCommonTypeForObject(types)});
return res;
}
2021-04-23 23:56:26 +00:00
}