mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
add comments and small refactoring
This commit is contained in:
parent
edd686e1d9
commit
0a7895ebb9
@ -1,13 +1,10 @@
|
||||
#include <Core/Field.h>
|
||||
#include <Columns/ColumnObject.h>
|
||||
#include <Columns/ColumnSparse.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/FieldToDataType.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
@ -15,8 +12,6 @@
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
|
||||
#include <Common/FieldVisitorToString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -32,6 +27,7 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
/// Recreates scolumn with default scalar values and keeps sizes of arrays.
|
||||
ColumnPtr recreateColumnWithDefaultValues(
|
||||
const ColumnPtr & column, const DataTypePtr & scalar_type, size_t num_dimensions)
|
||||
{
|
||||
@ -47,43 +43,44 @@ ColumnPtr recreateColumnWithDefaultValues(
|
||||
return createArrayOfType(scalar_type, num_dimensions)->createColumn()->cloneResized(column->size());
|
||||
}
|
||||
|
||||
/// Replaces NULL fields to given field or empty array.
|
||||
class FieldVisitorReplaceNull : public StaticVisitor<Field>
|
||||
{
|
||||
public:
|
||||
[[maybe_unused]] explicit FieldVisitorReplaceNull(
|
||||
explicit FieldVisitorReplaceNull(
|
||||
const Field & replacement_, size_t num_dimensions_)
|
||||
: replacement(replacement_)
|
||||
, num_dimensions(num_dimensions_)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Field operator()(const T & x) const
|
||||
Field operator()(const Null &) const
|
||||
{
|
||||
if constexpr (std::is_same_v<T, Null>)
|
||||
{
|
||||
return num_dimensions
|
||||
? createEmptyArrayField(num_dimensions)
|
||||
: replacement;
|
||||
}
|
||||
else if constexpr (std::is_same_v<T, Array>)
|
||||
{
|
||||
assert(num_dimensions > 0);
|
||||
const size_t size = x.size();
|
||||
Array res(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
res[i] = applyVisitor(FieldVisitorReplaceNull(replacement, num_dimensions - 1), x[i]);
|
||||
return res;
|
||||
}
|
||||
else
|
||||
return x;
|
||||
return num_dimensions
|
||||
? createEmptyArrayField(num_dimensions)
|
||||
: replacement;
|
||||
}
|
||||
|
||||
Field operator()(const Array & x) const
|
||||
{
|
||||
assert(num_dimensions > 0);
|
||||
const size_t size = x.size();
|
||||
Array res(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
res[i] = applyVisitor(FieldVisitorReplaceNull(replacement, num_dimensions - 1), x[i]);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Field operator()(const T & x) const { return x; }
|
||||
|
||||
private:
|
||||
const Field & replacement;
|
||||
size_t num_dimensions;
|
||||
};
|
||||
|
||||
/// Calculates number of dimensions in array field.
|
||||
/// Returns 0 for scalar fields.
|
||||
class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t>
|
||||
{
|
||||
public:
|
||||
@ -114,6 +111,9 @@ public:
|
||||
size_t operator()(const T &) const { return 0; }
|
||||
};
|
||||
|
||||
/// Visitor that allows to get type of scalar field
|
||||
/// or least common type of scalars in array.
|
||||
/// More optimized version of FieldToDataType.
|
||||
class FieldVisitorToScalarType : public StaticVisitor<>
|
||||
{
|
||||
public:
|
||||
@ -160,8 +160,7 @@ public:
|
||||
template <typename T>
|
||||
void operator()(const T &)
|
||||
{
|
||||
auto field_type = Field::TypeToEnum<NearestFieldType<T>>::value;
|
||||
field_types.insert(field_type);
|
||||
field_types.insert(Field::TypeToEnum<NearestFieldType<T>>::value);
|
||||
type_indexes.insert(TypeToTypeIndex<NearestFieldType<T>>);
|
||||
}
|
||||
|
||||
@ -280,18 +279,10 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
|
||||
if (is_nullable)
|
||||
base_type = makeNullable(base_type);
|
||||
|
||||
DataTypePtr value_type;
|
||||
if (!is_nullable && info.have_nulls)
|
||||
{
|
||||
auto default_value = base_type->getDefault();
|
||||
value_type = createArrayOfType(base_type, value_dim);
|
||||
field = applyVisitor(FieldVisitorReplaceNull(default_value, value_dim), std::move(field));
|
||||
}
|
||||
else
|
||||
{
|
||||
value_type = createArrayOfType(base_type, value_dim);
|
||||
}
|
||||
field = applyVisitor(FieldVisitorReplaceNull(base_type->getDefault(), value_dim), std::move(field));
|
||||
|
||||
auto value_type = createArrayOfType(base_type, value_dim);
|
||||
bool type_changed = false;
|
||||
|
||||
if (data.empty())
|
||||
@ -311,12 +302,9 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
|
||||
}
|
||||
|
||||
if (type_changed || info.need_convert)
|
||||
{
|
||||
auto converted_field = convertFieldToTypeOrThrow(std::move(field), *value_type);
|
||||
data.back()->insert(std::move(converted_field));
|
||||
}
|
||||
else
|
||||
data.back()->insert(std::move(field));
|
||||
field = convertFieldToTypeOrThrow(std::move(field), *value_type);
|
||||
|
||||
data.back()->insert(std::move(field));
|
||||
}
|
||||
|
||||
void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t start, size_t length)
|
||||
@ -372,6 +360,10 @@ void ColumnObject::Subcolumn::finalize()
|
||||
auto offsets = ColumnUInt64::create();
|
||||
auto & offsets_data = offsets->getData();
|
||||
|
||||
/// We need to convert only non-default values and then recreate column
|
||||
/// with default value of new type, because default values (which represents misses in data)
|
||||
/// may be inconsistent between types (e.g "0" in UInt64 and empty string in String).
|
||||
|
||||
part->getIndicesOfNonDefaultRows(offsets_data, 0, part_size);
|
||||
|
||||
if (offsets->size() == part_size)
|
||||
@ -448,16 +440,16 @@ Field ColumnObject::Subcolumn::getLastField() const
|
||||
|
||||
ColumnObject::Subcolumn ColumnObject::Subcolumn::recreateWithDefaultValues(const FieldInfo & field_info) const
|
||||
{
|
||||
auto scalar_type = field_info.scalar_type;
|
||||
if (is_nullable)
|
||||
scalar_type = makeNullable(scalar_type);
|
||||
|
||||
Subcolumn new_subcolumn;
|
||||
new_subcolumn.least_common_type = createArrayOfType(field_info.scalar_type, field_info.num_dimensions);
|
||||
new_subcolumn.least_common_type = createArrayOfType(scalar_type, field_info.num_dimensions);
|
||||
new_subcolumn.is_nullable = is_nullable;
|
||||
new_subcolumn.num_of_defaults_in_prefix = num_of_defaults_in_prefix;
|
||||
new_subcolumn.data.reserve(data.size());
|
||||
|
||||
auto scalar_type = field_info.scalar_type;
|
||||
if (new_subcolumn.is_nullable)
|
||||
scalar_type = makeNullable(scalar_type);
|
||||
|
||||
for (const auto & part : data)
|
||||
new_subcolumn.data.push_back(recreateColumnWithDefaultValues(
|
||||
part, scalar_type, field_info.num_dimensions));
|
||||
@ -524,6 +516,7 @@ size_t ColumnObject::size() const
|
||||
|
||||
MutableColumnPtr ColumnObject::cloneResized(size_t new_size) const
|
||||
{
|
||||
/// cloneResized with new_size == 0 is used for cloneEmpty().
|
||||
if (new_size != 0)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
||||
"ColumnObject doesn't support resize to non-zero length");
|
||||
@ -663,7 +656,7 @@ const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & ke
|
||||
ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key)
|
||||
{
|
||||
if (const auto * node = subcolumns.findLeaf(key))
|
||||
return const_cast<SubcolumnsTree::Leaf *>(node)->data;
|
||||
return const_cast<SubcolumnsTree::Node *>(node)->data;
|
||||
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObject", key.getPath());
|
||||
}
|
||||
@ -702,23 +695,29 @@ void ColumnObject::addNestedSubcolumn(const PathInData & key, const FieldInfo &
|
||||
"Cannot add Nested subcolumn, because path doesn't contain Nested");
|
||||
|
||||
bool inserted = false;
|
||||
/// We find node that represents the same Nested type as @key.
|
||||
const auto * nested_node = subcolumns.findBestMatch(key);
|
||||
|
||||
if (nested_node)
|
||||
{
|
||||
/// Find any leaf of Nested subcolumn.
|
||||
const auto * leaf = subcolumns.findLeaf(nested_node, [&](const auto &) { return true; });
|
||||
assert(leaf);
|
||||
|
||||
/// Recreate subcolumn with default values and the same sizes of arrays.
|
||||
auto new_subcolumn = leaf->data.recreateWithDefaultValues(field_info);
|
||||
|
||||
/// It's possible that we have already inserted value from current row
|
||||
/// to this subcolumn. So, adjust size to expected.
|
||||
if (new_subcolumn.size() > new_size)
|
||||
new_subcolumn.popBack(new_subcolumn.size() - new_size);
|
||||
else if (new_subcolumn.size() < new_size)
|
||||
new_subcolumn.insertManyDefaults(new_size - new_subcolumn.size());
|
||||
|
||||
assert(new_subcolumn.size() == new_size);
|
||||
inserted = subcolumns.add(key, new_subcolumn);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// If node was not found just add subcolumn with empty arrays.
|
||||
inserted = subcolumns.add(key, Subcolumn(new_size, is_nullable));
|
||||
}
|
||||
|
||||
@ -751,6 +750,8 @@ void ColumnObject::finalize()
|
||||
for (auto && entry : subcolumns)
|
||||
{
|
||||
const auto & least_common_type = entry->data.getLeastCommonType();
|
||||
|
||||
/// Do not add subcolumns, which consists only from NULLs.
|
||||
if (isNothing(getBaseTypeOfArray(least_common_type)))
|
||||
continue;
|
||||
|
||||
@ -758,6 +759,8 @@ void ColumnObject::finalize()
|
||||
new_subcolumns.add(entry->path, std::move(entry->data));
|
||||
}
|
||||
|
||||
/// If all subcolumns were skipped add a dummy subcolumn,
|
||||
/// because Tuple type must have at least one element.
|
||||
if (new_subcolumns.empty())
|
||||
new_subcolumns.add(PathInData{COLUMN_NAME_DUMMY}, Subcolumn{ColumnUInt8::create(old_size), is_nullable});
|
||||
|
||||
|
@ -18,19 +18,43 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
/// Info that represents a scalar or array field in a decomposed view.
|
||||
/// It allows to recreate field with different number
|
||||
/// of dimensions or nullability.
|
||||
struct FieldInfo
|
||||
{
|
||||
/// The common type of of all scalars in field.
|
||||
DataTypePtr scalar_type;
|
||||
|
||||
/// Do we have NULL scalar in field.
|
||||
bool have_nulls;
|
||||
|
||||
/// If true then we have scalars with different types in array and
|
||||
/// we need to convert scalars to the common type.
|
||||
bool need_convert;
|
||||
|
||||
/// Number of dimension in array. 0 if field is scalar.
|
||||
size_t num_dimensions;
|
||||
};
|
||||
|
||||
FieldInfo getFieldInfo(const Field & field);
|
||||
|
||||
/** A column that represents object with dynamic set of subcolumns.
|
||||
* Subcolumns are identified by paths in document and are stored in
|
||||
* a trie-like structure. ColumnObject is not suitable for writing into tables
|
||||
* and it should be converted to Tuple with fixed set of subcolumns before that.
|
||||
*/
|
||||
class ColumnObject final : public COWHelper<IColumn, ColumnObject>
|
||||
{
|
||||
public:
|
||||
/** Class that represents one subcolumn.
|
||||
* It stores values in several parts of column
|
||||
* and keeps current common type of all parts.
|
||||
* We add a new column part with a new type, when we insert a field,
|
||||
* which can't be converted to the current common type.
|
||||
* After insertion of all values subcolumn should be finalized
|
||||
* for writing and other operations.
|
||||
*/
|
||||
class Subcolumn
|
||||
{
|
||||
public:
|
||||
@ -44,8 +68,12 @@ public:
|
||||
|
||||
bool isFinalized() const { return data.size() == 1 && num_of_defaults_in_prefix == 0; }
|
||||
const DataTypePtr & getLeastCommonType() const { return least_common_type; }
|
||||
|
||||
/// Checks the consistency of column's parts stored in @data.
|
||||
void checkTypes() const;
|
||||
|
||||
/// Inserts a field, which scalars can be arbitrary, but number of
|
||||
/// dimensions should be consistent with current common type.
|
||||
void insert(Field field);
|
||||
void insert(Field field, FieldInfo info);
|
||||
|
||||
@ -54,11 +82,19 @@ public:
|
||||
void insertRangeFrom(const Subcolumn & src, size_t start, size_t length);
|
||||
void popBack(size_t n);
|
||||
|
||||
/// Converts all column's parts to the common type and
|
||||
/// creates a single column that stores all values.
|
||||
void finalize();
|
||||
|
||||
/// Returns last inserted field.
|
||||
Field getLastField() const;
|
||||
|
||||
/// Recreates subcolumn with default scalar values and keeps sizes of arrays.
|
||||
/// Used to create columns of type Nested with consistent array sizes.
|
||||
Subcolumn recreateWithDefaultValues(const FieldInfo & field_info) const;
|
||||
|
||||
/// Returns single column if subcolumn in finalizes.
|
||||
/// Otherwise -- undefined behaviour.
|
||||
IColumn & getFinalizedColumn();
|
||||
const IColumn & getFinalizedColumn() const;
|
||||
const ColumnPtr & getFinalizedColumnPtr() const;
|
||||
@ -66,15 +102,28 @@ public:
|
||||
friend class ColumnObject;
|
||||
|
||||
private:
|
||||
/// Current least common type of all values inserted to this subcolumn.
|
||||
DataTypePtr least_common_type;
|
||||
|
||||
/// If true then common type type of subcolumn is Nullable
|
||||
/// and default values are NULLs.
|
||||
bool is_nullable = false;
|
||||
|
||||
/// Parts of column. Parts should be in increasing order in terms of subtypes/supertypes.
|
||||
/// That means that the least common type for i-th prefix is the type of i-th part
|
||||
/// and it's the supertype for all type of column from 0 to i-1.
|
||||
std::vector<WrappedPtr> data;
|
||||
|
||||
/// Until we insert any non-default field we don't know further
|
||||
/// least common type and we count number of defaults in prefix,
|
||||
/// which will be converted to the default type of final common type.
|
||||
size_t num_of_defaults_in_prefix = 0;
|
||||
};
|
||||
|
||||
using SubcolumnsTree = SubcolumnsTree<Subcolumn>;
|
||||
|
||||
private:
|
||||
/// If true then all subcolumns are nullable.
|
||||
const bool is_nullable;
|
||||
|
||||
SubcolumnsTree subcolumns;
|
||||
@ -86,6 +135,7 @@ public:
|
||||
explicit ColumnObject(bool is_nullable_);
|
||||
ColumnObject(SubcolumnsTree && subcolumns_, bool is_nullable_);
|
||||
|
||||
/// Checks that all subcolumns have consistent sizes.
|
||||
void checkConsistency() const;
|
||||
|
||||
bool hasSubcolumn(const PathInData & key) const;
|
||||
@ -95,16 +145,23 @@ public:
|
||||
|
||||
void incrementNumRows() { ++num_rows; }
|
||||
|
||||
/// Adds a subcolumn from existing IColumn.
|
||||
void addSubcolumn(const PathInData & key, MutableColumnPtr && subcolumn);
|
||||
|
||||
/// Adds a subcolumn of specific size with default values.
|
||||
void addSubcolumn(const PathInData & key, size_t new_size);
|
||||
|
||||
/// Adds a subcolumn of type Nested of specific size with default values.
|
||||
/// It cares about consistency of sizes of Nested arrays.
|
||||
void addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size);
|
||||
|
||||
const SubcolumnsTree & getSubcolumns() const { return subcolumns; }
|
||||
SubcolumnsTree & getSubcolumns() { return subcolumns; }
|
||||
PathsInData getKeys() const;
|
||||
|
||||
bool isFinalized() const;
|
||||
/// Finalizes all subcolumns.
|
||||
void finalize();
|
||||
bool isFinalized() const;
|
||||
|
||||
/// Part of interface
|
||||
|
||||
|
@ -13,7 +13,6 @@
|
||||
#cmakedefine01 USE_CASSANDRA
|
||||
#cmakedefine01 USE_SENTRY
|
||||
#cmakedefine01 USE_GRPC
|
||||
#cmakedefine01 USE_STATS
|
||||
#cmakedefine01 USE_SIMDJSON
|
||||
#cmakedefine01 USE_RAPIDJSON
|
||||
|
||||
|
@ -759,27 +759,27 @@ private:
|
||||
using Row = std::vector<Field>;
|
||||
|
||||
|
||||
template <> struct Field::TypeToEnum<Null> { static const Types::Which value = Types::Null; };
|
||||
template <> struct Field::TypeToEnum<UInt64> { static const Types::Which value = Types::UInt64; };
|
||||
template <> struct Field::TypeToEnum<UInt128> { static const Types::Which value = Types::UInt128; };
|
||||
template <> struct Field::TypeToEnum<UInt256> { static const Types::Which value = Types::UInt256; };
|
||||
template <> struct Field::TypeToEnum<Int64> { static const Types::Which value = Types::Int64; };
|
||||
template <> struct Field::TypeToEnum<Int128> { static const Types::Which value = Types::Int128; };
|
||||
template <> struct Field::TypeToEnum<Int256> { static const Types::Which value = Types::Int256; };
|
||||
template <> struct Field::TypeToEnum<UUID> { static const Types::Which value = Types::UUID; };
|
||||
template <> struct Field::TypeToEnum<Float64> { static const Types::Which value = Types::Float64; };
|
||||
template <> struct Field::TypeToEnum<String> { static const Types::Which value = Types::String; };
|
||||
template <> struct Field::TypeToEnum<Array> { static const Types::Which value = Types::Array; };
|
||||
template <> struct Field::TypeToEnum<Tuple> { static const Types::Which value = Types::Tuple; };
|
||||
template <> struct Field::TypeToEnum<Map> { static const Types::Which value = Types::Map; };
|
||||
template <> struct Field::TypeToEnum<Object> { static const Types::Which value = Types::Object; };
|
||||
template <> struct Field::TypeToEnum<DecimalField<Decimal32>>{ static const Types::Which value = Types::Decimal32; };
|
||||
template <> struct Field::TypeToEnum<DecimalField<Decimal64>>{ static const Types::Which value = Types::Decimal64; };
|
||||
template <> struct Field::TypeToEnum<DecimalField<Decimal128>>{ static const Types::Which value = Types::Decimal128; };
|
||||
template <> struct Field::TypeToEnum<DecimalField<Decimal256>>{ static const Types::Which value = Types::Decimal256; };
|
||||
template <> struct Field::TypeToEnum<DecimalField<DateTime64>>{ static const Types::Which value = Types::Decimal64; };
|
||||
template <> struct Field::TypeToEnum<AggregateFunctionStateData>{ static const Types::Which value = Types::AggregateFunctionState; };
|
||||
template <> struct Field::TypeToEnum<bool>{ static const Types::Which value = Types::Bool; };
|
||||
template <> struct Field::TypeToEnum<Null> { static constexpr Types::Which value = Types::Null; };
|
||||
template <> struct Field::TypeToEnum<UInt64> { static constexpr Types::Which value = Types::UInt64; };
|
||||
template <> struct Field::TypeToEnum<UInt128> { static constexpr Types::Which value = Types::UInt128; };
|
||||
template <> struct Field::TypeToEnum<UInt256> { static constexpr Types::Which value = Types::UInt256; };
|
||||
template <> struct Field::TypeToEnum<Int64> { static constexpr Types::Which value = Types::Int64; };
|
||||
template <> struct Field::TypeToEnum<Int128> { static constexpr Types::Which value = Types::Int128; };
|
||||
template <> struct Field::TypeToEnum<Int256> { static constexpr Types::Which value = Types::Int256; };
|
||||
template <> struct Field::TypeToEnum<UUID> { static constexpr Types::Which value = Types::UUID; };
|
||||
template <> struct Field::TypeToEnum<Float64> { static constexpr Types::Which value = Types::Float64; };
|
||||
template <> struct Field::TypeToEnum<String> { static constexpr Types::Which value = Types::String; };
|
||||
template <> struct Field::TypeToEnum<Array> { static constexpr Types::Which value = Types::Array; };
|
||||
template <> struct Field::TypeToEnum<Tuple> { static constexpr Types::Which value = Types::Tuple; };
|
||||
template <> struct Field::TypeToEnum<Map> { static constexpr Types::Which value = Types::Map; };
|
||||
template <> struct Field::TypeToEnum<Object> { static constexpr Types::Which value = Types::Object; };
|
||||
template <> struct Field::TypeToEnum<DecimalField<Decimal32>>{ static constexpr Types::Which value = Types::Decimal32; };
|
||||
template <> struct Field::TypeToEnum<DecimalField<Decimal64>>{ static constexpr Types::Which value = Types::Decimal64; };
|
||||
template <> struct Field::TypeToEnum<DecimalField<Decimal128>>{ static constexpr Types::Which value = Types::Decimal128; };
|
||||
template <> struct Field::TypeToEnum<DecimalField<Decimal256>>{ static constexpr Types::Which value = Types::Decimal256; };
|
||||
template <> struct Field::TypeToEnum<DecimalField<DateTime64>>{ static constexpr Types::Which value = Types::Decimal64; };
|
||||
template <> struct Field::TypeToEnum<AggregateFunctionStateData>{ static constexpr Types::Which value = Types::AggregateFunctionState; };
|
||||
template <> struct Field::TypeToEnum<bool>{ static constexpr Types::Which value = Types::Bool; };
|
||||
|
||||
template <> struct Field::EnumToType<Field::Types::Null> { using Type = Null; };
|
||||
template <> struct Field::EnumToType<Field::Types::UInt64> { using Type = UInt64; };
|
||||
|
@ -26,7 +26,7 @@ DataTypeObject::DataTypeObject(const String & schema_format_, bool is_nullable_)
|
||||
bool DataTypeObject::equals(const IDataType & rhs) const
|
||||
{
|
||||
if (const auto * object = typeid_cast<const DataTypeObject *>(&rhs))
|
||||
return schema_format == object->schema_format;
|
||||
return schema_format == object->schema_format && is_nullable == object->is_nullable;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -408,7 +408,10 @@ inline bool isNothing(const DataTypePtr & data_type) { return WhichDataType(data
|
||||
inline bool isUUID(const DataTypePtr & data_type) { return WhichDataType(data_type).isUUID(); }
|
||||
|
||||
template <typename T>
|
||||
inline bool isObject(const T & data_type) {return WhichDataType(data_type).isObject(); }
|
||||
inline bool isObject(const T & data_type)
|
||||
{
|
||||
return WhichDataType(data_type).isObject();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool isUInt8(const T & data_type)
|
||||
|
@ -6,25 +6,18 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/FieldToDataType.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <Columns/ColumnObject.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Common/FieldVisitors.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <base/EnumReflection.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -52,8 +45,9 @@ size_t getNumberOfDimensions(const IColumn & column)
|
||||
|
||||
DataTypePtr getBaseTypeOfArray(const DataTypePtr & type)
|
||||
{
|
||||
/// Get raw pointers to avoid extra copying of type pointers.
|
||||
const DataTypeArray * last_array = nullptr;
|
||||
const IDataType * current_type = type.get();
|
||||
const auto * current_type = type.get();
|
||||
while (const auto * type_array = typeid_cast<const DataTypeArray *>(current_type))
|
||||
{
|
||||
current_type = type_array->getNestedType().get();
|
||||
@ -65,8 +59,9 @@ DataTypePtr getBaseTypeOfArray(const DataTypePtr & type)
|
||||
|
||||
ColumnPtr getBaseColumnOfArray(const ColumnPtr & column)
|
||||
{
|
||||
/// Get raw pointers to avoid extra copying of column pointers.
|
||||
const ColumnArray * last_array = nullptr;
|
||||
const IColumn * current_column = column.get();
|
||||
const auto * current_column = column.get();
|
||||
while (const auto * column_array = checkAndGetColumn<ColumnArray>(current_column))
|
||||
{
|
||||
current_column = &column_array->getData();
|
||||
@ -92,6 +87,9 @@ ColumnPtr createArrayOfColumn(ColumnPtr column, size_t num_dimensions)
|
||||
|
||||
Array createEmptyArrayField(size_t num_dimensions)
|
||||
{
|
||||
if (num_dimensions == 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create array field with 0 dimensions");
|
||||
|
||||
Array array;
|
||||
Array * current_array = &array;
|
||||
for (size_t i = 1; i < num_dimensions; ++i)
|
||||
@ -138,53 +136,53 @@ void convertObjectsToTuples(NamesAndTypesList & columns_list, Block & block, con
|
||||
|
||||
for (auto & name_type : columns_list)
|
||||
{
|
||||
if (isObject(name_type.type))
|
||||
if (!isObject(name_type.type))
|
||||
continue;
|
||||
|
||||
auto & column = block.getByName(name_type.name);
|
||||
if (!isObject(column.type))
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Type for column '{}' mismatch in columns list and in block. In list: {}, in block: {}",
|
||||
name_type.name, name_type.type->getName(), column.type->getName());
|
||||
|
||||
const auto & column_object = assert_cast<const ColumnObject &>(*column.column);
|
||||
const auto & subcolumns = column_object.getSubcolumns();
|
||||
|
||||
if (!column_object.isFinalized())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Cannot convert to tuple column '{}' from type {}. Column should be finalized first",
|
||||
name_type.name, name_type.type->getName());
|
||||
|
||||
PathsInData tuple_paths;
|
||||
DataTypes tuple_types;
|
||||
Columns tuple_columns;
|
||||
|
||||
for (const auto & entry : subcolumns)
|
||||
{
|
||||
auto & column = block.getByName(name_type.name);
|
||||
|
||||
if (!isObject(column.type))
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Type for column '{}' mismatch in columns list and in block. In list: {}, in block: {}",
|
||||
name_type.name, name_type.type->getName(), column.type->getName());
|
||||
|
||||
const auto & column_object = assert_cast<const ColumnObject &>(*column.column);
|
||||
const auto & subcolumns_map = column_object.getSubcolumns();
|
||||
|
||||
if (!column_object.isFinalized())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Cannot convert to tuple column '{}' from type {}. Column should be finalized first",
|
||||
name_type.name, name_type.type->getName());
|
||||
|
||||
PathsInData tuple_paths;
|
||||
DataTypes tuple_types;
|
||||
Columns tuple_columns;
|
||||
|
||||
for (const auto & entry : subcolumns_map)
|
||||
{
|
||||
tuple_paths.emplace_back(entry->path);
|
||||
tuple_types.emplace_back(entry->data.getLeastCommonType());
|
||||
tuple_columns.emplace_back(entry->data.getFinalizedColumnPtr());
|
||||
}
|
||||
|
||||
auto it = storage_columns_map.find(name_type.name);
|
||||
if (it == storage_columns_map.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", name_type.name);
|
||||
|
||||
std::tie(column.column, column.type) = unflattenTuple(tuple_paths, tuple_types, tuple_columns);
|
||||
name_type.type = column.type;
|
||||
|
||||
getLeastCommonTypeForObject({column.type, it->second}, true);
|
||||
tuple_paths.emplace_back(entry->path);
|
||||
tuple_types.emplace_back(entry->data.getLeastCommonType());
|
||||
tuple_columns.emplace_back(entry->data.getFinalizedColumnPtr());
|
||||
}
|
||||
|
||||
auto it = storage_columns_map.find(name_type.name);
|
||||
if (it == storage_columns_map.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", name_type.name);
|
||||
|
||||
std::tie(column.column, column.type) = unflattenTuple(tuple_paths, tuple_types, tuple_columns);
|
||||
name_type.type = column.type;
|
||||
|
||||
/// Check that constructed Tuple type and type in storage are compatible.
|
||||
getLeastCommonTypeForObject({column.type, it->second}, true);
|
||||
}
|
||||
}
|
||||
|
||||
static bool isPrefix(const PathInData::Parts & prefix, const PathInData::Parts & strings)
|
||||
static bool isPrefix(const PathInData::Parts & prefix, const PathInData::Parts & parts)
|
||||
{
|
||||
if (prefix.size() > strings.size())
|
||||
if (prefix.size() > parts.size())
|
||||
return false;
|
||||
|
||||
for (size_t i = 0; i < prefix.size(); ++i)
|
||||
if (prefix[i].key != strings[i].key)
|
||||
if (prefix[i].key != parts[i].key)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@ -192,19 +190,15 @@ static bool isPrefix(const PathInData::Parts & prefix, const PathInData::Parts &
|
||||
void checkObjectHasNoAmbiguosPaths(const PathsInData & paths)
|
||||
{
|
||||
size_t size = paths.size();
|
||||
std::vector<PathInData::Parts> names_parts(size);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
names_parts[i] = paths[i].getParts();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < i; ++j)
|
||||
{
|
||||
if (isPrefix(names_parts[i], names_parts[j]) || isPrefix(names_parts[j], names_parts[i]))
|
||||
if (isPrefix(paths[i].getParts(), paths[j].getParts())
|
||||
|| isPrefix(paths[j].getParts(), paths[i].getParts()))
|
||||
throw Exception(ErrorCodes::DUPLICATE_COLUMN,
|
||||
"Data in Object has ambiguous paths: '{}' and '{}'",
|
||||
paths[i].getPath(), paths[i].getPath());
|
||||
paths[i].getPath(), paths[j].getPath());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -227,8 +221,11 @@ DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambi
|
||||
if (all_equal)
|
||||
return types[0];
|
||||
|
||||
/// Types of subcolumns by path from all tuples.
|
||||
std::unordered_map<PathInData, DataTypes, PathInData::Hash> subcolumns_types;
|
||||
|
||||
/// First we flatten tuples, then get common type for paths
|
||||
/// and finally unflatten paths and create new tuple type.
|
||||
for (const auto & type : types)
|
||||
{
|
||||
const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get());
|
||||
@ -246,6 +243,7 @@ DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambi
|
||||
PathsInData tuple_paths;
|
||||
DataTypes tuple_types;
|
||||
|
||||
/// Get the least common type for all paths.
|
||||
for (const auto & [key, subtypes] : subcolumns_types)
|
||||
{
|
||||
assert(!subtypes.empty());
|
||||
@ -312,7 +310,7 @@ void updateObjectColumns(ColumnsDescription & object_columns, const NamesAndType
|
||||
{
|
||||
for (const auto & new_column : new_columns)
|
||||
{
|
||||
auto object_column = object_columns.tryGetPhysical(new_column.name);
|
||||
auto object_column = object_columns.tryGetColumn(GetColumnsOptions::All, new_column.name);
|
||||
if (object_column && !object_column->type->equals(*new_column.type))
|
||||
{
|
||||
object_columns.modify(new_column.name, [&](auto & column)
|
||||
@ -326,10 +324,14 @@ void updateObjectColumns(ColumnsDescription & object_columns, const NamesAndType
|
||||
namespace
|
||||
{
|
||||
|
||||
void flattenTupleImpl(PathInDataBuilder & builder, DataTypePtr type, size_t array_level, PathsInData & new_paths, DataTypes & new_types)
|
||||
void flattenTupleImpl(
|
||||
PathInDataBuilder & builder,
|
||||
DataTypePtr type,
|
||||
size_t array_level,
|
||||
PathsInData & new_paths,
|
||||
DataTypes & new_types)
|
||||
{
|
||||
bool is_nested = isNested(type);
|
||||
|
||||
if (is_nested)
|
||||
type = assert_cast<const DataTypeArray &>(*type).getNestedType();
|
||||
|
||||
@ -356,13 +358,14 @@ void flattenTupleImpl(PathInDataBuilder & builder, DataTypePtr type, size_t arra
|
||||
}
|
||||
}
|
||||
|
||||
/// @offsets_columns are used as stack of array offsets and allows to recreate Array columns.
|
||||
void flattenTupleImpl(const ColumnPtr & column, Columns & new_columns, Columns & offsets_columns)
|
||||
{
|
||||
if (const auto * column_tuple = checkAndGetColumn<ColumnTuple>(column.get()))
|
||||
{
|
||||
const auto & subcolumns = column_tuple->getColumns();
|
||||
for (const auto & subcolumn : subcolumns)
|
||||
flattenTupleImpl(subcolumn, new_columns,offsets_columns);
|
||||
flattenTupleImpl(subcolumn, new_columns, offsets_columns);
|
||||
}
|
||||
else if (const auto * column_array = checkAndGetColumn<ColumnArray>(column.get()))
|
||||
{
|
||||
@ -375,8 +378,8 @@ void flattenTupleImpl(const ColumnPtr & column, Columns & new_columns, Columns &
|
||||
if (!offsets_columns.empty())
|
||||
{
|
||||
auto new_column = ColumnArray::create(column, offsets_columns.back());
|
||||
for (ssize_t i = static_cast<ssize_t>(offsets_columns.size()) - 2; i >= 0; --i)
|
||||
new_column = ColumnArray::create(new_column, offsets_columns[i]);
|
||||
for (auto it = offsets_columns.rbegin() + 1; it != offsets_columns.rend(); ++it)
|
||||
new_column = ColumnArray::create(new_column, *it);
|
||||
|
||||
new_columns.push_back(std::move(new_column));
|
||||
}
|
||||
@ -422,9 +425,8 @@ struct ColumnWithTypeAndDimensions
|
||||
size_t array_dimensions;
|
||||
};
|
||||
|
||||
using SubcolumnsTreeWithTypes = SubcolumnsTree<ColumnWithTypeAndDimensions, ColumnWithTypeAndDimensions>;
|
||||
using SubcolumnsTreeWithTypes = SubcolumnsTree<ColumnWithTypeAndDimensions>;
|
||||
using Node = SubcolumnsTreeWithTypes::Node;
|
||||
using Leaf = SubcolumnsTreeWithTypes::Leaf;
|
||||
|
||||
std::pair<ColumnPtr, DataTypePtr> createTypeFromNode(const Node * node)
|
||||
{
|
||||
@ -438,6 +440,7 @@ std::pair<ColumnPtr, DataTypePtr> createTypeFromNode(const Node * node)
|
||||
tuple_elements.emplace_back(name, std::move(column), std::move(type));
|
||||
}
|
||||
|
||||
/// Sort to always create the same type for the same set of subcolumns.
|
||||
std::sort(tuple_elements.begin(), tuple_elements.end(),
|
||||
[](const auto & lhs, const auto & rhs) { return std::get<0>(lhs) < std::get<0>(rhs); });
|
||||
|
||||
@ -450,8 +453,7 @@ std::pair<ColumnPtr, DataTypePtr> createTypeFromNode(const Node * node)
|
||||
|
||||
if (node->kind == Node::SCALAR)
|
||||
{
|
||||
const auto * leaf = typeid_cast<const Leaf *>(node);
|
||||
return {leaf->data.column, leaf->data.type};
|
||||
return {node->data.column, node->data.type};
|
||||
}
|
||||
else if (node->kind == Node::NESTED)
|
||||
{
|
||||
@ -474,9 +476,9 @@ std::pair<ColumnPtr, DataTypePtr> createTypeFromNode(const Node * node)
|
||||
auto result_column = ColumnArray::create(ColumnTuple::create(tuple_columns), offsets_columns.back());
|
||||
auto result_type = createNested(tuple_types, tuple_names);
|
||||
|
||||
for (ssize_t i = static_cast<ssize_t>(offsets_columns.size()) - 2; i >= 0; --i)
|
||||
for (auto it = offsets_columns.rbegin() + 1; it != offsets_columns.rend(); ++it)
|
||||
{
|
||||
result_column = ColumnArray::create(result_column, offsets_columns[i]);
|
||||
result_column = ColumnArray::create(result_column, *it);
|
||||
result_type = std::make_shared<DataTypeArray>(result_type);
|
||||
}
|
||||
|
||||
@ -533,6 +535,9 @@ std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
|
||||
assert(paths.size() == tuple_types.size());
|
||||
assert(paths.size() == tuple_columns.size());
|
||||
|
||||
/// We add all paths to the subcolumn tree and then create a type from it.
|
||||
/// The tree stores column, type and number of array dimensions
|
||||
/// for each intermediate node.
|
||||
SubcolumnsTreeWithTypes tree;
|
||||
|
||||
for (size_t i = 0; i < paths.size(); ++i)
|
||||
@ -562,10 +567,9 @@ std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
|
||||
ColumnWithTypeAndDimensions current_column;
|
||||
if (kind == Node::NESTED)
|
||||
{
|
||||
size_t dimensions_to_reduce = array_level - nested_level;
|
||||
assert(parts[pos].is_nested);
|
||||
|
||||
++dimensions_to_reduce;
|
||||
size_t dimensions_to_reduce = array_level - nested_level + 1;
|
||||
--nested_level;
|
||||
|
||||
current_column = ColumnWithTypeAndDimensions{column, type, dimensions_to_reduce};
|
||||
@ -579,15 +583,16 @@ std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
|
||||
array_level -= dimensions_to_reduce;
|
||||
}
|
||||
else
|
||||
{
|
||||
current_column = ColumnWithTypeAndDimensions{column, type, 0};
|
||||
}
|
||||
|
||||
++pos;
|
||||
|
||||
if (exists)
|
||||
return nullptr;
|
||||
|
||||
return kind == Node::SCALAR
|
||||
? std::make_shared<Leaf>(paths[i], current_column)
|
||||
? std::make_shared<Node>(kind, current_column, paths[i])
|
||||
: std::make_shared<Node>(kind, current_column);
|
||||
});
|
||||
}
|
||||
|
@ -12,31 +12,64 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Returns number of dimensions in Array type. 0 if type is not array.
|
||||
size_t getNumberOfDimensions(const IDataType & type);
|
||||
size_t getNumberOfDimensions(const IColumn & column);
|
||||
DataTypePtr getBaseTypeOfArray(const DataTypePtr & type);
|
||||
DataTypePtr createArrayOfType(DataTypePtr type, size_t num_dimensions);
|
||||
Array createEmptyArrayField(size_t num_dimensions);
|
||||
|
||||
/// Returns number of dimensions in Array column. 0 if column is not array.
|
||||
size_t getNumberOfDimensions(const IColumn & column);
|
||||
|
||||
/// Returns type of scalars of Array of arbitrary dimensions.
|
||||
DataTypePtr getBaseTypeOfArray(const DataTypePtr & type);
|
||||
|
||||
/// Returns Array type with requested scalar type and number of dimensions.
|
||||
DataTypePtr createArrayOfType(DataTypePtr type, size_t num_dimensions);
|
||||
|
||||
/// Returns column of scalars of Array of arbitrary dimensions.
|
||||
ColumnPtr getBaseColumnOfArray(const ColumnPtr & column);
|
||||
|
||||
/// Returns empty Array column with requested scalar column and number of dimensions.
|
||||
ColumnPtr createArrayOfColumn(const ColumnPtr & column, size_t num_dimensions);
|
||||
|
||||
/// Returns Array with requested number of dimensions and no scalars.
|
||||
Array createEmptyArrayField(size_t num_dimensions);
|
||||
|
||||
/// Tries to get data type by column. Only limited subset of types is supported
|
||||
DataTypePtr getDataTypeByColumn(const IColumn & column);
|
||||
|
||||
/// Converts Object types and columns to Tuples in @columns_list and @block
|
||||
/// and checks that types are consistent with types in @extended_storage_columns.
|
||||
void convertObjectsToTuples(NamesAndTypesList & columns_list, Block & block, const NamesAndTypesList & extended_storage_columns);
|
||||
|
||||
/// Checks that each path is not the prefix of any other path.
|
||||
void checkObjectHasNoAmbiguosPaths(const PathsInData & paths);
|
||||
|
||||
/// Receives several Tuple types and deduces the least common type among them.
|
||||
DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambiguos_paths = false);
|
||||
|
||||
/// Converts types of object columns to tuples in @columns_list
|
||||
/// according to @object_columns and adds all tuple's subcolumns if needed.
|
||||
void extendObjectColumns(NamesAndTypesList & columns_list, const ColumnsDescription & object_columns, bool with_subcolumns);
|
||||
|
||||
NameSet getNamesOfObjectColumns(const NamesAndTypesList & columns_list);
|
||||
bool hasObjectColumns(const ColumnsDescription & columns);
|
||||
void finalizeObjectColumns(MutableColumns & columns);
|
||||
|
||||
/// Updates types of objects in @object_columns inplace
|
||||
/// according to types in new_columns.
|
||||
void updateObjectColumns(ColumnsDescription & object_columns, const NamesAndTypesList & new_columns);
|
||||
|
||||
using DataTypeTuplePtr = std::shared_ptr<DataTypeTuple>;
|
||||
|
||||
/// Flattens nested Tuple to plain Tuple. I.e extracts all paths and types from tuple.
|
||||
/// E.g. Tuple(t Tuple(c1 UInt32, c2 String), c3 UInt64) -> Tuple(t.c1 UInt32, t.c2 String, c3 UInt32)
|
||||
std::pair<PathsInData, DataTypes> flattenTuple(const DataTypePtr & type);
|
||||
|
||||
/// Flattens nested Tuple column to plain Tuple column.
|
||||
ColumnPtr flattenTuple(const ColumnPtr & column);
|
||||
|
||||
/// The reverse operation to 'flattenTuple'.
|
||||
/// Creates nested Tuple from all paths and types.
|
||||
/// E.g. Tuple(t.c1 UInt32, t.c2 String, c3 UInt32) -> Tuple(t Tuple(c1 UInt32, c2 String), c3 UInt64)
|
||||
DataTypePtr unflattenTuple(
|
||||
const PathsInData & paths,
|
||||
const DataTypes & tuple_types);
|
||||
@ -46,13 +79,20 @@ std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
|
||||
const DataTypes & tuple_types,
|
||||
const Columns & tuple_columns);
|
||||
|
||||
/// For all columns which exist in @expected_columns and
|
||||
/// don't exist in @available_columns adds to WITH clause
|
||||
/// an alias with column name to literal of default value of column type.
|
||||
void replaceMissedSubcolumnsByConstants(
|
||||
const ColumnsDescription & expected_columns,
|
||||
const ColumnsDescription & available_columns,
|
||||
ASTPtr query);
|
||||
|
||||
void finalizeObjectColumns(MutableColumns & columns);
|
||||
|
||||
/// Receives range of objects, which contains collections
|
||||
/// of columns-like objects (e.g. ColumnsDescription or NamesAndTypesList)
|
||||
/// and deduces the common types of object columns for all entries.
|
||||
/// @entry_columns_getter should extract reference to collection of
|
||||
/// columns-like objects from entry to which Iterator points.
|
||||
/// columns-like object should have fields "name" and "type".
|
||||
template <typename Iterator, typename EntryColumnsGetter>
|
||||
ColumnsDescription getObjectColumns(
|
||||
Iterator begin, Iterator end,
|
||||
|
@ -136,6 +136,7 @@ public:
|
||||
/// Index of tuple element, starting at 1 or name.
|
||||
String tuple_element_name;
|
||||
|
||||
/// Name of subcolumn of object column.
|
||||
String object_key_name;
|
||||
|
||||
/// Do we need to escape a dot in filenames for tuple elements.
|
||||
|
@ -61,19 +61,21 @@ private:
|
||||
size_t num_dimensions_to_keep;
|
||||
};
|
||||
|
||||
using Node = typename ColumnObject::SubcolumnsTree::Node;
|
||||
|
||||
bool tryInsertDefaultFromNested(
|
||||
ColumnObject::SubcolumnsTree::LeafPtr entry, const ColumnObject::SubcolumnsTree & subcolumns)
|
||||
std::shared_ptr<Node> entry, const ColumnObject::SubcolumnsTree & subcolumns)
|
||||
{
|
||||
if (!entry->path.hasNested())
|
||||
return false;
|
||||
|
||||
const ColumnObject::SubcolumnsTree::Node * node = subcolumns.findLeaf(entry->path);
|
||||
const ColumnObject::SubcolumnsTree::Leaf * leaf = nullptr;
|
||||
const Node * current_node = subcolumns.findLeaf(entry->path);
|
||||
const Node * leaf = nullptr;
|
||||
size_t num_skipped_nested = 0;
|
||||
|
||||
while (node)
|
||||
while (current_node)
|
||||
{
|
||||
const auto * node_nested = subcolumns.findParent(node,
|
||||
const auto * node_nested = subcolumns.findParent(current_node,
|
||||
[](const auto & candidate) { return candidate.isNested(); });
|
||||
|
||||
if (!node_nested)
|
||||
@ -88,7 +90,7 @@ bool tryInsertDefaultFromNested(
|
||||
if (leaf)
|
||||
break;
|
||||
|
||||
node = node_nested->parent;
|
||||
current_node = node_nested->parent;
|
||||
++num_skipped_nested;
|
||||
}
|
||||
|
||||
|
@ -8,9 +8,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct EmptyNodeData {};
|
||||
|
||||
template <typename LeafData, typename NodeData = EmptyNodeData>
|
||||
template <typename NodeData>
|
||||
class SubcolumnsTree
|
||||
{
|
||||
public:
|
||||
@ -25,40 +23,31 @@ public:
|
||||
|
||||
explicit Node(Kind kind_) : kind(kind_) {}
|
||||
Node(Kind kind_, const NodeData & data_) : kind(kind_), data(data_) {}
|
||||
Node(Kind kind_, const NodeData & data_, const PathInData & path_)
|
||||
: kind(kind_), data(data_), path(path_) {}
|
||||
|
||||
Kind kind = TUPLE;
|
||||
const Node * parent = nullptr;
|
||||
|
||||
std::map<String, std::shared_ptr<Node>, std::less<>> children;
|
||||
|
||||
NodeData data;
|
||||
PathInData path;
|
||||
|
||||
bool isNested() const { return kind == NESTED; }
|
||||
bool isScalar() const { return kind == SCALAR; }
|
||||
|
||||
void addChild(const String & key, std::shared_ptr<Node> next_node)
|
||||
{
|
||||
next_node->parent = this;
|
||||
children[key] = std::move(next_node);
|
||||
}
|
||||
|
||||
virtual ~Node() = default;
|
||||
};
|
||||
|
||||
struct Leaf : public Node
|
||||
{
|
||||
Leaf(const PathInData & path_, const LeafData & data_)
|
||||
: Node(Node::SCALAR), path(path_), data(data_)
|
||||
{
|
||||
}
|
||||
|
||||
PathInData path;
|
||||
LeafData data;
|
||||
};
|
||||
|
||||
using NodeKind = typename Node::Kind;
|
||||
using NodePtr = std::shared_ptr<Node>;
|
||||
using LeafPtr = std::shared_ptr<Leaf>;
|
||||
|
||||
bool add(const PathInData & path, const LeafData & leaf_data)
|
||||
bool add(const PathInData & path, const NodeData & leaf_data)
|
||||
{
|
||||
return add(path, [&](NodeKind kind, bool exists) -> NodePtr
|
||||
{
|
||||
@ -66,7 +55,7 @@ public:
|
||||
return nullptr;
|
||||
|
||||
if (kind == Node::SCALAR)
|
||||
return std::make_shared<Leaf>(path, leaf_data);
|
||||
return std::make_shared<Node>(kind, leaf_data, path);
|
||||
|
||||
return std::make_shared<Node>(kind);
|
||||
});
|
||||
@ -94,9 +83,8 @@ public:
|
||||
{
|
||||
current_node = it->second.get();
|
||||
node_creator(current_node->kind, true);
|
||||
bool current_node_is_nested = current_node->kind == Node::NESTED;
|
||||
|
||||
if (current_node_is_nested != parts[i].is_nested)
|
||||
if (current_node->isNested() != parts[i].is_nested)
|
||||
return false;
|
||||
}
|
||||
else
|
||||
@ -114,10 +102,7 @@ public:
|
||||
|
||||
auto next_node = node_creator(Node::SCALAR, false);
|
||||
current_node->addChild(String(parts.back().key), next_node);
|
||||
|
||||
auto leaf = std::dynamic_pointer_cast<Leaf>(next_node);
|
||||
assert(leaf);
|
||||
leaves.push_back(std::move(leaf));
|
||||
leaves.push_back(std::move(next_node));
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -132,22 +117,28 @@ public:
|
||||
return findImpl(path, true);
|
||||
}
|
||||
|
||||
const Leaf * findLeaf(const PathInData & path) const
|
||||
const Node * findLeaf(const PathInData & path) const
|
||||
{
|
||||
return typeid_cast<const Leaf *>(findExact(path));
|
||||
const auto * candidate = findExact(path);
|
||||
if (!candidate || !candidate->isScalar())
|
||||
return nullptr;
|
||||
return candidate;
|
||||
}
|
||||
|
||||
using LeafPredicate = std::function<bool(const Leaf &)>;
|
||||
using NodePredicate = std::function<bool(const Node &)>;
|
||||
|
||||
const Leaf * findLeaf(const LeafPredicate & predicate)
|
||||
const Node * findLeaf(const NodePredicate & predicate)
|
||||
{
|
||||
return findLeaf(root.get(), predicate);
|
||||
}
|
||||
|
||||
static const Leaf * findLeaf(const Node * node, const LeafPredicate & predicate)
|
||||
static const Node * findLeaf(const Node * node, const NodePredicate & predicate)
|
||||
{
|
||||
if (const auto * leaf = typeid_cast<const Leaf *>(node))
|
||||
return predicate(*leaf) ? leaf : nullptr;
|
||||
if (!node)
|
||||
return nullptr;
|
||||
|
||||
if (node->isScalar())
|
||||
return predicate(*node) ? node : nullptr;
|
||||
|
||||
for (const auto & [_, child] : node->children)
|
||||
if (const auto * leaf = findLeaf(child.get(), predicate))
|
||||
@ -156,8 +147,6 @@ public:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
using NodePredicate = std::function<bool(const Node &)>;
|
||||
|
||||
static const Node * findParent(const Node * node, const NodePredicate & predicate)
|
||||
{
|
||||
while (node && !predicate(*node))
|
||||
@ -168,12 +157,13 @@ public:
|
||||
bool empty() const { return root == nullptr; }
|
||||
size_t size() const { return leaves.size(); }
|
||||
|
||||
using Leaves = std::vector<LeafPtr>;
|
||||
const Leaves & getLeaves() const { return leaves; }
|
||||
using Nodes = std::vector<NodePtr>;
|
||||
|
||||
const Nodes & getLeaves() const { return leaves; }
|
||||
const Node * getRoot() const { return root.get(); }
|
||||
|
||||
using iterator = typename Leaves::iterator;
|
||||
using const_iterator = typename Leaves::const_iterator;
|
||||
using iterator = typename Nodes::iterator;
|
||||
using const_iterator = typename Nodes::const_iterator;
|
||||
|
||||
iterator begin() { return leaves.begin(); }
|
||||
iterator end() { return leaves.end(); }
|
||||
@ -200,11 +190,10 @@ private:
|
||||
}
|
||||
|
||||
return current_node;
|
||||
|
||||
}
|
||||
|
||||
NodePtr root;
|
||||
Leaves leaves;
|
||||
Nodes leaves;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -14,7 +14,6 @@ namespace DB
|
||||
* Examples: there is no least common supertype for Array(UInt8), Int8.
|
||||
*/
|
||||
DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_string = false);
|
||||
DataTypePtr getLeastSupertype(const DataTypePtr & lhs, const DataTypePtr & rhs, bool allow_conversion_to_string = false);
|
||||
|
||||
using TypeIndexSet = std::unordered_set<TypeIndex>;
|
||||
DataTypePtr getLeastSupertype(const TypeIndexSet & types, bool allow_conversion_to_string = false);
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
#include <Storages/AlterCommands.h>
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
Loading…
Reference in New Issue
Block a user