#include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int ILLEGAL_COLUMN; extern const int DUPLICATE_COLUMN; extern const int NUMBER_OF_DIMENSIONS_MISMATHED; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; extern const int ARGUMENT_OUT_OF_BOUND; } namespace { /// Recreates column with default scalar values and keeps sizes of arrays. ColumnPtr recreateColumnWithDefaultValues( const ColumnPtr & column, const DataTypePtr & scalar_type, size_t num_dimensions) { const auto * column_array = checkAndGetColumn(column.get()); if (column_array && num_dimensions) { return ColumnArray::create( recreateColumnWithDefaultValues( column_array->getDataPtr(), scalar_type, num_dimensions - 1), IColumn::mutate(column_array->getOffsetsPtr())); } return createArrayOfType(scalar_type, num_dimensions)->createColumn()->cloneResized(column->size()); } /// Replaces NULL fields to given field or empty array. class FieldVisitorReplaceNull : public StaticVisitor { public: explicit FieldVisitorReplaceNull( const Field & replacement_, size_t num_dimensions_) : replacement(replacement_) , num_dimensions(num_dimensions_) { } Field operator()(const Null &) const { return num_dimensions ? Array() : replacement; } Field operator()(const Array & x) const { assert(num_dimensions > 0); const size_t size = x.size(); Array res(size); for (size_t i = 0; i < size; ++i) res[i] = applyVisitor(FieldVisitorReplaceNull(replacement, num_dimensions - 1), x[i]); return res; } template Field operator()(const T & x) const { return x; } private: const Field & replacement; size_t num_dimensions; }; /// Visitor that allows to get type of scalar field /// or least common type of scalars in array. /// More optimized version of FieldToDataType. class FieldVisitorToScalarType : public StaticVisitor<> { public: using FieldType = Field::Types::Which; void operator()(const Array & x) { size_t size = x.size(); for (size_t i = 0; i < size; ++i) applyVisitor(*this, x[i]); } void operator()(const UInt64 & x) { field_types.insert(FieldType::UInt64); if (x <= std::numeric_limits::max()) type_indexes.insert(TypeIndex::UInt8); else if (x <= std::numeric_limits::max()) type_indexes.insert(TypeIndex::UInt16); else if (x <= std::numeric_limits::max()) type_indexes.insert(TypeIndex::UInt32); else type_indexes.insert(TypeIndex::UInt64); } void operator()(const Int64 & x) { field_types.insert(FieldType::Int64); if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) type_indexes.insert(TypeIndex::Int8); else if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) type_indexes.insert(TypeIndex::Int16); else if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) type_indexes.insert(TypeIndex::Int32); else type_indexes.insert(TypeIndex::Int64); } void operator()(const bool &) { field_types.insert(FieldType::UInt64); type_indexes.insert(TypeIndex::UInt8); } void operator()(const Null &) { have_nulls = true; } template void operator()(const T &) { field_types.insert(Field::TypeToEnum>::value); type_indexes.insert(TypeToTypeIndex>); } DataTypePtr getScalarType() const { return getLeastSupertypeOrString(type_indexes); } bool haveNulls() const { return have_nulls; } bool needConvertField() const { return field_types.size() > 1; } private: TypeIndexSet type_indexes; std::unordered_set field_types; bool have_nulls = false; }; } FieldInfo getFieldInfo(const Field & field) { FieldVisitorToScalarType to_scalar_type_visitor; applyVisitor(to_scalar_type_visitor, field); return { to_scalar_type_visitor.getScalarType(), to_scalar_type_visitor.haveNulls(), to_scalar_type_visitor.needConvertField(), applyVisitor(FieldVisitorToNumberOfDimensions(), field), }; } ColumnObject::Subcolumn::Subcolumn(MutableColumnPtr && data_, bool is_nullable_) : least_common_type(getDataTypeByColumn(*data_)) , is_nullable(is_nullable_) , num_rows(data_->size()) { data.push_back(std::move(data_)); } ColumnObject::Subcolumn::Subcolumn( size_t size_, bool is_nullable_) : least_common_type(std::make_shared()) , is_nullable(is_nullable_) , num_of_defaults_in_prefix(size_) , num_rows(size_) { } size_t ColumnObject::Subcolumn::size() const { return num_rows; } size_t ColumnObject::Subcolumn::byteSize() const { size_t res = 0; for (const auto & part : data) res += part->byteSize(); return res; } size_t ColumnObject::Subcolumn::allocatedBytes() const { size_t res = 0; for (const auto & part : data) res += part->allocatedBytes(); return res; } void ColumnObject::Subcolumn::get(size_t n, Field & res) const { if (isFinalized()) { getFinalizedColumn().get(n, res); return; } size_t ind = n; if (ind < num_of_defaults_in_prefix) { res = least_common_type.get()->getDefault(); return; } ind -= num_of_defaults_in_prefix; for (const auto & part : data) { if (ind < part->size()) { part->get(ind, res); res = convertFieldToTypeOrThrow(res, *least_common_type.get()); return; } ind -= part->size(); } throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index ({}) for getting field is out of range", n); } void ColumnObject::Subcolumn::checkTypes() const { DataTypes prefix_types; prefix_types.reserve(data.size()); for (size_t i = 0; i < data.size(); ++i) { auto current_type = getDataTypeByColumn(*data[i]); prefix_types.push_back(current_type); auto prefix_common_type = getLeastSupertype(prefix_types); if (!prefix_common_type->equals(*current_type)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Data type {} of column at position {} cannot represent all columns from i-th prefix", current_type->getName(), i); } } void ColumnObject::Subcolumn::insert(Field field) { auto info = DB::getFieldInfo(field); insert(std::move(field), std::move(info)); } void ColumnObject::Subcolumn::addNewColumnPart(DataTypePtr type) { auto serialization = type->getSerialization(ISerialization::Kind::SPARSE); data.push_back(type->createColumn(*serialization)); least_common_type = LeastCommonType{std::move(type)}; } static bool isConversionRequiredBetweenIntegers(const IDataType & lhs, const IDataType & rhs) { /// If both of types are signed/unsigned integers and size of left field type /// is less than right type, we don't need to convert field, /// because all integer fields are stored in Int64/UInt64. WhichDataType which_lhs(lhs); WhichDataType which_rhs(rhs); bool is_native_int = which_lhs.isNativeInt() && which_rhs.isNativeInt(); bool is_native_uint = which_lhs.isNativeUInt() && which_rhs.isNativeUInt(); return (!is_native_int && !is_native_uint) || lhs.getSizeOfValueInMemory() > rhs.getSizeOfValueInMemory(); } void ColumnObject::Subcolumn::insert(Field field, FieldInfo info) { auto base_type = std::move(info.scalar_type); if (isNothing(base_type) && info.num_dimensions == 0) { insertDefault(); return; } auto column_dim = least_common_type.getNumberOfDimensions(); auto value_dim = info.num_dimensions; if (isNothing(least_common_type.get())) column_dim = value_dim; if (isNothing(base_type)) value_dim = column_dim; if (value_dim != column_dim) throw Exception(ErrorCodes::NUMBER_OF_DIMENSIONS_MISMATHED, "Dimension of types mismatched between inserted value and column. " "Dimension of value: {}. Dimension of column: {}", value_dim, column_dim); if (is_nullable) base_type = makeNullable(base_type); if (!is_nullable && info.have_nulls) field = applyVisitor(FieldVisitorReplaceNull(base_type->getDefault(), value_dim), std::move(field)); bool type_changed = false; const auto & least_common_base_type = least_common_type.getBase(); if (data.empty()) { addNewColumnPart(createArrayOfType(std::move(base_type), value_dim)); } else if (!least_common_base_type->equals(*base_type) && !isNothing(base_type)) { if (isConversionRequiredBetweenIntegers(*base_type, *least_common_base_type)) { base_type = getLeastSupertypeOrString(DataTypes{std::move(base_type), least_common_base_type}); type_changed = true; if (!least_common_base_type->equals(*base_type)) addNewColumnPart(createArrayOfType(std::move(base_type), value_dim)); } } if (type_changed || info.need_convert) field = convertFieldToTypeOrThrow(field, *least_common_type.get()); data.back()->insert(field); ++num_rows; } void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t start, size_t length) { assert(start + length <= src.size()); size_t end = start + length; num_rows += length; if (data.empty()) { addNewColumnPart(src.getLeastCommonType()); } else if (!least_common_type.get()->equals(*src.getLeastCommonType())) { auto new_least_common_type = getLeastSupertypeOrString(DataTypes{least_common_type.get(), src.getLeastCommonType()}); if (!new_least_common_type->equals(*least_common_type.get())) addNewColumnPart(std::move(new_least_common_type)); } if (end <= src.num_of_defaults_in_prefix) { data.back()->insertManyDefaults(length); return; } if (start < src.num_of_defaults_in_prefix) data.back()->insertManyDefaults(src.num_of_defaults_in_prefix - start); auto insert_from_part = [&](const auto & column, size_t from, size_t n) { assert(from + n <= column->size()); auto column_type = getDataTypeByColumn(*column); if (column_type->equals(*least_common_type.get())) { data.back()->insertRangeFrom(*column, from, n); return; } /// If we need to insert large range, there is no sense to cut part of column and cast it. /// Casting of all column and inserting from it can be faster. /// Threshold is just a guess. if (n * 3 >= column->size()) { auto casted_column = castColumn({column, column_type, ""}, least_common_type.get()); data.back()->insertRangeFrom(*casted_column, from, n); return; } auto casted_column = column->cut(from, n); casted_column = castColumn({casted_column, column_type, ""}, least_common_type.get()); data.back()->insertRangeFrom(*casted_column, 0, n); }; size_t pos = 0; size_t processed_rows = src.num_of_defaults_in_prefix; /// Find the first part of the column that intersects the range. while (pos < src.data.size() && processed_rows + src.data[pos]->size() < start) { processed_rows += src.data[pos]->size(); ++pos; } /// Insert from the first part of column. if (pos < src.data.size() && processed_rows < start) { size_t part_start = start - processed_rows; size_t part_length = std::min(src.data[pos]->size() - part_start, end - start); insert_from_part(src.data[pos], part_start, part_length); processed_rows += src.data[pos]->size(); ++pos; } /// Insert from the parts of column in the middle of range. while (pos < src.data.size() && processed_rows + src.data[pos]->size() < end) { insert_from_part(src.data[pos], 0, src.data[pos]->size()); processed_rows += src.data[pos]->size(); ++pos; } /// Insert from the last part of column if needed. if (pos < src.data.size() && processed_rows < end) { size_t part_end = end - processed_rows; insert_from_part(src.data[pos], 0, part_end); } } bool ColumnObject::Subcolumn::isFinalized() const { return num_of_defaults_in_prefix == 0 && (data.empty() || (data.size() == 1 && !data[0]->isSparse())); } void ColumnObject::Subcolumn::finalize() { if (isFinalized()) return; if (data.size() == 1 && num_of_defaults_in_prefix == 0) { data[0] = data[0]->convertToFullColumnIfSparse(); return; } const auto & to_type = least_common_type.get(); auto result_column = to_type->createColumn(); if (num_of_defaults_in_prefix) result_column->insertManyDefaults(num_of_defaults_in_prefix); for (auto & part : data) { part = part->convertToFullColumnIfSparse(); auto from_type = getDataTypeByColumn(*part); size_t part_size = part->size(); if (!from_type->equals(*to_type)) { auto offsets = ColumnUInt64::create(); auto & offsets_data = offsets->getData(); /// We need to convert only non-default values and then recreate column /// with default value of new type, because default values (which represents misses in data) /// may be inconsistent between types (e.g "0" in UInt64 and empty string in String). part->getIndicesOfNonDefaultRows(offsets_data, 0, part_size); if (offsets->size() == part_size) { part = castColumn({part, from_type, ""}, to_type); } else { auto values = part->index(*offsets, offsets->size()); values = castColumn({values, from_type, ""}, to_type); part = values->createWithOffsets(offsets_data, to_type->getDefault(), part_size, /*shift=*/ 0); } } result_column->insertRangeFrom(*part, 0, part_size); } data = { std::move(result_column) }; num_of_defaults_in_prefix = 0; } void ColumnObject::Subcolumn::insertDefault() { if (data.empty()) ++num_of_defaults_in_prefix; else data.back()->insertDefault(); ++num_rows; } void ColumnObject::Subcolumn::insertManyDefaults(size_t length) { if (data.empty()) num_of_defaults_in_prefix += length; else data.back()->insertManyDefaults(length); num_rows += length; } void ColumnObject::Subcolumn::popBack(size_t n) { assert(n <= size()); num_rows -= n; size_t num_removed = 0; for (auto it = data.rbegin(); it != data.rend(); ++it) { if (n == 0) break; auto & column = *it; if (n < column->size()) { column->popBack(n); n = 0; } else { ++num_removed; n -= column->size(); } } data.resize(data.size() - num_removed); num_of_defaults_in_prefix -= n; } ColumnObject::Subcolumn ColumnObject::Subcolumn::cut(size_t start, size_t length) const { Subcolumn new_subcolumn(0, is_nullable); new_subcolumn.insertRangeFrom(*this, start, length); return new_subcolumn; } Field ColumnObject::Subcolumn::getLastField() const { if (data.empty()) return Field(); const auto & last_part = data.back(); assert(!last_part->empty()); return (*last_part)[last_part->size() - 1]; } FieldInfo ColumnObject::Subcolumn::getFieldInfo() const { const auto & base_type = least_common_type.getBase(); return FieldInfo { .scalar_type = base_type, .have_nulls = base_type->isNullable(), .need_convert = false, .num_dimensions = least_common_type.getNumberOfDimensions(), }; } ColumnObject::Subcolumn ColumnObject::Subcolumn::recreateWithDefaultValues(const FieldInfo & field_info) const { auto scalar_type = field_info.scalar_type; if (is_nullable) scalar_type = makeNullable(scalar_type); Subcolumn new_subcolumn(*this); new_subcolumn.least_common_type = LeastCommonType{createArrayOfType(scalar_type, field_info.num_dimensions)}; for (auto & part : new_subcolumn.data) part = recreateColumnWithDefaultValues(part, scalar_type, field_info.num_dimensions); return new_subcolumn; } IColumn & ColumnObject::Subcolumn::getFinalizedColumn() { assert(isFinalized()); return *data[0]; } const IColumn & ColumnObject::Subcolumn::getFinalizedColumn() const { assert(isFinalized()); return *data[0]; } const ColumnPtr & ColumnObject::Subcolumn::getFinalizedColumnPtr() const { assert(isFinalized()); return data[0]; } ColumnObject::Subcolumn::LeastCommonType::LeastCommonType() : type(std::make_shared()) , base_type(type) , num_dimensions(0) { } ColumnObject::Subcolumn::LeastCommonType::LeastCommonType(DataTypePtr type_) : type(std::move(type_)) , base_type(getBaseTypeOfArray(type)) , num_dimensions(DB::getNumberOfDimensions(*type)) { } ColumnObject::ColumnObject(bool is_nullable_) : is_nullable(is_nullable_) , num_rows(0) { } ColumnObject::ColumnObject(Subcolumns && subcolumns_, bool is_nullable_) : is_nullable(is_nullable_) , subcolumns(std::move(subcolumns_)) , num_rows(subcolumns.empty() ? 0 : (*subcolumns.begin())->data.size()) { checkConsistency(); } void ColumnObject::checkConsistency() const { if (subcolumns.empty()) return; for (const auto & leaf : subcolumns) { if (num_rows != leaf->data.size()) { throw Exception(ErrorCodes::LOGICAL_ERROR, "Sizes of subcolumns are inconsistent in ColumnObject." " Subcolumn '{}' has {} rows, but expected size is {}", leaf->path.getPath(), leaf->data.size(), num_rows); } } } size_t ColumnObject::size() const { #ifndef NDEBUG checkConsistency(); #endif return num_rows; } size_t ColumnObject::byteSize() const { size_t res = 0; for (const auto & entry : subcolumns) res += entry->data.byteSize(); return res; } size_t ColumnObject::allocatedBytes() const { size_t res = 0; for (const auto & entry : subcolumns) res += entry->data.allocatedBytes(); return res; } void ColumnObject::forEachSubcolumn(ColumnCallback callback) { for (auto & entry : subcolumns) for (auto & part : entry->data.data) callback(part); } void ColumnObject::insert(const Field & field) { const auto & object = field.get(); HashSet inserted_paths; size_t old_size = size(); for (const auto & [key_str, value] : object) { PathInData key(key_str); inserted_paths.insert(key_str); if (!hasSubcolumn(key)) addSubcolumn(key, old_size); auto & subcolumn = getSubcolumn(key); subcolumn.insert(value); } for (auto & entry : subcolumns) { if (!inserted_paths.has(entry->path.getPath())) { bool inserted = tryInsertDefaultFromNested(entry); if (!inserted) entry->data.insertDefault(); } } ++num_rows; } void ColumnObject::insertDefault() { for (auto & entry : subcolumns) entry->data.insertDefault(); ++num_rows; } Field ColumnObject::operator[](size_t n) const { Field object; get(n, object); return object; } void ColumnObject::get(size_t n, Field & res) const { assert(n < size()); res = Object(); auto & object = res.get(); for (const auto & entry : subcolumns) { auto it = object.try_emplace(entry->path.getPath()).first; entry->data.get(n, it->second); } } void ColumnObject::insertFrom(const IColumn & src, size_t n) { insert(src[n]); finalize(); } void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length) { const auto & src_object = assert_cast(src); for (const auto & entry : src_object.subcolumns) { if (!hasSubcolumn(entry->path)) { if (entry->path.hasNested()) addNestedSubcolumn(entry->path, entry->data.getFieldInfo(), num_rows); else addSubcolumn(entry->path, num_rows); } auto & subcolumn = getSubcolumn(entry->path); subcolumn.insertRangeFrom(entry->data, start, length); } for (auto & entry : subcolumns) { if (!src_object.hasSubcolumn(entry->path)) { bool inserted = tryInsertManyDefaultsFromNested(entry); if (!inserted) entry->data.insertManyDefaults(length); } } num_rows += length; finalize(); } void ColumnObject::popBack(size_t length) { for (auto & entry : subcolumns) entry->data.popBack(length); num_rows -= length; } template MutableColumnPtr ColumnObject::applyForSubcolumns(Func && func) const { if (!isFinalized()) { auto finalized = IColumn::mutate(getPtr()); auto & finalized_object = assert_cast(*finalized); finalized_object.finalize(); return finalized_object.applyForSubcolumns(std::forward(func)); } auto res = ColumnObject::create(is_nullable); for (const auto & subcolumn : subcolumns) { auto new_subcolumn = func(subcolumn->data.getFinalizedColumn()); res->addSubcolumn(subcolumn->path, new_subcolumn->assumeMutable()); } return res; } ColumnPtr ColumnObject::permute(const Permutation & perm, size_t limit) const { return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.permute(perm, limit); }); } ColumnPtr ColumnObject::filter(const Filter & filter, ssize_t result_size_hint) const { return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.filter(filter, result_size_hint); }); } ColumnPtr ColumnObject::index(const IColumn & indexes, size_t limit) const { return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.index(indexes, limit); }); } ColumnPtr ColumnObject::replicate(const Offsets & offsets) const { return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.replicate(offsets); }); } MutableColumnPtr ColumnObject::cloneResized(size_t new_size) const { if (new_size == 0) return ColumnObject::create(is_nullable); return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.cloneResized(new_size); }); } const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) const { if (const auto * node = subcolumns.findLeaf(key)) return node->data; throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObject", key.getPath()); } ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) { if (const auto * node = subcolumns.findLeaf(key)) return const_cast(node)->data; throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObject", key.getPath()); } bool ColumnObject::hasSubcolumn(const PathInData & key) const { return subcolumns.findLeaf(key) != nullptr; } void ColumnObject::addSubcolumn(const PathInData & key, MutableColumnPtr && subcolumn) { size_t new_size = subcolumn->size(); bool inserted = subcolumns.add(key, Subcolumn(std::move(subcolumn), is_nullable)); if (!inserted) throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Subcolumn '{}' already exists", key.getPath()); if (num_rows == 0) num_rows = new_size; else if (new_size != num_rows) throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of subcolumn {} ({}) is inconsistent with column size ({})", key.getPath(), new_size, num_rows); } void ColumnObject::addSubcolumn(const PathInData & key, size_t new_size) { bool inserted = subcolumns.add(key, Subcolumn(new_size, is_nullable)); if (!inserted) throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Subcolumn '{}' already exists", key.getPath()); if (num_rows == 0) num_rows = new_size; else if (new_size != num_rows) throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Required size of subcolumn {} ({}) is inconsistent with column size ({})", key.getPath(), new_size, num_rows); } void ColumnObject::addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size) { if (!key.hasNested()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add Nested subcolumn, because path doesn't contain Nested"); bool inserted = false; /// We find node that represents the same Nested type as @key. const auto * nested_node = subcolumns.findBestMatch(key); if (nested_node) { /// Find any leaf of Nested subcolumn. const auto * leaf = subcolumns.findLeaf(nested_node, [&](const auto &) { return true; }); assert(leaf); /// Recreate subcolumn with default values and the same sizes of arrays. auto new_subcolumn = leaf->data.recreateWithDefaultValues(field_info); /// It's possible that we have already inserted value from current row /// to this subcolumn. So, adjust size to expected. if (new_subcolumn.size() > new_size) new_subcolumn.popBack(new_subcolumn.size() - new_size); assert(new_subcolumn.size() == new_size); inserted = subcolumns.add(key, new_subcolumn); } else { /// If node was not found just add subcolumn with empty arrays. inserted = subcolumns.add(key, Subcolumn(new_size, is_nullable)); } if (!inserted) throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Subcolumn '{}' already exists", key.getPath()); if (num_rows == 0) num_rows = new_size; else if (new_size != num_rows) throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Required size of subcolumn {} ({}) is inconsistent with column size ({})", key.getPath(), new_size, num_rows); } const ColumnObject::Subcolumns::Node * ColumnObject::getLeafOfTheSameNested(const Subcolumns::NodePtr & entry) const { if (!entry->path.hasNested()) return nullptr; size_t old_size = entry->data.size(); const auto * current_node = subcolumns.findLeaf(entry->path); const Subcolumns::Node * leaf = nullptr; while (current_node) { /// Try to find the first Nested up to the current node. const auto * node_nested = subcolumns.findParent(current_node, [](const auto & candidate) { return candidate.isNested(); }); if (!node_nested) break; /// Find the leaf with subcolumn that contains values /// for the last rows. /// If there are no leaves, skip current node and find /// the next node up to the current. leaf = subcolumns.findLeaf(node_nested, [&](const auto & candidate) { return candidate.data.size() > old_size; }); if (leaf) break; current_node = node_nested->parent; } if (leaf && isNothing(leaf->data.getLeastCommonTypeBase())) return nullptr; return leaf; } bool ColumnObject::tryInsertManyDefaultsFromNested(const Subcolumns::NodePtr & entry) const { const auto * leaf = getLeafOfTheSameNested(entry); if (!leaf) return false; size_t old_size = entry->data.size(); auto field_info = entry->data.getFieldInfo(); /// Cut the needed range from the found leaf /// and replace scalar values to the correct /// default values for given entry. auto new_subcolumn = leaf->data .cut(old_size, leaf->data.size() - old_size) .recreateWithDefaultValues(field_info); entry->data.insertRangeFrom(new_subcolumn, 0, new_subcolumn.size()); return true; } bool ColumnObject::tryInsertDefaultFromNested(const Subcolumns::NodePtr & entry) const { const auto * leaf = getLeafOfTheSameNested(entry); if (!leaf) return false; auto last_field = leaf->data.getLastField(); if (last_field.isNull()) return false; size_t leaf_num_dimensions = leaf->data.getNumberOfDimensions(); size_t entry_num_dimensions = entry->data.getNumberOfDimensions(); auto default_scalar = entry_num_dimensions > leaf_num_dimensions ? createEmptyArrayField(entry_num_dimensions - leaf_num_dimensions) : entry->data.getLeastCommonTypeBase()->getDefault(); auto default_field = applyVisitor(FieldVisitorReplaceScalars(default_scalar, leaf_num_dimensions), last_field); entry->data.insert(std::move(default_field)); return true; } PathsInData ColumnObject::getKeys() const { PathsInData keys; keys.reserve(subcolumns.size()); for (const auto & entry : subcolumns) keys.emplace_back(entry->path); return keys; } bool ColumnObject::isFinalized() const { return std::all_of(subcolumns.begin(), subcolumns.end(), [](const auto & entry) { return entry->data.isFinalized(); }); } void ColumnObject::finalize() { size_t old_size = size(); Subcolumns new_subcolumns; for (auto && entry : subcolumns) { const auto & least_common_type = entry->data.getLeastCommonType(); /// Do not add subcolumns, which consist only from NULLs. if (isNothing(getBaseTypeOfArray(least_common_type))) continue; entry->data.finalize(); new_subcolumns.add(entry->path, entry->data); } /// If all subcolumns were skipped add a dummy subcolumn, /// because Tuple type must have at least one element. if (new_subcolumns.empty()) new_subcolumns.add(PathInData{COLUMN_NAME_DUMMY}, Subcolumn{ColumnUInt8::create(old_size, 0), is_nullable}); std::swap(subcolumns, new_subcolumns); checkObjectHasNoAmbiguosPaths(getKeys()); } }