mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-20 05:05:38 +00:00
fix filling of empty Nested
This commit is contained in:
parent
2d30524d72
commit
57c1d7a101
@ -90,7 +90,9 @@ void IDataType::forEachSubcolumn(
|
||||
{
|
||||
auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len);
|
||||
auto subdata = ISerialization::createFromPath(subpath, prefix_len);
|
||||
callback(subpath, name, subdata);
|
||||
auto path_copy = subpath;
|
||||
path_copy.resize(prefix_len);
|
||||
callback(path_copy, name, subdata);
|
||||
}
|
||||
subpath[i].visited = true;
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
@ -66,6 +67,36 @@ DataTypePtr getBaseTypeOfArray(const DataTypePtr & type)
|
||||
return last_array ? last_array->getNestedType() : type;
|
||||
}
|
||||
|
||||
DataTypePtr getBaseTypeOfArray(DataTypePtr type, const Names & tuple_elements)
|
||||
{
|
||||
auto it = tuple_elements.begin();
|
||||
while (true)
|
||||
{
|
||||
if (const auto * type_array = typeid_cast<const DataTypeArray *>(type.get()))
|
||||
{
|
||||
type = type_array->getNestedType();
|
||||
}
|
||||
else if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
|
||||
{
|
||||
if (it == tuple_elements.end())
|
||||
break;
|
||||
|
||||
auto pos = type_tuple->tryGetPositionByName(*it);
|
||||
if (!pos)
|
||||
break;
|
||||
|
||||
++it;
|
||||
type = type_tuple->getElement(*pos);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
ColumnPtr getBaseColumnOfArray(const ColumnPtr & column)
|
||||
{
|
||||
/// Get raw pointers to avoid extra copying of column pointers.
|
||||
|
@ -27,6 +27,9 @@ size_t getNumberOfDimensions(const IColumn & column);
|
||||
/// Returns type of scalars of Array of arbitrary dimensions.
|
||||
DataTypePtr getBaseTypeOfArray(const DataTypePtr & type);
|
||||
|
||||
/// The same as above but takes into account Tuples of Nested.
|
||||
DataTypePtr getBaseTypeOfArray(DataTypePtr type, const Names & tuple_elements);
|
||||
|
||||
/// Returns Array type with requested scalar type and number of dimensions.
|
||||
DataTypePtr createArrayOfType(DataTypePtr type, size_t num_dimensions);
|
||||
|
||||
|
@ -195,7 +195,7 @@ public:
|
||||
/// Types of substreams that can have arbitrary name.
|
||||
static const std::set<Type> named_types;
|
||||
|
||||
Type type;
|
||||
Type type = Type::Regular;
|
||||
|
||||
/// The name of a variant element type.
|
||||
String variant_element_name;
|
||||
@ -212,6 +212,7 @@ public:
|
||||
/// Flag, that may help to traverse substream paths.
|
||||
mutable bool visited = false;
|
||||
|
||||
Substream() = default;
|
||||
Substream(Type type_) : type(type_) {} /// NOLINT
|
||||
String toString() const;
|
||||
};
|
||||
|
@ -283,6 +283,9 @@ static ColumnPtr createColumnWithDefaultValue(const IDataType & data_type, const
|
||||
{
|
||||
auto column = data_type.createColumnConstWithDefaultValue(num_rows);
|
||||
|
||||
/// We must turn a constant column into a full column because the interpreter could infer
|
||||
/// that it is constant everywhere but in some blocks (from other parts) it can be a full column.
|
||||
|
||||
if (subcolumn_name.empty())
|
||||
return column->convertToFullColumnIfConst();
|
||||
|
||||
@ -293,6 +296,35 @@ static ColumnPtr createColumnWithDefaultValue(const IDataType & data_type, const
|
||||
return ColumnConst::create(std::move(column), num_rows)->convertToFullColumnIfConst();
|
||||
}
|
||||
|
||||
static bool hasDefault(const StorageMetadataPtr & metadata_snapshot, const NameAndTypePair & column)
|
||||
{
|
||||
if (!metadata_snapshot)
|
||||
return false;
|
||||
|
||||
const auto & columns = metadata_snapshot->getColumns();
|
||||
if (columns.has(column.name))
|
||||
return columns.hasDefault(column.name);
|
||||
|
||||
auto name_in_storage = column.getNameInStorage();
|
||||
return columns.hasDefault(name_in_storage);
|
||||
}
|
||||
|
||||
static String removeTupleElementsFromSubcolumn(String subcolumn_name, const Names & tuple_elements)
|
||||
{
|
||||
subcolumn_name += ".";
|
||||
for (const auto & elem : tuple_elements)
|
||||
{
|
||||
auto pos = subcolumn_name.find(elem + ".");
|
||||
if (pos != std::string::npos)
|
||||
subcolumn_name.erase(pos, elem.size());
|
||||
}
|
||||
|
||||
if (subcolumn_name.ends_with("."))
|
||||
subcolumn_name.pop_back();
|
||||
|
||||
return subcolumn_name;
|
||||
}
|
||||
|
||||
void fillMissingColumns(
|
||||
Columns & res_columns,
|
||||
size_t num_rows,
|
||||
@ -321,10 +353,8 @@ void fillMissingColumns(
|
||||
if (res_columns[i] && partially_read_columns.contains(requested_column->name))
|
||||
res_columns[i] = nullptr;
|
||||
|
||||
if (res_columns[i])
|
||||
continue;
|
||||
|
||||
if (metadata_snapshot && metadata_snapshot->getColumns().hasDefault(requested_column->getNameInStorage()))
|
||||
/// Nothing to fill or default should be filled in evaluateMissingDefaults
|
||||
if (res_columns[i] || hasDefault(metadata_snapshot, *requested_column))
|
||||
continue;
|
||||
|
||||
std::vector<ColumnPtr> current_offsets;
|
||||
@ -365,19 +395,30 @@ void fillMissingColumns(
|
||||
|
||||
if (!current_offsets.empty())
|
||||
{
|
||||
|
||||
Names tuple_elements;
|
||||
auto serialization = IDataType::getSerialization(*requested_column);
|
||||
|
||||
IDataType::forEachSubcolumn([&](const auto & path, const auto &, const auto &)
|
||||
{
|
||||
if (path.back().type == ISerialization::Substream::TupleElement)
|
||||
tuple_elements.push_back(path.back().name_of_substream);
|
||||
}, ISerialization::SubstreamData(serialization));
|
||||
|
||||
size_t num_empty_dimensions = num_dimensions - current_offsets.size();
|
||||
auto scalar_type = createArrayOfType(getBaseTypeOfArray(requested_column->getTypeInStorage()), num_empty_dimensions);
|
||||
auto base_type = getBaseTypeOfArray(requested_column->getTypeInStorage(), tuple_elements);
|
||||
auto scalar_type = createArrayOfType(base_type, num_empty_dimensions);
|
||||
|
||||
size_t data_size = assert_cast<const ColumnUInt64 &>(*current_offsets.back()).getData().back();
|
||||
res_columns[i] = createColumnWithDefaultValue(*scalar_type, requested_column->getSubcolumnName(), data_size);
|
||||
auto subcolumn_name = removeTupleElementsFromSubcolumn(requested_column->getSubcolumnName(), tuple_elements);
|
||||
|
||||
res_columns[i] = createColumnWithDefaultValue(*scalar_type, subcolumn_name, data_size);
|
||||
|
||||
for (auto it = current_offsets.rbegin(); it != current_offsets.rend(); ++it)
|
||||
res_columns[i] = ColumnArray::create(res_columns[i], *it);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// We must turn a constant column into a full column because the interpreter could infer
|
||||
/// that it is constant everywhere but in some blocks (from other parts) it can be a full column.
|
||||
res_columns[i] = createColumnWithDefaultValue(*requested_column->getTypeInStorage(), requested_column->getSubcolumnName(), num_rows);
|
||||
}
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ IMergeTreeReader::IMergeTreeReader(
|
||||
, alter_conversions(data_part_info_for_read->getAlterConversions())
|
||||
/// For wide parts convert plain arrays of Nested to subcolumns
|
||||
/// to allow to use shared offset column from cache.
|
||||
, original_requested_columns(columns_)
|
||||
, requested_columns(data_part_info_for_read->isWidePart()
|
||||
? Nested::convertToSubcolumns(columns_)
|
||||
: columns_)
|
||||
@ -139,7 +140,7 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns
|
||||
{
|
||||
try
|
||||
{
|
||||
size_t num_columns = requested_columns.size();
|
||||
size_t num_columns = original_requested_columns.size();
|
||||
|
||||
if (res_columns.size() != num_columns)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "invalid number of columns passed to MergeTreeReader::fillMissingColumns. "
|
||||
@ -151,7 +152,7 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns
|
||||
/// Convert columns list to block. And convert subcolumns to full columns.
|
||||
/// TODO: rewrite with columns interface. It will be possible after changes in ExpressionActions.
|
||||
|
||||
auto it = requested_columns.begin();
|
||||
auto it = original_requested_columns.begin();
|
||||
for (size_t pos = 0; pos < num_columns; ++pos, ++it)
|
||||
{
|
||||
auto name_in_storage = it->getNameInStorage();
|
||||
@ -178,7 +179,7 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns
|
||||
}
|
||||
|
||||
/// Move columns from block.
|
||||
it = requested_columns.begin();
|
||||
it = original_requested_columns.begin();
|
||||
for (size_t pos = 0; pos < num_columns; ++pos, ++it)
|
||||
{
|
||||
auto name_in_storage = it->getNameInStorage();
|
||||
|
@ -112,6 +112,9 @@ protected:
|
||||
|
||||
private:
|
||||
/// Columns that are requested to read.
|
||||
NamesAndTypesList original_requested_columns;
|
||||
|
||||
/// The same as above but with converted Arrays to subcolumns of Nested.
|
||||
NamesAndTypesList requested_columns;
|
||||
|
||||
/// Actual columns description in part.
|
||||
|
Loading…
Reference in New Issue
Block a user