mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-02 12:32:04 +00:00
fix filling of missed Nested columns with multiple levels
This commit is contained in:
parent
3215c98319
commit
0dbbca5b3e
@ -132,29 +132,29 @@ namespace
|
||||
|
||||
offset_values.resize(i);
|
||||
}
|
||||
}
|
||||
|
||||
ColumnPtr arraySizesToOffsets(const IColumn & column)
|
||||
{
|
||||
const auto & column_sizes = assert_cast<const ColumnArray::ColumnOffsets &>(column);
|
||||
MutableColumnPtr column_offsets = column_sizes.cloneEmpty();
|
||||
|
||||
if (column_sizes.empty())
|
||||
return column_offsets;
|
||||
|
||||
const auto & sizes_data = column_sizes.getData();
|
||||
auto & offsets_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_offsets).getData();
|
||||
|
||||
offsets_data.resize(sizes_data.size());
|
||||
|
||||
IColumn::Offset prev_offset = 0;
|
||||
for (size_t i = 0, size = sizes_data.size(); i < size; ++i)
|
||||
{
|
||||
prev_offset += sizes_data[i];
|
||||
offsets_data[i] = prev_offset;
|
||||
}
|
||||
ColumnPtr arraySizesToOffsets(const IColumn & column)
|
||||
{
|
||||
const auto & column_sizes = assert_cast<const ColumnArray::ColumnOffsets &>(column);
|
||||
MutableColumnPtr column_offsets = column_sizes.cloneEmpty();
|
||||
|
||||
if (column_sizes.empty())
|
||||
return column_offsets;
|
||||
|
||||
const auto & sizes_data = column_sizes.getData();
|
||||
auto & offsets_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_offsets).getData();
|
||||
|
||||
offsets_data.resize(sizes_data.size());
|
||||
|
||||
IColumn::Offset prev_offset = 0;
|
||||
for (size_t i = 0, size = sizes_data.size(); i < size; ++i)
|
||||
{
|
||||
prev_offset += sizes_data[i];
|
||||
offsets_data[i] = prev_offset;
|
||||
}
|
||||
|
||||
return column_offsets;
|
||||
}
|
||||
|
||||
ColumnPtr arrayOffsetsToSizes(const IColumn & column)
|
||||
|
@ -80,5 +80,6 @@ private:
|
||||
};
|
||||
|
||||
ColumnPtr arrayOffsetsToSizes(const IColumn & column);
|
||||
ColumnPtr arraySizesToOffsets(const IColumn & column);
|
||||
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <utility>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
#include <Interpreters/RequiredSourceColumnsVisitor.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
@ -198,6 +199,9 @@ static bool arrayHasNoElementsRead(const IColumn & column)
|
||||
if (!size)
|
||||
return false;
|
||||
|
||||
if (const auto * nested_array = typeid_cast<const ColumnArray *>(&column_array->getData()))
|
||||
return arrayHasNoElementsRead(*nested_array);
|
||||
|
||||
size_t data_size = column_array->getData().size();
|
||||
if (data_size)
|
||||
return false;
|
||||
@ -210,6 +214,7 @@ void fillMissingColumns(
|
||||
Columns & res_columns,
|
||||
size_t num_rows,
|
||||
const NamesAndTypesList & requested_columns,
|
||||
const NamesAndTypesList & available_columns,
|
||||
StorageMetadataPtr metadata_snapshot)
|
||||
{
|
||||
size_t num_columns = requested_columns.size();
|
||||
@ -224,26 +229,35 @@ void fillMissingColumns(
|
||||
/// First, collect offset columns for all arrays in the block.
|
||||
|
||||
std::unordered_map<String, ColumnPtr> offset_columns;
|
||||
auto requested_column = requested_columns.begin();
|
||||
for (size_t i = 0; i < num_columns; ++i, ++requested_column)
|
||||
auto available_column = available_columns.begin();
|
||||
for (size_t i = 0; i < num_columns; ++i, ++available_column)
|
||||
{
|
||||
if (res_columns[i] == nullptr)
|
||||
continue;
|
||||
|
||||
if (const auto * array = typeid_cast<const ColumnArray *>(res_columns[i].get()))
|
||||
auto serialization = IDataType::getSerialization(*available_column);
|
||||
auto name_in_storage = Nested::extractTableName(available_column->name);
|
||||
|
||||
ISerialization::SubstreamPath path;
|
||||
serialization->enumerateStreams(path, [&](const auto & subpath)
|
||||
{
|
||||
String offsets_name = Nested::extractTableName(requested_column->name);
|
||||
auto & offsets_column = offset_columns[offsets_name];
|
||||
if (subpath.empty() || subpath.back().type != ISerialization::Substream::ArraySizes)
|
||||
return;
|
||||
|
||||
auto subname = ISerialization::getSubcolumnNameForStream(subpath);
|
||||
auto & offsets_column = offset_columns[Nested::concatenateName(name_in_storage, subname)];
|
||||
|
||||
/// If for some reason multiple offsets columns are present for the same nested data structure,
|
||||
/// choose the one that is not empty.
|
||||
/// TODO: more optimal
|
||||
if (!offsets_column || offsets_column->empty())
|
||||
offsets_column = array->getOffsetsPtr();
|
||||
}
|
||||
offsets_column = arraySizesToOffsets(*subpath.back().data.column);
|
||||
|
||||
}, {serialization, available_column->type, res_columns[i], nullptr});
|
||||
}
|
||||
|
||||
/// insert default values only for columns without default expressions
|
||||
requested_column = requested_columns.begin();
|
||||
auto requested_column = requested_columns.begin();
|
||||
for (size_t i = 0; i < num_columns; ++i, ++requested_column)
|
||||
{
|
||||
const auto & [name, type] = *requested_column;
|
||||
@ -256,19 +270,44 @@ void fillMissingColumns(
|
||||
if (metadata_snapshot && metadata_snapshot->getColumns().hasDefault(name))
|
||||
continue;
|
||||
|
||||
String offsets_name = Nested::extractTableName(name);
|
||||
auto offset_it = offset_columns.find(offsets_name);
|
||||
std::vector<ColumnPtr> current_offsets;
|
||||
bool has_all_offsets = true;
|
||||
|
||||
const auto * array_type = typeid_cast<const DataTypeArray *>(type.get());
|
||||
if (offset_it != offset_columns.end() && array_type)
|
||||
if (array_type)
|
||||
{
|
||||
const auto & nested_type = array_type->getNestedType();
|
||||
ColumnPtr offsets_column = offset_it->second;
|
||||
size_t nested_rows = typeid_cast<const ColumnUInt64 &>(*offsets_column).getData().back();
|
||||
auto serialization = IDataType::getSerialization(*requested_column);
|
||||
auto name_in_storage = Nested::extractTableName(requested_column->name);
|
||||
|
||||
ColumnPtr nested_column =
|
||||
nested_type->createColumnConstWithDefaultValue(nested_rows)->convertToFullColumnIfConst();
|
||||
ISerialization::SubstreamPath path;
|
||||
serialization->enumerateStreams(path, [&](const auto & subpath)
|
||||
{
|
||||
if (!has_all_offsets)
|
||||
return;
|
||||
|
||||
res_columns[i] = ColumnArray::create(nested_column, offsets_column);
|
||||
if (subpath.empty() || subpath.back().type != ISerialization::Substream::ArraySizes)
|
||||
return;
|
||||
|
||||
auto subname = ISerialization::getSubcolumnNameForStream(subpath);
|
||||
auto it = offset_columns.find(Nested::concatenateName(name_in_storage, subname));
|
||||
if (it != offset_columns.end())
|
||||
current_offsets.emplace_back(it->second);
|
||||
else
|
||||
has_all_offsets = false;
|
||||
|
||||
}, {serialization, type, nullptr, nullptr});
|
||||
}
|
||||
|
||||
if (array_type && has_all_offsets)
|
||||
{
|
||||
assert(!current_offsets.empty());
|
||||
auto scalar_type = getBaseTypeOfArray(type);
|
||||
|
||||
size_t data_size = assert_cast<const ColumnUInt64 &>(*current_offsets.back()).getData().back();
|
||||
res_columns[i] = scalar_type->createColumnConstWithDefaultValue(data_size)->convertToFullColumnIfConst();
|
||||
|
||||
for (auto it = current_offsets.rbegin(); it != current_offsets.rend(); ++it)
|
||||
res_columns[i] = ColumnArray::create(res_columns[i], *it);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -43,6 +43,7 @@ void fillMissingColumns(
|
||||
Columns & res_columns,
|
||||
size_t num_rows,
|
||||
const NamesAndTypesList & requested_columns,
|
||||
const NamesAndTypesList & available_columns,
|
||||
StorageMetadataPtr metadata_snapshot);
|
||||
|
||||
}
|
||||
|
@ -66,7 +66,11 @@ void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_e
|
||||
{
|
||||
try
|
||||
{
|
||||
DB::fillMissingColumns(res_columns, num_rows, columns, metadata_snapshot);
|
||||
NamesAndTypesList available_columns;
|
||||
for (const auto & column : columns)
|
||||
available_columns.push_back(getColumnFromPart(column));
|
||||
|
||||
DB::fillMissingColumns(res_columns, num_rows, columns, available_columns, metadata_snapshot);
|
||||
should_evaluate_missing_defaults = std::any_of(
|
||||
res_columns.begin(), res_columns.end(), [](const auto & column) { return column == nullptr; });
|
||||
}
|
||||
|
@ -91,7 +91,7 @@ protected:
|
||||
++name_and_type;
|
||||
}
|
||||
|
||||
fillMissingColumns(columns, src.rows(), column_names_and_types, /*metadata_snapshot=*/ nullptr);
|
||||
fillMissingColumns(columns, src.rows(), column_names_and_types, column_names_and_types, /*metadata_snapshot=*/ nullptr);
|
||||
assert(std::all_of(columns.begin(), columns.end(), [](const auto & column) { return column != nullptr; }));
|
||||
|
||||
return Chunk(std::move(columns), src.rows());
|
||||
|
7
tests/queries/0_stateless/01825_type_json_17.reference
Normal file
7
tests/queries/0_stateless/01825_type_json_17.reference
Normal file
@ -0,0 +1,7 @@
|
||||
Tuple(arr Nested(k1 Nested(k2 String, k3 String, k4 Int8), k5 Tuple(k6 String)), id Int8)
|
||||
{"obj":{"arr":[{"k1":[{"k2":"aaa","k3":"bbb","k4":0},{"k2":"ccc","k3":"","k4":0}],"k5":{"k6":""}}],"id":1}}
|
||||
{"obj":{"arr":[{"k1":[{"k2":"","k3":"ddd","k4":10},{"k2":"","k3":"","k4":20}],"k5":{"k6":"foo"}}],"id":2}}
|
||||
[['bbb','']] [['aaa','ccc']]
|
||||
[['ddd','']] [['','']]
|
||||
[[0,0]]
|
||||
[[10,20]]
|
18
tests/queries/0_stateless/01825_type_json_17.sql
Normal file
18
tests/queries/0_stateless/01825_type_json_17.sql
Normal file
@ -0,0 +1,18 @@
|
||||
-- Tags: no-fasttest
|
||||
|
||||
DROP TABLE IF EXISTS t_json_17;
|
||||
SET allow_experimental_object_type = 1;
|
||||
SET output_format_json_named_tuples_as_objects = 1;
|
||||
|
||||
CREATE TABLE t_json_17(obj JSON)
|
||||
ENGINE = MergeTree ORDER BY tuple();
|
||||
|
||||
INSERT INTO t_json_17 FORMAT JSONAsObject {"id": 1, "arr": [{"k1": [{"k2": "aaa", "k3": "bbb"}, {"k2": "ccc"}]}]}
|
||||
INSERT INTO t_json_17 FORMAT JSONAsObject {"id": 2, "arr": [{"k1": [{"k3": "ddd", "k4": 10}, {"k4": 20}], "k5": {"k6": "foo"}}]}
|
||||
|
||||
SELECT toTypeName(obj) FROM t_json_17 LIMIT 1;
|
||||
SELECT obj FROM t_json_17 ORDER BY obj.id FORMAT JSONEachRow;
|
||||
SELECT obj.arr.k1.k3, obj.arr.k1.k2 FROM t_json_17 ORDER BY obj.id;
|
||||
SELECT obj.arr.k1.k4 FROM t_json_17 ORDER BY obj.id;
|
||||
|
||||
DROP TABLE IF EXISTS t_json_17;
|
Loading…
Reference in New Issue
Block a user