mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-13 09:52:38 +00:00
fix filling of missed subcolumns
This commit is contained in:
parent
5943d60f26
commit
a2b17b01f9
@ -18,6 +18,7 @@
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Storages/StorageInMemoryMetadata.h>
|
||||
|
||||
@ -35,8 +36,13 @@ namespace
|
||||
|
||||
/// Add all required expressions for missing columns calculation
|
||||
void addDefaultRequiredExpressionsRecursively(
|
||||
const Block & block, const String & required_column_name, DataTypePtr required_column_type,
|
||||
const ColumnsDescription & columns, ASTPtr default_expr_list_accum, NameSet & added_columns, bool null_as_default)
|
||||
const Block & block,
|
||||
const String & required_column_name,
|
||||
DataTypePtr required_column_type,
|
||||
const ColumnsDescription & columns,
|
||||
ASTPtr default_expr_list_accum,
|
||||
NameSet & added_columns,
|
||||
bool null_as_default)
|
||||
{
|
||||
checkStackSize();
|
||||
|
||||
@ -273,6 +279,20 @@ static std::unordered_map<String, ColumnPtr> collectOffsetsColumns(
|
||||
return offsets_columns;
|
||||
}
|
||||
|
||||
static ColumnPtr createColumnWithDefaultValue(const IDataType & data_type, const String & subcolumn_name, size_t num_rows)
|
||||
{
|
||||
auto column = data_type.createColumnConstWithDefaultValue(num_rows);
|
||||
|
||||
if (subcolumn_name.empty())
|
||||
return column->convertToFullColumnIfConst();
|
||||
|
||||
/// Firstly get subcolumn from const column and then replicate.
|
||||
column = assert_cast<const ColumnConst &>(*column).getDataColumnPtr();
|
||||
column = data_type.getSubcolumn(subcolumn_name, column);
|
||||
|
||||
return ColumnConst::create(std::move(column), num_rows)->convertToFullColumnIfConst();
|
||||
}
|
||||
|
||||
void fillMissingColumns(
|
||||
Columns & res_columns,
|
||||
size_t num_rows,
|
||||
@ -298,21 +318,19 @@ void fillMissingColumns(
|
||||
auto requested_column = requested_columns.begin();
|
||||
for (size_t i = 0; i < num_columns; ++i, ++requested_column)
|
||||
{
|
||||
const auto & [name, type] = *requested_column;
|
||||
|
||||
if (res_columns[i] && partially_read_columns.contains(name))
|
||||
if (res_columns[i] && partially_read_columns.contains(requested_column->name))
|
||||
res_columns[i] = nullptr;
|
||||
|
||||
if (res_columns[i])
|
||||
continue;
|
||||
|
||||
if (metadata_snapshot && metadata_snapshot->getColumns().hasDefault(name))
|
||||
if (metadata_snapshot && metadata_snapshot->getColumns().hasDefault(requested_column->getNameInStorage()))
|
||||
continue;
|
||||
|
||||
std::vector<ColumnPtr> current_offsets;
|
||||
size_t num_dimensions = 0;
|
||||
|
||||
const auto * array_type = typeid_cast<const DataTypeArray *>(type.get());
|
||||
const auto * array_type = typeid_cast<const DataTypeArray *>(requested_column->type.get());
|
||||
if (array_type && !offsets_columns.empty())
|
||||
{
|
||||
num_dimensions = getNumberOfDimensions(*array_type);
|
||||
@ -348,10 +366,10 @@ void fillMissingColumns(
|
||||
if (!current_offsets.empty())
|
||||
{
|
||||
size_t num_empty_dimensions = num_dimensions - current_offsets.size();
|
||||
auto scalar_type = createArrayOfType(getBaseTypeOfArray(type), num_empty_dimensions);
|
||||
auto scalar_type = createArrayOfType(getBaseTypeOfArray(requested_column->getTypeInStorage()), num_empty_dimensions);
|
||||
|
||||
size_t data_size = assert_cast<const ColumnUInt64 &>(*current_offsets.back()).getData().back();
|
||||
res_columns[i] = scalar_type->createColumnConstWithDefaultValue(data_size)->convertToFullColumnIfConst();
|
||||
res_columns[i] = createColumnWithDefaultValue(*scalar_type, requested_column->getSubcolumnName(), data_size);
|
||||
|
||||
for (auto it = current_offsets.rbegin(); it != current_offsets.rend(); ++it)
|
||||
res_columns[i] = ColumnArray::create(res_columns[i], *it);
|
||||
@ -360,7 +378,7 @@ void fillMissingColumns(
|
||||
{
|
||||
/// We must turn a constant column into a full column because the interpreter could infer
|
||||
/// that it is constant everywhere but in some blocks (from other parts) it can be a full column.
|
||||
res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst();
|
||||
res_columns[i] = createColumnWithDefaultValue(*requested_column->getTypeInStorage(), requested_column->getSubcolumnName(), num_rows);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
|
||||
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Compression/CachedCompressedReadBuffer.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
@ -144,19 +145,26 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "invalid number of columns passed to MergeTreeReader::fillMissingColumns. "
|
||||
"Expected {}, got {}", num_columns, res_columns.size());
|
||||
|
||||
/// Convert columns list to block.
|
||||
/// TODO: rewrite with columns interface. It will be possible after changes in ExpressionActions.
|
||||
auto name_and_type = requested_columns.begin();
|
||||
for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
|
||||
{
|
||||
if (res_columns[pos] == nullptr)
|
||||
continue;
|
||||
NameSet full_requested_columns_set;
|
||||
NamesAndTypesList full_requested_columns;
|
||||
|
||||
additional_columns.insert({res_columns[pos], name_and_type->type, name_and_type->name});
|
||||
/// Convert columns list to block. And convert subcolumns to full columns.
|
||||
/// TODO: rewrite with columns interface. It will be possible after changes in ExpressionActions.
|
||||
|
||||
auto it = requested_columns.begin();
|
||||
for (size_t pos = 0; pos < num_columns; ++pos, ++it)
|
||||
{
|
||||
auto name_in_storage = it->getNameInStorage();
|
||||
|
||||
if (full_requested_columns_set.emplace(name_in_storage).second)
|
||||
full_requested_columns.emplace_back(name_in_storage, it->getTypeInStorage());
|
||||
|
||||
if (res_columns[pos])
|
||||
additional_columns.insert({res_columns[pos], it->type, it->name});
|
||||
}
|
||||
|
||||
auto dag = DB::evaluateMissingDefaults(
|
||||
additional_columns, requested_columns,
|
||||
additional_columns, full_requested_columns,
|
||||
storage_snapshot->metadata->getColumns(),
|
||||
data_part_info_for_read->getContext());
|
||||
|
||||
@ -170,9 +178,18 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns
|
||||
}
|
||||
|
||||
/// Move columns from block.
|
||||
name_and_type = requested_columns.begin();
|
||||
for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type)
|
||||
res_columns[pos] = std::move(additional_columns.getByName(name_and_type->name).column);
|
||||
it = requested_columns.begin();
|
||||
for (size_t pos = 0; pos < num_columns; ++pos, ++it)
|
||||
{
|
||||
auto name_in_storage = it->getNameInStorage();
|
||||
res_columns[pos] = additional_columns.getByName(name_in_storage).column;
|
||||
|
||||
if (it->isSubcolumn())
|
||||
{
|
||||
const auto & type_in_storage = it->getTypeInStorage();
|
||||
res_columns[pos] = type_in_storage->getSubcolumn(it->getSubcolumnName(), res_columns[pos]);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
@ -192,7 +209,12 @@ bool IMergeTreeReader::isSubcolumnOffsetsOfNested(const String & name_in_storage
|
||||
if (!data_part_info_for_read->isWidePart() || subcolumn_name != "size0")
|
||||
return false;
|
||||
|
||||
return Nested::isSubcolumnOfNested(name_in_storage, part_columns);
|
||||
auto split = Nested::splitName(name_in_storage);
|
||||
if (split.second.empty())
|
||||
return false;
|
||||
|
||||
auto nested_column = part_columns.tryGetColumn(GetColumnsOptions::All, split.first);
|
||||
return nested_column && isNested(nested_column->type);
|
||||
}
|
||||
|
||||
String IMergeTreeReader::getColumnNameInPart(const NameAndTypePair & required_column) const
|
||||
|
@ -60,39 +60,25 @@ void MergeTreeReaderCompact::fillColumnPositions()
|
||||
|
||||
for (size_t i = 0; i < columns_num; ++i)
|
||||
{
|
||||
const auto & column_to_read = columns_to_read[i];
|
||||
|
||||
auto & column_to_read = columns_to_read[i];
|
||||
auto position = data_part_info_for_read->getColumnPosition(column_to_read.getNameInStorage());
|
||||
bool is_array = isArray(column_to_read.type);
|
||||
|
||||
if (column_to_read.isSubcolumn())
|
||||
{
|
||||
auto storage_column_from_part = getColumnInPart(
|
||||
{column_to_read.getNameInStorage(), column_to_read.getTypeInStorage()});
|
||||
NameAndTypePair column_in_storage{column_to_read.getNameInStorage(), column_to_read.getTypeInStorage()};
|
||||
auto storage_column_from_part = getColumnInPart(column_in_storage);
|
||||
|
||||
auto subcolumn_name = column_to_read.getSubcolumnName();
|
||||
if (!storage_column_from_part.type->hasSubcolumn(subcolumn_name))
|
||||
position.reset();
|
||||
}
|
||||
|
||||
column_positions[i] = std::move(position);
|
||||
|
||||
/// If array of Nested column is missing in part,
|
||||
/// we have to read its offsets if they exist.
|
||||
if (!position && is_array)
|
||||
{
|
||||
auto column_to_read_with_subcolumns = getColumnConvertedToSubcolumnOfNested(column_to_read);
|
||||
auto name_level_for_offsets = findColumnForOffsets(column_to_read_with_subcolumns);
|
||||
|
||||
if (name_level_for_offsets.has_value())
|
||||
{
|
||||
column_positions[i] = data_part_info_for_read->getColumnPosition(name_level_for_offsets->first);
|
||||
columns_for_offsets[i] = name_level_for_offsets;
|
||||
partially_read_columns.insert(column_to_read.name);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
column_positions[i] = std::move(position);
|
||||
}
|
||||
if (!column_positions[i])
|
||||
findPositionForMissedNested(i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -125,6 +111,38 @@ NameAndTypePair MergeTreeReaderCompact::getColumnConvertedToSubcolumnOfNested(co
|
||||
Nested::concatenateName(name_in_storage, subcolumn_name));
|
||||
}
|
||||
|
||||
void MergeTreeReaderCompact::findPositionForMissedNested(size_t pos)
|
||||
{
|
||||
auto & column = columns_to_read[pos];
|
||||
|
||||
bool is_array = isArray(column.type);
|
||||
bool is_offsets_subcolumn = isArray(column.getTypeInStorage()) && column.getSubcolumnName() == "size0";
|
||||
|
||||
if (!is_array && !is_offsets_subcolumn)
|
||||
return;
|
||||
|
||||
NameAndTypePair column_in_storage{column.getNameInStorage(), column.getTypeInStorage()};
|
||||
|
||||
auto column_to_read_with_subcolumns = getColumnConvertedToSubcolumnOfNested(column_in_storage);
|
||||
auto name_level_for_offsets = findColumnForOffsets(column_to_read_with_subcolumns);
|
||||
|
||||
if (!name_level_for_offsets)
|
||||
return;
|
||||
|
||||
column_positions[pos] = data_part_info_for_read->getColumnPosition(name_level_for_offsets->first);
|
||||
|
||||
if (is_offsets_subcolumn)
|
||||
{
|
||||
/// Read offsets from antoher array from the same Nested column.
|
||||
column = {name_level_for_offsets->first, column.getSubcolumnName(), column.getTypeInStorage(), column.type};
|
||||
}
|
||||
else
|
||||
{
|
||||
columns_for_offsets[pos] = std::move(name_level_for_offsets);
|
||||
partially_read_columns.insert(column.name);
|
||||
}
|
||||
}
|
||||
|
||||
void MergeTreeReaderCompact::readData(
|
||||
const NameAndTypePair & name_and_type,
|
||||
ColumnPtr & column,
|
||||
|
@ -36,6 +36,7 @@ public:
|
||||
protected:
|
||||
void fillColumnPositions();
|
||||
NameAndTypePair getColumnConvertedToSubcolumnOfNested(const NameAndTypePair & column);
|
||||
void findPositionForMissedNested(size_t pos);
|
||||
|
||||
using InputStreamGetter = ISerialization::InputStreamGetter;
|
||||
|
||||
|
@ -0,0 +1,31 @@
|
||||
0
|
||||
2
|
||||
4
|
||||
6
|
||||
8
|
||||
0
|
||||
2
|
||||
4
|
||||
6
|
||||
8
|
||||
1 ['aaa',NULL] [NULL,NULL]
|
||||
2 ['ccc'] [NULL]
|
||||
3 [NULL] [NULL]
|
||||
4 [NULL,'bbb'] ['ddd',NULL]
|
||||
5 [NULL] [NULL]
|
||||
1 2 2
|
||||
2 1 1
|
||||
3 1 1
|
||||
4 2 2
|
||||
5 1 1
|
||||
1 [0,1] [1,1]
|
||||
2 [0] [1]
|
||||
3 [1] [1]
|
||||
4 [1,0] [0,1]
|
||||
5 [1] [1]
|
||||
1 ('foo','bar') [1,NULL,3]
|
||||
2 ('aaa','bbb') [1,NULL,3]
|
||||
3 ('ccc','ddd') [4,5,6]
|
||||
1 foo bar 3 [0,1,0]
|
||||
2 foo bar 3 [0,1,0]
|
||||
3 ccc ddd 3 [0,0,0]
|
47
tests/queries/0_stateless/03203_fill_missed_subcolumns.sql
Normal file
47
tests/queries/0_stateless/03203_fill_missed_subcolumns.sql
Normal file
@ -0,0 +1,47 @@
|
||||
DROP TABLE IF EXISTS t_missed_subcolumns;
|
||||
|
||||
-- .null subcolumn
|
||||
|
||||
CREATE TABLE t_missed_subcolumns (x UInt32) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO t_missed_subcolumns SELECT * FROM numbers(10);
|
||||
|
||||
ALTER TABLE t_missed_subcolumns ADD COLUMN `y` Nullable(UInt32);
|
||||
|
||||
INSERT INTO t_missed_subcolumns SELECT number, if(number % 2, NULL, number) FROM numbers(10);
|
||||
|
||||
SELECT x FROM t_missed_subcolumns WHERE y IS NOT NULL SETTINGS optimize_functions_to_subcolumns = 1;
|
||||
SELECT x FROM t_missed_subcolumns WHERE y IS NOT NULL SETTINGS optimize_functions_to_subcolumns = 0;
|
||||
|
||||
DROP TABLE IF EXISTS t_missed_subcolumns;
|
||||
|
||||
-- .null and .size0 subcolumn in array
|
||||
|
||||
CREATE TABLE t_missed_subcolumns (id UInt64, `n.a` Array(Nullable(String))) ENGINE = MergeTree ORDER BY id;
|
||||
|
||||
INSERT INTO t_missed_subcolumns VALUES (1, ['aaa', NULL]) (2, ['ccc']) (3, [NULL]);
|
||||
ALTER TABLE t_missed_subcolumns ADD COLUMN `n.b` Array(Nullable(String));
|
||||
INSERT INTO t_missed_subcolumns VALUES (4, [NULL, 'bbb'], ['ddd', NULL]), (5, [NULL], [NULL]);
|
||||
|
||||
SELECT id, n.a, n.b FROM t_missed_subcolumns ORDER BY id;
|
||||
SELECT id, n.a.size0, n.b.size0 FROM t_missed_subcolumns ORDER BY id;
|
||||
SELECT id, n.a.null, n.b.null FROM t_missed_subcolumns ORDER BY id;
|
||||
|
||||
DROP TABLE IF EXISTS t_missed_subcolumns;
|
||||
|
||||
-- subcolumns and custom defaults
|
||||
|
||||
CREATE TABLE t_missed_subcolumns (id UInt64) ENGINE = MergeTree ORDER BY id;
|
||||
SYSTEM STOP MERGES t_missed_subcolumns;
|
||||
|
||||
INSERT INTO t_missed_subcolumns VALUES (1);
|
||||
|
||||
ALTER TABLE t_missed_subcolumns ADD COLUMN t Tuple(a String, b String) DEFAULT ('foo', 'bar');
|
||||
INSERT INTO t_missed_subcolumns VALUES (2, ('aaa', 'bbb'));
|
||||
|
||||
ALTER TABLE t_missed_subcolumns ADD COLUMN arr Array(Nullable(UInt64)) DEFAULT [1, NULL, 3];
|
||||
INSERT INTO t_missed_subcolumns VALUES (3, ('ccc', 'ddd'), [4, 5, 6]);
|
||||
|
||||
SELECT id, t, arr FROM t_missed_subcolumns ORDER BY id;
|
||||
SELECT id, t.a, t.b, arr.size0, arr.null FROM t_missed_subcolumns ORDER BY id;
|
||||
|
||||
DROP TABLE t_missed_subcolumns;
|
Loading…
Reference in New Issue
Block a user