Merge pull request #62180 from Algunenano/i35215

Add test for #35215
This commit is contained in:
Raúl Marín 2024-04-11 22:16:21 +00:00 committed by GitHub
commit 1ae8a9fc6f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 32 additions and 15 deletions

View File

@ -432,13 +432,12 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
{
const auto & keys = aggregating.getParams().keys;
const auto & aggregates = aggregating.getParams().aggregates;
Block key_virtual_columns = reading.getMergeTreeData().getHeaderWithVirtualsForFilter();
const auto metadata = reading.getStorageMetadata();
Block key_virtual_columns = reading.getMergeTreeData().getHeaderWithVirtualsForFilter(metadata);
AggregateProjectionCandidates candidates;
const auto & parts = reading.getParts();
const auto metadata = reading.getStorageMetadata();
ContextPtr context = reading.getContext();
const auto & projections = metadata->projections;

View File

@ -1415,7 +1415,8 @@ static void buildIndexes(
indexes->partition_pruner.emplace(metadata_snapshot, filter_actions_dag, context, false /* strict */);
}
indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, filter_actions_dag, context);
indexes->part_values
= MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(metadata_snapshot, data, parts, filter_actions_dag, context);
MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset(indexes->part_offset_condition, filter_actions_dag, context);
indexes->use_skip_indexes = settings.use_skip_indexes;

View File

@ -1031,19 +1031,26 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
const Names MergeTreeData::virtuals_useful_for_filter = {"_part", "_partition_id", "_part_uuid", "_partition_value", "_part_data_version"};
Block MergeTreeData::getHeaderWithVirtualsForFilter() const
Block MergeTreeData::getHeaderWithVirtualsForFilter(const StorageMetadataPtr & metadata) const
{
const auto columns = metadata->getColumns().getAllPhysical();
Block header;
auto virtuals_desc = getVirtualsPtr();
for (const auto & name : virtuals_useful_for_filter)
{
if (columns.contains(name))
continue;
if (auto column = virtuals_desc->tryGet(name))
header.insert({column->type->createColumn(), column->type, name});
}
return header;
}
Block MergeTreeData::getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty) const
Block MergeTreeData::getBlockWithVirtualsForFilter(
const StorageMetadataPtr & metadata, const MergeTreeData::DataPartsVector & parts, bool ignore_empty) const
{
auto block = getHeaderWithVirtualsForFilter();
auto block = getHeaderWithVirtualsForFilter(metadata);
for (const auto & part_or_projection : parts)
{
@ -1072,7 +1079,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
return 0;
auto metadata_snapshot = getInMemoryMetadataPtr();
auto virtual_columns_block = getBlockWithVirtualsForFilter({parts[0]});
auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]});
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr);
if (!filter_dag)
@ -1091,7 +1098,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
std::unordered_set<String> part_values;
if (valid)
{
virtual_columns_block = getBlockWithVirtualsForFilter(parts);
virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, parts);
VirtualColumnUtils::filterBlockWithDAG(filter_dag, virtual_columns_block, local_context);
part_values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
if (part_values.empty())
@ -6694,11 +6701,11 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
};
Block virtual_columns_block;
auto virtual_block = getHeaderWithVirtualsForFilter();
auto virtual_block = getHeaderWithVirtualsForFilter(metadata_snapshot);
bool has_virtual_column = std::any_of(required_columns.begin(), required_columns.end(), [&](const auto & name) { return virtual_block.has(name); });
if (has_virtual_column || filter_dag)
{
virtual_columns_block = getBlockWithVirtualsForFilter(parts, /*ignore_empty=*/ true);
virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, parts, /*ignore_empty=*/true);
if (virtual_columns_block.rows() == 0)
return {};
}

View File

@ -990,10 +990,11 @@ public:
static const Names virtuals_useful_for_filter;
/// Construct a sample block of virtual columns.
Block getHeaderWithVirtualsForFilter() const;
Block getHeaderWithVirtualsForFilter(const StorageMetadataPtr & metadata) const;
/// Construct a block consisting only of possible virtual columns for part pruning.
Block getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty = false) const;
Block getBlockWithVirtualsForFilter(
const StorageMetadataPtr & metadata, const MergeTreeData::DataPartsVector & parts, bool ignore_empty = false) const;
/// In merge tree we do inserts with several steps. One of them:
/// X. write part to temporary directory with some temp name

View File

@ -473,6 +473,7 @@ void MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset(
}
std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(
const StorageMetadataPtr & metadata_snapshot,
const MergeTreeData & data,
const MergeTreeData::DataPartsVector & parts,
const ActionsDAGPtr & filter_dag,
@ -481,12 +482,12 @@ std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPar
if (!filter_dag)
return {};
auto sample = data.getHeaderWithVirtualsForFilter();
auto sample = data.getHeaderWithVirtualsForFilter(metadata_snapshot);
auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), &sample);
if (!dag)
return {};
auto virtual_columns_block = data.getBlockWithVirtualsForFilter(parts);
auto virtual_columns_block = data.getBlockWithVirtualsForFilter(metadata_snapshot, parts);
VirtualColumnUtils::filterBlockWithDAG(dag, virtual_columns_block, context);
return VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
}

View File

@ -166,6 +166,7 @@ public:
/// Example: SELECT count() FROM table WHERE _part = 'part_name'
/// If expression found, return a set with allowed part names (std::nullopt otherwise).
static std::optional<std::unordered_set<String>> filterPartsByVirtualColumns(
const StorageMetadataPtr & metadata_snapshot,
const MergeTreeData & data,
const MergeTreeData::DataPartsVector & parts,
const ActionsDAGPtr & filter_dag,

View File

@ -0,0 +1,3 @@
DROP TABLE IF EXISTS override_test;
CREATE TABLE override_test (_part UInt32) ENGINE = MergeTree ORDER BY tuple() AS SELECT 1;
SELECT _part FROM override_test;

View File

@ -0,0 +1,2 @@
CREATE TABLE override_test__fuzz_45 (`_part` Float32) ENGINE = MergeTree ORDER BY tuple() AS SELECT 1;
SELECT _part FROM override_test__fuzz_45 GROUP BY materialize(6), 1;