fix reading from Merge tables

This commit is contained in:
Anton Popov 2024-07-25 12:20:39 +00:00
parent 36b264fe01
commit 97ceca4b92
5 changed files with 23 additions and 7 deletions

View File

@ -41,12 +41,19 @@ bool canUseProjectionForReadingStep(ReadFromMergeTree * reading)
if (reading->readsInOrder())
return false;
const auto & query_settings = reading->getContext()->getSettingsRef();
// Currently projection don't support deduplication when moving parts between shards.
if (reading->getContext()->getSettingsRef().allow_experimental_query_deduplication)
if (query_settings.allow_experimental_query_deduplication)
return false;
// Currently projection don't support settings which implicitly modify aggregate functions.
if (reading->getContext()->getSettingsRef().aggregate_functions_null_for_empty)
if (query_settings.aggregate_functions_null_for_empty)
return false;
/// Don't use projections if have mutations to apply
/// because we need to apply them on original data.
if (query_settings.apply_mutations_on_fly && reading->getMutationsSnapshot()->hasDataMutations())
return false;
return true;

View File

@ -271,6 +271,7 @@ public:
/// Return true if the trivial count query could be optimized without reading the data at all
/// in totalRows() or totalRowsByPartitionPredicate() methods or with optimized reading in read() method.
/// 'storage_snapshot' may be nullptr.
virtual bool supportsTrivialCountOptimization(const StorageSnapshotPtr & /*storage_snapshot*/, ContextPtr /*query_context*/) const
{
return false;

View File

@ -8444,10 +8444,16 @@ void MergeTreeData::updateObjectColumns(const DataPartPtr & part, const DataPart
DB::updateObjectColumns(object_columns, columns, part->getColumns());
}
bool MergeTreeData::supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr) const
bool MergeTreeData::supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr query_context) const
{
if (hasLightweightDeletedMask())
return false;
if (!storage_snapshot)
return !query_context->getSettingsRef().apply_mutations_on_fly;
const auto & snapshot_data = assert_cast<const MergeTreeData::SnapshotData &>(*storage_snapshot->data);
return !hasLightweightDeletedMask() && !snapshot_data.mutations_snapshot->hasDataMutations();
return !snapshot_data.mutations_snapshot->hasDataMutations();
}
Int64 MergeTreeData::getMinMetadataVersion(const DataPartsVector & parts)

View File

@ -443,7 +443,7 @@ public:
bool areAsynchronousInsertsEnabled() const override;
bool supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr) const override;
bool supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr query_context) const override;
/// A snapshot of pending mutations that weren't applied to some of the parts yet
/// and should be applied on the fly (i.e. when reading from the part).

View File

@ -1617,9 +1617,11 @@ std::tuple<bool /* is_regexp */, ASTPtr> StorageMerge::evaluateDatabaseName(cons
return {false, ast};
}
bool StorageMerge::supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr ctx) const
bool StorageMerge::supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr ctx) const
{
return getFirstTable([&](const auto & table) { return !table->supportsTrivialCountOptimization(storage_snapshot, ctx); }) == nullptr;
/// Here we actually need storage snapshot of all nested tables.
/// But to avoid complexity pass nullptr to make more lightweight check in MergeTreeData.
return getFirstTable([&](const auto & table) { return !table->supportsTrivialCountOptimization(nullptr, ctx); }) == nullptr;
}
std::optional<UInt64> StorageMerge::totalRows(const Settings & settings) const