Fix invalid virtual column expr

This commit is contained in:
Amos Bird 2021-04-20 22:53:17 +08:00
parent 1bdd699f20
commit 50f2e488bd
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
8 changed files with 70 additions and 19 deletions

View File

@ -688,9 +688,11 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
const SelectQueryInfo & query_info, ContextPtr local_context, const DataPartsVector & parts) const
{
if (parts.empty())
return 0u;
auto metadata_snapshot = getInMemoryMetadataPtr();
ASTPtr expression_ast;
Block virtual_columns_block = MergeTreeDataSelectExecutor::getSampleBlockWithVirtualPartColumns();
Block virtual_columns_block = MergeTreeDataSelectExecutor::getBlockWithVirtualPartColumns(parts, true /* one_part */);
// Generate valid expressions for filtering
bool valid = VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, local_context, virtual_columns_block, expression_ast);
@ -702,7 +704,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
std::unordered_set<String> part_values;
if (valid && expression_ast)
{
MergeTreeDataSelectExecutor::fillBlockWithVirtualPartColumns(parts, virtual_columns_block);
virtual_columns_block = MergeTreeDataSelectExecutor::getBlockWithVirtualPartColumns(parts, false /* one_part */);
VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, local_context, expression_ast);
part_values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
if (part_values.empty())

View File

@ -71,17 +71,13 @@ MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & d
{
}
Block MergeTreeDataSelectExecutor::getSampleBlockWithVirtualPartColumns()
Block MergeTreeDataSelectExecutor::getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part)
{
return Block(std::initializer_list<ColumnWithTypeAndName>{
Block block(std::initializer_list<ColumnWithTypeAndName>{
ColumnWithTypeAndName(ColumnString::create(), std::make_shared<DataTypeString>(), "_part"),
ColumnWithTypeAndName(ColumnString::create(), std::make_shared<DataTypeString>(), "_partition_id"),
ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared<DataTypeUUID>(), "_part_uuid")});
}
void MergeTreeDataSelectExecutor::fillBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, Block & block)
{
MutableColumns columns = block.mutateColumns();
auto & part_column = columns[0];
@ -93,9 +89,17 @@ void MergeTreeDataSelectExecutor::fillBlockWithVirtualPartColumns(const MergeTre
part_column->insert(part->name);
partition_id_column->insert(part->info.partition_id);
part_uuid_column->insert(part->uuid);
if (one_part)
{
part_column = ColumnConst::create(std::move(part_column), 1);
partition_id_column = ColumnConst::create(std::move(partition_id_column), 1);
part_uuid_column = ColumnConst::create(std::move(part_uuid_column), 1);
break;
}
}
block.setColumns(std::move(columns));
return block;
}
@ -179,6 +183,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
Names real_column_names;
size_t total_parts = parts.size();
if (total_parts == 0)
return std::make_unique<QueryPlan>();
bool sample_factor_column_queried = false;
Float64 used_sample_factor = 1;
@ -220,7 +226,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
std::unordered_set<String> part_values;
ASTPtr expression_ast;
auto virtual_columns_block = getSampleBlockWithVirtualPartColumns();
auto virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */);
// Generate valid expressions for filtering
VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, context, virtual_columns_block, expression_ast);
@ -228,7 +234,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
// If there is still something left, fill the virtual block and do the filtering.
if (expression_ast)
{
fillBlockWithVirtualPartColumns(parts, virtual_columns_block);
virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */);
VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context, expression_ast);
part_values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
if (part_values.empty())

View File

@ -45,11 +45,9 @@ public:
unsigned num_streams,
const PartitionIdToMaxBlock * max_block_numbers_to_read = nullptr) const;
/// Construct a sample block consisting only of possible virtual columns for part pruning.
static Block getSampleBlockWithVirtualPartColumns();
/// Fill in values of possible virtual columns for part pruning.
static void fillBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, Block & block);
/// Construct a block consisting only of possible virtual columns for part pruning.
/// If one_part is true, fill in at most one part.
static Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part);
private:
const MergeTreeData & data;

View File

@ -14,6 +14,7 @@
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/FilterDescription.h>
@ -27,6 +28,11 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
@ -124,6 +130,21 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va
bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block block, ASTPtr & expression_ast)
{
if (block.rows() == 0)
throw Exception("Cannot prepare filter with empty block", ErrorCodes::LOGICAL_ERROR);
/// Take the first row of the input block to build a constant block
auto columns = block.getColumns();
Columns const_columns(columns.size());
for (size_t i = 0; i < columns.size(); ++i)
{
if (isColumnConst(*columns[i]))
const_columns[i] = columns[i]->cloneResized(1);
else
const_columns[i] = ColumnConst::create(columns[i]->cloneResized(1), 1);
}
block.setColumns(const_columns);
bool unmodified = true;
const auto & select = query->as<ASTSelectQuery &>();
if (!select.where() && !select.prewhere())
@ -135,10 +156,6 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block
else
condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone();
// Prepare a constant block with valid expressions
for (size_t i = 0; i < block.columns(); ++i)
block.getByPosition(i).column = block.getByPosition(i).type->createColumnConstWithDefaultValue(1);
// Provide input columns as constant columns to check if an expression is constant.
std::function<bool(const ASTPtr &)> is_constant = [&block, &context](const ASTPtr & node)
{
@ -169,6 +186,9 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block
void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr context, ASTPtr expression_ast)
{
if (block.rows() == 0)
return;
if (!expression_ast)
prepareFilterBlockWithQuery(query, context, block, expression_ast);

View File

@ -10,6 +10,8 @@ CREATE TABLE data_01515
Engine=MergeTree()
ORDER BY key;
INSERT INTO data_01515 VALUES (1, 2, 3);
SELECT * FROM data_01515;
SELECT * FROM data_01515 SETTINGS force_data_skipping_indices=''; -- { serverError 6 }
SELECT * FROM data_01515 SETTINGS force_data_skipping_indices='d1_idx'; -- { serverError 277 }

View File

@ -5,3 +5,13 @@
1 2
1 3
3
1
11
21
31
41
51
61
71
81
91

View File

@ -17,3 +17,15 @@ set max_rows_to_read = 1; -- one row for subquery
select count() from x where _partition_id in (select partitionId(number + 1) from numbers(1));
drop table x;
drop table if exists mt;
create table mt (n UInt64) engine=MergeTree order by n partition by n % 10;
set max_rows_to_read = 200;
insert into mt select * from numbers(100);
select * from mt where toUInt64(substr(_part, 1, position(_part, '_') - 1)) = 1;
drop table mt;