RFC: Fix filtering by virtual columns with OR expression

Virtual columns did not supports queries with OR, for example query like
this (here `m` is the `Merge` table, see the test):

    select key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1');

Will always leads to:

    Cannot find column `value` in source stream, there are only columns ...

The reason for this is that it actually executes the following queries:

    SELECT key, value FROM default.d1 WHERE ((value = 10) AND ('v1' = 'v1')) OR ((value = 20) AND ('v1' = 'v1'));
    SELECT key FROM default.d2 WHERE 0;

And this kind of filtering is used not only for `Merge` table but also:
- `_table` for `Merge` (already mentioned)
- `_file` for `File`
- `_idx` for `S3`
- and as well as filtering `system.*` tables by `database`/`table`/...

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2023-07-26 20:25:48 +02:00
parent 5611b2fff4
commit 68aed0d16e
3 changed files with 99 additions and 9 deletions

View File

@ -30,6 +30,7 @@
#include <Storages/VirtualColumnUtils.h>
#include <IO/WriteHelpers.h>
#include <Common/typeid_cast.h>
#include <Parsers/makeASTForLogicalFunction.h>
#include <Columns/ColumnSet.h>
#include <Functions/FunctionHelpers.h>
#include <Interpreters/ActionsVisitor.h>
@ -63,14 +64,31 @@ bool isValidFunction(const ASTPtr & expression, const std::function<bool(const A
bool extractFunctions(const ASTPtr & expression, const std::function<bool(const ASTPtr &)> & is_constant, ASTs & result)
{
const auto * function = expression->as<ASTFunction>();
if (function && (function->name == "and" || function->name == "indexHint"))
if (function)
{
bool ret = true;
for (const auto & child : function->arguments->children)
ret &= extractFunctions(child, is_constant, result);
return ret;
if (function->name == "and" || function->name == "indexHint")
{
bool ret = true;
for (const auto & child : function->arguments->children)
ret &= extractFunctions(child, is_constant, result);
return ret;
}
else if (function->name == "or")
{
bool ret = true;
ASTs or_args;
for (const auto & child : function->arguments->children)
ret &= extractFunctions(child, is_constant, or_args);
/// We can keep condition only if it still OR condition (i.e. we
/// have dependent conditions for columns at both sides)
if (or_args.size() == 2)
result.push_back(makeASTForLogicalOr(std::move(or_args)));
return ret;
}
}
else if (isValidFunction(expression, is_constant))
if (isValidFunction(expression, is_constant))
{
result.push_back(expression->clone());
return true;
@ -80,13 +98,13 @@ bool extractFunctions(const ASTPtr & expression, const std::function<bool(const
}
/// Construct a conjunction from given functions
ASTPtr buildWhereExpression(const ASTs & functions)
ASTPtr buildWhereExpression(ASTs && functions)
{
if (functions.empty())
return nullptr;
if (functions.size() == 1)
return functions[0];
return makeASTFunction("and", functions);
return makeASTForLogicalAnd(std::move(functions));
}
}
@ -171,7 +189,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block
if (select.prewhere())
unmodified &= extractFunctions(select.prewhere(), is_constant, functions);
expression_ast = buildWhereExpression(functions);
expression_ast = buildWhereExpression(std::move(functions));
return unmodified;
}

View File

@ -0,0 +1,38 @@
-- { echoOn }
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings allow_experimental_analyzer=0, convert_query_to_cnf=0;
v1 1
v1 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings allow_experimental_analyzer=0, convert_query_to_cnf=0;
v1 1
v2 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=0, convert_query_to_cnf=0;
v1 1
select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=0, convert_query_to_cnf=0;
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings allow_experimental_analyzer=0, convert_query_to_cnf=1;
v1 1
v1 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings allow_experimental_analyzer=0, convert_query_to_cnf=1;
v1 1
v2 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=0, convert_query_to_cnf=1;
v1 1
select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=0, convert_query_to_cnf=1;
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings allow_experimental_analyzer=1, convert_query_to_cnf=0;
v1 1
v1 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings allow_experimental_analyzer=1, convert_query_to_cnf=0;
v1 1
v2 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=1, convert_query_to_cnf=0;
v1 1
select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=1, convert_query_to_cnf=0;
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings allow_experimental_analyzer=1, convert_query_to_cnf=1;
v1 1
v1 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings allow_experimental_analyzer=1, convert_query_to_cnf=1;
v1 1
v2 2
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=1, convert_query_to_cnf=1;
v1 1
select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings allow_experimental_analyzer=1, convert_query_to_cnf=1;

View File

@ -0,0 +1,34 @@
drop table if exists m;
drop view if exists v1;
drop view if exists v2;
drop table if exists d1;
drop table if exists d2;
create table d1 (key Int, value Int) engine=Memory();
create table d2 (key Int, value Int) engine=Memory();
insert into d1 values (1, 10);
insert into d1 values (2, 20);
insert into d2 values (1, 10);
insert into d2 values (2, 20);
create view v1 as select * from d1;
create view v2 as select * from d2;
create table m as v1 engine=Merge(currentDatabase(), '^(v1|v2)$');
-- avoid reorder
set max_threads=1;
-- { echoOn }
{% for settings in [
'allow_experimental_analyzer=0, convert_query_to_cnf=0',
'allow_experimental_analyzer=0, convert_query_to_cnf=1',
'allow_experimental_analyzer=1, convert_query_to_cnf=0',
'allow_experimental_analyzer=1, convert_query_to_cnf=1'
] %}
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') settings {{ settings }};
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v2') settings {{ settings }};
select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v3') settings {{ settings }};
select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') settings {{ settings }};
{% endfor %}