Fix JOIN rewriters inconsistency (#9830)

* fix join rewrites: inconsistent visitors
This commit is contained in:
Artem Zuikov 2020-03-24 01:23:31 +03:00 committed by GitHub
parent 5b8824c065
commit 38cbf3e6f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 290 additions and 274 deletions

View File

@ -273,9 +273,6 @@ bool getTables(ASTSelectQuery & select, std::vector<JoinedElement> & joined_tabl
if (num_using && (num_tables - num_array_join) > 2)
throw Exception("Multiple CROSS/COMMA JOIN do not support USING", ErrorCodes::NOT_IMPLEMENTED);
if (num_comma && (num_comma != (joined_tables.size() - 1)))
throw Exception("Mix of COMMA and other JOINS is not supported", ErrorCodes::NOT_IMPLEMENTED);
return !(num_array_join || num_using);
}

View File

@ -639,6 +639,11 @@ std::shared_ptr<ASTExpressionList> subqueryExpressionList(
} /// namelesspace
bool JoinToSubqueryTransformMatcher::needChildVisit(ASTPtr & node, const ASTPtr &)
{
return !node->as<ASTSubquery>();
}
void JoinToSubqueryTransformMatcher::visit(ASTPtr & ast, Data & data)
{
if (auto * t = ast->as<ASTSelectQuery>())

View File

@ -24,7 +24,7 @@ public:
bool done = false;
};
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
static bool needChildVisit(ASTPtr &, const ASTPtr &);
static void visit(ASTPtr & ast, Data & data);
private:

View File

@ -11,6 +11,7 @@ SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_0084
SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n ) AS `--.s`\n ALL INNER JOIN t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n) AS `--.s`\nALL INNER JOIN t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a)
SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n ) AS `--.s`\n CROSS JOIN t3_00849\n) AS `--.s`\nCROSS JOIN t4_00849
SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n ) AS `--.s`\n CROSS JOIN t3_00849\n) AS `--.s`\nCROSS JOIN t4_00849
SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n) AS `--.s`\nCROSS JOIN t3_00849
SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n) AS `--.s`\nCROSS JOIN t3_00849
SELECT * FROM t1, t2
1 1 1 1

View File

@ -28,7 +28,7 @@ ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849, t3_00849, t4_00849 WHERE t1_0
ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849, t3_00849, t4_00849;
ANALYZE SELECT t1_00849.a FROM t1_00849 CROSS JOIN t2_00849 CROSS JOIN t3_00849 CROSS JOIN t4_00849;
ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849 CROSS JOIN t3_00849; -- { serverError 48 }
ANALYZE SELECT t1_00849.a FROM t1_00849, t2_00849 CROSS JOIN t3_00849;
ANALYZE SELECT t1_00849.a FROM t1_00849 JOIN t2_00849 USING a CROSS JOIN t3_00849; -- { serverError 48 }
ANALYZE SELECT t1_00849.a FROM t1_00849 JOIN t2_00849 ON t1_00849.a = t2_00849.a CROSS JOIN t3_00849;

View File

@ -11,6 +11,7 @@ SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.a`, \n `-
SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.a`, \n `--t2.a`, \n a AS `--t3.a`\n FROM \n (\n SELECT \n a AS `--t1.a`, \n t2.a AS `--t2.a`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.a` = `--t2.a`\n ) AS `--.s`\n ALL INNER JOIN t3 ON `--t2.a` = `--t3.a`\n) AS `--.s`\nALL INNER JOIN t4 ON `--t3.a` = a\nWHERE (`--t1.a` = `--t2.a`) AND (`--t2.a` = `--t3.a`) AND (`--t3.a` = a)
SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT `--t1.a`\n FROM \n (\n SELECT a AS `--t1.a`\n FROM t1\n CROSS JOIN t2\n ) AS `--.s`\n CROSS JOIN t3\n) AS `--.s`\nCROSS JOIN t4
SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT `--t1.a`\n FROM \n (\n SELECT a AS `--t1.a`\n FROM t1\n CROSS JOIN t2\n ) AS `--.s`\n CROSS JOIN t3\n) AS `--.s`\nCROSS JOIN t4
SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT a AS `--t1.a`\n FROM t1\n CROSS JOIN t2\n) AS `--.s`\nCROSS JOIN t3
SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n a AS `--t1.a`, \n t2.a AS `--t2.a`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.a` = `--t2.a`\n) AS `--.s`\nCROSS JOIN t3
SELECT * FROM t1, t2
1 1 1 1

View File

@ -28,7 +28,7 @@ ANALYZE SELECT t1.a FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.a = t3.a AND t3
ANALYZE SELECT t1.a FROM t1, t2, t3, t4;
ANALYZE SELECT t1.a FROM t1 CROSS JOIN t2 CROSS JOIN t3 CROSS JOIN t4;
ANALYZE SELECT t1.a FROM t1, t2 CROSS JOIN t3; -- { serverError 48 }
ANALYZE SELECT t1.a FROM t1, t2 CROSS JOIN t3;
ANALYZE SELECT t1.a FROM t1 JOIN t2 USING a CROSS JOIN t3; -- { serverError 48 }
ANALYZE SELECT t1.a FROM t1 JOIN t2 ON t1.a = t2.a CROSS JOIN t3;

View File

@ -1,14 +1,25 @@
1
2 fail: correlated subquery
3
4 fail: exists
5
6
0.0000
7
8
9
10
11
12
13 fail: join predicates
14
0.00000000
15 fail: correlated subquery
16
17 fail: correlated subquery
18
19
0.0000
20 fail: correlated subquery
21 fail: exists, not exists
22 fail: not exists

View File

@ -137,51 +137,51 @@ order by
l_returnflag,
l_linestatus;
-- select 2; -- rewrite fail
-- select
-- s_acctbal,
-- s_name,
-- n_name,
-- p_partkey,
-- p_mfgr,
-- s_address,
-- s_phone,
-- s_comment
-- from
-- part,
-- supplier,
-- partsupp,
-- nation,
-- region
-- where
-- p_partkey = ps_partkey
-- and s_suppkey = ps_suppkey
-- and p_size = 15
-- and p_type like '%BRASS'
-- and s_nationkey = n_nationkey
-- and n_regionkey = r_regionkey
-- and r_name = 'EUROPE'
-- and ps_supplycost = (
-- select
-- min(ps_supplycost)
-- from
-- partsupp,
-- supplier,
-- nation,
-- region
-- where
-- p_partkey = ps_partkey
-- and s_suppkey = ps_suppkey
-- and s_nationkey = n_nationkey
-- and n_regionkey = r_regionkey
-- and r_name = 'EUROPE'
-- )
-- order by
-- s_acctbal desc,
-- n_name,
-- s_name,
-- p_partkey
-- limit 100;
select 2, 'fail: correlated subquery'; -- TODO: Missing columns: 'p_partkey'
select
s_acctbal,
s_name,
n_name,
p_partkey,
p_mfgr,
s_address,
s_phone,
s_comment
from
part,
supplier,
partsupp,
nation,
region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and p_size = 15
and p_type like '%BRASS'
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
and ps_supplycost = (
select
min(ps_supplycost)
from
partsupp,
supplier,
nation,
region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
)
order by
s_acctbal desc,
n_name,
s_name,
p_partkey
limit 100; -- { serverError 47 }
select 3;
select
@ -208,7 +208,7 @@ order by
o_orderdate
limit 10;
-- select 4;
select 4, 'fail: exists'; -- TODO
-- select
-- o_orderpriority,
-- count(*) as order_count
@ -269,85 +269,85 @@ where
and toDecimal32(0.06, 2) + toDecimal32(0.01, 2)
and l_quantity < 24;
-- select 7;
-- select
-- supp_nation,
-- cust_nation,
-- l_year,
-- sum(volume) as revenue
-- from
-- (
-- select
-- n1.n_name as supp_nation,
-- n2.n_name as cust_nation,
-- extract(year from l_shipdate) as l_year,
-- l_extendedprice * (1 - l_discount) as volume
-- from
-- supplier,
-- lineitem,
-- orders,
-- customer,
-- nation n1,
-- nation n2
-- where
-- s_suppkey = l_suppkey
-- and o_orderkey = l_orderkey
-- and c_custkey = o_custkey
-- and s_nationkey = n1.n_nationkey
-- and c_nationkey = n2.n_nationkey
-- and (
-- (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
-- or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
-- )
-- and l_shipdate between date '1995-01-01' and date '1996-12-31'
-- ) as shipping
-- group by
-- supp_nation,
-- cust_nation,
-- l_year
-- order by
-- supp_nation,
-- cust_nation,
-- l_year;
select 7;
select
supp_nation,
cust_nation,
l_year,
sum(volume) as revenue
from
(
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from
supplier,
lineitem,
orders,
customer,
nation n1,
nation n2
where
s_suppkey = l_suppkey
and o_orderkey = l_orderkey
and c_custkey = o_custkey
and s_nationkey = n1.n_nationkey
and c_nationkey = n2.n_nationkey
and (
(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
)
and l_shipdate between date '1995-01-01' and date '1996-12-31'
) as shipping
group by
supp_nation,
cust_nation,
l_year
order by
supp_nation,
cust_nation,
l_year;
-- select 8;
-- select
-- o_year,
-- sum(case
-- when nation = 'BRAZIL' then volume
-- else 0
-- end) / sum(volume) as mkt_share
-- from
-- (
-- select
-- extract(year from o_orderdate) as o_year,
-- l_extendedprice * (1 - l_discount) as volume,
-- n2.n_name as nation
-- from
-- part,
-- supplier,
-- lineitem,
-- orders,
-- customer,
-- nation n1,
-- nation n2,
-- region
-- where
-- p_partkey = l_partkey
-- and s_suppkey = l_suppkey
-- and l_orderkey = o_orderkey
-- and o_custkey = c_custkey
-- and c_nationkey = n1.n_nationkey
-- and n1.n_regionkey = r_regionkey
-- and r_name = 'AMERICA'
-- and s_nationkey = n2.n_nationkey
-- and o_orderdate between date '1995-01-01' and date '1996-12-31'
-- and p_type = 'ECONOMY ANODIZED STEEL'
-- ) as all_nations
-- group by
-- o_year
-- order by
-- o_year;
select 8;
select
o_year,
sum(case
when nation = 'BRAZIL' then volume
else 0
end) / sum(volume) as mkt_share
from
(
select
extract(year from o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) as volume,
n2.n_name as nation
from
part,
supplier,
lineitem,
orders,
customer,
nation n1,
nation n2,
region
where
p_partkey = l_partkey
and s_suppkey = l_suppkey
and l_orderkey = o_orderkey
and o_custkey = c_custkey
and c_nationkey = n1.n_nationkey
and n1.n_regionkey = r_regionkey
and r_name = 'AMERICA'
and s_nationkey = n2.n_nationkey
and o_orderdate between date '1995-01-01' and date '1996-12-31'
and p_type = 'ECONOMY ANODIZED STEEL'
) as all_nations
group by
o_year
order by
o_year;
select 9;
select
@ -417,37 +417,37 @@ order by
revenue desc
limit 20;
-- select 11; -- rewrite fail
-- select
-- ps_partkey,
-- sum(ps_supplycost * ps_availqty) as value
-- from
-- partsupp,
-- supplier,
-- nation
-- where
-- ps_suppkey = s_suppkey
-- and s_nationkey = n_nationkey
-- and n_name = 'GERMANY'
-- group by
-- ps_partkey having
-- sum(ps_supplycost * ps_availqty) > (
-- select
-- sum(ps_supplycost * ps_availqty) * 0.0100000000
-- -- ^^^^^^^^^^^^
-- -- The above constant needs to be adjusted according
-- -- to the scale factor (SF): constant = 0.0001 / SF.
-- from
-- partsupp,
-- supplier,
-- nation
-- where
-- ps_suppkey = s_suppkey
-- and s_nationkey = n_nationkey
-- and n_name = 'GERMANY'
-- )
-- order by
-- value desc;
select 11; -- TODO: remove toDecimal()
select
ps_partkey,
sum(ps_supplycost * ps_availqty) as value
from
partsupp,
supplier,
nation
where
ps_suppkey = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by
ps_partkey having
sum(ps_supplycost * ps_availqty) > (
select
sum(ps_supplycost * ps_availqty) * toDecimal64('0.0100000000', 2)
-- ^^^^^^^^^^^^
-- The above constant needs to be adjusted according
-- to the scale factor (SF): constant = 0.0001 / SF.
from
partsupp,
supplier,
nation
where
ps_suppkey = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
)
order by
value desc;
select 12;
select
@ -479,27 +479,27 @@ group by
order by
l_shipmode;
-- select 13; -- rewrite fail
-- select
-- c_count,
-- count(*) as custdist
-- from
-- (
-- select
-- c_custkey,
-- count(o_orderkey)
-- from
-- customer left outer join orders on
-- c_custkey = o_custkey
-- and o_comment not like '%special%requests%'
-- group by
-- c_custkey
-- ) as c_orders
-- group by
-- c_count
-- order by
-- custdist desc,
-- c_count desc;
select 13, 'fail: join predicates'; -- TODO: Invalid expression for JOIN ON
select
c_count,
count(*) as custdist
from
(
select
c_custkey,
count(o_orderkey)
from
customer left outer join orders on
c_custkey = o_custkey
and o_comment not like '%special%requests%'
group by
c_custkey
) as c_orders
group by
c_count
order by
custdist desc,
c_count desc; -- { serverError 403 }
select 14;
select
@ -516,39 +516,40 @@ where
and l_shipdate >= date '1995-09-01'
and l_shipdate < date '1995-09-01' + interval '1' month;
-- select 15;
-- create view revenue0 as
-- select
-- l_suppkey,
-- sum(l_extendedprice * (1 - l_discount))
-- from
-- lineitem
-- where
-- l_shipdate >= date '1996-01-01'
-- and l_shipdate < date '1996-01-01' + interval '3' month
-- group by
-- l_suppkey;
-- select
-- s_suppkey,
-- s_name,
-- s_address,
-- s_phone,
-- total_revenue
-- from
-- supplier,
-- revenue0
-- where
-- s_suppkey = supplier_no
-- and total_revenue = (
-- select
-- max(total_revenue)
-- from
-- revenue0
-- )
-- order by
-- s_suppkey;
-- drop view revenue0;
select 15, 'fail: correlated subquery'; -- TODO: Missing columns: 'total_revenue'
drop table if exists revenue0;
create view revenue0 as
select
l_suppkey,
sum(l_extendedprice * (1 - l_discount))
from
lineitem
where
l_shipdate >= date '1996-01-01'
and l_shipdate < date '1996-01-01' + interval '3' month
group by
l_suppkey;
select
s_suppkey,
s_name,
s_address,
s_phone,
total_revenue
from
supplier,
revenue0
where
s_suppkey = supplier_no
and total_revenue = (
select
max(total_revenue)
from
revenue0
)
order by
s_suppkey; -- { serverError 47 }
drop table revenue0; -- TODO: drop view
select 16;
select
p_brand,
@ -580,25 +581,25 @@ order by
p_brand,
p_type,
p_size;
-- select 17;
-- select
-- sum(l_extendedprice) / 7.0 as avg_yearly
-- from
-- lineitem,
-- part
-- where
-- p_partkey = l_partkey
-- and p_brand = 'Brand#23'
-- and p_container = 'MED BOX'
-- and l_quantity < (
-- select
-- 0.2 * avg(l_quantity)
-- from
-- lineitem
-- where
-- l_partkey = p_partkey
-- );
select 17, 'fail: correlated subquery'; -- TODO: Missing columns: 'p_partkey'
select
sum(l_extendedprice) / 7.0 as avg_yearly
from
lineitem,
part
where
p_partkey = l_partkey
and p_brand = 'Brand#23'
and p_container = 'MED BOX'
and l_quantity < (
select
0.2 * avg(l_quantity)
from
lineitem
where
l_partkey = p_partkey
); -- { serverError 47 }
select 18;
select
@ -672,46 +673,46 @@ where
and l_shipinstruct = 'DELIVER IN PERSON'
);
-- select 20;
-- select
-- s_name,
-- s_address
-- from
-- supplier,
-- nation
-- where
-- s_suppkey in (
-- select
-- ps_suppkey
-- from
-- partsupp
-- where
-- ps_partkey in (
-- select
-- p_partkey
-- from
-- part
-- where
-- p_name like 'forest%'
-- )
-- and ps_availqty > (
-- select
-- 0.5 * sum(l_quantity)
-- from
-- lineitem
-- where
-- l_partkey = ps_partkey
-- and l_suppkey = ps_suppkey
-- and l_shipdate >= date '1994-01-01'
-- and l_shipdate < date '1994-01-01' + interval '1' year
-- )
-- )
-- and s_nationkey = n_nationkey
-- and n_name = 'CANADA'
-- order by
-- s_name;
select 20, 'fail: correlated subquery'; -- TODO: Missing columns: 'ps_suppkey' 'ps_partkey'
select
s_name,
s_address
from
supplier,
nation
where
s_suppkey in (
select
ps_suppkey
from
partsupp
where
ps_partkey in (
select
p_partkey
from
part
where
p_name like 'forest%'
)
and ps_availqty > (
select
0.5 * sum(l_quantity)
from
lineitem
where
l_partkey = ps_partkey
and l_suppkey = ps_suppkey
and l_shipdate >= date '1994-01-01'
and l_shipdate < date '1994-01-01' + interval '1' year
)
)
and s_nationkey = n_nationkey
and n_name = 'CANADA'
order by
s_name; -- { serverError 47 }
-- select 21;
select 21, 'fail: exists, not exists'; -- TODO
-- select
-- s_name,
-- count(*) as numwait
@ -753,7 +754,7 @@ where
-- s_name
-- limit 100;
-- select 22;
select 22, 'fail: not exists'; -- TODO
-- select
-- cntrycode,
-- count(*) as numcust,