Merge pull request #44012 from ClickHouse/vdimir/storage_join_key_order

This commit is contained in:
Vladimir C 2022-12-08 12:10:13 +01:00 committed by GitHub
commit eb1fd99196
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 153 additions and 5 deletions

View File

@ -225,7 +225,8 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
, right_sample_block(right_sample_block_)
, log(&Poco::Logger::get("HashJoin"))
{
LOG_DEBUG(log, "HashJoin. Datatype: {}, kind: {}, strictness: {}", data->type, kind, strictness);
LOG_DEBUG(log, "Datatype: {}, kind: {}, strictness: {}", data->type, kind, strictness);
LOG_DEBUG(log, "Keys: {}", TableJoin::formatClauses(table_join->getClauses(), true));
if (isCrossOrComma(kind))
{
@ -1492,7 +1493,7 @@ void HashJoin::joinBlockImpl(
{
const auto & right_key = required_right_keys.getByPosition(i);
auto right_col_name = getTableJoin().renamedRightColumnName(right_key.name);
if (!block.findByName(right_col_name /*right_key.name*/))
if (!block.findByName(right_col_name))
{
const auto & left_name = required_right_keys_sources[i];
@ -1512,7 +1513,7 @@ void HashJoin::joinBlockImpl(
block.insert(std::move(right_col));
if constexpr (jf.need_replication)
right_keys_to_replicate.push_back(block.getPositionByName(right_key.name));
right_keys_to_replicate.push_back(block.getPositionByName(right_col_name));
}
}
}

View File

@ -329,6 +329,7 @@ public:
/// StorageJoin overrides key names (cause of different names qualification)
void setRightKeys(const Names & keys) { getOnlyClause().key_names_right = keys; }
void setLeftKeys(const Names & keys) { getOnlyClause().key_names_left = keys; }
Block getRequiredRightKeys(const Block & right_table_keys, std::vector<String> & keys_sources) const;

View File

@ -48,6 +48,7 @@
#include <IO/WriteHelpers.h>
#include <Storages/IStorage.h>
#include <Storages/StorageJoin.h>
#include <Common/checkStackSize.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
@ -60,6 +61,7 @@ namespace ErrorCodes
extern const int EMPTY_LIST_OF_COLUMNS_QUERIED;
extern const int EMPTY_NESTED_TABLE;
extern const int EXPECTED_ALL_OR_ANY;
extern const int INCOMPATIBLE_TYPE_OF_JOIN;
extern const int INVALID_JOIN_ON_EXPRESSION;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
@ -757,6 +759,10 @@ void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join,
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"Cannot get JOIN keys from JOIN ON section: {}", queryToString(table_join.on_expression));
if (const auto storage_join = analyzed_join.getStorageJoin())
throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN,
"StorageJoin keys should match JOIN keys, expected JOIN ON [{}]", fmt::join(storage_join->getKeyNames(), ", "));
bool join_on_const_ok = tryJoinOnConst(analyzed_join, table_join.on_expression, context);
if (!join_on_const_ok)
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,

View File

@ -165,7 +165,7 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr<TableJoin> analyzed_join,
{
auto metadata_snapshot = getInMemoryMetadataPtr();
if (!analyzed_join->sameStrictnessAndKind(strictness, kind))
throw Exception("Table " + getStorageID().getNameForLogs() + " has incompatible type of JOIN.", ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN);
throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "Table '{}' has incompatible type of JOIN", getStorageID().getNameForLogs());
if ((analyzed_join->forceNullableRight() && !use_nulls) ||
(!analyzed_join->forceNullableRight() && isLeftOrFull(analyzed_join->kind()) && use_nulls))
@ -174,12 +174,48 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr<TableJoin> analyzed_join,
"Table {} needs the same join_use_nulls setting as present in LEFT or FULL JOIN",
getStorageID().getNameForLogs());
/// TODO: check key columns
const auto & join_on = analyzed_join->getOnlyClause();
if (join_on.on_filter_condition_left || join_on.on_filter_condition_right)
throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "ON section of JOIN with filter conditions is not implemented");
const auto & key_names_right = join_on.key_names_right;
const auto & key_names_left = join_on.key_names_left;
if (key_names.size() != key_names_right.size() || key_names.size() != key_names_left.size())
throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN,
"Number of keys in JOIN ON section ({}) doesn't match number of keys in Join engine ({})",
key_names_right.size(), key_names.size());
/* Resort left keys according to right keys order in StorageJoin
* We can't change the order of keys in StorageJoin
* because the hash table was already built with tuples serialized in the order of key_names.
* If we try to use the same hash table with different order of keys,
* then calculated hashes and the result of the comparison will be wrong.
*
* Example:
* ```
* CREATE TABLE t_right (a UInt32, b UInt32) ENGINE = Join(ALL, INNER, a, b);
* SELECT * FROM t_left JOIN t_right ON t_left.y = t_right.b AND t_left.x = t_right.a;
* ```
* In that case right keys should still be (a, b), need to change the order of the left keys to (x, y).
*/
Names left_key_names_resorted;
for (const auto & key_name : key_names)
{
const auto & renamed_key = analyzed_join->renamedRightColumnName(key_name);
/// find position of renamed_key in key_names_right
auto it = std::find(key_names_right.begin(), key_names_right.end(), renamed_key);
if (it == key_names_right.end())
throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN,
"Key '{}' not found in JOIN ON section. All Join engine keys '{}' have to be used", key_name, fmt::join(key_names, ", "));
const size_t key_position = std::distance(key_names_right.begin(), it);
left_key_names_resorted.push_back(key_names_left[key_position]);
}
/// Set names qualifiers: table.column -> column
/// It's required because storage join stores non-qualified names
/// Qualifies will be added by join implementation (HashJoin)
analyzed_join->setRightKeys(key_names);
analyzed_join->setLeftKeys(left_key_names_resorted);
HashJoinPtr join_clone = std::make_shared<HashJoin>(analyzed_join, getRightSampleBlock());

View File

@ -85,6 +85,8 @@ public:
bool useNulls() const { return use_nulls; }
const Names & getKeyNames() const { return key_names; }
private:
Block sample_block;
const Names key_names;

View File

@ -0,0 +1,54 @@
21 22 23 2000
31 32 33 3000
41 42 43 4000
51 52 53 5000
21 22 23 2000
31 32 33 3000
41 42 43 4000
51 52 53 5000
21 22 23 2000
31 32 33 3000
41 42 43 4000
51 52 53 5000
21 22 23 2000
31 32 33 3000
41 42 43 4000
51 52 53 5000
21 22 23 22 21 23 2000
31 32 33 32 31 33 3000
41 42 43 42 41 43 4000
51 52 53 52 51 53 5000
21 22 23 22 21 23 2000
31 32 33 32 31 33 3000
41 42 43 42 41 43 4000
51 52 53 52 51 53 5000
21 22 23 22 21 23 2000
31 32 33 32 31 33 3000
41 42 43 42 41 43 4000
51 52 53 52 51 53 5000
21 22 23 22 21 23 2000
31 32 33 32 31 33 3000
41 42 43 42 41 43 4000
51 52 53 52 51 53 5000
23 21 22 22 21 23 2000
33 31 32 32 31 33 3000
43 41 42 42 41 43 4000
53 51 52 52 51 53 5000
23 21 22 22 21 23 2000
33 31 32 32 31 33 3000
43 41 42 42 41 43 4000
53 51 52 52 51 53 5000
23 21 22 22 21 23 2000
33 31 32 32 31 33 3000
43 41 42 42 41 43 4000
53 51 52 52 51 53 5000
11 12 13 11 11 11 1000
21 22 23 21 21 21 2000
31 32 33 31 31 31 3000
41 42 43 41 41 41 4000
51 52 53 51 51 51 5000
11 12 13 11 11 11 1000
21 22 23 21 21 21 2000
31 32 33 31 31 31 3000
41 42 43 41 41 41 4000
51 52 53 51 51 51 5000

View File

@ -0,0 +1,48 @@
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS tj;
DROP TABLE IF EXISTS tjj;
CREATE TABLE t1 (key1 UInt64, key2 UInt64, key3 UInt64) ENGINE = Memory;
INSERT INTO t1 VALUES (11, 12, 13), (21, 22, 23), (31, 32, 33), (41, 42, 43), (51, 52, 53);
CREATE TABLE tj (key2 UInt64, key1 UInt64, key3 UInt64, attr UInt64) ENGINE = Join(ALL, INNER, key3, key2, key1);
INSERT INTO tj VALUES (22, 21, 23, 2000), (32, 31, 33, 3000), (42, 41, 43, 4000), (52, 51, 53, 5000), (62, 61, 63, 6000);
SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3) ORDER BY key1;
SELECT * FROM t1 ALL INNER JOIN tj USING (key2, key3, key1) ORDER BY key1;
SELECT * FROM t1 ALL INNER JOIN tj USING (key3, key2, key1) ORDER BY key1;
SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key3, key2) ORDER BY key1;
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND t1.key1 = tj.key1 ORDER BY key1;
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key2 = tj.key2 AND t1.key3 = tj.key3 AND t1.key1 = tj.key1 ORDER BY key1;
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key3 = tj.key3 AND t1.key1 = tj.key1 AND t1.key2 = tj.key2 ORDER BY key1;
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 ORDER BY key1;
SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.a = tj.key1 AND t1.c = tj.key3 AND t1.b = tj.key2 ORDER BY t1.a;
SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.a = tj.key1 AND t1.b = tj.key2 AND t1.c = tj.key3 ORDER BY t1.a;
SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.c = tj.key3 AND t1.a = tj.key1 AND t1.b = tj.key2 ORDER BY t1.a;
SELECT * FROM t1 ALL INNER JOIN tj ON 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
SELECT * FROM t1 ALL INNER JOIN tj ON 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
SELECT * FROM t1 ALL INNER JOIN tj ON NULL; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
SELECT * FROM t1 ALL INNER JOIN tj ON 1 == 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
SELECT * FROM t1 ALL INNER JOIN tj ON 1 != 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
SELECT * FROM t1 ALL INNER JOIN tj USING (key2, key3); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.attr; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key2 = tj.key2 AND t1.key3 = tj.attr; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key2 = tj.key2 AND t1.key3 = tj.key3 AND t1.key1 = tj.key1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
CREATE TABLE tjj (key2 UInt64, key1 UInt64, key3 UInt64, attr UInt64) ENGINE = Join(ALL, INNER, key3, key2, key1);
INSERT INTO tjj VALUES (11, 11, 11, 1000), (21, 21, 21, 2000), (31, 31, 31, 3000), (41, 41, 41, 4000), (51, 51, 51, 5000), (61, 61, 61, 6000);
SELECT * FROM t1 ALL INNER JOIN tjj ON t1.key1 = tjj.key1 AND t1.key1 = tjj.key2 AND t1.key1 = tjj.key3 ORDER BY key1;
SELECT * FROM t1 ALL INNER JOIN tjj ON t1.key1 = tjj.key1 AND t1.key1 = tjj.key3 AND t1.key1 = tjj.key2 ORDER BY key1;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS tj;
DROP TABLE IF EXISTS tjj;