mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge pull request #44012 from ClickHouse/vdimir/storage_join_key_order
This commit is contained in:
commit
eb1fd99196
@ -225,7 +225,8 @@ HashJoin::HashJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_s
|
||||
, right_sample_block(right_sample_block_)
|
||||
, log(&Poco::Logger::get("HashJoin"))
|
||||
{
|
||||
LOG_DEBUG(log, "HashJoin. Datatype: {}, kind: {}, strictness: {}", data->type, kind, strictness);
|
||||
LOG_DEBUG(log, "Datatype: {}, kind: {}, strictness: {}", data->type, kind, strictness);
|
||||
LOG_DEBUG(log, "Keys: {}", TableJoin::formatClauses(table_join->getClauses(), true));
|
||||
|
||||
if (isCrossOrComma(kind))
|
||||
{
|
||||
@ -1492,7 +1493,7 @@ void HashJoin::joinBlockImpl(
|
||||
{
|
||||
const auto & right_key = required_right_keys.getByPosition(i);
|
||||
auto right_col_name = getTableJoin().renamedRightColumnName(right_key.name);
|
||||
if (!block.findByName(right_col_name /*right_key.name*/))
|
||||
if (!block.findByName(right_col_name))
|
||||
{
|
||||
const auto & left_name = required_right_keys_sources[i];
|
||||
|
||||
@ -1512,7 +1513,7 @@ void HashJoin::joinBlockImpl(
|
||||
block.insert(std::move(right_col));
|
||||
|
||||
if constexpr (jf.need_replication)
|
||||
right_keys_to_replicate.push_back(block.getPositionByName(right_key.name));
|
||||
right_keys_to_replicate.push_back(block.getPositionByName(right_col_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -329,6 +329,7 @@ public:
|
||||
|
||||
/// StorageJoin overrides key names (cause of different names qualification)
|
||||
void setRightKeys(const Names & keys) { getOnlyClause().key_names_right = keys; }
|
||||
void setLeftKeys(const Names & keys) { getOnlyClause().key_names_left = keys; }
|
||||
|
||||
Block getRequiredRightKeys(const Block & right_table_keys, std::vector<String> & keys_sources) const;
|
||||
|
||||
|
@ -48,6 +48,7 @@
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/StorageJoin.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
@ -60,6 +61,7 @@ namespace ErrorCodes
|
||||
extern const int EMPTY_LIST_OF_COLUMNS_QUERIED;
|
||||
extern const int EMPTY_NESTED_TABLE;
|
||||
extern const int EXPECTED_ALL_OR_ANY;
|
||||
extern const int INCOMPATIBLE_TYPE_OF_JOIN;
|
||||
extern const int INVALID_JOIN_ON_EXPRESSION;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
@ -757,6 +759,10 @@ void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join,
|
||||
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
|
||||
"Cannot get JOIN keys from JOIN ON section: {}", queryToString(table_join.on_expression));
|
||||
|
||||
if (const auto storage_join = analyzed_join.getStorageJoin())
|
||||
throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN,
|
||||
"StorageJoin keys should match JOIN keys, expected JOIN ON [{}]", fmt::join(storage_join->getKeyNames(), ", "));
|
||||
|
||||
bool join_on_const_ok = tryJoinOnConst(analyzed_join, table_join.on_expression, context);
|
||||
if (!join_on_const_ok)
|
||||
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
|
||||
|
@ -165,7 +165,7 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr<TableJoin> analyzed_join,
|
||||
{
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
if (!analyzed_join->sameStrictnessAndKind(strictness, kind))
|
||||
throw Exception("Table " + getStorageID().getNameForLogs() + " has incompatible type of JOIN.", ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN);
|
||||
throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "Table '{}' has incompatible type of JOIN", getStorageID().getNameForLogs());
|
||||
|
||||
if ((analyzed_join->forceNullableRight() && !use_nulls) ||
|
||||
(!analyzed_join->forceNullableRight() && isLeftOrFull(analyzed_join->kind()) && use_nulls))
|
||||
@ -174,12 +174,48 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr<TableJoin> analyzed_join,
|
||||
"Table {} needs the same join_use_nulls setting as present in LEFT or FULL JOIN",
|
||||
getStorageID().getNameForLogs());
|
||||
|
||||
/// TODO: check key columns
|
||||
const auto & join_on = analyzed_join->getOnlyClause();
|
||||
if (join_on.on_filter_condition_left || join_on.on_filter_condition_right)
|
||||
throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "ON section of JOIN with filter conditions is not implemented");
|
||||
|
||||
const auto & key_names_right = join_on.key_names_right;
|
||||
const auto & key_names_left = join_on.key_names_left;
|
||||
if (key_names.size() != key_names_right.size() || key_names.size() != key_names_left.size())
|
||||
throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN,
|
||||
"Number of keys in JOIN ON section ({}) doesn't match number of keys in Join engine ({})",
|
||||
key_names_right.size(), key_names.size());
|
||||
|
||||
/* Resort left keys according to right keys order in StorageJoin
|
||||
* We can't change the order of keys in StorageJoin
|
||||
* because the hash table was already built with tuples serialized in the order of key_names.
|
||||
* If we try to use the same hash table with different order of keys,
|
||||
* then calculated hashes and the result of the comparison will be wrong.
|
||||
*
|
||||
* Example:
|
||||
* ```
|
||||
* CREATE TABLE t_right (a UInt32, b UInt32) ENGINE = Join(ALL, INNER, a, b);
|
||||
* SELECT * FROM t_left JOIN t_right ON t_left.y = t_right.b AND t_left.x = t_right.a;
|
||||
* ```
|
||||
* In that case right keys should still be (a, b), need to change the order of the left keys to (x, y).
|
||||
*/
|
||||
Names left_key_names_resorted;
|
||||
for (const auto & key_name : key_names)
|
||||
{
|
||||
const auto & renamed_key = analyzed_join->renamedRightColumnName(key_name);
|
||||
/// find position of renamed_key in key_names_right
|
||||
auto it = std::find(key_names_right.begin(), key_names_right.end(), renamed_key);
|
||||
if (it == key_names_right.end())
|
||||
throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN,
|
||||
"Key '{}' not found in JOIN ON section. All Join engine keys '{}' have to be used", key_name, fmt::join(key_names, ", "));
|
||||
const size_t key_position = std::distance(key_names_right.begin(), it);
|
||||
left_key_names_resorted.push_back(key_names_left[key_position]);
|
||||
}
|
||||
|
||||
/// Set names qualifiers: table.column -> column
|
||||
/// It's required because storage join stores non-qualified names
|
||||
/// Qualifies will be added by join implementation (HashJoin)
|
||||
analyzed_join->setRightKeys(key_names);
|
||||
analyzed_join->setLeftKeys(left_key_names_resorted);
|
||||
|
||||
HashJoinPtr join_clone = std::make_shared<HashJoin>(analyzed_join, getRightSampleBlock());
|
||||
|
||||
|
@ -85,6 +85,8 @@ public:
|
||||
|
||||
bool useNulls() const { return use_nulls; }
|
||||
|
||||
const Names & getKeyNames() const { return key_names; }
|
||||
|
||||
private:
|
||||
Block sample_block;
|
||||
const Names key_names;
|
||||
|
@ -0,0 +1,54 @@
|
||||
21 22 23 2000
|
||||
31 32 33 3000
|
||||
41 42 43 4000
|
||||
51 52 53 5000
|
||||
21 22 23 2000
|
||||
31 32 33 3000
|
||||
41 42 43 4000
|
||||
51 52 53 5000
|
||||
21 22 23 2000
|
||||
31 32 33 3000
|
||||
41 42 43 4000
|
||||
51 52 53 5000
|
||||
21 22 23 2000
|
||||
31 32 33 3000
|
||||
41 42 43 4000
|
||||
51 52 53 5000
|
||||
21 22 23 22 21 23 2000
|
||||
31 32 33 32 31 33 3000
|
||||
41 42 43 42 41 43 4000
|
||||
51 52 53 52 51 53 5000
|
||||
21 22 23 22 21 23 2000
|
||||
31 32 33 32 31 33 3000
|
||||
41 42 43 42 41 43 4000
|
||||
51 52 53 52 51 53 5000
|
||||
21 22 23 22 21 23 2000
|
||||
31 32 33 32 31 33 3000
|
||||
41 42 43 42 41 43 4000
|
||||
51 52 53 52 51 53 5000
|
||||
21 22 23 22 21 23 2000
|
||||
31 32 33 32 31 33 3000
|
||||
41 42 43 42 41 43 4000
|
||||
51 52 53 52 51 53 5000
|
||||
23 21 22 22 21 23 2000
|
||||
33 31 32 32 31 33 3000
|
||||
43 41 42 42 41 43 4000
|
||||
53 51 52 52 51 53 5000
|
||||
23 21 22 22 21 23 2000
|
||||
33 31 32 32 31 33 3000
|
||||
43 41 42 42 41 43 4000
|
||||
53 51 52 52 51 53 5000
|
||||
23 21 22 22 21 23 2000
|
||||
33 31 32 32 31 33 3000
|
||||
43 41 42 42 41 43 4000
|
||||
53 51 52 52 51 53 5000
|
||||
11 12 13 11 11 11 1000
|
||||
21 22 23 21 21 21 2000
|
||||
31 32 33 31 31 31 3000
|
||||
41 42 43 41 41 41 4000
|
||||
51 52 53 51 51 51 5000
|
||||
11 12 13 11 11 11 1000
|
||||
21 22 23 21 21 21 2000
|
||||
31 32 33 31 31 31 3000
|
||||
41 42 43 41 41 41 4000
|
||||
51 52 53 51 51 51 5000
|
@ -0,0 +1,48 @@
|
||||
DROP TABLE IF EXISTS t1;
|
||||
DROP TABLE IF EXISTS tj;
|
||||
DROP TABLE IF EXISTS tjj;
|
||||
|
||||
CREATE TABLE t1 (key1 UInt64, key2 UInt64, key3 UInt64) ENGINE = Memory;
|
||||
INSERT INTO t1 VALUES (11, 12, 13), (21, 22, 23), (31, 32, 33), (41, 42, 43), (51, 52, 53);
|
||||
|
||||
CREATE TABLE tj (key2 UInt64, key1 UInt64, key3 UInt64, attr UInt64) ENGINE = Join(ALL, INNER, key3, key2, key1);
|
||||
INSERT INTO tj VALUES (22, 21, 23, 2000), (32, 31, 33, 3000), (42, 41, 43, 4000), (52, 51, 53, 5000), (62, 61, 63, 6000);
|
||||
|
||||
SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3) ORDER BY key1;
|
||||
SELECT * FROM t1 ALL INNER JOIN tj USING (key2, key3, key1) ORDER BY key1;
|
||||
SELECT * FROM t1 ALL INNER JOIN tj USING (key3, key2, key1) ORDER BY key1;
|
||||
SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key3, key2) ORDER BY key1;
|
||||
|
||||
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND t1.key1 = tj.key1 ORDER BY key1;
|
||||
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key2 = tj.key2 AND t1.key3 = tj.key3 AND t1.key1 = tj.key1 ORDER BY key1;
|
||||
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key3 = tj.key3 AND t1.key1 = tj.key1 AND t1.key2 = tj.key2 ORDER BY key1;
|
||||
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 ORDER BY key1;
|
||||
|
||||
SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.a = tj.key1 AND t1.c = tj.key3 AND t1.b = tj.key2 ORDER BY t1.a;
|
||||
SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.a = tj.key1 AND t1.b = tj.key2 AND t1.c = tj.key3 ORDER BY t1.a;
|
||||
SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.c = tj.key3 AND t1.a = tj.key1 AND t1.b = tj.key2 ORDER BY t1.a;
|
||||
|
||||
SELECT * FROM t1 ALL INNER JOIN tj ON 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
|
||||
SELECT * FROM t1 ALL INNER JOIN tj ON 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
|
||||
SELECT * FROM t1 ALL INNER JOIN tj ON NULL; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
|
||||
SELECT * FROM t1 ALL INNER JOIN tj ON 1 == 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
|
||||
SELECT * FROM t1 ALL INNER JOIN tj ON 1 != 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
|
||||
|
||||
SELECT * FROM t1 ALL INNER JOIN tj USING (key2, key3); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
|
||||
SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
|
||||
SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
|
||||
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.attr; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
|
||||
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
|
||||
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key2 = tj.key2 AND t1.key3 = tj.attr; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
|
||||
SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key2 = tj.key2 AND t1.key3 = tj.key3 AND t1.key1 = tj.key1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN }
|
||||
|
||||
|
||||
CREATE TABLE tjj (key2 UInt64, key1 UInt64, key3 UInt64, attr UInt64) ENGINE = Join(ALL, INNER, key3, key2, key1);
|
||||
INSERT INTO tjj VALUES (11, 11, 11, 1000), (21, 21, 21, 2000), (31, 31, 31, 3000), (41, 41, 41, 4000), (51, 51, 51, 5000), (61, 61, 61, 6000);
|
||||
|
||||
SELECT * FROM t1 ALL INNER JOIN tjj ON t1.key1 = tjj.key1 AND t1.key1 = tjj.key2 AND t1.key1 = tjj.key3 ORDER BY key1;
|
||||
SELECT * FROM t1 ALL INNER JOIN tjj ON t1.key1 = tjj.key1 AND t1.key1 = tjj.key3 AND t1.key1 = tjj.key2 ORDER BY key1;
|
||||
|
||||
DROP TABLE IF EXISTS t1;
|
||||
DROP TABLE IF EXISTS tj;
|
||||
DROP TABLE IF EXISTS tjj;
|
Loading…
Reference in New Issue
Block a user