mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
semi & anti join
This commit is contained in:
parent
0251f4e442
commit
5ce65546e9
@ -193,10 +193,10 @@ static const IColumn * extractAsofColumn(const ColumnRawPtrs & key_columns)
|
||||
return key_columns.back();
|
||||
}
|
||||
|
||||
template<typename KeyGetter, ASTTableJoin::Strictness STRICTNESS>
|
||||
template<typename KeyGetter, bool is_asof_join>
|
||||
static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes)
|
||||
{
|
||||
if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof)
|
||||
if constexpr (is_asof_join)
|
||||
{
|
||||
auto key_column_copy = key_columns;
|
||||
auto key_size_copy = key_sizes;
|
||||
@ -360,28 +360,19 @@ void Join::setSampleBlock(const Block & block)
|
||||
namespace
|
||||
{
|
||||
/// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN.
|
||||
template <ASTTableJoin::Strictness STRICTNESS, typename Map, typename KeyGetter>
|
||||
template <typename Map, typename KeyGetter>
|
||||
struct Inserter
|
||||
{
|
||||
static void insert(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool);
|
||||
};
|
||||
|
||||
template <typename Map, typename KeyGetter>
|
||||
struct Inserter<ASTTableJoin::Strictness::RightAny, Map, KeyGetter>
|
||||
{
|
||||
static ALWAYS_INLINE void insert(const Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool)
|
||||
static ALWAYS_INLINE void insertOne(const Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i,
|
||||
Arena & pool)
|
||||
{
|
||||
auto emplace_result = key_getter.emplaceKey(map, i, pool);
|
||||
|
||||
if (emplace_result.isInserted() || join.anyTakeLastRow())
|
||||
new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Map, typename KeyGetter>
|
||||
struct Inserter<ASTTableJoin::Strictness::All, Map, KeyGetter>
|
||||
{
|
||||
static ALWAYS_INLINE void insert(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool)
|
||||
static ALWAYS_INLINE void insertAll(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool)
|
||||
{
|
||||
auto emplace_result = key_getter.emplaceKey(map, i, pool);
|
||||
|
||||
@ -393,13 +384,9 @@ namespace
|
||||
emplace_result.getMapped().insert({stored_block, i}, pool);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Map, typename KeyGetter>
|
||||
struct Inserter<ASTTableJoin::Strictness::Asof, Map, KeyGetter>
|
||||
{
|
||||
static ALWAYS_INLINE void insert(Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool,
|
||||
const IColumn * asof_column)
|
||||
static ALWAYS_INLINE void insertAsof(Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool,
|
||||
const IColumn * asof_column)
|
||||
{
|
||||
auto emplace_result = key_getter.emplaceKey(map, i, pool);
|
||||
typename Map::mapped_type * time_series_map = &emplace_result.getMapped();
|
||||
@ -416,30 +403,27 @@ namespace
|
||||
Join & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns,
|
||||
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool)
|
||||
{
|
||||
constexpr bool mapped_one = std::is_same_v<typename Map::mapped_type, JoinStuff::MappedOne> ||
|
||||
std::is_same_v<typename Map::mapped_type, JoinStuff::MappedOneFlagged>;
|
||||
constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof;
|
||||
|
||||
const IColumn * asof_column [[maybe_unused]] = nullptr;
|
||||
if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof)
|
||||
if constexpr (is_asof_join)
|
||||
asof_column = extractAsofColumn(key_columns);
|
||||
|
||||
auto key_getter = createKeyGetter<KeyGetter, STRICTNESS>(key_columns, key_sizes);
|
||||
auto key_getter = createKeyGetter<KeyGetter, is_asof_join>(key_columns, key_sizes);
|
||||
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
{
|
||||
if (has_null_map && (*null_map)[i])
|
||||
continue;
|
||||
|
||||
if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any)
|
||||
{
|
||||
constexpr bool mapped_one = std::is_same_v<typename Map::mapped_type, JoinStuff::MappedOne> ||
|
||||
std::is_same_v<typename Map::mapped_type, JoinStuff::MappedOneFlagged>;
|
||||
if constexpr (mapped_one)
|
||||
Inserter<ASTTableJoin::Strictness::RightAny, Map, KeyGetter>::insert(join, map, key_getter, stored_block, i, pool);
|
||||
else
|
||||
Inserter<ASTTableJoin::Strictness::All, Map, KeyGetter>::insert(join, map, key_getter, stored_block, i, pool);
|
||||
}
|
||||
else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof)
|
||||
Inserter<STRICTNESS, Map, KeyGetter>::insert(join, map, key_getter, stored_block, i, pool, asof_column);
|
||||
if constexpr (is_asof_join)
|
||||
Inserter<Map, KeyGetter>::insertAsof(join, map, key_getter, stored_block, i, pool, asof_column);
|
||||
else if constexpr (mapped_one)
|
||||
Inserter<Map, KeyGetter>::insertOne(join, map, key_getter, stored_block, i, pool);
|
||||
else
|
||||
Inserter<STRICTNESS, Map, KeyGetter>::insert(join, map, key_getter, stored_block, i, pool);
|
||||
Inserter<Map, KeyGetter>::insertAll(join, map, key_getter, stored_block, i, pool);
|
||||
}
|
||||
}
|
||||
|
||||
@ -706,11 +690,14 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
|
||||
constexpr bool is_any_join = STRICTNESS == ASTTableJoin::Strictness::Any;
|
||||
constexpr bool is_all_join = STRICTNESS == ASTTableJoin::Strictness::All;
|
||||
constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof;
|
||||
constexpr bool left_or_full = static_in_v<KIND, ASTTableJoin::Kind::Left, ASTTableJoin::Kind::Full>;
|
||||
constexpr bool is_semi_join = STRICTNESS == ASTTableJoin::Strictness::Semi;
|
||||
constexpr bool is_anti_join = STRICTNESS == ASTTableJoin::Strictness::Anti;
|
||||
constexpr bool left = KIND == ASTTableJoin::Kind::Left;
|
||||
constexpr bool right = KIND == ASTTableJoin::Kind::Right;
|
||||
constexpr bool full = KIND == ASTTableJoin::Kind::Full;
|
||||
|
||||
constexpr bool add_missing = left_or_full;
|
||||
constexpr bool need_replication = is_all_join || (is_any_join && right);
|
||||
constexpr bool add_missing = (left || full) && !is_semi_join;
|
||||
constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right);
|
||||
|
||||
size_t rows = added_columns.rows_to_add;
|
||||
IColumn::Filter filter(rows, 0);
|
||||
@ -723,7 +710,7 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
|
||||
if constexpr (is_asof_join)
|
||||
asof_column = extractAsofColumn(added_columns.key_columns);
|
||||
|
||||
auto key_getter = createKeyGetter<KeyGetter, STRICTNESS>(added_columns.key_columns, added_columns.key_sizes);
|
||||
auto key_getter = createKeyGetter<KeyGetter, is_asof_join>(added_columns.key_columns, added_columns.key_sizes);
|
||||
|
||||
IColumn::Offset current_offset = 0;
|
||||
|
||||
@ -768,7 +755,7 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
|
||||
mapped.setUsed();
|
||||
addFoundRowAll<Map, add_missing>(mapped, added_columns, current_offset);
|
||||
}
|
||||
else if constexpr (is_any_join && right)
|
||||
else if constexpr ((is_any_join || is_semi_join) && right)
|
||||
{
|
||||
/// Use first appered left key + it needs left columns replication
|
||||
if (mapped.setUsedOnce())
|
||||
@ -786,11 +773,16 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
|
||||
added_columns.appendFromBlock<add_missing>(*mapped.block, mapped.row_num);
|
||||
}
|
||||
}
|
||||
else if constexpr (is_any_join && KIND == ASTTableJoin::Kind::Full)
|
||||
else if constexpr (is_any_join && full)
|
||||
{
|
||||
/// TODO
|
||||
}
|
||||
else /// ANY LEFT + old ANY (RightAny)
|
||||
else if constexpr (is_anti_join)
|
||||
{
|
||||
if constexpr (right)
|
||||
mapped.setUsed();
|
||||
}
|
||||
else /// ANY LEFT, SEMI LEFT, old ANY (RightAny)
|
||||
{
|
||||
filter[i] = 1;
|
||||
mapped.setUsed();
|
||||
@ -798,7 +790,11 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (is_anti_join && left)
|
||||
filter[i] = 1;
|
||||
addNotFoundRow<add_missing>(added_columns, current_offset);
|
||||
}
|
||||
|
||||
if constexpr (need_replication)
|
||||
(*added_columns.offsets_to_replicate)[i] = current_offset;
|
||||
@ -849,12 +845,16 @@ void Join::joinBlockImpl(
|
||||
constexpr bool is_any_join = STRICTNESS == ASTTableJoin::Strictness::Any;
|
||||
constexpr bool is_all_join = STRICTNESS == ASTTableJoin::Strictness::All;
|
||||
constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof;
|
||||
constexpr bool right = KIND == ASTTableJoin::Kind::Right;
|
||||
constexpr bool inner_or_right = static_in_v<KIND, ASTTableJoin::Kind::Inner, ASTTableJoin::Kind::Right>;
|
||||
constexpr bool right_or_full = static_in_v<KIND, ASTTableJoin::Kind::Right, ASTTableJoin::Kind::Full>;
|
||||
constexpr bool is_semi_join = STRICTNESS == ASTTableJoin::Strictness::Semi;
|
||||
constexpr bool is_anti_join = STRICTNESS == ASTTableJoin::Strictness::Anti;
|
||||
|
||||
constexpr bool need_filter = (!is_all_join && inner_or_right) && !(is_any_join && right);
|
||||
constexpr bool need_replication = is_all_join || (is_any_join && right);
|
||||
constexpr bool left = KIND == ASTTableJoin::Kind::Left;
|
||||
constexpr bool right = KIND == ASTTableJoin::Kind::Right;
|
||||
constexpr bool inner = KIND == ASTTableJoin::Kind::Inner;
|
||||
constexpr bool full = KIND == ASTTableJoin::Kind::Full;
|
||||
|
||||
constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right);
|
||||
constexpr bool need_filter = !need_replication && (inner || right || (is_semi_join && left) || (is_anti_join && left));
|
||||
|
||||
/// Rare case, when keys are constant. To avoid code bloat, simply materialize them.
|
||||
Columns materialized_columns;
|
||||
@ -870,7 +870,7 @@ void Join::joinBlockImpl(
|
||||
* Because if they are constants, then in the "not joined" rows, they may have different values
|
||||
* - default values, which can differ from the values of these constants.
|
||||
*/
|
||||
if constexpr (right_or_full)
|
||||
if constexpr (right || full)
|
||||
{
|
||||
materializeBlockInplace(block);
|
||||
|
||||
@ -1085,61 +1085,44 @@ void Join::joinTotals(Block & block) const
|
||||
}
|
||||
|
||||
|
||||
template <ASTTableJoin::Strictness STRICTNESS, typename Mapped>
|
||||
struct AdderNonJoined;
|
||||
|
||||
template <typename Mapped>
|
||||
struct AdderNonJoined<ASTTableJoin::Strictness::RightAny, Mapped>
|
||||
{
|
||||
static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right)
|
||||
{
|
||||
for (size_t j = 0; j < columns_right.size(); ++j)
|
||||
{
|
||||
const auto & mapped_column = mapped.block->getByPosition(j).column;
|
||||
columns_right[j]->insertFrom(*mapped_column, mapped.row_num);
|
||||
}
|
||||
|
||||
++rows_added;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Mapped>
|
||||
struct AdderNonJoined<ASTTableJoin::Strictness::Any, Mapped>
|
||||
struct AdderNonJoined
|
||||
{
|
||||
static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right)
|
||||
{
|
||||
constexpr bool mapped_asof = std::is_same_v<Mapped, JoinStuff::MappedAsof>;
|
||||
constexpr bool mapped_one = std::is_same_v<Mapped, JoinStuff::MappedOne> || std::is_same_v<Mapped, JoinStuff::MappedOneFlagged>;
|
||||
if constexpr (!mapped_one)
|
||||
AdderNonJoined<ASTTableJoin::Strictness::All, Mapped>::add(mapped, rows_added, columns_right);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Mapped>
|
||||
struct AdderNonJoined<ASTTableJoin::Strictness::All, Mapped>
|
||||
{
|
||||
static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right)
|
||||
{
|
||||
for (auto it = mapped.begin(); it.ok(); ++it)
|
||||
if constexpr (mapped_asof)
|
||||
{
|
||||
/// Do nothing
|
||||
}
|
||||
else if constexpr (mapped_one)
|
||||
{
|
||||
for (size_t j = 0; j < columns_right.size(); ++j)
|
||||
{
|
||||
const auto & mapped_column = it->block->getByPosition(j).column;
|
||||
columns_right[j]->insertFrom(*mapped_column, it->row_num);
|
||||
const auto & mapped_column = mapped.block->getByPosition(j).column;
|
||||
columns_right[j]->insertFrom(*mapped_column, mapped.row_num);
|
||||
}
|
||||
|
||||
++rows_added;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto it = mapped.begin(); it.ok(); ++it)
|
||||
{
|
||||
for (size_t j = 0; j < columns_right.size(); ++j)
|
||||
{
|
||||
const auto & mapped_column = it->block->getByPosition(j).column;
|
||||
columns_right[j]->insertFrom(*mapped_column, it->row_num);
|
||||
}
|
||||
|
||||
++rows_added;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Mapped>
|
||||
struct AdderNonJoined<ASTTableJoin::Strictness::Asof, Mapped>
|
||||
{
|
||||
static void add(const Mapped & /*mapped*/, size_t & /*rows_added*/, MutableColumns & /*columns_right*/)
|
||||
{
|
||||
// If we have a leftover match in the right hand side, not required to join because we are only support asof left/inner
|
||||
}
|
||||
};
|
||||
|
||||
/// Stream from not joined earlier rows of the right table.
|
||||
class NonJoinedBlockInputStream : public IBlockInputStream
|
||||
@ -1348,10 +1331,11 @@ private:
|
||||
for (; it != end; ++it)
|
||||
{
|
||||
const Mapped & mapped = it->getMapped();
|
||||
|
||||
if (mapped.getUsed())
|
||||
continue;
|
||||
|
||||
AdderNonJoined<STRICTNESS, Mapped>::add(mapped, rows_added, columns_keys_and_right);
|
||||
AdderNonJoined<Mapped>::add(mapped, rows_added, columns_keys_and_right);
|
||||
|
||||
if (rows_added >= max_block_size)
|
||||
{
|
||||
@ -1391,6 +1375,10 @@ private:
|
||||
|
||||
BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const
|
||||
{
|
||||
if (table_join->strictness() == ASTTableJoin::Strictness::Asof ||
|
||||
table_join->strictness() == ASTTableJoin::Strictness::Semi)
|
||||
return {};
|
||||
|
||||
if (isRightOrFull(table_join->kind()))
|
||||
return std::make_shared<NonJoinedBlockInputStream>(*this, result_sample_block, max_block_size);
|
||||
return {};
|
||||
|
@ -80,11 +80,23 @@ using MappedAsof = WithFlags<AsofRowRefs, false>;
|
||||
* It is just a hash table: keys -> rows of joined ("right") table.
|
||||
* Additionally, CROSS JOIN is supported: instead of hash table, it use just set of blocks without keys.
|
||||
*
|
||||
* JOIN-s could be of nine types: ANY/ALL × LEFT/INNER/RIGHT/FULL, and also CROSS.
|
||||
* JOIN-s could be of these types:
|
||||
* - ALL × LEFT/INNER/RIGHT/FULL
|
||||
* - ANY × LEFT/INNER/RIGHT
|
||||
* - SEMI/ANTI x LEFT/RIGHT
|
||||
* - ASOF x LEFT/INNER
|
||||
* - CROSS
|
||||
*
|
||||
* If ANY is specified - then select only one row from the "right" table, (first encountered row), even if there was more matching rows.
|
||||
* If ALL is specified - usual JOIN, when rows are multiplied by number of matching rows from the "right" table.
|
||||
* ANY is more efficient.
|
||||
* ALL means usual JOIN, when rows are multiplied by number of matching rows from the "right" table.
|
||||
* ANY uses one line per unique key from right talbe. For LEFT JOIN it would be any row (with needed joined key) from the right table,
|
||||
* for RIGHT JOIN it would be any row from the left table and for INNER one it would be any row from right and any row from left.
|
||||
* SEMI JOIN filter left table by keys that are present in right table for LEFT JOIN, and filter right table by keys from left table
|
||||
* for RIGHT JOIN. In other words SEMI JOIN returns only rows which joining keys present in another table.
|
||||
* ANTI JOIN is the same as SEMI JOIN but returns rows with joining keys that are NOT present in another table.
|
||||
* SEMI/ANTI JOINs allow to get values from both tables. For filter table it gets any row with joining same key. For ANTI JOIN it returns
|
||||
* defaults other table columns.
|
||||
* ASOF JOIN is not-equi join. For one key column it finds nearest value to join according to join inequality.
|
||||
* It's expected that ANY|SEMI LEFT JOIN is more efficient that ALL one.
|
||||
*
|
||||
* If INNER is specified - leave only rows that have matching rows from "right" table.
|
||||
* If LEFT is specified - in case when there is no matching row in "right" table, fill it with default values instead.
|
||||
|
@ -30,6 +30,18 @@ template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness
|
||||
template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::All> { using Map = Join::MapsAllFlagged; };
|
||||
template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::All> { using Map = Join::MapsAllFlagged; };
|
||||
|
||||
/// Only SEMI LEFT and SEMI RIGHT are valid
|
||||
template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Semi> { using Map = Join::MapsOne; };
|
||||
template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Semi> { using Map = Join::MapsOne; };
|
||||
template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Semi> { using Map = Join::MapsAll; };
|
||||
template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Semi> { using Map = Join::MapsOne; };
|
||||
|
||||
/// Only ANTI LEFT and ANTI RIGHT are valid
|
||||
template <> struct MapGetter<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Anti> { using Map = Join::MapsOne; };
|
||||
template <> struct MapGetter<ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::Anti> { using Map = Join::MapsOne; };
|
||||
template <> struct MapGetter<ASTTableJoin::Kind::Right, ASTTableJoin::Strictness::Anti> { using Map = Join::MapsAllFlagged; };
|
||||
template <> struct MapGetter<ASTTableJoin::Kind::Full, ASTTableJoin::Strictness::Anti> { using Map = Join::MapsOne; };
|
||||
|
||||
template <ASTTableJoin::Kind kind>
|
||||
struct MapGetter<kind, ASTTableJoin::Strictness::Asof>
|
||||
{
|
||||
@ -37,11 +49,13 @@ struct MapGetter<kind, ASTTableJoin::Strictness::Asof>
|
||||
};
|
||||
|
||||
|
||||
static constexpr std::array<ASTTableJoin::Strictness, 4> STRICTNESSES = {
|
||||
static constexpr std::array<ASTTableJoin::Strictness, 6> STRICTNESSES = {
|
||||
ASTTableJoin::Strictness::RightAny,
|
||||
ASTTableJoin::Strictness::Any,
|
||||
ASTTableJoin::Strictness::All,
|
||||
ASTTableJoin::Strictness::Asof
|
||||
ASTTableJoin::Strictness::Asof,
|
||||
ASTTableJoin::Strictness::Semi,
|
||||
ASTTableJoin::Strictness::Anti,
|
||||
};
|
||||
|
||||
static constexpr std::array<ASTTableJoin::Kind, 4> KINDS = {
|
||||
|
@ -150,6 +150,12 @@ void ASTTableJoin::formatImplBeforeTable(const FormatSettings & settings, Format
|
||||
case Strictness::Asof:
|
||||
settings.ostr << "ASOF ";
|
||||
break;
|
||||
case Strictness::Semi:
|
||||
settings.ostr << "SEMI ";
|
||||
break;
|
||||
case Strictness::Anti:
|
||||
settings.ostr << "ANTI ";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -25,7 +25,7 @@ namespace DB
|
||||
* SAMPLE 1000000
|
||||
*
|
||||
* Table expressions may be combined with JOINs of following kinds:
|
||||
* [GLOBAL] [ANY|ALL|] INNER|LEFT|RIGHT|FULL [OUTER] JOIN table_expr
|
||||
* [GLOBAL] [ANY|ALL|ASOF|SEMI] [INNER|LEFT|RIGHT|FULL] [OUTER] JOIN table_expr
|
||||
* CROSS JOIN
|
||||
* , (comma)
|
||||
*
|
||||
@ -74,10 +74,12 @@ struct ASTTableJoin : public IAST
|
||||
enum class Strictness
|
||||
{
|
||||
Unspecified,
|
||||
RightAny, /// Right ANY. If there are many suitable rows in right table, use any from them to join.
|
||||
RightAny, /// Old ANY JOIN. If there are many suitable rows in right table, use any from them to join.
|
||||
Any, /// Semi Join with any value from filtering table. For LEFT JOIN with Any and RightAny are the same.
|
||||
All, /// If there are many suitable rows to join, use all of them and replicate rows of "left" table (usual semantic of JOIN).
|
||||
Asof, /// For the last JOIN column, pick the latest value
|
||||
Semi, /// LEFT or RIGHT. SEMI LEFT JOIN filters left table by values exists in right table. SEMI RIGHT - otherwise.
|
||||
Anti, /// LEFT or RIGHT. Same as SEMI JOIN but filter values that are NOT exists in other table.
|
||||
};
|
||||
|
||||
/// Join method.
|
||||
|
@ -1134,11 +1134,14 @@ const char * ParserAlias::restricted_keywords[] =
|
||||
"INNER",
|
||||
"FULL",
|
||||
"CROSS",
|
||||
"ASOF",
|
||||
"JOIN",
|
||||
"GLOBAL",
|
||||
"ANY",
|
||||
"ALL",
|
||||
"ASOF",
|
||||
"SEMI",
|
||||
"ANTI",
|
||||
"ONLY", /// YQL synonym for ANTI
|
||||
"ON",
|
||||
"USING",
|
||||
"PREWHERE",
|
||||
|
@ -137,6 +137,10 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec
|
||||
table_join->strictness = ASTTableJoin::Strictness::All;
|
||||
else if (ParserKeyword("ASOF").ignore(pos))
|
||||
table_join->strictness = ASTTableJoin::Strictness::Asof;
|
||||
else if (ParserKeyword("SEMI").ignore(pos))
|
||||
table_join->strictness = ASTTableJoin::Strictness::Semi;
|
||||
else if (ParserKeyword("ANTI").ignore(pos) || ParserKeyword("ONLY").ignore(pos))
|
||||
table_join->strictness = ASTTableJoin::Strictness::Anti;
|
||||
else
|
||||
table_join->strictness = ASTTableJoin::Strictness::Unspecified;
|
||||
|
||||
@ -153,13 +157,21 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec
|
||||
else
|
||||
{
|
||||
/// Use INNER by default as in another DBMS.
|
||||
table_join->kind = ASTTableJoin::Kind::Inner;
|
||||
if (table_join->strictness == ASTTableJoin::Strictness::Semi ||
|
||||
table_join->strictness == ASTTableJoin::Strictness::Anti)
|
||||
table_join->kind = ASTTableJoin::Kind::Left;
|
||||
else
|
||||
table_join->kind = ASTTableJoin::Kind::Inner;
|
||||
}
|
||||
|
||||
if (table_join->strictness != ASTTableJoin::Strictness::Unspecified
|
||||
&& table_join->kind == ASTTableJoin::Kind::Cross)
|
||||
throw Exception("You must not specify ANY or ALL for CROSS JOIN.", ErrorCodes::SYNTAX_ERROR);
|
||||
|
||||
if ((table_join->strictness == ASTTableJoin::Strictness::Semi || table_join->strictness == ASTTableJoin::Strictness::Anti) &&
|
||||
(table_join->kind != ASTTableJoin::Kind::Left && table_join->kind != ASTTableJoin::Kind::Right))
|
||||
throw Exception("SEMI|ANTI JOIN should be LEFT or RIGHT.", ErrorCodes::SYNTAX_ERROR);
|
||||
|
||||
/// Optional OUTER keyword for outer joins.
|
||||
if (table_join->kind == ASTTableJoin::Kind::Left
|
||||
|| table_join->kind == ASTTableJoin::Kind::Right
|
||||
|
@ -343,9 +343,11 @@ private:
|
||||
throw Exception("New ANY join storage is not implemented yet (set any_join_distinct_right_table_keys=1 to use old one)",
|
||||
ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof)
|
||||
else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof ||
|
||||
STRICTNESS == ASTTableJoin::Strictness::Semi ||
|
||||
STRICTNESS == ASTTableJoin::Strictness::Anti)
|
||||
{
|
||||
throw Exception("ASOF join storage is not implemented yet", ErrorCodes::NOT_IMPLEMENTED);
|
||||
throw Exception("ASOF|SEMI|ANTI join storage is not implemented yet", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
else
|
||||
for (auto ref_it = it->getMapped().begin(); ref_it.ok(); ++ref_it)
|
||||
|
@ -20,7 +20,7 @@ FROM
|
||||
learnerHash,
|
||||
passed - eventTime AS diff
|
||||
FROM statements
|
||||
GLOBAL ANY INNER JOIN
|
||||
GLOBAL SEMI LEFT JOIN
|
||||
(
|
||||
SELECT
|
||||
learnerHash,
|
||||
|
@ -1,7 +1,6 @@
|
||||
set any_join_distinct_right_table_keys = 1;
|
||||
SET joined_subquery_requires_alias = 0;
|
||||
|
||||
SELECT * FROM (SELECT 1 AS id, 2 AS value);
|
||||
SELECT * FROM (SELECT 1 AS id, 2 AS value, 3 AS A) ANY INNER JOIN (SELECT 1 AS id, 4 AS values, 5 AS D) USING id;
|
||||
SELECT *, d.* FROM ( SELECT 1 AS id, 2 AS value ) ANY INNER JOIN ( SELECT 1 AS id, 3 AS values ) AS d USING id;
|
||||
SELECT *, d.*, d.values FROM ( SELECT 1 AS id, 2 AS value ) ANY INNER JOIN ( SELECT 1 AS id, 3 AS values ) AS d USING id;
|
||||
SELECT * FROM (SELECT 1 AS id, 2 AS value, 3 AS A) SEMI LEFT JOIN (SELECT 1 AS id, 4 AS values, 5 AS D) USING id;
|
||||
SELECT *, d.* FROM ( SELECT 1 AS id, 2 AS value ) SEMI LEFT JOIN ( SELECT 1 AS id, 3 AS values ) AS d USING id;
|
||||
SELECT *, d.*, d.values FROM ( SELECT 1 AS id, 2 AS value ) SEMI LEFT JOIN ( SELECT 1 AS id, 3 AS values ) AS d USING id;
|
||||
|
@ -12,8 +12,7 @@ SYSTEM SYNC REPLICA byte_identical_r2;
|
||||
ALTER TABLE byte_identical_r1 ADD COLUMN y DEFAULT rand();
|
||||
OPTIMIZE TABLE byte_identical_r1 PARTITION tuple() FINAL;
|
||||
|
||||
SET any_join_distinct_right_table_keys = 1;
|
||||
SELECT x, t1.y - t2.y FROM byte_identical_r1 t1 ANY INNER JOIN byte_identical_r2 t2 USING x ORDER BY x;
|
||||
SELECT x, t1.y - t2.y FROM byte_identical_r1 t1 SEMI LEFT JOIN byte_identical_r2 t2 USING x ORDER BY x;
|
||||
|
||||
DROP TABLE byte_identical_r1;
|
||||
DROP TABLE byte_identical_r2;
|
||||
|
@ -1,5 +1,3 @@
|
||||
set any_join_distinct_right_table_keys = 1;
|
||||
|
||||
drop table if exists fooL;
|
||||
drop table if exists fooR;
|
||||
create table fooL (a Int32, v String) engine = Memory;
|
||||
@ -9,7 +7,7 @@ insert into fooL select number, 'L' || toString(number) from numbers(2);
|
||||
insert into fooL select number, 'LL' || toString(number) from numbers(2);
|
||||
insert into fooR select number, 'R' || toString(number) from numbers(2);
|
||||
|
||||
select distinct a from fooL any join fooR using(a) order by a;
|
||||
select distinct a from fooL semi left join fooR using(a) order by a;
|
||||
|
||||
drop table fooL;
|
||||
drop table fooR;
|
||||
|
@ -1,4 +1,3 @@
|
||||
SET any_join_distinct_right_table_keys = 1;
|
||||
SET join_use_nulls = 1;
|
||||
SELECT number FROM system.numbers ANY INNER JOIN (SELECT number, ['test'] FROM system.numbers LIMIT 1) js2 USING (number) LIMIT 1;
|
||||
SELECT number FROM system.numbers SEMI LEFT JOIN (SELECT number, ['test'] FROM system.numbers LIMIT 1) js2 USING (number) LIMIT 1;
|
||||
SELECT number FROM system.numbers ANY LEFT JOIN (SELECT number, ['test'] FROM system.numbers LIMIT 1) js2 USING (number) LIMIT 1;
|
||||
|
@ -1,5 +1,5 @@
|
||||
drop table if exists tab;
|
||||
create table tab (x UInt64) engine = MergeTree order by tuple();
|
||||
|
||||
insert into tab select number as n from numbers(20) any inner join (select number * 10 as n from numbers(2)) using(n) settings any_join_distinct_right_table_keys = 1, max_block_size = 5;
|
||||
insert into tab select number as n from numbers(20) semi left join (select number * 10 as n from numbers(2)) using(n) settings max_block_size = 5;
|
||||
select * from tab order by x;
|
||||
|
@ -1,4 +1,3 @@
|
||||
Set any_join_distinct_right_table_keys=1;
|
||||
DROP TABLE IF EXISTS test_insert_t1;
|
||||
DROP TABLE IF EXISTS test_insert_t2;
|
||||
DROP TABLE IF EXISTS test_insert_t3;
|
||||
@ -15,7 +14,7 @@ INSERT INTO test_insert_t2 SELECT '2019-09-01',toString(number) FROM system.numb
|
||||
INSERT INTO test_insert_t2 SELECT '2019-09-01',toString(number) FROM system.numbers WHERE number >=700000 limit 200;
|
||||
INSERT INTO test_insert_t2 SELECT '2019-09-01',toString(number) FROM system.numbers WHERE number >=900000 limit 200;
|
||||
|
||||
INSERT INTO test_insert_t3 SELECT '2019-09-01', uid, name, city FROM ( SELECT dt, uid, name, city FROM test_insert_t1 WHERE dt = '2019-09-01') t1 GLOBAL ANY INNER JOIN (SELECT uid FROM test_insert_t2 WHERE dt = '2019-09-01') t2 ON t1.uid=t2.uid;
|
||||
INSERT INTO test_insert_t3 SELECT '2019-09-01', uid, name, city FROM ( SELECT dt, uid, name, city FROM test_insert_t1 WHERE dt = '2019-09-01') t1 GLOBAL SEMI LEFT JOIN (SELECT uid FROM test_insert_t2 WHERE dt = '2019-09-01') t2 ON t1.uid=t2.uid;
|
||||
|
||||
SELECT count(*) FROM test_insert_t3;
|
||||
|
||||
|
@ -5,12 +5,12 @@ any left
|
||||
3 a4 0
|
||||
4 a5 4 b3
|
||||
any left (rev)
|
||||
0 5 b6
|
||||
2 a3 2 b1
|
||||
2 a3 2 b2
|
||||
4 a5 4 b3
|
||||
4 a5 4 b4
|
||||
4 a5 4 b5
|
||||
4 a5 4 b6
|
||||
any inner
|
||||
2 a3 2 b1
|
||||
4 a5 4 b3
|
||||
@ -18,12 +18,12 @@ any inner (rev)
|
||||
2 a3 2 b1
|
||||
4 a5 4 b3
|
||||
any right
|
||||
0 5 b6
|
||||
2 a3 2 b1
|
||||
2 a3 2 b2
|
||||
4 a5 4 b3
|
||||
4 a5 4 b4
|
||||
4 a5 4 b5
|
||||
4 a5 4 b6
|
||||
any right (rev)
|
||||
0 a1 0
|
||||
1 a2 0
|
||||
|
@ -5,7 +5,7 @@ CREATE TABLE t1 (x UInt32, s String) engine = Memory;
|
||||
CREATE TABLE t2 (x UInt32, s String) engine = Memory;
|
||||
|
||||
INSERT INTO t1 (x, s) VALUES (0, 'a1'), (1, 'a2'), (2, 'a3'), (3, 'a4'), (4, 'a5');
|
||||
INSERT INTO t2 (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (4, 'b6');
|
||||
INSERT INTO t2 (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (5, 'b6');
|
||||
|
||||
SET join_use_nulls = 0;
|
||||
SET any_join_distinct_right_table_keys = 0;
|
||||
|
@ -0,0 +1,15 @@
|
||||
semi left
|
||||
2 a3 2 b1
|
||||
4 a5 4 b3
|
||||
semi right
|
||||
2 a3 2 b1
|
||||
2 a3 2 b2
|
||||
4 a5 4 b3
|
||||
4 a5 4 b4
|
||||
4 a5 4 b5
|
||||
anti left
|
||||
0 a1 0
|
||||
1 a2 1
|
||||
3 a4 3
|
||||
anti right
|
||||
0 5 b6
|
25
dbms/tests/queries/0_stateless/01031_semi_anti_join.sql
Normal file
25
dbms/tests/queries/0_stateless/01031_semi_anti_join.sql
Normal file
@ -0,0 +1,25 @@
|
||||
DROP TABLE IF EXISTS t1;
|
||||
DROP TABLE IF EXISTS t2;
|
||||
|
||||
CREATE TABLE t1 (x UInt32, s String) engine = Memory;
|
||||
CREATE TABLE t2 (x UInt32, s String) engine = Memory;
|
||||
|
||||
INSERT INTO t1 (x, s) VALUES (0, 'a1'), (1, 'a2'), (2, 'a3'), (3, 'a4'), (4, 'a5');
|
||||
INSERT INTO t2 (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (5, 'b6');
|
||||
|
||||
SET join_use_nulls = 0;
|
||||
|
||||
SELECT 'semi left';
|
||||
SELECT t1.*, t2.* FROM t1 SEMI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x;
|
||||
|
||||
SELECT 'semi right';
|
||||
SELECT t1.*, t2.* FROM t1 SEMI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x;
|
||||
|
||||
SELECT 'anti left';
|
||||
SELECT t1.*, t2.* FROM t1 ANTI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x;
|
||||
|
||||
SELECT 'anti right';
|
||||
SELECT t1.*, t2.* FROM t1 ANTI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x;
|
||||
|
||||
DROP TABLE t1;
|
||||
DROP TABLE t2;
|
Loading…
Reference in New Issue
Block a user