mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
Merge pull request #55051 from ClickHouse/vdimir/full_sorting_join_improvements
Suppport ASOF JOIN for full_sorting_merge algorithm
This commit is contained in:
commit
846922aeae
@ -297,7 +297,7 @@ Algorithm requires the special column in tables. This column:
|
||||
|
||||
- Must contain an ordered sequence.
|
||||
- Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
- Can’t be the only column in the `JOIN` clause.
|
||||
- For `hash` join algorithm it can’t be the only column in the `JOIN` clause.
|
||||
|
||||
Syntax `ASOF JOIN ... ON`:
|
||||
|
||||
@ -337,7 +337,8 @@ For example, consider the following tables:
|
||||
`ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` can’t be joined.
|
||||
|
||||
:::note
|
||||
`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
|
||||
`ASOF JOIN` is supported only by `hash` and `full_sorting_merge` join algorithms.
|
||||
It's **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine.
|
||||
:::
|
||||
|
||||
## PASTE JOIN Usage
|
||||
|
@ -6,12 +6,17 @@ namespace DB
|
||||
{
|
||||
|
||||
String getRandomASCIIString(size_t length)
|
||||
{
|
||||
return getRandomASCIIString(length, thread_local_rng);
|
||||
}
|
||||
|
||||
String getRandomASCIIString(size_t length, pcg64 & rng)
|
||||
{
|
||||
std::uniform_int_distribution<int> distribution('a', 'z');
|
||||
String res;
|
||||
res.resize(length);
|
||||
for (auto & c : res)
|
||||
c = distribution(thread_local_rng);
|
||||
c = distribution(rng);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -2,11 +2,14 @@
|
||||
|
||||
#include <Core/Types.h>
|
||||
|
||||
#include <pcg_random.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Slow random string. Useful for random names and things like this. Not for generating data.
|
||||
String getRandomASCIIString(size_t length);
|
||||
String getRandomASCIIString(size_t length, pcg64 & rng);
|
||||
|
||||
}
|
||||
|
@ -1726,7 +1726,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
const auto & join_clause = table_join.getOnlyClause();
|
||||
|
||||
auto join_kind = table_join.kind();
|
||||
bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind);
|
||||
auto join_strictness = table_join.strictness();
|
||||
|
||||
bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any)
|
||||
&& (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind));
|
||||
|
||||
auto has_non_const = [](const Block & block, const auto & keys)
|
||||
{
|
||||
@ -1745,7 +1748,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
bool has_non_const_keys = has_non_const(query_plan.getCurrentDataStream().header, join_clause.key_names_left)
|
||||
&& has_non_const(joined_plan->getCurrentDataStream().header, join_clause.key_names_right);
|
||||
|
||||
if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys)
|
||||
if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys)
|
||||
{
|
||||
auto * left_set = add_create_set(query_plan, join_clause.key_names_left, JoinTableSide::Left);
|
||||
auto * right_set = add_create_set(*joined_plan, join_clause.key_names_right, JoinTableSide::Right);
|
||||
|
@ -77,7 +77,6 @@ namespace ErrorCodes
|
||||
extern const int INVALID_JOIN_ON_EXPRESSION;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int SYNTAX_ERROR;
|
||||
extern const int ACCESS_DENIED;
|
||||
extern const int PARAMETER_OUT_OF_BOUND;
|
||||
extern const int TOO_MANY_COLUMNS;
|
||||
@ -1397,12 +1396,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
{
|
||||
if (!join_clause.hasASOF())
|
||||
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
|
||||
"JOIN {} no inequality in ASOF JOIN ON section.",
|
||||
join_node.formatASTForErrorMessage());
|
||||
|
||||
if (table_join_clause.key_names_left.size() <= 1)
|
||||
throw Exception(ErrorCodes::SYNTAX_ERROR,
|
||||
"JOIN {} ASOF join needs at least one equi-join column",
|
||||
"JOIN {} no inequality in ASOF JOIN ON section",
|
||||
join_node.formatASTForErrorMessage());
|
||||
}
|
||||
|
||||
@ -1524,7 +1518,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
{
|
||||
const auto & join_clause = table_join->getOnlyClause();
|
||||
|
||||
bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind);
|
||||
bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any)
|
||||
&& (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind));
|
||||
|
||||
|
||||
auto has_non_const = [](const Block & block, const auto & keys)
|
||||
{
|
||||
@ -1544,7 +1540,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left)
|
||||
&& has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right);
|
||||
|
||||
if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys)
|
||||
if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys)
|
||||
{
|
||||
auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left);
|
||||
auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right);
|
||||
|
@ -34,13 +34,20 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns)
|
||||
FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, JoinStrictness strictness)
|
||||
{
|
||||
SortDescription desc;
|
||||
desc.reserve(columns.size());
|
||||
for (const auto & name : columns)
|
||||
desc.emplace_back(name);
|
||||
return std::make_unique<FullMergeJoinCursor>(block, desc);
|
||||
return std::make_unique<FullMergeJoinCursor>(block, desc, strictness == JoinStrictness::Asof);
|
||||
}
|
||||
|
||||
bool ALWAYS_INLINE isNullAt(const IColumn & column, size_t row)
|
||||
{
|
||||
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(&column))
|
||||
return nullable_column->isNullAt(row);
|
||||
return false;
|
||||
}
|
||||
|
||||
template <bool has_left_nulls, bool has_right_nulls>
|
||||
@ -54,7 +61,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column,
|
||||
if (left_nullable && right_nullable)
|
||||
{
|
||||
int res = left_nullable->compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint);
|
||||
if (res)
|
||||
if (res != 0)
|
||||
return res;
|
||||
|
||||
/// NULL != NULL case
|
||||
@ -90,9 +97,10 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column,
|
||||
|
||||
int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos,
|
||||
const SortCursorImpl & rhs, size_t rpos,
|
||||
size_t key_length,
|
||||
int null_direction_hint)
|
||||
{
|
||||
for (size_t i = 0; i < lhs.sort_columns_size; ++i)
|
||||
for (size_t i = 0; i < key_length; ++i)
|
||||
{
|
||||
/// TODO(@vdimir): use nullableCompareAt only if there's nullable columns
|
||||
int cmp = nullableCompareAt<true, true>(*lhs.sort_columns[i], *rhs.sort_columns[i], lpos, rpos, null_direction_hint);
|
||||
@ -104,13 +112,18 @@ int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos,
|
||||
|
||||
int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImpl & rhs, int null_direction_hint)
|
||||
{
|
||||
return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), null_direction_hint);
|
||||
return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint);
|
||||
}
|
||||
|
||||
int compareAsofCursors(const FullMergeJoinCursor & lhs, const FullMergeJoinCursor & rhs, int null_direction_hint)
|
||||
{
|
||||
return nullableCompareAt<true, true>(*lhs.getAsofColumn(), *rhs.getAsofColumn(), lhs->getRow(), rhs->getRow(), null_direction_hint);
|
||||
}
|
||||
|
||||
bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint)
|
||||
{
|
||||
/// The last row of left cursor is less than the current row of the right cursor.
|
||||
int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), null_direction_hint);
|
||||
int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint);
|
||||
return cmp < 0;
|
||||
}
|
||||
|
||||
@ -222,25 +235,136 @@ Chunk getRowFromChunk(const Chunk & chunk, size_t pos)
|
||||
return result;
|
||||
}
|
||||
|
||||
void inline addRange(PaddedPODArray<UInt64> & left_map, size_t start, size_t end)
|
||||
void inline addRange(PaddedPODArray<UInt64> & values, UInt64 start, UInt64 end)
|
||||
{
|
||||
assert(end > start);
|
||||
for (size_t i = start; i < end; ++i)
|
||||
left_map.push_back(i);
|
||||
for (UInt64 i = start; i < end; ++i)
|
||||
values.push_back(i);
|
||||
}
|
||||
|
||||
void inline addMany(PaddedPODArray<UInt64> & left_or_right_map, size_t idx, size_t num)
|
||||
void inline addMany(PaddedPODArray<UInt64> & values, UInt64 value, size_t num)
|
||||
{
|
||||
for (size_t i = 0; i < num; ++i)
|
||||
left_or_right_map.push_back(idx);
|
||||
values.resize_fill(values.size() + num, value);
|
||||
}
|
||||
}
|
||||
|
||||
FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_)
|
||||
: sample_block(materializeBlock(sample_block_).cloneEmpty()), desc(description_)
|
||||
JoinKeyRow::JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos)
|
||||
{
|
||||
row.reserve(cursor->sort_columns.size());
|
||||
for (const auto & col : cursor->sort_columns)
|
||||
{
|
||||
auto new_col = col->cloneEmpty();
|
||||
new_col->insertFrom(*col, pos);
|
||||
row.push_back(std::move(new_col));
|
||||
}
|
||||
if (const IColumn * asof_column = cursor.getAsofColumn())
|
||||
{
|
||||
if (const auto * nullable_asof_column = checkAndGetColumn<ColumnNullable>(asof_column))
|
||||
{
|
||||
/// We save matched column, and since NULL do not match anything, we can't use it as a key
|
||||
chassert(!nullable_asof_column->isNullAt(pos));
|
||||
asof_column = nullable_asof_column->getNestedColumnPtr().get();
|
||||
}
|
||||
auto new_col = asof_column->cloneEmpty();
|
||||
new_col->insertFrom(*asof_column, pos);
|
||||
row.push_back(std::move(new_col));
|
||||
}
|
||||
}
|
||||
|
||||
void JoinKeyRow::reset()
|
||||
{
|
||||
row.clear();
|
||||
}
|
||||
|
||||
bool JoinKeyRow::equals(const FullMergeJoinCursor & cursor) const
|
||||
{
|
||||
if (row.empty())
|
||||
return false;
|
||||
|
||||
for (size_t i = 0; i < cursor->sort_columns_size; ++i)
|
||||
{
|
||||
// int cmp = this->row[i]->compareAt(0, cursor->getRow(), *(cursor->sort_columns[i]), cursor->desc[i].nulls_direction);
|
||||
int cmp = nullableCompareAt<true, true>(*this->row[i], *cursor->sort_columns[i], 0, cursor->getRow(), cursor->desc[i].nulls_direction);
|
||||
if (cmp != 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool JoinKeyRow::asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const
|
||||
{
|
||||
chassert(this->row.size() == cursor->sort_columns_size + 1);
|
||||
if (!equals(cursor))
|
||||
return false;
|
||||
|
||||
const auto & asof_row = row.back();
|
||||
if (isNullAt(*asof_row, 0) || isNullAt(*cursor.getAsofColumn(), cursor->getRow()))
|
||||
return false;
|
||||
|
||||
int cmp = 0;
|
||||
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(cursor.getAsofColumn()))
|
||||
cmp = nullable_column->getNestedColumn().compareAt(cursor->getRow(), 0, *asof_row, 1);
|
||||
else
|
||||
cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *asof_row, 1);
|
||||
|
||||
return (asof_inequality == ASOFJoinInequality::Less && cmp < 0)
|
||||
|| (asof_inequality == ASOFJoinInequality::LessOrEquals && cmp <= 0)
|
||||
|| (asof_inequality == ASOFJoinInequality::Greater && cmp > 0)
|
||||
|| (asof_inequality == ASOFJoinInequality::GreaterOrEquals && cmp >= 0);
|
||||
}
|
||||
|
||||
void AnyJoinState::set(size_t source_num, const FullMergeJoinCursor & cursor)
|
||||
{
|
||||
assert(cursor->rows);
|
||||
keys[source_num] = JoinKeyRow(cursor, cursor->rows - 1);
|
||||
}
|
||||
|
||||
void AnyJoinState::reset(size_t source_num)
|
||||
{
|
||||
keys[source_num].reset();
|
||||
value.clear();
|
||||
}
|
||||
|
||||
void AnyJoinState::setValue(Chunk value_)
|
||||
{
|
||||
value = std::move(value_);
|
||||
}
|
||||
|
||||
bool AnyJoinState::empty() const { return keys[0].row.empty() && keys[1].row.empty(); }
|
||||
|
||||
|
||||
void AsofJoinState::set(const FullMergeJoinCursor & rcursor, size_t rpos)
|
||||
{
|
||||
key = JoinKeyRow(rcursor, rpos);
|
||||
value = rcursor.getCurrent().clone();
|
||||
value_row = rpos;
|
||||
}
|
||||
|
||||
void AsofJoinState::reset()
|
||||
{
|
||||
key.reset();
|
||||
value.clear();
|
||||
}
|
||||
|
||||
FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof)
|
||||
: sample_block(materializeBlock(sample_block_).cloneEmpty())
|
||||
, desc(description_)
|
||||
{
|
||||
if (desc.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty sort description for FullMergeJoinCursor");
|
||||
|
||||
if (is_asof)
|
||||
{
|
||||
/// For ASOF join prefix of sort description is used for equality comparison
|
||||
/// and the last column is used for inequality comparison and is handled separately
|
||||
|
||||
auto asof_column_description = desc.back();
|
||||
desc.pop_back();
|
||||
|
||||
chassert(asof_column_description.direction == 1 && asof_column_description.nulls_direction == 1);
|
||||
asof_column_position = sample_block.getPositionByName(asof_column_description.column_name);
|
||||
}
|
||||
}
|
||||
|
||||
const Chunk & FullMergeJoinCursor::getCurrent() const
|
||||
{
|
||||
@ -278,48 +402,103 @@ bool FullMergeJoinCursor::fullyCompleted() const
|
||||
return !cursor.isValid() && recieved_all_blocks;
|
||||
}
|
||||
|
||||
String FullMergeJoinCursor::dump() const
|
||||
{
|
||||
Strings row_dump;
|
||||
if (cursor.isValid())
|
||||
{
|
||||
Field val;
|
||||
for (size_t i = 0; i < cursor.sort_columns_size; ++i)
|
||||
{
|
||||
cursor.sort_columns[i]->get(cursor.getRow(), val);
|
||||
row_dump.push_back(val.dump());
|
||||
}
|
||||
|
||||
if (const auto * asof_column = getAsofColumn())
|
||||
{
|
||||
asof_column->get(cursor.getRow(), val);
|
||||
row_dump.push_back(val.dump());
|
||||
}
|
||||
}
|
||||
|
||||
return fmt::format("<{}/{}{}>[{}]",
|
||||
cursor.getRow(), cursor.rows,
|
||||
recieved_all_blocks ? "(finished)" : "",
|
||||
fmt::join(row_dump, ", "));
|
||||
}
|
||||
|
||||
MergeJoinAlgorithm::MergeJoinAlgorithm(
|
||||
JoinPtr table_join_,
|
||||
JoinKind kind_,
|
||||
JoinStrictness strictness_,
|
||||
const TableJoin::JoinOnClause & on_clause_,
|
||||
const Blocks & input_headers,
|
||||
size_t max_block_size_)
|
||||
: table_join(table_join_)
|
||||
: kind(kind_)
|
||||
, strictness(strictness_)
|
||||
, max_block_size(max_block_size_)
|
||||
, log(getLogger("MergeJoinAlgorithm"))
|
||||
{
|
||||
if (input_headers.size() != 2)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs");
|
||||
|
||||
auto strictness = table_join->getTableJoin().strictness();
|
||||
if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All)
|
||||
if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All && strictness != JoinStrictness::Asof)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for strictness {}", strictness);
|
||||
|
||||
auto kind = table_join->getTableJoin().kind();
|
||||
if (strictness == JoinStrictness::Asof)
|
||||
{
|
||||
if (kind != JoinKind::Left && kind != JoinKind::Inner)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not implement ASOF {} join", kind);
|
||||
}
|
||||
|
||||
if (!isInner(kind) && !isLeft(kind) && !isRight(kind) && !isFull(kind))
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for kind {}", kind);
|
||||
|
||||
const auto & join_on = table_join->getTableJoin().getOnlyClause();
|
||||
|
||||
if (join_on.on_filter_condition_left || join_on.on_filter_condition_right)
|
||||
if (on_clause_.on_filter_condition_left || on_clause_.on_filter_condition_right)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions");
|
||||
|
||||
cursors = {
|
||||
createCursor(input_headers[0], join_on.key_names_left),
|
||||
createCursor(input_headers[1], join_on.key_names_right)
|
||||
createCursor(input_headers[0], on_clause_.key_names_left, strictness),
|
||||
createCursor(input_headers[1], on_clause_.key_names_right, strictness),
|
||||
};
|
||||
}
|
||||
|
||||
for (const auto & [left_key, right_key] : table_join->getTableJoin().leftToRightKeyRemap())
|
||||
MergeJoinAlgorithm::MergeJoinAlgorithm(
|
||||
JoinPtr join_ptr,
|
||||
const Blocks & input_headers,
|
||||
size_t max_block_size_)
|
||||
: MergeJoinAlgorithm(
|
||||
join_ptr->getTableJoin().kind(),
|
||||
join_ptr->getTableJoin().strictness(),
|
||||
join_ptr->getTableJoin().getOnlyClause(),
|
||||
input_headers,
|
||||
max_block_size_)
|
||||
{
|
||||
for (const auto & [left_key, right_key] : join_ptr->getTableJoin().leftToRightKeyRemap())
|
||||
{
|
||||
size_t left_idx = input_headers[0].getPositionByName(left_key);
|
||||
size_t right_idx = input_headers[1].getPositionByName(right_key);
|
||||
left_to_right_key_remap[left_idx] = right_idx;
|
||||
}
|
||||
|
||||
const auto *smjPtr = typeid_cast<const FullSortingMergeJoin *>(table_join.get());
|
||||
const auto *smjPtr = typeid_cast<const FullSortingMergeJoin *>(join_ptr.get());
|
||||
if (smjPtr)
|
||||
{
|
||||
null_direction_hint = smjPtr->getNullDirection();
|
||||
}
|
||||
|
||||
if (strictness == JoinStrictness::Asof)
|
||||
setAsofInequality(join_ptr->getTableJoin().getAsofInequality());
|
||||
}
|
||||
|
||||
void MergeJoinAlgorithm::setAsofInequality(ASOFJoinInequality asof_inequality_)
|
||||
{
|
||||
if (strictness != JoinStrictness::Asof)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "setAsofInequality is only supported for ASOF joins");
|
||||
|
||||
if (asof_inequality_ == ASOFJoinInequality::None)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "ASOF inequality cannot be None");
|
||||
|
||||
asof_inequality = asof_inequality_;
|
||||
}
|
||||
|
||||
void MergeJoinAlgorithm::logElapsed(double seconds)
|
||||
@ -407,7 +586,7 @@ struct AllJoinImpl
|
||||
size_t lnum = nextDistinct(left_cursor.cursor);
|
||||
size_t rnum = nextDistinct(right_cursor.cursor);
|
||||
|
||||
bool all_fit_in_block = std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size;
|
||||
bool all_fit_in_block = !max_block_size || std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size;
|
||||
bool have_all_ranges = left_cursor.cursor.isValid() && right_cursor.cursor.isValid();
|
||||
if (all_fit_in_block && have_all_ranges)
|
||||
{
|
||||
@ -421,7 +600,7 @@ struct AllJoinImpl
|
||||
else
|
||||
{
|
||||
assert(state == nullptr);
|
||||
state = std::make_unique<AllJoinState>(left_cursor.cursor, lpos, right_cursor.cursor, rpos);
|
||||
state = std::make_unique<AllJoinState>(left_cursor, lpos, right_cursor, rpos);
|
||||
state->addRange(0, left_cursor.getCurrent().clone(), lpos, lnum);
|
||||
state->addRange(1, right_cursor.getCurrent().clone(), rpos, rnum);
|
||||
return;
|
||||
@ -466,6 +645,17 @@ void dispatchKind(JoinKind kind, Args && ... args)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported join kind: \"{}\"", kind);
|
||||
}
|
||||
|
||||
MutableColumns MergeJoinAlgorithm::getEmptyResultColumns() const
|
||||
{
|
||||
MutableColumns result_cols;
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
{
|
||||
for (const auto & col : cursors[i]->sampleColumns())
|
||||
result_cols.push_back(col->cloneEmpty());
|
||||
}
|
||||
return result_cols;
|
||||
}
|
||||
|
||||
std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState()
|
||||
{
|
||||
if (all_join_state && all_join_state->finished())
|
||||
@ -479,7 +669,7 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
|
||||
/// Accumulate blocks with same key in all_join_state
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
{
|
||||
if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(cursors[i]->cursor))
|
||||
if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(*cursors[i]))
|
||||
{
|
||||
size_t pos = cursors[i]->cursor.getRow();
|
||||
size_t num = nextDistinct(cursors[i]->cursor);
|
||||
@ -499,15 +689,10 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
|
||||
stat.max_blocks_loaded = std::max(stat.max_blocks_loaded, all_join_state->blocksStored());
|
||||
|
||||
/// join all rows with current key
|
||||
MutableColumns result_cols;
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
{
|
||||
for (const auto & col : cursors[i]->sampleColumns())
|
||||
result_cols.push_back(col->cloneEmpty());
|
||||
}
|
||||
MutableColumns result_cols = getEmptyResultColumns();
|
||||
|
||||
size_t total_rows = 0;
|
||||
while (total_rows < max_block_size)
|
||||
while (!max_block_size || total_rows < max_block_size)
|
||||
{
|
||||
const auto & left_range = all_join_state->getLeft();
|
||||
const auto & right_range = all_join_state->getRight();
|
||||
@ -532,7 +717,52 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAllJoinState
|
||||
return {};
|
||||
}
|
||||
|
||||
MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin(JoinKind kind)
|
||||
std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAsofJoinState()
|
||||
{
|
||||
if (strictness != JoinStrictness::Asof)
|
||||
return {};
|
||||
|
||||
if (!cursors[1]->fullyCompleted())
|
||||
return {};
|
||||
|
||||
auto & left_cursor = *cursors[0];
|
||||
const auto & left_columns = left_cursor.getCurrent().getColumns();
|
||||
|
||||
MutableColumns result_cols = getEmptyResultColumns();
|
||||
|
||||
while (left_cursor->isValid() && asof_join_state.hasMatch(left_cursor, asof_inequality))
|
||||
{
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertFrom(*col, left_cursor->getRow());
|
||||
for (const auto & col : asof_join_state.value.getColumns())
|
||||
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
|
||||
chassert(i == result_cols.size());
|
||||
left_cursor->next();
|
||||
}
|
||||
|
||||
while (isLeft(kind) && left_cursor->isValid())
|
||||
{
|
||||
/// return row with default values at right side
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertFrom(*col, left_cursor->getRow());
|
||||
for (; i < result_cols.size(); ++i)
|
||||
result_cols[i]->insertDefault();
|
||||
chassert(i == result_cols.size());
|
||||
|
||||
left_cursor->next();
|
||||
}
|
||||
|
||||
size_t result_rows = result_cols.empty() ? 0 : result_cols.front()->size();
|
||||
if (result_rows)
|
||||
return Status(Chunk(std::move(result_cols), result_rows));
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin()
|
||||
{
|
||||
PaddedPODArray<UInt64> idx_map[2];
|
||||
|
||||
@ -595,7 +825,7 @@ struct AnyJoinImpl
|
||||
FullMergeJoinCursor & right_cursor,
|
||||
PaddedPODArray<UInt64> & left_map,
|
||||
PaddedPODArray<UInt64> & right_map,
|
||||
AnyJoinState & state,
|
||||
AnyJoinState & any_join_state,
|
||||
int null_direction_hint)
|
||||
{
|
||||
assert(enabled);
|
||||
@ -656,21 +886,21 @@ struct AnyJoinImpl
|
||||
}
|
||||
}
|
||||
|
||||
/// Remember index of last joined row to propagate it to next block
|
||||
/// Remember last joined row to propagate it to next block
|
||||
|
||||
state.setValue({});
|
||||
any_join_state.setValue({});
|
||||
if (!left_cursor->isValid())
|
||||
{
|
||||
state.set(0, left_cursor.cursor);
|
||||
any_join_state.set(0, left_cursor);
|
||||
if (cmp == 0 && isLeft(kind))
|
||||
state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos));
|
||||
any_join_state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos));
|
||||
}
|
||||
|
||||
if (!right_cursor->isValid())
|
||||
{
|
||||
state.set(1, right_cursor.cursor);
|
||||
any_join_state.set(1, right_cursor);
|
||||
if (cmp == 0 && isRight(kind))
|
||||
state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos));
|
||||
any_join_state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos));
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -680,40 +910,34 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAnyJoinState
|
||||
if (any_join_state.empty())
|
||||
return {};
|
||||
|
||||
auto kind = table_join->getTableJoin().kind();
|
||||
|
||||
Chunk result;
|
||||
|
||||
for (size_t source_num = 0; source_num < 2; ++source_num)
|
||||
{
|
||||
auto & current = *cursors[source_num];
|
||||
auto & state = any_join_state;
|
||||
if (any_join_state.keys[source_num].equals(current.cursor))
|
||||
if (any_join_state.keys[source_num].equals(current))
|
||||
{
|
||||
size_t start_pos = current->getRow();
|
||||
size_t length = nextDistinct(current.cursor);
|
||||
|
||||
if (length && isLeft(kind) && source_num == 0)
|
||||
{
|
||||
if (state.value)
|
||||
result = copyChunkResized(current.getCurrent(), state.value, start_pos, length);
|
||||
if (any_join_state.value)
|
||||
result = copyChunkResized(current.getCurrent(), any_join_state.value, start_pos, length);
|
||||
else
|
||||
result = createBlockWithDefaults(source_num, start_pos, length);
|
||||
}
|
||||
|
||||
if (length && isRight(kind) && source_num == 1)
|
||||
{
|
||||
if (state.value)
|
||||
result = copyChunkResized(state.value, current.getCurrent(), start_pos, length);
|
||||
if (any_join_state.value)
|
||||
result = copyChunkResized(any_join_state.value, current.getCurrent(), start_pos, length);
|
||||
else
|
||||
result = createBlockWithDefaults(source_num, start_pos, length);
|
||||
}
|
||||
|
||||
/// We've found row with other key, no need to skip more rows with current key
|
||||
if (current->isValid())
|
||||
{
|
||||
state.keys[source_num].reset();
|
||||
}
|
||||
any_join_state.keys[source_num].reset();
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -726,7 +950,7 @@ std::optional<MergeJoinAlgorithm::Status> MergeJoinAlgorithm::handleAnyJoinState
|
||||
return {};
|
||||
}
|
||||
|
||||
MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind)
|
||||
MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin()
|
||||
{
|
||||
if (auto result = handleAnyJoinState())
|
||||
return std::move(*result);
|
||||
@ -771,10 +995,151 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind)
|
||||
return Status(std::move(result));
|
||||
}
|
||||
|
||||
|
||||
MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin()
|
||||
{
|
||||
auto & left_cursor = *cursors[0];
|
||||
if (!left_cursor->isValid())
|
||||
return Status(0);
|
||||
|
||||
auto & right_cursor = *cursors[1];
|
||||
if (!right_cursor->isValid())
|
||||
return Status(1);
|
||||
|
||||
const auto & left_columns = left_cursor.getCurrent().getColumns();
|
||||
const auto & right_columns = right_cursor.getCurrent().getColumns();
|
||||
|
||||
MutableColumns result_cols = getEmptyResultColumns();
|
||||
|
||||
while (left_cursor->isValid() && right_cursor->isValid())
|
||||
{
|
||||
auto lpos = left_cursor->getRow();
|
||||
auto rpos = right_cursor->getRow();
|
||||
auto cmp = compareCursors(*left_cursor, *right_cursor, null_direction_hint);
|
||||
if (cmp == 0)
|
||||
{
|
||||
if (isNullAt(*left_cursor.getAsofColumn(), lpos))
|
||||
cmp = -1;
|
||||
if (isNullAt(*right_cursor.getAsofColumn(), rpos))
|
||||
cmp = 1;
|
||||
}
|
||||
|
||||
if (cmp == 0)
|
||||
{
|
||||
auto asof_cmp = compareAsofCursors(left_cursor, right_cursor, null_direction_hint);
|
||||
|
||||
if ((asof_inequality == ASOFJoinInequality::Less && asof_cmp <= -1)
|
||||
|| (asof_inequality == ASOFJoinInequality::LessOrEquals && asof_cmp <= 0))
|
||||
{
|
||||
/// First row in right table that is greater (or equal) than current row in left table
|
||||
/// matches asof join condition the best
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertFrom(*col, lpos);
|
||||
for (const auto & col : right_columns)
|
||||
result_cols[i++]->insertFrom(*col, rpos);
|
||||
chassert(i == result_cols.size());
|
||||
|
||||
left_cursor->next();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (asof_inequality == ASOFJoinInequality::Less || asof_inequality == ASOFJoinInequality::LessOrEquals)
|
||||
{
|
||||
/// Asof condition is not (yet) satisfied, skip row in right table
|
||||
right_cursor->next();
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((asof_inequality == ASOFJoinInequality::Greater && asof_cmp >= 1)
|
||||
|| (asof_inequality == ASOFJoinInequality::GreaterOrEquals && asof_cmp >= 0))
|
||||
{
|
||||
/// condition is satisfied, remember this row and move next to try to find better match
|
||||
asof_join_state.set(right_cursor, rpos);
|
||||
right_cursor->next();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (asof_inequality == ASOFJoinInequality::Greater || asof_inequality == ASOFJoinInequality::GreaterOrEquals)
|
||||
{
|
||||
/// Asof condition is not satisfied anymore, use last matched row from right table
|
||||
if (asof_join_state.hasMatch(left_cursor, asof_inequality))
|
||||
{
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertFrom(*col, lpos);
|
||||
for (const auto & col : asof_join_state.value.getColumns())
|
||||
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
|
||||
chassert(i == result_cols.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
asof_join_state.reset();
|
||||
if (isLeft(kind))
|
||||
{
|
||||
/// return row with default values at right side
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertFrom(*col, lpos);
|
||||
for (; i < result_cols.size(); ++i)
|
||||
result_cols[i]->insertDefault();
|
||||
chassert(i == result_cols.size());
|
||||
}
|
||||
}
|
||||
left_cursor->next();
|
||||
continue;
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO: implement ASOF equality join");
|
||||
}
|
||||
else if (cmp < 0)
|
||||
{
|
||||
if (asof_join_state.hasMatch(left_cursor, asof_inequality))
|
||||
{
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertFrom(*col, lpos);
|
||||
for (const auto & col : asof_join_state.value.getColumns())
|
||||
result_cols[i++]->insertFrom(*col, asof_join_state.value_row);
|
||||
chassert(i == result_cols.size());
|
||||
left_cursor->next();
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
asof_join_state.reset();
|
||||
}
|
||||
|
||||
/// no matches for rows in left table, just pass them through
|
||||
size_t num = nextDistinct(*left_cursor);
|
||||
|
||||
if (isLeft(kind) && num)
|
||||
{
|
||||
/// return them with default values at right side
|
||||
size_t i = 0;
|
||||
for (const auto & col : left_columns)
|
||||
result_cols[i++]->insertRangeFrom(*col, lpos, num);
|
||||
for (; i < result_cols.size(); ++i)
|
||||
result_cols[i]->insertManyDefaults(num);
|
||||
chassert(i == result_cols.size());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/// skip rows in right table until we find match for current row in left table
|
||||
nextDistinct(*right_cursor);
|
||||
}
|
||||
}
|
||||
size_t num_rows = result_cols.empty() ? 0 : result_cols.front()->size();
|
||||
return Status(Chunk(std::move(result_cols), num_rows));
|
||||
}
|
||||
|
||||
|
||||
/// if `source_num == 0` get data from left cursor and fill defaults at right
|
||||
/// otherwise - vice versa
|
||||
Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const
|
||||
{
|
||||
|
||||
ColumnRawPtrs cols;
|
||||
{
|
||||
const auto & columns_left = source_num == 0 ? cursors[0]->getCurrent().getColumns() : cursors[0]->sampleColumns();
|
||||
@ -797,7 +1162,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t star
|
||||
cols.push_back(col.get());
|
||||
}
|
||||
}
|
||||
|
||||
Chunk result_chunk;
|
||||
copyColumnsResized(cols, start, num_rows, result_chunk);
|
||||
return result_chunk;
|
||||
@ -813,7 +1177,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num)
|
||||
|
||||
IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
|
||||
{
|
||||
auto kind = table_join->getTableJoin().kind();
|
||||
|
||||
if (!cursors[0]->cursor.isValid() && !cursors[0]->fullyCompleted())
|
||||
return Status(0);
|
||||
@ -821,11 +1184,11 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
|
||||
if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted())
|
||||
return Status(1);
|
||||
|
||||
|
||||
if (auto result = handleAllJoinState())
|
||||
{
|
||||
return std::move(*result);
|
||||
}
|
||||
|
||||
if (auto result = handleAsofJoinState())
|
||||
return std::move(*result);
|
||||
|
||||
if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted())
|
||||
{
|
||||
@ -839,7 +1202,7 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
|
||||
}
|
||||
|
||||
/// check if blocks are not intersecting at all
|
||||
if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0)
|
||||
if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0 && strictness != JoinStrictness::Asof)
|
||||
{
|
||||
if (cmp < 0)
|
||||
{
|
||||
@ -858,13 +1221,14 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge()
|
||||
}
|
||||
}
|
||||
|
||||
auto strictness = table_join->getTableJoin().strictness();
|
||||
|
||||
if (strictness == JoinStrictness::Any)
|
||||
return anyJoin(kind);
|
||||
return anyJoin();
|
||||
|
||||
if (strictness == JoinStrictness::All)
|
||||
return allJoin(kind);
|
||||
return allJoin();
|
||||
|
||||
if (strictness == JoinStrictness::Asof)
|
||||
return asofJoin();
|
||||
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported strictness '{}'", strictness);
|
||||
}
|
||||
@ -883,9 +1247,26 @@ MergeJoinTransform::MergeJoinTransform(
|
||||
/* always_read_till_end_= */ false,
|
||||
/* empty_chunk_on_finish_= */ true,
|
||||
table_join, input_headers, max_block_size)
|
||||
, log(getLogger("MergeJoinTransform"))
|
||||
{
|
||||
LOG_TRACE(log, "Use MergeJoinTransform");
|
||||
}
|
||||
|
||||
MergeJoinTransform::MergeJoinTransform(
|
||||
JoinKind kind_,
|
||||
JoinStrictness strictness_,
|
||||
const TableJoin::JoinOnClause & on_clause_,
|
||||
const Blocks & input_headers,
|
||||
const Block & output_header,
|
||||
size_t max_block_size,
|
||||
UInt64 limit_hint_)
|
||||
: IMergingTransform<MergeJoinAlgorithm>(
|
||||
input_headers,
|
||||
output_header,
|
||||
/* have_all_inputs_= */ true,
|
||||
limit_hint_,
|
||||
/* always_read_till_end_= */ false,
|
||||
/* empty_chunk_on_finish_= */ true,
|
||||
kind_, strictness_, on_clause_, input_headers, max_block_size)
|
||||
{
|
||||
}
|
||||
|
||||
void MergeJoinTransform::onFinish()
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
@ -19,6 +20,7 @@
|
||||
#include <Processors/Chunk.h>
|
||||
#include <Processors/Merges/Algorithms/IMergingAlgorithm.h>
|
||||
#include <Processors/Merges/IMergingTransform.h>
|
||||
#include <Interpreters/TableJoin.h>
|
||||
|
||||
namespace Poco { class Logger; }
|
||||
|
||||
@ -35,57 +37,28 @@ using FullMergeJoinCursorPtr = std::unique_ptr<FullMergeJoinCursor>;
|
||||
/// Used instead of storing previous block
|
||||
struct JoinKeyRow
|
||||
{
|
||||
std::vector<ColumnPtr> row;
|
||||
|
||||
JoinKeyRow() = default;
|
||||
|
||||
explicit JoinKeyRow(const SortCursorImpl & impl_, size_t pos)
|
||||
{
|
||||
row.reserve(impl_.sort_columns.size());
|
||||
for (const auto & col : impl_.sort_columns)
|
||||
{
|
||||
auto new_col = col->cloneEmpty();
|
||||
new_col->insertFrom(*col, pos);
|
||||
row.push_back(std::move(new_col));
|
||||
}
|
||||
}
|
||||
JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos);
|
||||
|
||||
void reset()
|
||||
{
|
||||
row.clear();
|
||||
}
|
||||
bool equals(const FullMergeJoinCursor & cursor) const;
|
||||
bool asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const;
|
||||
|
||||
bool equals(const SortCursorImpl & impl) const
|
||||
{
|
||||
if (row.empty())
|
||||
return false;
|
||||
void reset();
|
||||
|
||||
assert(this->row.size() == impl.sort_columns_size);
|
||||
for (size_t i = 0; i < impl.sort_columns_size; ++i)
|
||||
{
|
||||
int cmp = this->row[i]->compareAt(0, impl.getRow(), *impl.sort_columns[i], impl.desc[i].nulls_direction);
|
||||
if (cmp != 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
std::vector<ColumnPtr> row;
|
||||
};
|
||||
|
||||
/// Remembers previous key if it was joined in previous block
|
||||
class AnyJoinState : boost::noncopyable
|
||||
{
|
||||
public:
|
||||
AnyJoinState() = default;
|
||||
void set(size_t source_num, const FullMergeJoinCursor & cursor);
|
||||
void setValue(Chunk value_);
|
||||
|
||||
void set(size_t source_num, const SortCursorImpl & cursor)
|
||||
{
|
||||
assert(cursor.rows);
|
||||
keys[source_num] = JoinKeyRow(cursor, cursor.rows - 1);
|
||||
}
|
||||
void reset(size_t source_num);
|
||||
|
||||
void setValue(Chunk value_) { value = std::move(value_); }
|
||||
|
||||
bool empty() const { return keys[0].row.empty() && keys[1].row.empty(); }
|
||||
bool empty() const;
|
||||
|
||||
/// current keys
|
||||
JoinKeyRow keys[2];
|
||||
@ -118,8 +91,8 @@ public:
|
||||
Chunk chunk;
|
||||
};
|
||||
|
||||
AllJoinState(const SortCursorImpl & lcursor, size_t lpos,
|
||||
const SortCursorImpl & rcursor, size_t rpos)
|
||||
AllJoinState(const FullMergeJoinCursor & lcursor, size_t lpos,
|
||||
const FullMergeJoinCursor & rcursor, size_t rpos)
|
||||
: keys{JoinKeyRow(lcursor, lpos), JoinKeyRow(rcursor, rpos)}
|
||||
{
|
||||
}
|
||||
@ -187,13 +160,32 @@ private:
|
||||
size_t ridx = 0;
|
||||
};
|
||||
|
||||
|
||||
class AsofJoinState : boost::noncopyable
|
||||
{
|
||||
public:
|
||||
void set(const FullMergeJoinCursor & rcursor, size_t rpos);
|
||||
void reset();
|
||||
|
||||
bool hasMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const
|
||||
{
|
||||
if (value.empty())
|
||||
return false;
|
||||
return key.asofMatch(cursor, asof_inequality);
|
||||
}
|
||||
|
||||
JoinKeyRow key;
|
||||
Chunk value;
|
||||
size_t value_row = 0;
|
||||
};
|
||||
|
||||
/*
|
||||
* Wrapper for SortCursorImpl
|
||||
*/
|
||||
class FullMergeJoinCursor : boost::noncopyable
|
||||
{
|
||||
public:
|
||||
explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_);
|
||||
explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof = false);
|
||||
|
||||
bool fullyCompleted() const;
|
||||
void setChunk(Chunk && chunk);
|
||||
@ -203,17 +195,31 @@ public:
|
||||
SortCursorImpl * operator-> () { return &cursor; }
|
||||
const SortCursorImpl * operator-> () const { return &cursor; }
|
||||
|
||||
SortCursorImpl & operator* () { return cursor; }
|
||||
const SortCursorImpl & operator* () const { return cursor; }
|
||||
|
||||
SortCursorImpl cursor;
|
||||
|
||||
const Block & sampleBlock() const { return sample_block; }
|
||||
Columns sampleColumns() const { return sample_block.getColumns(); }
|
||||
|
||||
const IColumn * getAsofColumn() const
|
||||
{
|
||||
if (!asof_column_position)
|
||||
return nullptr;
|
||||
return cursor.all_columns[*asof_column_position];
|
||||
}
|
||||
|
||||
String dump() const;
|
||||
|
||||
private:
|
||||
Block sample_block;
|
||||
SortDescription desc;
|
||||
|
||||
Chunk current_chunk;
|
||||
bool recieved_all_blocks = false;
|
||||
|
||||
std::optional<size_t> asof_column_position;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -223,22 +229,33 @@ private:
|
||||
class MergeJoinAlgorithm final : public IMergingAlgorithm
|
||||
{
|
||||
public:
|
||||
explicit MergeJoinAlgorithm(JoinPtr table_join, const Blocks & input_headers, size_t max_block_size_);
|
||||
MergeJoinAlgorithm(JoinKind kind_,
|
||||
JoinStrictness strictness_,
|
||||
const TableJoin::JoinOnClause & on_clause_,
|
||||
const Blocks & input_headers,
|
||||
size_t max_block_size_);
|
||||
|
||||
MergeJoinAlgorithm(JoinPtr join_ptr, const Blocks & input_headers, size_t max_block_size_);
|
||||
|
||||
const char * getName() const override { return "MergeJoinAlgorithm"; }
|
||||
void initialize(Inputs inputs) override;
|
||||
void consume(Input & input, size_t source_num) override;
|
||||
Status merge() override;
|
||||
|
||||
void logElapsed(double seconds);
|
||||
void setAsofInequality(ASOFJoinInequality asof_inequality_);
|
||||
|
||||
void logElapsed(double seconds);
|
||||
private:
|
||||
std::optional<Status> handleAnyJoinState();
|
||||
Status anyJoin(JoinKind kind);
|
||||
Status anyJoin();
|
||||
|
||||
std::optional<Status> handleAllJoinState();
|
||||
Status allJoin(JoinKind kind);
|
||||
Status allJoin();
|
||||
|
||||
std::optional<Status> handleAsofJoinState();
|
||||
Status asofJoin();
|
||||
|
||||
MutableColumns getEmptyResultColumns() const;
|
||||
Chunk createBlockWithDefaults(size_t source_num);
|
||||
Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const;
|
||||
|
||||
@ -246,12 +263,15 @@ private:
|
||||
std::unordered_map<size_t, size_t> left_to_right_key_remap;
|
||||
|
||||
std::array<FullMergeJoinCursorPtr, 2> cursors;
|
||||
ASOFJoinInequality asof_inequality = ASOFJoinInequality::None;
|
||||
|
||||
/// Keep some state to make connection between data in different blocks
|
||||
/// Keep some state to make handle data from different blocks
|
||||
AnyJoinState any_join_state;
|
||||
std::unique_ptr<AllJoinState> all_join_state;
|
||||
AsofJoinState asof_join_state;
|
||||
|
||||
JoinPtr table_join;
|
||||
JoinKind kind;
|
||||
JoinStrictness strictness;
|
||||
|
||||
size_t max_block_size;
|
||||
int null_direction_hint = 1;
|
||||
@ -281,12 +301,21 @@ public:
|
||||
size_t max_block_size,
|
||||
UInt64 limit_hint = 0);
|
||||
|
||||
MergeJoinTransform(
|
||||
JoinKind kind_,
|
||||
JoinStrictness strictness_,
|
||||
const TableJoin::JoinOnClause & on_clause_,
|
||||
const Blocks & input_headers,
|
||||
const Block & output_header,
|
||||
size_t max_block_size,
|
||||
UInt64 limit_hint_ = 0);
|
||||
|
||||
String getName() const override { return "MergeJoinTransform"; }
|
||||
|
||||
void setAsofInequality(ASOFJoinInequality asof_inequality_) { algorithm.setAsofInequality(asof_inequality_); }
|
||||
|
||||
protected:
|
||||
void onFinish() override;
|
||||
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
}
|
||||
|
768
src/Processors/tests/gtest_full_sorting_join.cpp
Normal file
768
src/Processors/tests/gtest_full_sorting_join.cpp
Normal file
@ -0,0 +1,768 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <pcg_random.hpp>
|
||||
#include <random>
|
||||
#include <Poco/ConsoleChannel.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/AutoPtr.h>
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/getRandomASCIIString.h>
|
||||
#include <Common/randomSeed.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <Interpreters/TableJoin.h>
|
||||
|
||||
#include <Processors/Executors/PipelineExecutor.h>
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
#include <Processors/Sinks/NullSink.h>
|
||||
#include <Processors/Sources/SourceFromChunks.h>
|
||||
#include <Processors/Sources/SourceFromSingleChunk.h>
|
||||
#include <Processors/Transforms/MergeJoinTransform.h>
|
||||
|
||||
#include <Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h>
|
||||
#include <Processors/Executors/CompletedPipelineExecutor.h>
|
||||
|
||||
|
||||
#include <QueryPipeline/QueryPipeline.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
QueryPipeline buildJoinPipeline(
|
||||
std::shared_ptr<ISource> left_source,
|
||||
std::shared_ptr<ISource> right_source,
|
||||
size_t key_length = 1,
|
||||
JoinKind kind = JoinKind::Inner,
|
||||
JoinStrictness strictness = JoinStrictness::All,
|
||||
ASOFJoinInequality asof_inequality = ASOFJoinInequality::None)
|
||||
{
|
||||
Blocks inputs;
|
||||
inputs.emplace_back(left_source->getPort().getHeader());
|
||||
inputs.emplace_back(right_source->getPort().getHeader());
|
||||
|
||||
Block out_header;
|
||||
for (const auto & input : inputs)
|
||||
{
|
||||
for (ColumnWithTypeAndName column : input)
|
||||
{
|
||||
if (&input == &inputs.front())
|
||||
column.name = "t1." + column.name;
|
||||
else
|
||||
column.name = "t2." + column.name;
|
||||
out_header.insert(column);
|
||||
}
|
||||
}
|
||||
|
||||
TableJoin::JoinOnClause on_clause;
|
||||
for (size_t i = 0; i < key_length; ++i)
|
||||
{
|
||||
on_clause.key_names_left.emplace_back(inputs[0].getByPosition(i).name);
|
||||
on_clause.key_names_right.emplace_back(inputs[1].getByPosition(i).name);
|
||||
}
|
||||
|
||||
auto joining = std::make_shared<MergeJoinTransform>(
|
||||
kind,
|
||||
strictness,
|
||||
on_clause,
|
||||
inputs, out_header, /* max_block_size = */ 0);
|
||||
|
||||
if (asof_inequality != ASOFJoinInequality::None)
|
||||
joining->setAsofInequality(asof_inequality);
|
||||
|
||||
chassert(joining->getInputs().size() == 2);
|
||||
|
||||
connect(left_source->getPort(), joining->getInputs().front());
|
||||
connect(right_source->getPort(), joining->getInputs().back());
|
||||
|
||||
auto * output_port = &joining->getOutputPort();
|
||||
|
||||
auto processors = std::make_shared<Processors>();
|
||||
processors->emplace_back(std::move(left_source));
|
||||
processors->emplace_back(std::move(right_source));
|
||||
processors->emplace_back(std::move(joining));
|
||||
|
||||
QueryPipeline pipeline(QueryPlanResourceHolder{}, processors, output_port);
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<ISource> oneColumnSource(const std::vector<std::vector<UInt64>> & values)
|
||||
{
|
||||
Block header = {
|
||||
ColumnWithTypeAndName(std::make_shared<DataTypeUInt64>(), "key"),
|
||||
ColumnWithTypeAndName(std::make_shared<DataTypeUInt64>(), "idx"),
|
||||
};
|
||||
|
||||
UInt64 idx = 0;
|
||||
Chunks chunks;
|
||||
for (const auto & chunk_values : values)
|
||||
{
|
||||
auto key_column = ColumnUInt64::create();
|
||||
auto idx_column = ColumnUInt64::create();
|
||||
|
||||
for (auto n : chunk_values)
|
||||
{
|
||||
key_column->insertValue(n);
|
||||
idx_column->insertValue(idx);
|
||||
++idx;
|
||||
}
|
||||
chunks.emplace_back(Chunk(Columns{std::move(key_column), std::move(idx_column)}, chunk_values.size()));
|
||||
}
|
||||
return std::make_shared<SourceFromChunks>(header, std::move(chunks));
|
||||
}
|
||||
|
||||
class SourceChunksBuilder
|
||||
{
|
||||
public:
|
||||
|
||||
explicit SourceChunksBuilder(const Block & header_)
|
||||
: header(header_)
|
||||
{
|
||||
current_chunk = header.cloneEmptyColumns();
|
||||
chassert(!current_chunk.empty());
|
||||
}
|
||||
|
||||
void setBreakProbability(pcg64 & rng_)
|
||||
{
|
||||
/// random probability with possibility to have exact 0.0 and 1.0 values
|
||||
break_prob = std::uniform_int_distribution<size_t>(0, 5)(rng_) / static_cast<double>(5);
|
||||
rng = &rng_;
|
||||
}
|
||||
|
||||
void addRow(const std::vector<Field> & row)
|
||||
{
|
||||
chassert(row.size() == current_chunk.size());
|
||||
for (size_t i = 0; i < current_chunk.size(); ++i)
|
||||
current_chunk[i]->insert(row[i]);
|
||||
|
||||
if (rng && std::uniform_real_distribution<>(0.0, 1.0)(*rng) < break_prob)
|
||||
addChunk();
|
||||
}
|
||||
|
||||
void addChunk()
|
||||
{
|
||||
if (current_chunk.front()->empty())
|
||||
return;
|
||||
|
||||
size_t rows = current_chunk.front()->size();
|
||||
chunks.emplace_back(std::move(current_chunk), rows);
|
||||
current_chunk = header.cloneEmptyColumns();
|
||||
}
|
||||
|
||||
std::shared_ptr<ISource> getSource()
|
||||
{
|
||||
addChunk();
|
||||
|
||||
/// copy chunk to allow reusing same builder
|
||||
Chunks chunks_copy;
|
||||
chunks_copy.reserve(chunks.size());
|
||||
for (const auto & chunk : chunks)
|
||||
chunks_copy.emplace_back(chunk.clone());
|
||||
return std::make_shared<SourceFromChunks>(header, std::move(chunks_copy));
|
||||
}
|
||||
|
||||
private:
|
||||
Block header;
|
||||
Chunks chunks;
|
||||
MutableColumns current_chunk;
|
||||
|
||||
pcg64 * rng = nullptr;
|
||||
double break_prob = 0.0;
|
||||
};
|
||||
|
||||
|
||||
std::vector<std::vector<Field>> getValuesFromBlock(const Block & block, const Names & names)
|
||||
{
|
||||
std::vector<std::vector<Field>> result;
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
auto & row = result.emplace_back();
|
||||
for (const auto & name : names)
|
||||
block.getByName(name).column->get(i, row.emplace_back());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
Block executePipeline(QueryPipeline && pipeline)
|
||||
{
|
||||
PullingPipelineExecutor executor(pipeline);
|
||||
|
||||
Blocks result_blocks;
|
||||
while (true)
|
||||
{
|
||||
Block block;
|
||||
bool is_ok = executor.pull(block);
|
||||
if (!is_ok)
|
||||
break;
|
||||
result_blocks.emplace_back(std::move(block));
|
||||
}
|
||||
|
||||
return concatenateBlocks(result_blocks);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void assertColumnVectorEq(const typename ColumnVector<T>::Container & expected, const Block & block, const std::string & name)
|
||||
{
|
||||
const auto * actual = typeid_cast<const ColumnVector<T> *>(block.getByName(name).column.get());
|
||||
ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector<T>).name();
|
||||
|
||||
auto get_first_diff = [&]() -> String
|
||||
{
|
||||
const auto & actual_data = actual->getData();
|
||||
size_t num_rows = std::min(expected.size(), actual_data.size());
|
||||
for (size_t i = 0; i < num_rows; ++i)
|
||||
{
|
||||
if (expected[i] != actual_data[i])
|
||||
return fmt::format(", expected: {}, actual: {} at row {}", expected[i], actual_data[i], i);
|
||||
}
|
||||
return "";
|
||||
};
|
||||
|
||||
EXPECT_EQ(actual->getData().size(), expected.size());
|
||||
ASSERT_EQ(actual->getData(), expected) << "column name: " << name << get_first_diff();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name)
|
||||
{
|
||||
const ColumnPtr & actual = block.getByName(name).column;
|
||||
ASSERT_TRUE(checkColumn<T>(*actual));
|
||||
ASSERT_TRUE(checkColumn<T>(expected));
|
||||
EXPECT_EQ(actual->size(), expected.size());
|
||||
|
||||
auto dump_val = [](const IColumn & col, size_t i) -> String
|
||||
{
|
||||
Field value;
|
||||
col.get(i, value);
|
||||
return value.dump();
|
||||
};
|
||||
|
||||
size_t num_rows = std::min(actual->size(), expected.size());
|
||||
for (size_t i = 0; i < num_rows; ++i)
|
||||
ASSERT_EQ(actual->compareAt(i, i, expected, 1), 0) << dump_val(*actual, i) << " != " << dump_val(expected, i) << " at row " << i;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T getRandomFrom(pcg64 & rng, const std::initializer_list<T> & opts)
|
||||
{
|
||||
std::vector<T> options(opts.begin(), opts.end());
|
||||
size_t idx = std::uniform_int_distribution<size_t>(0, options.size() - 1)(rng);
|
||||
return options[idx];
|
||||
}
|
||||
|
||||
void generateNextKey(pcg64 & rng, UInt64 & k1, String & k2)
|
||||
{
|
||||
size_t str_len = std::uniform_int_distribution<>(1, 10)(rng);
|
||||
String new_k2 = getRandomASCIIString(str_len, rng);
|
||||
if (new_k2.compare(k2) <= 0)
|
||||
++k1;
|
||||
k2 = new_k2;
|
||||
}
|
||||
|
||||
bool isStrict(ASOFJoinInequality inequality)
|
||||
{
|
||||
return inequality == ASOFJoinInequality::Less || inequality == ASOFJoinInequality::Greater;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class FullSortingJoinTest : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
FullSortingJoinTest() = default;
|
||||
|
||||
void SetUp() override
|
||||
{
|
||||
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
|
||||
Poco::Logger::root().setChannel(channel);
|
||||
if (const char * test_log_level = std::getenv("TEST_LOG_LEVEL")) // NOLINT(concurrency-mt-unsafe)
|
||||
Poco::Logger::root().setLevel(test_log_level);
|
||||
else
|
||||
Poco::Logger::root().setLevel("none");
|
||||
|
||||
|
||||
UInt64 seed = randomSeed();
|
||||
if (const char * random_seed = std::getenv("TEST_RANDOM_SEED")) // NOLINT(concurrency-mt-unsafe)
|
||||
seed = std::stoull(random_seed);
|
||||
std::cout << "TEST_RANDOM_SEED=" << seed << std::endl;
|
||||
rng = pcg64(seed);
|
||||
}
|
||||
|
||||
void TearDown() override
|
||||
{
|
||||
}
|
||||
|
||||
pcg64 rng;
|
||||
};
|
||||
|
||||
TEST_F(FullSortingJoinTest, AllAnyOneKey)
|
||||
try
|
||||
{
|
||||
{
|
||||
SCOPED_TRACE("Inner All");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {1, 2, 3, 4, 5} }),
|
||||
oneColumnSource({ {1}, {2}, {3}, {4}, {5} }),
|
||||
1, JoinKind::Inner, JoinStrictness::All));
|
||||
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx");
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("Inner Any");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {1, 2, 3, 4, 5} }),
|
||||
oneColumnSource({ {1}, {2}, {3}, {4}, {5} }),
|
||||
1, JoinKind::Inner, JoinStrictness::Any));
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx");
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("Inner All");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
|
||||
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
|
||||
1, JoinKind::Inner, JoinStrictness::All));
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 0, 1, 2, 3, 3, 4, 5}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 4, 4, 4, 3, 4, 5, 5}), result, "t2.idx");
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("Inner Any");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
|
||||
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
|
||||
1, JoinKind::Inner, JoinStrictness::Any));
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 4}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 5}), result, "t2.idx");
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("Inner Any");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }),
|
||||
oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }),
|
||||
1, JoinKind::Inner, JoinStrictness::Any));
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 4}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 5}), result, "t2.idx");
|
||||
}
|
||||
{
|
||||
|
||||
SCOPED_TRACE("Left Any");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }),
|
||||
oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }),
|
||||
1, JoinKind::Left, JoinStrictness::Any));
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx");
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("Left Any");
|
||||
Block result = executePipeline(buildJoinPipeline(
|
||||
oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }),
|
||||
oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }),
|
||||
1, JoinKind::Left, JoinStrictness::Any));
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx");
|
||||
assertColumnVectorEq<UInt64>(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx");
|
||||
}
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::cout << e.getStackTraceString() << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
||||
|
||||
TEST_F(FullSortingJoinTest, AnySimple)
|
||||
try
|
||||
{
|
||||
JoinKind kind = getRandomFrom(rng, {JoinKind::Inner, JoinKind::Left, JoinKind::Right});
|
||||
|
||||
SourceChunksBuilder left_source({
|
||||
{std::make_shared<DataTypeUInt64>(), "k1"},
|
||||
{std::make_shared<DataTypeString>(), "k2"},
|
||||
{std::make_shared<DataTypeString>(), "attr"},
|
||||
});
|
||||
|
||||
SourceChunksBuilder right_source({
|
||||
{std::make_shared<DataTypeUInt64>(), "k1"},
|
||||
{std::make_shared<DataTypeString>(), "k2"},
|
||||
{std::make_shared<DataTypeString>(), "attr"},
|
||||
});
|
||||
|
||||
left_source.setBreakProbability(rng);
|
||||
right_source.setBreakProbability(rng);
|
||||
|
||||
size_t num_keys = std::uniform_int_distribution<>(100, 1000)(rng);
|
||||
|
||||
auto expected_left = ColumnString::create();
|
||||
auto expected_right = ColumnString::create();
|
||||
|
||||
UInt64 k1 = 1;
|
||||
String k2;
|
||||
|
||||
auto get_attr = [&](const String & side, size_t idx) -> String
|
||||
{
|
||||
return toString(k1) + "_" + k2 + "_" + side + "_" + toString(idx);
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < num_keys; ++i)
|
||||
{
|
||||
generateNextKey(rng, k1, k2);
|
||||
|
||||
/// Key is present in left, right or both tables. Both tables is more probable.
|
||||
size_t key_presence = std::uniform_int_distribution<>(0, 10)(rng);
|
||||
|
||||
size_t num_rows_left = key_presence == 0 ? 0 : std::uniform_int_distribution<>(1, 10)(rng);
|
||||
for (size_t j = 0; j < num_rows_left; ++j)
|
||||
left_source.addRow({k1, k2, get_attr("left", j)});
|
||||
|
||||
size_t num_rows_right = key_presence == 1 ? 0 : std::uniform_int_distribution<>(1, 10)(rng);
|
||||
for (size_t j = 0; j < num_rows_right; ++j)
|
||||
right_source.addRow({k1, k2, get_attr("right", j)});
|
||||
|
||||
String left_attr = num_rows_left ? get_attr("left", 0) : "";
|
||||
String right_attr = num_rows_right ? get_attr("right", 0) : "";
|
||||
|
||||
if (kind == JoinKind::Inner && num_rows_left && num_rows_right)
|
||||
{
|
||||
expected_left->insert(left_attr);
|
||||
expected_right->insert(right_attr);
|
||||
}
|
||||
else if (kind == JoinKind::Left)
|
||||
{
|
||||
for (size_t j = 0; j < num_rows_left; ++j)
|
||||
{
|
||||
expected_left->insert(get_attr("left", j));
|
||||
expected_right->insert(right_attr);
|
||||
}
|
||||
}
|
||||
else if (kind == JoinKind::Right)
|
||||
{
|
||||
for (size_t j = 0; j < num_rows_right; ++j)
|
||||
{
|
||||
expected_left->insert(left_attr);
|
||||
expected_right->insert(get_attr("right", j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Block result_block = executePipeline(buildJoinPipeline(
|
||||
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
|
||||
kind, JoinStrictness::Any));
|
||||
assertColumnEq<ColumnString>(*expected_left, result_block, "t1.attr");
|
||||
assertColumnEq<ColumnString>(*expected_right, result_block, "t2.attr");
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::cout << e.getStackTraceString() << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
||||
TEST_F(FullSortingJoinTest, AsofSimple)
|
||||
try
|
||||
{
|
||||
SourceChunksBuilder left_source({
|
||||
{std::make_shared<DataTypeString>(), "key"},
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
});
|
||||
left_source.addRow({"AMZN", 3});
|
||||
left_source.addRow({"AMZN", 4});
|
||||
left_source.addRow({"AMZN", 6});
|
||||
left_source.addRow({"SBUX", 10});
|
||||
|
||||
SourceChunksBuilder right_source({
|
||||
{std::make_shared<DataTypeString>(), "key"},
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
{std::make_shared<DataTypeUInt64>(), "value"},
|
||||
});
|
||||
right_source.addRow({"AAPL", 1, 97});
|
||||
right_source.addChunk();
|
||||
right_source.addRow({"AAPL", 2, 98});
|
||||
right_source.addRow({"AAPL", 3, 99});
|
||||
right_source.addRow({"AMZN", 1, 100});
|
||||
right_source.addRow({"AMZN", 2, 110});
|
||||
right_source.addChunk();
|
||||
right_source.addRow({"AMZN", 2, 110});
|
||||
right_source.addChunk();
|
||||
right_source.addRow({"AMZN", 4, 130});
|
||||
right_source.addRow({"AMZN", 5, 140});
|
||||
right_source.addRow({"SBUX", 8, 180});
|
||||
right_source.addChunk();
|
||||
right_source.addRow({"SBUX", 9, 190});
|
||||
|
||||
{
|
||||
Block result_block = executePipeline(buildJoinPipeline(
|
||||
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
|
||||
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals));
|
||||
auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"});
|
||||
|
||||
ASSERT_EQ(values, (std::vector<std::vector<Field>>{
|
||||
{"AMZN", 3u, 4u, 130u},
|
||||
{"AMZN", 4u, 4u, 130u},
|
||||
}));
|
||||
}
|
||||
|
||||
{
|
||||
Block result_block = executePipeline(buildJoinPipeline(
|
||||
left_source.getSource(), right_source.getSource(), /* key_length = */ 2,
|
||||
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals));
|
||||
auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"});
|
||||
|
||||
ASSERT_EQ(values, (std::vector<std::vector<Field>>{
|
||||
{"AMZN", 3u, 2u, 110u},
|
||||
{"AMZN", 4u, 4u, 130u},
|
||||
{"AMZN", 6u, 5u, 140u},
|
||||
{"SBUX", 10u, 9u, 190u},
|
||||
}));
|
||||
}
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::cout << e.getStackTraceString() << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
||||
|
||||
TEST_F(FullSortingJoinTest, AsofOnlyColumn)
|
||||
try
|
||||
{
|
||||
auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} });
|
||||
|
||||
SourceChunksBuilder right_source_builder({
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
{std::make_shared<DataTypeUInt64>(), "value"},
|
||||
});
|
||||
|
||||
right_source_builder.setBreakProbability(rng);
|
||||
|
||||
for (const auto & row : std::vector<std::vector<Field>>{ {1, 101}, {2, 102}, {4, 104}, {5, 105}, {11, 111}, {15, 115} })
|
||||
right_source_builder.addRow(row);
|
||||
|
||||
auto right_source = right_source_builder.getSource();
|
||||
|
||||
auto pipeline = buildJoinPipeline(
|
||||
left_source, right_source, /* key_length = */ 1,
|
||||
JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals);
|
||||
|
||||
Block result_block = executePipeline(std::move(pipeline));
|
||||
|
||||
ASSERT_EQ(
|
||||
assert_cast<const ColumnUInt64 *>(result_block.getByName("t1.key").column.get())->getData(),
|
||||
(ColumnUInt64::Container{3, 3, 3, 3, 3, 5, 5, 6, 9, 9, 10})
|
||||
);
|
||||
|
||||
ASSERT_EQ(
|
||||
assert_cast<const ColumnUInt64 *>(result_block.getByName("t2.t").column.get())->getData(),
|
||||
(ColumnUInt64::Container{4, 4, 4, 4, 4, 5, 5, 11, 11, 11, 11})
|
||||
);
|
||||
|
||||
ASSERT_EQ(
|
||||
assert_cast<const ColumnUInt64 *>(result_block.getByName("t2.value").column.get())->getData(),
|
||||
(ColumnUInt64::Container{104, 104, 104, 104, 104, 105, 105, 111, 111, 111, 111})
|
||||
);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::cout << e.getStackTraceString() << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
||||
TEST_F(FullSortingJoinTest, AsofLessGeneratedTestData)
|
||||
try
|
||||
{
|
||||
/// Generate data random and build expected result at the same time.
|
||||
|
||||
/// Test specific combinations of join kind and inequality per each run
|
||||
auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left });
|
||||
auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals });
|
||||
|
||||
SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality));
|
||||
|
||||
/// Key is complex, `k1, k2` for equality and `t` for asof
|
||||
SourceChunksBuilder left_source_builder({
|
||||
{std::make_shared<DataTypeUInt64>(), "k1"},
|
||||
{std::make_shared<DataTypeString>(), "k2"},
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
{std::make_shared<DataTypeInt64>(), "attr"},
|
||||
});
|
||||
|
||||
SourceChunksBuilder right_source_builder({
|
||||
{std::make_shared<DataTypeUInt64>(), "k1"},
|
||||
{std::make_shared<DataTypeString>(), "k2"},
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
{std::make_shared<DataTypeInt64>(), "attr"},
|
||||
});
|
||||
|
||||
/// How small generated block should be
|
||||
left_source_builder.setBreakProbability(rng);
|
||||
right_source_builder.setBreakProbability(rng);
|
||||
|
||||
/// We are going to generate sorted data and remember expected result
|
||||
ColumnInt64::Container expected;
|
||||
|
||||
UInt64 k1 = 1;
|
||||
String k2;
|
||||
auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng);
|
||||
for (size_t key_num = 0; key_num < key_num_total; ++key_num)
|
||||
{
|
||||
/// Generate new key greater than previous
|
||||
generateNextKey(rng, k1, k2);
|
||||
|
||||
Int64 left_t = 0;
|
||||
/// Generate several rows for the key
|
||||
size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng);
|
||||
for (size_t i = 0; i < num_left_rows; ++i)
|
||||
{
|
||||
/// t is strictly greater than previous
|
||||
left_t += std::uniform_int_distribution<>(1, 10)(rng);
|
||||
|
||||
auto left_arrtibute_value = 10 * left_t;
|
||||
left_source_builder.addRow({k1, k2, left_t, left_arrtibute_value});
|
||||
expected.push_back(left_arrtibute_value);
|
||||
|
||||
auto num_matches = 1 + std::poisson_distribution<>(4)(rng);
|
||||
/// Generate several matches in the right table
|
||||
auto right_t = left_t;
|
||||
for (size_t j = 0; j < num_matches; ++j)
|
||||
{
|
||||
int min_step = isStrict(asof_inequality) ? 1 : 0;
|
||||
right_t += std::uniform_int_distribution<>(min_step, 3)(rng);
|
||||
|
||||
/// First row should match
|
||||
bool is_match = j == 0;
|
||||
right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * left_arrtibute_value : -1});
|
||||
}
|
||||
/// Next left_t should be greater than right_t not to match with previous rows
|
||||
left_t = right_t;
|
||||
}
|
||||
|
||||
/// generate some rows with greater left_t to check that they are not matched
|
||||
num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0;
|
||||
for (size_t i = 0; i < num_left_rows; ++i)
|
||||
{
|
||||
left_t += std::uniform_int_distribution<>(1, 10)(rng);
|
||||
left_source_builder.addRow({k1, k2, left_t, -10 * left_t});
|
||||
|
||||
if (join_kind == JoinKind::Left)
|
||||
expected.push_back(-10 * left_t);
|
||||
}
|
||||
}
|
||||
|
||||
Block result_block = executePipeline(buildJoinPipeline(
|
||||
left_source_builder.getSource(), right_source_builder.getSource(),
|
||||
/* key_length = */ 3,
|
||||
join_kind, JoinStrictness::Asof, asof_inequality));
|
||||
|
||||
assertColumnVectorEq<Int64>(expected, result_block, "t1.attr");
|
||||
|
||||
for (auto & e : expected)
|
||||
/// Non matched rows from left table have negative attr
|
||||
/// Value if attribute in right table is 10 times greater than in left table
|
||||
e = e < 0 ? 0 : 10 * e;
|
||||
|
||||
assertColumnVectorEq<Int64>(expected, result_block, "t2.attr");
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::cout << e.getStackTraceString() << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
||||
TEST_F(FullSortingJoinTest, AsofGreaterGeneratedTestData)
|
||||
try
|
||||
{
|
||||
/// Generate data random and build expected result at the same time.
|
||||
|
||||
/// Test specific combinations of join kind and inequality per each run
|
||||
auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left });
|
||||
auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals });
|
||||
|
||||
SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality));
|
||||
|
||||
SourceChunksBuilder left_source_builder({
|
||||
{std::make_shared<DataTypeUInt64>(), "k1"},
|
||||
{std::make_shared<DataTypeString>(), "k2"},
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
{std::make_shared<DataTypeInt64>(), "attr"},
|
||||
});
|
||||
|
||||
SourceChunksBuilder right_source_builder({
|
||||
{std::make_shared<DataTypeUInt64>(), "k1"},
|
||||
{std::make_shared<DataTypeString>(), "k2"},
|
||||
{std::make_shared<DataTypeUInt64>(), "t"},
|
||||
{std::make_shared<DataTypeInt64>(), "attr"},
|
||||
});
|
||||
|
||||
left_source_builder.setBreakProbability(rng);
|
||||
right_source_builder.setBreakProbability(rng);
|
||||
|
||||
ColumnInt64::Container expected;
|
||||
|
||||
UInt64 k1 = 1;
|
||||
String k2;
|
||||
UInt64 left_t = 0;
|
||||
|
||||
auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng);
|
||||
for (size_t key_num = 0; key_num < key_num_total; ++key_num)
|
||||
{
|
||||
/// Generate new key greater than previous
|
||||
generateNextKey(rng, k1, k2);
|
||||
|
||||
/// Generate some rows with smaller left_t to check that they are not matched
|
||||
size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0;
|
||||
for (size_t i = 0; i < num_left_rows; ++i)
|
||||
{
|
||||
left_t += std::uniform_int_distribution<>(1, 10)(rng);
|
||||
left_source_builder.addRow({k1, k2, left_t, -10 * left_t});
|
||||
|
||||
if (join_kind == JoinKind::Left)
|
||||
expected.push_back(-10 * left_t);
|
||||
}
|
||||
|
||||
if (std::bernoulli_distribution(0.1)(rng))
|
||||
continue;
|
||||
|
||||
size_t num_right_matches = std::uniform_int_distribution<>(1, 10)(rng);
|
||||
auto right_t = left_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 0 : 1, 10)(rng);
|
||||
auto attribute_value = 10 * right_t;
|
||||
for (size_t j = 0; j < num_right_matches; ++j)
|
||||
{
|
||||
right_t += std::uniform_int_distribution<>(0, 3)(rng);
|
||||
bool is_match = j == num_right_matches - 1;
|
||||
right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * attribute_value : -1});
|
||||
}
|
||||
|
||||
/// Next left_t should be greater than (or equals) right_t to match with previous rows
|
||||
left_t = right_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 1 : 0, 100)(rng);
|
||||
size_t num_left_matches = std::uniform_int_distribution<>(1, 100)(rng);
|
||||
for (size_t j = 0; j < num_left_matches; ++j)
|
||||
{
|
||||
left_t += std::uniform_int_distribution<>(0, 3)(rng);
|
||||
left_source_builder.addRow({k1, k2, left_t, attribute_value});
|
||||
expected.push_back(attribute_value);
|
||||
}
|
||||
}
|
||||
|
||||
Block result_block = executePipeline(buildJoinPipeline(
|
||||
left_source_builder.getSource(), right_source_builder.getSource(),
|
||||
/* key_length = */ 3,
|
||||
join_kind, JoinStrictness::Asof, asof_inequality));
|
||||
|
||||
assertColumnVectorEq<Int64>(expected, result_block, "t1.attr");
|
||||
|
||||
for (auto & e : expected)
|
||||
/// Non matched rows from left table have negative attr
|
||||
/// Value if attribute in right table is 10 times greater than in left table
|
||||
e = e < 0 ? 0 : 10 * e;
|
||||
|
||||
assertColumnVectorEq<Int64>(expected, result_block, "t2.attr");
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::cout << e.getStackTraceString() << std::endl;
|
||||
throw;
|
||||
}
|
@ -1,13 +1,36 @@
|
||||
-- { echoOn }
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
1 101 1 0 0 0
|
||||
1 102 2 2 102 1
|
||||
1 103 3 2 102 1
|
||||
1 104 4 4 104 1
|
||||
1 105 5 4 104 1
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
1 101 1 0 0 0
|
||||
1 102 2 2 102 1
|
||||
1 103 3 2 102 1
|
||||
1 104 4 4 104 1
|
||||
1 105 5 4 104 1
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
1 101 1 0 0 0
|
||||
1 102 2 2 102 1
|
||||
1 103 3 2 102 1
|
||||
1 104 4 4 104 1
|
||||
1 105 5 4 104 1
|
||||
SET join_algorithm = 'full_sorting_merge';
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
1 101 1 0 0 0
|
||||
1 102 2 2 102 1
|
||||
1 103 3 2 102 1
|
||||
1 104 4 4 104 1
|
||||
1 105 5 4 104 1
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
1 101 1 0 0 0
|
||||
1 102 2 2 102 1
|
||||
1 103 3 2 102 1
|
||||
1 104 4 4 104 1
|
||||
1 105 5 4 104 1
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
1 101 1 0 0 0
|
||||
1 102 2 2 102 1
|
||||
1 103 3 2 102 1
|
||||
|
@ -4,20 +4,29 @@ DROP TABLE IF EXISTS B;
|
||||
CREATE TABLE A(k UInt32, t UInt32, a UInt64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO A(k,t,a) VALUES (1,101,1),(1,102,2),(1,103,3),(1,104,4),(1,105,5);
|
||||
|
||||
CREATE TABLE B(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
DROP TABLE B;
|
||||
CREATE TABLE B1(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B1(k,t,b) VALUES (1,102,2), (1,104,4);
|
||||
|
||||
CREATE TABLE B2(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B2(k,t,b) VALUES (1,102,2), (1,104,4);
|
||||
|
||||
CREATE TABLE B(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
DROP TABLE B;
|
||||
CREATE TABLE B3(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B3(k,t,b) VALUES (1,102,2), (1,104,4);
|
||||
|
||||
CREATE TABLE B(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
DROP TABLE B;
|
||||
-- { echoOn }
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
|
||||
SET join_algorithm = 'full_sorting_merge';
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t);
|
||||
|
||||
-- { echoOff }
|
||||
|
||||
DROP TABLE B1;
|
||||
DROP TABLE B2;
|
||||
DROP TABLE B3;
|
||||
|
||||
DROP TABLE A;
|
||||
|
@ -1 +1,2 @@
|
||||
3000000
|
||||
3000000
|
||||
|
@ -2,15 +2,28 @@
|
||||
|
||||
DROP TABLE IF EXISTS tvs;
|
||||
|
||||
-- to use different algorithms for in subquery
|
||||
SET allow_experimental_analyzer = 1;
|
||||
|
||||
CREATE TABLE tvs(k UInt32, t UInt32, tv UInt64) ENGINE = Memory;
|
||||
INSERT INTO tvs(k,t,tv) SELECT k, t, t
|
||||
FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys
|
||||
CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times;
|
||||
CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times
|
||||
SETTINGS join_algorithm = 'hash';
|
||||
|
||||
SELECT SUM(trades.price - tvs.tv) FROM
|
||||
(SELECT k, t, t as price
|
||||
FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys
|
||||
CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times) trades
|
||||
CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times
|
||||
SETTINGS join_algorithm = 'hash') trades
|
||||
ASOF LEFT JOIN tvs USING(k,t);
|
||||
|
||||
SELECT SUM(trades.price - tvs.tv) FROM
|
||||
(SELECT k, t, t as price
|
||||
FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys
|
||||
CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times
|
||||
SETTINGS join_algorithm = 'hash') trades
|
||||
ASOF LEFT JOIN tvs USING(k,t)
|
||||
SETTINGS join_algorithm = 'full_sorting_merge';
|
||||
|
||||
DROP TABLE tvs;
|
||||
|
@ -27,3 +27,32 @@
|
||||
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
|
||||
1 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0
|
||||
1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1
|
||||
1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1
|
||||
1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1
|
||||
1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1
|
||||
2 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0
|
||||
2 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0
|
||||
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
|
||||
3 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0
|
||||
3 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0
|
||||
3 1970-01-01 00:00:03 3 0 1970-01-01 00:00:00 0
|
||||
3 1970-01-01 00:00:04 4 0 1970-01-01 00:00:00 0
|
||||
3 1970-01-01 00:00:05 5 0 1970-01-01 00:00:00 0
|
||||
1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1
|
||||
1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1
|
||||
1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1
|
||||
1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1
|
||||
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
|
||||
1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1
|
||||
1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1
|
||||
1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1
|
||||
1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1
|
||||
2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2
|
||||
2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2
|
||||
|
@ -11,9 +11,12 @@ INSERT INTO B(k,t,b) VALUES (1,2,2),(1,4,4);
|
||||
INSERT INTO B(k,t,b) VALUES (2,3,3);
|
||||
|
||||
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
|
||||
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t);
|
||||
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
|
||||
SET join_algorithm = 'full_sorting_merge';
|
||||
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t);
|
||||
SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t);
|
||||
|
||||
DROP TABLE A;
|
||||
|
@ -1,27 +1,72 @@
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1970-01-01 02:00:01 1 0
|
||||
2 1970-01-01 02:00:03 3 3
|
||||
2 1970-01-01 02:00:05 5 3
|
||||
-
|
||||
2 1970-01-01 02:00:01 1 0
|
||||
2 1970-01-01 02:00:03 3 3
|
||||
2 1970-01-01 02:00:05 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1 1 0
|
||||
2 3 3 3
|
||||
2 5 5 3
|
||||
-
|
||||
2 1970-01-01 02:00:00.001 1 0
|
||||
2 1970-01-01 02:00:00.003 3 3
|
||||
2 1970-01-01 02:00:00.005 5 3
|
||||
-
|
||||
2 1970-01-01 02:00:00.001 1 0
|
||||
2 1970-01-01 02:00:00.003 3 3
|
||||
2 1970-01-01 02:00:00.005 5 3
|
||||
|
@ -1,27 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
for typename in "UInt32" "UInt64" "Float64" "Float32" "DateTime('Asia/Istanbul')" "Decimal32(5)" "Decimal64(5)" "Decimal128(5)" "DateTime64(3, 'Asia/Istanbul')"
|
||||
do
|
||||
$CLICKHOUSE_CLIENT -mn <<EOF
|
||||
DROP TABLE IF EXISTS A;
|
||||
DROP TABLE IF EXISTS B;
|
||||
|
||||
CREATE TABLE A(k UInt32, t ${typename}, a Float64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO A(k,t,a) VALUES (2,1,1),(2,3,3),(2,5,5);
|
||||
|
||||
CREATE TABLE B(k UInt32, t ${typename}, b Float64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B(k,t,b) VALUES (2,3,3);
|
||||
|
||||
SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t);
|
||||
|
||||
DROP TABLE A;
|
||||
DROP TABLE B;
|
||||
EOF
|
||||
|
||||
done
|
27
tests/queries/0_stateless/00927_asof_join_other_types.sql.j2
Normal file
27
tests/queries/0_stateless/00927_asof_join_other_types.sql.j2
Normal file
@ -0,0 +1,27 @@
|
||||
|
||||
{% for typename in ["UInt32", "UInt64", "Float64", "Float32", "DateTime('Asia/Istanbul')", "Decimal32(5)", "Decimal64(5)", "Decimal128(5)", "DateTime64(3, 'Asia/Istanbul')"] -%}
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS A;
|
||||
DROP TABLE IF EXISTS B;
|
||||
|
||||
CREATE TABLE A(k UInt32, t {{ typename }}, a Float64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO A(k,t,a) VALUES (2,1,1),(2,3,3),(2,5,5);
|
||||
|
||||
CREATE TABLE B(k UInt32, t {{ typename }}, b Float64) ENGINE = MergeTree() ORDER BY (k, t);
|
||||
INSERT INTO B(k,t,b) VALUES (2,3,3);
|
||||
|
||||
SELECT '-';
|
||||
|
||||
SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t)
|
||||
SETTINGS join_algorithm = 'full_sorting_merge';
|
||||
|
||||
SELECT '-';
|
||||
|
||||
SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t)
|
||||
SETTINGS join_algorithm = 'hash';
|
||||
|
||||
DROP TABLE A;
|
||||
DROP TABLE B;
|
||||
|
||||
{% endfor %}
|
@ -12,3 +12,18 @@
|
||||
2 1970-01-01 00:00:15 5 6.5 6
|
||||
2 1970-01-01 00:00:16 5 5.6 6
|
||||
2 1970-01-01 00:00:20 17 8.5 18
|
||||
-
|
||||
1 1970-01-01 00:00:05 1 1.5 2
|
||||
1 1970-01-01 00:00:06 1 1.51 2
|
||||
1 1970-01-01 00:00:10 11 11.5 12
|
||||
1 1970-01-01 00:00:11 11 11.51 12
|
||||
1 1970-01-01 00:00:15 5 5.5 6
|
||||
1 1970-01-01 00:00:16 5 5.6 6
|
||||
1 1970-01-01 00:00:20 7 7.5 8
|
||||
2 1970-01-01 00:00:05 11 2.5 12
|
||||
2 1970-01-01 00:00:06 11 2.51 12
|
||||
2 1970-01-01 00:00:10 21 12.5 22
|
||||
2 1970-01-01 00:00:11 21 12.51 22
|
||||
2 1970-01-01 00:00:15 5 6.5 6
|
||||
2 1970-01-01 00:00:16 5 5.6 6
|
||||
2 1970-01-01 00:00:20 17 8.5 18
|
||||
|
@ -9,7 +9,13 @@ CREATE TABLE tv(key UInt32, t DateTime, tv Float64) ENGINE = MergeTree() ORDER B
|
||||
INSERT INTO tv(key,t,tv) VALUES (1,5,1.5),(1,6,1.51),(1,10,11.5),(1,11,11.51),(1,15,5.5),(1,16,5.6),(1,20,7.5);
|
||||
INSERT INTO tv(key,t,tv) VALUES (2,5,2.5),(2,6,2.51),(2,10,12.5),(2,11,12.51),(2,15,6.5),(2,16,5.6),(2,20,8.5);
|
||||
|
||||
SELECT tv.key, toString(tv.t, 'UTC'), md.bid, tv.tv, md.ask FROM tv ASOF LEFT JOIN md USING(key,t) ORDER BY (tv.key, tv.t);
|
||||
SELECT tv.key, toString(tv.t, 'UTC'), md.bid, tv.tv, md.ask FROM tv ASOF LEFT JOIN md USING(key,t) ORDER BY (tv.key, tv.t)
|
||||
;
|
||||
|
||||
SELECT '-';
|
||||
|
||||
SELECT tv.key, toString(tv.t, 'UTC'), md.bid, tv.tv, md.ask FROM tv ASOF LEFT JOIN md USING(key,t) ORDER BY (tv.key, tv.t)
|
||||
SETTINGS join_algorithm = 'full_sorting_merge';
|
||||
|
||||
DROP TABLE md;
|
||||
DROP TABLE tv;
|
||||
|
@ -1,3 +1,4 @@
|
||||
- default / join_use_nulls = 0 -
|
||||
1 1 0 0
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
@ -34,3 +35,114 @@
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
1 2 1 2
|
||||
- full_sorting_merge / join_use_nulls = 0 -
|
||||
1 1 0 0
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
2 1 0 0
|
||||
2 2 0 0
|
||||
2 3 2 3
|
||||
3 1 0 0
|
||||
3 2 0 0
|
||||
3 3 0 0
|
||||
9
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
2 3 2 3
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 2
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
2 3 2 3
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 2
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
2 3 2 3
|
||||
-
|
||||
1 3 1 2
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 4
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
1 2 1 2
|
||||
- default / join_use_nulls = 1 -
|
||||
1 1 \N \N
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
2 1 \N \N
|
||||
2 2 \N \N
|
||||
2 3 2 3
|
||||
3 1 \N \N
|
||||
3 2 \N \N
|
||||
3 3 \N \N
|
||||
9
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
2 3 2 3
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 2
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
2 3 2 3
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 2
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
2 3 2 3
|
||||
-
|
||||
1 3 1 2
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 4
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
1 2 1 2
|
||||
- full_sorting_merge / join_use_nulls = 1 -
|
||||
1 1 \N \N
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
2 1 \N \N
|
||||
2 2 \N \N
|
||||
2 3 2 3
|
||||
3 1 \N \N
|
||||
3 2 \N \N
|
||||
3 3 \N \N
|
||||
9
|
||||
1 2 1 2
|
||||
1 3 1 2
|
||||
2 3 2 3
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 2
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
2 3 2 3
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 2
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
2 3 2 3
|
||||
-
|
||||
1 3 1 2
|
||||
-
|
||||
1 1 1 2
|
||||
1 2 1 4
|
||||
1 3 1 4
|
||||
2 1 2 3
|
||||
2 2 2 3
|
||||
1 2 1 2
|
||||
|
@ -7,6 +7,14 @@ CREATE TABLE B(b UInt32, t UInt32) ENGINE = Memory;
|
||||
INSERT INTO A (a,t) VALUES (1,1),(1,2),(1,3), (2,1),(2,2),(2,3), (3,1),(3,2),(3,3);
|
||||
INSERT INTO B (b,t) VALUES (1,2),(1,4),(2,3);
|
||||
|
||||
{% for join_use_nulls in [0, 1] -%}
|
||||
{% for join_algorithm in ['default', 'full_sorting_merge'] -%}
|
||||
|
||||
SET join_algorithm = '{{ join_algorithm }}';
|
||||
|
||||
SELECT '- {{ join_algorithm }} / join_use_nulls = {{ join_use_nulls }} -';
|
||||
set join_use_nulls = {{ join_use_nulls }};
|
||||
|
||||
SELECT A.a, A.t, B.b, B.t FROM A ASOF LEFT JOIN B ON A.a == B.b AND A.t >= B.t ORDER BY (A.a, A.t);
|
||||
SELECT count() FROM A ASOF LEFT JOIN B ON A.a == B.b AND B.t <= A.t;
|
||||
SELECT A.a, A.t, B.b, B.t FROM A ASOF INNER JOIN B ON B.t <= A.t AND A.a == B.b ORDER BY (A.a, A.t);
|
||||
@ -28,5 +36,8 @@ ASOF INNER JOIN (SELECT * FROM B UNION ALL SELECT 1, 3) AS B ON B.t <= A.t AND A
|
||||
WHERE B.t != 3 ORDER BY (A.a, A.t)
|
||||
;
|
||||
|
||||
{% endfor -%}
|
||||
{% endfor -%}
|
||||
|
||||
DROP TABLE A;
|
||||
DROP TABLE B;
|
@ -1,3 +1,6 @@
|
||||
v1 o1 ['s2','s1']
|
||||
v1 o2 ['s4']
|
||||
v2 o3 ['s5','s3']
|
||||
v1 o1 ['s2','s1']
|
||||
v1 o2 ['s4']
|
||||
v2 o3 ['s5','s3']
|
||||
|
@ -16,3 +16,17 @@ GROUP BY
|
||||
ORDER BY
|
||||
visitorId ASC,
|
||||
orderId ASC;
|
||||
|
||||
SELECT
|
||||
visitorId,
|
||||
orderId,
|
||||
groupUniqArray(sessionId)
|
||||
FROM sessions
|
||||
ASOF INNER JOIN orders ON (sessions.visitorId = orders.visitorId) AND (sessions.date <= orders.date)
|
||||
GROUP BY
|
||||
visitorId,
|
||||
orderId
|
||||
ORDER BY
|
||||
visitorId ASC,
|
||||
orderId ASC
|
||||
SETTINGS join_algorithm = 'full_sorting_merge';
|
||||
|
@ -2,3 +2,7 @@
|
||||
0 340282366920938463463374607431768211457
|
||||
0 18446744073709551617
|
||||
0 340282366920938463463374607431768211457
|
||||
0 18446744073709551617
|
||||
0 340282366920938463463374607431768211457
|
||||
0 18446744073709551617
|
||||
0 340282366920938463463374607431768211457
|
||||
|
@ -3,3 +3,11 @@ select * from (select 0 as k, toInt256('340282366920938463463374607431768211457'
|
||||
|
||||
select * from (select 0 as k, toUInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toUInt128('18446744073709551616') as v) t2 using(k, v);
|
||||
select * from (select 0 as k, toUInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toUInt256('340282366920938463463374607431768211456') as v) t2 using(k, v);
|
||||
|
||||
SET join_algorithm = 'full_sorting_merge';
|
||||
|
||||
select * from (select 0 as k, toInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toInt128('18446744073709551616') as v) t2 using(k, v);
|
||||
select * from (select 0 as k, toInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toInt256('340282366920938463463374607431768211456') as v) t2 using(k, v);
|
||||
|
||||
select * from (select 0 as k, toUInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toUInt128('18446744073709551616') as v) t2 using(k, v);
|
||||
select * from (select 0 as k, toUInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toUInt256('340282366920938463463374607431768211456') as v) t2 using(k, v);
|
||||
|
@ -19,8 +19,6 @@ SELECT * FROM t1 ANTI JOIN t2 ON t1.key = t2.key; -- { serverError NOT_IMPLEMENT
|
||||
|
||||
SELECT * FROM t1 SEMI JOIN t2 ON t1.key = t2.key; -- { serverError NOT_IMPLEMENTED }
|
||||
|
||||
SELECT * FROM t1 ASOF JOIN t2 ON t1.key = t2.key AND t1.val > t2.val; -- { serverError NOT_IMPLEMENTED }
|
||||
|
||||
SELECT * FROM t1 ANY JOIN t2 ON t1.key = t2.key SETTINGS any_join_distinct_right_table_keys = 1; -- { serverError NOT_IMPLEMENTED }
|
||||
|
||||
SELECT * FROM t1 JOIN t2 USING (key) SETTINGS join_use_nulls = 1; -- { serverError NOT_IMPLEMENTED }
|
||||
|
@ -1 +1,2 @@
|
||||
1
|
||||
1
|
||||
|
@ -6,3 +6,15 @@ ASOF LEFT JOIN (
|
||||
select 1 as session_id, 4 as id
|
||||
) as visitors
|
||||
ON visitors.session_id <= sessions.id AND arrayFirst(a -> a, arrayMap((a) -> a, sessions.arr)) = visitors.id
|
||||
;
|
||||
|
||||
select count(*)
|
||||
from (
|
||||
select 1 as id, [1, 2, 3] as arr
|
||||
) as sessions
|
||||
ASOF LEFT JOIN (
|
||||
select 1 as session_id, 4 as id
|
||||
) as visitors
|
||||
ON visitors.session_id <= sessions.id AND arrayFirst(a -> a, arrayMap((a) -> a, sessions.arr)) = visitors.id
|
||||
SETTINGS join_algorithm = 'full_sorting_merge'
|
||||
;
|
||||
|
@ -0,0 +1,2 @@
|
||||
49999983751397 10000032
|
||||
49999983751397 10000032
|
50
tests/queries/0_stateless/03143_asof_join_ddb_long.sql
Normal file
50
tests/queries/0_stateless/03143_asof_join_ddb_long.sql
Normal file
@ -0,0 +1,50 @@
|
||||
-- Tags: long
|
||||
|
||||
DROP TABLE IF EXISTS build;
|
||||
DROP TABLE IF EXISTS skewed_probe;
|
||||
|
||||
SET session_timezone = 'UTC';
|
||||
|
||||
CREATE TABLE build ENGINE = MergeTree ORDER BY (key, begin)
|
||||
AS
|
||||
SELECT
|
||||
toDateTime('1990-03-21 13:00:00') + INTERVAL number MINUTE AS begin,
|
||||
number % 4 AS key,
|
||||
number AS value
|
||||
FROM numbers(0, 10000000);
|
||||
|
||||
CREATE TABLE skewed_probe ENGINE = MergeTree ORDER BY (key, begin)
|
||||
AS
|
||||
SELECT
|
||||
toDateTime('1990-04-21 13:00:01') + INTERVAL number MINUTE AS begin,
|
||||
0 AS key
|
||||
FROM numbers(0, 5)
|
||||
UNION ALL
|
||||
SELECT
|
||||
toDateTime('1990-05-21 13:00:01') + INTERVAL number MINUTE AS begin,
|
||||
1 AS key
|
||||
FROM numbers(0, 10)
|
||||
UNION ALL
|
||||
SELECT
|
||||
toDateTime('1990-06-21 13:00:01') + INTERVAL number MINUTE AS begin,
|
||||
2 AS key
|
||||
FROM numbers(0, 20)
|
||||
UNION ALL
|
||||
SELECT
|
||||
toDateTime('1990-03-21 13:00:01') + INTERVAL number MINUTE AS begin,
|
||||
3 AS key
|
||||
FROM numbers(0, 10000000);
|
||||
|
||||
|
||||
SELECT SUM(value), COUNT(*)
|
||||
FROM skewed_probe
|
||||
ASOF JOIN build
|
||||
USING (key, begin)
|
||||
;
|
||||
|
||||
SELECT SUM(value), COUNT(*)
|
||||
FROM skewed_probe
|
||||
ASOF JOIN build
|
||||
USING (key, begin)
|
||||
SETTINGS join_algorithm = 'full_sorting_merge'
|
||||
;
|
@ -0,0 +1,58 @@
|
||||
1 0
|
||||
2 0
|
||||
3 1
|
||||
4 1
|
||||
5 1
|
||||
6 2
|
||||
7 2
|
||||
8 3
|
||||
9 3
|
||||
0 0
|
||||
1 0
|
||||
2 0
|
||||
3 1
|
||||
4 1
|
||||
5 1
|
||||
6 2
|
||||
7 2
|
||||
8 3
|
||||
9 3
|
||||
1 1 0
|
||||
1 2 0
|
||||
1 3 1
|
||||
1 4 1
|
||||
1 5 1
|
||||
1 6 2
|
||||
1 7 2
|
||||
1 8 3
|
||||
1 9 3
|
||||
2 0 10
|
||||
2 1 10
|
||||
2 2 10
|
||||
2 3 10
|
||||
2 4 10
|
||||
2 5 10
|
||||
2 6 10
|
||||
2 7 20
|
||||
2 8 20
|
||||
2 9 20
|
||||
1 0 0
|
||||
1 1 0
|
||||
1 2 0
|
||||
1 3 1
|
||||
1 4 1
|
||||
1 5 1
|
||||
1 6 2
|
||||
1 7 2
|
||||
1 8 3
|
||||
1 9 3
|
||||
2 0 10
|
||||
2 1 10
|
||||
2 2 10
|
||||
2 3 10
|
||||
2 4 10
|
||||
2 5 10
|
||||
2 6 10
|
||||
2 7 20
|
||||
2 8 20
|
||||
2 9 20
|
65
tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql
Normal file
65
tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql
Normal file
@ -0,0 +1,65 @@
|
||||
SET join_algorithm = 'full_sorting_merge';
|
||||
SET allow_experimental_analyzer = 1;
|
||||
|
||||
DROP TABLE IF EXISTS events0;
|
||||
|
||||
CREATE TABLE events0 (
|
||||
begin Float64,
|
||||
value Int32
|
||||
) ENGINE = MergeTree ORDER BY begin;
|
||||
|
||||
INSERT INTO events0 VALUES (1.0, 0), (3.0, 1), (6.0, 2), (8.0, 3);
|
||||
|
||||
SELECT p.ts, e.value
|
||||
FROM
|
||||
(SELECT number :: Float64 AS ts FROM numbers(10)) p
|
||||
ASOF JOIN events0 e
|
||||
ON p.ts >= e.begin
|
||||
ORDER BY p.ts ASC;
|
||||
|
||||
SELECT p.ts, e.value
|
||||
FROM
|
||||
(SELECT number :: Float64 AS ts FROM numbers(10)) p
|
||||
ASOF LEFT JOIN events0 e
|
||||
ON p.ts >= e.begin
|
||||
ORDER BY p.ts ASC
|
||||
-- SETTINGS join_use_nulls = 1
|
||||
;
|
||||
|
||||
DROP TABLE IF EXISTS events0;
|
||||
|
||||
DROP TABLE IF EXISTS events;
|
||||
DROP TABLE IF EXISTS probes;
|
||||
|
||||
CREATE TABLE events (
|
||||
key Int32,
|
||||
begin Float64,
|
||||
value Int32
|
||||
) ENGINE = MergeTree ORDER BY (key, begin);
|
||||
|
||||
INSERT INTO events VALUES (1, 1.0, 0), (1, 3.0, 1), (1, 6.0, 2), (1, 8.0, 3), (2, 0.0, 10), (2, 7.0, 20), (2, 11.0, 30);
|
||||
|
||||
CREATE TABLE probes (
|
||||
key Int32,
|
||||
ts Float64
|
||||
) ENGINE = MergeTree ORDER BY (key, ts) AS
|
||||
SELECT
|
||||
key.number,
|
||||
ts.number
|
||||
FROM
|
||||
numbers(1, 2) as key,
|
||||
numbers(10) as ts
|
||||
SETTINGS join_algorithm = 'hash';
|
||||
|
||||
SELECT p.key, p.ts, e.value
|
||||
FROM probes p
|
||||
ASOF JOIN events e
|
||||
ON p.key = e.key AND p.ts >= e.begin
|
||||
ORDER BY p.key, p.ts ASC;
|
||||
|
||||
SELECT p.key, p.ts, e.value
|
||||
FROM probes p
|
||||
ASOF LEFT JOIN events e
|
||||
ON p.key = e.key AND p.ts >= e.begin
|
||||
ORDER BY p.key, p.ts ASC NULLS FIRST;
|
||||
|
@ -0,0 +1,73 @@
|
||||
-
|
||||
2023-03-21 12:00:00 1970-01-01 00:00:00 -1
|
||||
2023-03-21 13:00:00 1970-01-01 00:00:00 -1
|
||||
2023-03-21 14:00:00 2023-03-21 13:00:00 0
|
||||
2023-03-21 15:00:00 2023-03-21 14:00:00 1
|
||||
2023-03-21 16:00:00 2023-03-21 15:00:00 2
|
||||
2023-03-21 17:00:00 2023-03-21 16:00:00 3
|
||||
2023-03-21 18:00:00 2023-03-21 16:00:00 3
|
||||
2023-03-21 19:00:00 2023-03-21 16:00:00 3
|
||||
2023-03-21 20:00:00 2023-03-21 16:00:00 3
|
||||
2023-03-21 21:00:00 2023-03-21 16:00:00 3
|
||||
2027-10-18 11:03:27 2023-03-21 16:00:00 3
|
||||
-
|
||||
2023-03-21 12:00:00 1970-01-01 00:00:00 -1
|
||||
2023-03-21 13:00:00 1970-01-01 00:00:00 -1
|
||||
2023-03-21 14:00:00 2023-03-21 13:00:00 0
|
||||
2023-03-21 15:00:00 2023-03-21 14:00:00 1
|
||||
2023-03-21 16:00:00 2023-03-21 15:00:00 2
|
||||
2023-03-21 17:00:00 2023-03-21 16:00:00 3
|
||||
2023-03-21 18:00:00 2023-03-21 16:00:00 3
|
||||
2023-03-21 19:00:00 2023-03-21 16:00:00 3
|
||||
2023-03-21 20:00:00 2023-03-21 16:00:00 3
|
||||
2023-03-21 21:00:00 2023-03-21 16:00:00 3
|
||||
2027-10-18 11:03:27 2023-03-21 16:00:00 3
|
||||
\N \N \N
|
||||
2023-03-21 12:00:00 2023-03-21 13:00:00 0
|
||||
2023-03-21 13:00:00 2023-03-21 13:00:00 0
|
||||
2023-03-21 14:00:00 2023-03-21 14:00:00 1
|
||||
2023-03-21 15:00:00 2023-03-21 15:00:00 2
|
||||
2023-03-21 16:00:00 2023-03-21 16:00:00 3
|
||||
2023-03-21 17:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 18:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 19:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 20:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 21:00:00 2027-10-18 11:03:27 9
|
||||
2027-10-18 11:03:27 2027-10-18 11:03:27 9
|
||||
-
|
||||
2023-03-21 12:00:00 2023-03-21 13:00:00 0
|
||||
2023-03-21 13:00:00 2023-03-21 13:00:00 0
|
||||
2023-03-21 14:00:00 2023-03-21 14:00:00 1
|
||||
2023-03-21 15:00:00 2023-03-21 15:00:00 2
|
||||
2023-03-21 16:00:00 2023-03-21 16:00:00 3
|
||||
2023-03-21 17:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 18:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 19:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 20:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 21:00:00 2027-10-18 11:03:27 9
|
||||
2027-10-18 11:03:27 2027-10-18 11:03:27 9
|
||||
\N \N \N
|
||||
-
|
||||
2023-03-21 12:00:00 2023-03-21 13:00:00 0
|
||||
2023-03-21 13:00:00 2023-03-21 14:00:00 1
|
||||
2023-03-21 14:00:00 2023-03-21 15:00:00 2
|
||||
2023-03-21 15:00:00 2023-03-21 16:00:00 3
|
||||
2023-03-21 16:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 17:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 18:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 19:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 20:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 21:00:00 2027-10-18 11:03:27 9
|
||||
-
|
||||
2023-03-21 12:00:00 2023-03-21 13:00:00 0
|
||||
2023-03-21 13:00:00 2023-03-21 14:00:00 1
|
||||
2023-03-21 14:00:00 2023-03-21 15:00:00 2
|
||||
2023-03-21 15:00:00 2023-03-21 16:00:00 3
|
||||
2023-03-21 16:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 17:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 18:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 19:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 20:00:00 2027-10-18 11:03:27 9
|
||||
2023-03-21 21:00:00 2027-10-18 11:03:27 9
|
||||
2027-10-18 11:03:27 \N \N
|
||||
\N \N \N
|
@ -0,0 +1,66 @@
|
||||
DROP TABLE IF EXISTS events0;
|
||||
DROP TABLE IF EXISTS probe0;
|
||||
|
||||
SET allow_experimental_analyzer = 1;
|
||||
SET join_algorithm = 'full_sorting_merge';
|
||||
|
||||
CREATE TABLE events0 (
|
||||
begin Nullable(DateTime('UTC')),
|
||||
value Int32
|
||||
) ENGINE = MergeTree ORDER BY tuple();
|
||||
|
||||
INSERT INTO events0 SELECT toDateTime('2023-03-21 13:00:00', 'UTC') + INTERVAL number HOUR, number FROM numbers(4);
|
||||
INSERT INTO events0 VALUES (NULL, -10),('0000-01-01 00:00:00', -1), ('9999-12-31 23:59:59', 9);
|
||||
|
||||
CREATE TABLE probe0 (
|
||||
begin Nullable(DateTime('UTC'))
|
||||
) ENGINE = MergeTree ORDER BY tuple();
|
||||
|
||||
INSERT INTO probe0 SELECT toDateTime('2023-03-21 12:00:00', 'UTC') + INTERVAl number HOUR FROM numbers(10);
|
||||
INSERT INTO probe0 VALUES (NULL),('9999-12-31 23:59:59');
|
||||
|
||||
SET join_use_nulls = 1;
|
||||
|
||||
SELECT '-';
|
||||
SELECT p.begin, e.begin, e.value
|
||||
FROM probe0 p
|
||||
ASOF JOIN events0 e
|
||||
ON p.begin > e.begin
|
||||
ORDER BY p.begin ASC;
|
||||
|
||||
SELECT '-';
|
||||
SELECT p.begin, e.begin, e.value
|
||||
FROM probe0 p
|
||||
ASOF LEFT JOIN events0 e
|
||||
ON p.begin > e.begin
|
||||
ORDER BY p.begin ASC;
|
||||
|
||||
SELECT p.begin, e.begin, e.value
|
||||
FROM probe0 p
|
||||
ASOF JOIN events0 e
|
||||
ON p.begin <= e.begin
|
||||
ORDER BY p.begin ASC;
|
||||
|
||||
SELECT '-';
|
||||
SELECT p.begin, e.begin, e.value
|
||||
FROM probe0 p
|
||||
ASOF LEFT JOIN events0 e
|
||||
ON p.begin <= e.begin
|
||||
ORDER BY p.begin ASC;
|
||||
|
||||
SELECT '-';
|
||||
SELECT p.begin, e.begin, e.value
|
||||
FROM probe0 p
|
||||
ASOF JOIN events0 e
|
||||
ON p.begin < e.begin
|
||||
ORDER BY p.begin ASC;
|
||||
|
||||
SELECT '-';
|
||||
SELECT p.begin, e.begin, e.value
|
||||
FROM probe0 p
|
||||
ASOF LEFT JOIN events0 e
|
||||
ON p.begin < e.begin
|
||||
ORDER BY p.begin ASC;
|
||||
|
||||
DROP TABLE IF EXISTS events0;
|
||||
DROP TABLE IF EXISTS probe0;
|
@ -0,0 +1,2 @@
|
||||
26790 1488
|
||||
26790 1488
|
@ -0,0 +1,39 @@
|
||||
-- Tags: long
|
||||
|
||||
SET allow_experimental_analyzer=1;
|
||||
|
||||
SET session_timezone = 'UTC';
|
||||
|
||||
{% for join_algorithm in ['default', 'full_sorting_merge'] -%}
|
||||
|
||||
SET join_algorithm = '{{ join_algorithm }}';
|
||||
|
||||
-- TODO: enable once USING and `join_use_nulls` is supported by `full_sorting_merge`
|
||||
-- SET join_use_nulls = 1;
|
||||
|
||||
WITH build AS (
|
||||
SELECT
|
||||
tk.number AS k,
|
||||
toDateTime('2021-01-01 00:00:00') + INTERVAL i.number SECONDS AS t,
|
||||
i.number % 37 AS v
|
||||
FROM numbers(3000000) AS i
|
||||
CROSS JOIN numbers(2) AS tk
|
||||
SETTINGS join_algorithm = 'hash', join_use_nulls = 0
|
||||
),
|
||||
probe AS (
|
||||
SELECT
|
||||
tk.number AS k,
|
||||
toDateTime('2021-01-01 00:00:30') + INTERVAL tt.number HOUR AS t
|
||||
FROM numbers(2) AS tk
|
||||
CROSS JOIN numbers(toUInt32((toDateTime('2021-02-01 00:00:30') - toDateTime('2021-01-01 00:00:30')) / 3600)) AS tt
|
||||
SETTINGS join_algorithm = 'hash', join_use_nulls = 0
|
||||
)
|
||||
SELECT
|
||||
SUM(v) AS v,
|
||||
COUNT(*) AS n
|
||||
FROM probe
|
||||
ASOF LEFT JOIN build
|
||||
USING (k, t)
|
||||
;
|
||||
|
||||
{% endfor -%}
|
@ -0,0 +1,10 @@
|
||||
108
|
||||
108 27
|
||||
513
|
||||
1218
|
||||
3528
|
||||
14553
|
||||
121275
|
||||
1495503
|
||||
12462525
|
||||
1249625025
|
186
tests/queries/0_stateless/03147_asof_join_ddb_missing.sql
Normal file
186
tests/queries/0_stateless/03147_asof_join_ddb_missing.sql
Normal file
@ -0,0 +1,186 @@
|
||||
SET allow_experimental_analyzer=1;
|
||||
|
||||
SET session_timezone = 'UTC';
|
||||
SET joined_subquery_requires_alias = 0;
|
||||
SET allow_experimental_analyzer = 1;
|
||||
SET join_algorithm = 'full_sorting_merge';
|
||||
|
||||
-- # 10 dates, 5 keys
|
||||
WITH build AS (
|
||||
SELECT
|
||||
k,
|
||||
toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t,
|
||||
number AS v
|
||||
FROM numbers(10), (SELECT number AS k FROM numbers(5))
|
||||
SETTINGS join_algorithm = 'default'
|
||||
),
|
||||
probe AS (
|
||||
SELECT
|
||||
k * 2 AS k,
|
||||
t - INTERVAL 30 SECOND AS t
|
||||
FROM build
|
||||
)
|
||||
SELECT SUM(v)
|
||||
FROM probe ASOF JOIN build USING (k, t);
|
||||
|
||||
-- # Coverage: Missing right side bin
|
||||
WITH build AS (
|
||||
SELECT
|
||||
k * 2 AS k,
|
||||
toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t,
|
||||
number AS v
|
||||
FROM numbers(10), (SELECT number AS k FROM numbers(5))
|
||||
SETTINGS join_algorithm = 'default'
|
||||
),
|
||||
probe AS (
|
||||
SELECT
|
||||
intDiv(k, 2) AS k,
|
||||
t - INTERVAL 30 SECOND AS t
|
||||
FROM build
|
||||
)
|
||||
SELECT SUM(v), COUNT(*)
|
||||
FROM probe ASOF JOIN build USING (k, t);
|
||||
|
||||
-- # 20 dates, 5 keys
|
||||
WITH build AS (
|
||||
SELECT
|
||||
k,
|
||||
toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t,
|
||||
number AS v
|
||||
FROM numbers(20), (SELECT number AS k FROM numbers(5))
|
||||
SETTINGS join_algorithm = 'default'
|
||||
),
|
||||
probe AS (
|
||||
SELECT
|
||||
k * 2 AS k,
|
||||
t - INTERVAL 30 SECOND AS t
|
||||
FROM build
|
||||
)
|
||||
SELECT SUM(v)
|
||||
FROM probe ASOF JOIN build USING (k, t);
|
||||
|
||||
-- # 30 dates, 5 keys
|
||||
WITH build AS (
|
||||
SELECT
|
||||
k,
|
||||
toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t,
|
||||
number AS v
|
||||
FROM numbers(30), (SELECT number AS k FROM numbers(5))
|
||||
SETTINGS join_algorithm = 'default'
|
||||
),
|
||||
probe AS (
|
||||
SELECT
|
||||
k * 2 AS k,
|
||||
t - INTERVAL 30 SECOND AS t
|
||||
FROM build
|
||||
)
|
||||
SELECT SUM(v)
|
||||
FROM probe ASOF JOIN build USING (k, t);
|
||||
|
||||
-- # 50 dates, 5 keys
|
||||
WITH build AS (
|
||||
SELECT
|
||||
k,
|
||||
toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t,
|
||||
number AS v
|
||||
FROM numbers(50), (SELECT number AS k FROM numbers(5))
|
||||
SETTINGS join_algorithm = 'default'
|
||||
),
|
||||
probe AS (
|
||||
SELECT
|
||||
k * 2 AS k,
|
||||
t - INTERVAL 30 SECOND AS t
|
||||
FROM build
|
||||
)
|
||||
SELECT SUM(v)
|
||||
FROM probe ASOF JOIN build USING (k, t);
|
||||
|
||||
-- # 100 dates, 5 keys
|
||||
WITH build AS (
|
||||
SELECT
|
||||
k,
|
||||
toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t,
|
||||
number AS v
|
||||
FROM numbers(100), (SELECT number AS k FROM numbers(5))
|
||||
SETTINGS join_algorithm = 'default'
|
||||
),
|
||||
probe AS (
|
||||
SELECT
|
||||
k * 2 AS k,
|
||||
t - INTERVAL 30 SECOND AS t
|
||||
FROM build
|
||||
)
|
||||
SELECT SUM(v)
|
||||
FROM probe ASOF JOIN build USING (k, t);
|
||||
|
||||
-- # 100 dates, 50 keys
|
||||
WITH build AS (
|
||||
SELECT
|
||||
k,
|
||||
toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t,
|
||||
number AS v
|
||||
FROM numbers(100), (SELECT number AS k FROM numbers(50))
|
||||
SETTINGS join_algorithm = 'default'
|
||||
),
|
||||
probe AS (
|
||||
SELECT
|
||||
k * 2 AS k,
|
||||
t - INTERVAL 30 SECOND AS t
|
||||
FROM build
|
||||
)
|
||||
SELECT SUM(v)
|
||||
FROM probe ASOF JOIN build USING (k, t);
|
||||
|
||||
-- # 1000 dates, 5 keys
|
||||
WITH build AS (
|
||||
SELECT
|
||||
k,
|
||||
toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t,
|
||||
number AS v
|
||||
FROM numbers(1000), (SELECT number AS k FROM numbers(5))
|
||||
SETTINGS join_algorithm = 'default'
|
||||
),
|
||||
probe AS (
|
||||
SELECT
|
||||
k * 2 AS k,
|
||||
t - INTERVAL 30 SECOND AS t
|
||||
FROM build
|
||||
)
|
||||
SELECT SUM(v)
|
||||
FROM probe ASOF JOIN build USING (k, t);
|
||||
|
||||
-- # 1000 dates, 50 keys
|
||||
WITH build AS (
|
||||
SELECT
|
||||
k,
|
||||
toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t,
|
||||
number AS v
|
||||
FROM numbers(1000), (SELECT number AS k FROM numbers(50))
|
||||
SETTINGS join_algorithm = 'default'
|
||||
),
|
||||
probe AS (
|
||||
SELECT
|
||||
k * 2 AS k,
|
||||
t - INTERVAL 30 SECOND AS t
|
||||
FROM build
|
||||
)
|
||||
SELECT SUM(v)
|
||||
FROM probe ASOF JOIN build USING (k, t);
|
||||
|
||||
-- # 10000 dates, 50 keys
|
||||
WITH build AS (
|
||||
SELECT
|
||||
k,
|
||||
toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t,
|
||||
number AS v
|
||||
FROM numbers(10000), (SELECT number AS k FROM numbers(50))
|
||||
SETTINGS join_algorithm = 'default'
|
||||
),
|
||||
probe AS (
|
||||
SELECT
|
||||
k * 2 AS k,
|
||||
t - INTERVAL 30 SECOND AS t
|
||||
FROM build
|
||||
)
|
||||
SELECT SUM(v)
|
||||
FROM probe ASOF JOIN build USING (k, t);
|
@ -0,0 +1,4 @@
|
||||
1 1
|
||||
3 1
|
||||
6 1
|
||||
8 1
|
29
tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql
Normal file
29
tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql
Normal file
@ -0,0 +1,29 @@
|
||||
DROP TABLE IF EXISTS events;
|
||||
CREATE TABLE events (begin Float64, value Int32) ENGINE = MergeTree() ORDER BY begin;
|
||||
|
||||
INSERT INTO events VALUES (1, 0), (3, 1), (6, 2), (8, 3);
|
||||
|
||||
SET allow_experimental_analyzer = 1;
|
||||
SET join_algorithm = 'full_sorting_merge';
|
||||
SET joined_subquery_requires_alias = 0;
|
||||
|
||||
SELECT
|
||||
begin,
|
||||
value IN (
|
||||
SELECT e1.value
|
||||
FROM (
|
||||
SELECT *
|
||||
FROM events e1
|
||||
WHERE e1.value = events.value
|
||||
) AS e1
|
||||
ASOF JOIN (
|
||||
SELECT number :: Float64 AS begin
|
||||
FROM numbers(10)
|
||||
WHERE number >= 1 AND number < 10
|
||||
)
|
||||
USING (begin)
|
||||
)
|
||||
FROM events
|
||||
ORDER BY begin ASC;
|
||||
|
||||
DROP TABLE IF EXISTS events;
|
@ -0,0 +1,56 @@
|
||||
2023-03-21 13:00:00 0
|
||||
2023-03-21 14:00:00 1
|
||||
2023-03-21 15:00:00 2
|
||||
2023-03-21 16:00:00 3
|
||||
2023-03-21 17:00:00 3
|
||||
2023-03-21 18:00:00 3
|
||||
2023-03-21 19:00:00 3
|
||||
2023-03-21 20:00:00 3
|
||||
2023-03-21 21:00:00 3
|
||||
2106-02-07 06:28:15 9
|
||||
2023-03-21 13:00:00 0
|
||||
2023-03-21 14:00:00 1
|
||||
2023-03-21 15:00:00 2
|
||||
2023-03-21 16:00:00 3
|
||||
2023-03-21 17:00:00 3
|
||||
2023-03-21 18:00:00 3
|
||||
2023-03-21 19:00:00 3
|
||||
2023-03-21 20:00:00 3
|
||||
2023-03-21 21:00:00 3
|
||||
2106-02-07 06:28:15 9
|
||||
2023-03-21 12:00:00 \N
|
||||
2023-03-21 13:00:00 0
|
||||
2023-03-21 14:00:00 1
|
||||
2023-03-21 15:00:00 2
|
||||
2023-03-21 16:00:00 3
|
||||
2023-03-21 17:00:00 3
|
||||
2023-03-21 18:00:00 3
|
||||
2023-03-21 19:00:00 3
|
||||
2023-03-21 20:00:00 3
|
||||
2023-03-21 21:00:00 3
|
||||
2106-02-07 06:28:15 9
|
||||
\N \N
|
||||
2023-03-21 12:00:00 0
|
||||
2023-03-21 13:00:00 0
|
||||
2023-03-21 14:00:00 1
|
||||
2023-03-21 15:00:00 2
|
||||
2023-03-21 16:00:00 3
|
||||
2023-03-21 17:00:00 3
|
||||
2023-03-21 18:00:00 3
|
||||
2023-03-21 19:00:00 3
|
||||
2023-03-21 20:00:00 3
|
||||
2023-03-21 21:00:00 3
|
||||
2106-02-07 06:28:15 9
|
||||
\N 0
|
||||
2023-03-21 12:00:00 \N
|
||||
2023-03-21 13:00:00 \N
|
||||
2023-03-21 14:00:00 \N
|
||||
2023-03-21 15:00:00 \N
|
||||
2023-03-21 16:00:00 \N
|
||||
2023-03-21 17:00:00 \N
|
||||
2023-03-21 18:00:00 \N
|
||||
2023-03-21 19:00:00 \N
|
||||
2023-03-21 20:00:00 \N
|
||||
2023-03-21 21:00:00 \N
|
||||
2106-02-07 06:28:15 \N
|
||||
\N \N
|
95
tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql
Normal file
95
tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql
Normal file
@ -0,0 +1,95 @@
|
||||
DROP TABLE IF EXISTS events0;
|
||||
DROP TABLE IF EXISTS probe0;
|
||||
|
||||
SET session_timezone = 'UTC';
|
||||
SET allow_experimental_analyzer = 1;
|
||||
SET join_algorithm = 'full_sorting_merge';
|
||||
SET join_use_nulls = 1;
|
||||
|
||||
CREATE TABLE events0
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY COALESCE(begin, toDateTime('9999-12-31 23:59:59'))
|
||||
AS
|
||||
SELECT
|
||||
toNullable(toDateTime('2023-03-21 13:00:00') + INTERVAL number HOUR) AS begin,
|
||||
number AS value
|
||||
FROM numbers(4);
|
||||
|
||||
INSERT INTO events0 VALUES (NULL, -1), (toDateTime('9999-12-31 23:59:59'), 9);
|
||||
|
||||
CREATE TABLE probe0
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY COALESCE(begin, toDateTime('9999-12-31 23:59:59'))
|
||||
AS
|
||||
SELECT
|
||||
toNullable(toDateTime('2023-03-21 12:00:00') + INTERVAL number HOUR) AS begin
|
||||
FROM numbers(10);
|
||||
|
||||
INSERT INTO probe0 VALUES (NULL), (toDateTime('9999-12-31 23:59:59'));
|
||||
|
||||
SELECT
|
||||
p.begin,
|
||||
e.value
|
||||
FROM
|
||||
probe0 p
|
||||
ASOF JOIN events0 e ON p.begin >= e.begin
|
||||
ORDER BY p.begin ASC;
|
||||
|
||||
SELECT
|
||||
p.begin,
|
||||
e.value
|
||||
FROM
|
||||
probe0 p
|
||||
ASOF JOIN events0 e USING (begin)
|
||||
ORDER BY p.begin ASC
|
||||
SETTINGS join_use_nulls = 0
|
||||
;
|
||||
|
||||
SELECT
|
||||
p.begin,
|
||||
e.value
|
||||
FROM
|
||||
probe0 p
|
||||
ASOF LEFT JOIN events0 e ON p.begin >= e.begin
|
||||
ORDER BY p.begin ASC;
|
||||
|
||||
SELECT
|
||||
p.begin,
|
||||
e.value
|
||||
FROM
|
||||
probe0 p
|
||||
ASOF LEFT JOIN events0 e USING (begin)
|
||||
ORDER BY p.begin ASC
|
||||
SETTINGS join_use_nulls = 0
|
||||
;
|
||||
|
||||
SELECT
|
||||
p.begin,
|
||||
e.value
|
||||
FROM
|
||||
probe0 p
|
||||
ASOF RIGHT JOIN events0 e ON p.begin >= e.begin
|
||||
ORDER BY e.begin ASC; -- { serverError NOT_IMPLEMENTED}
|
||||
|
||||
SELECT
|
||||
p.begin,
|
||||
e.value
|
||||
FROM
|
||||
probe0 p
|
||||
ASOF RIGHT JOIN events0 e USING (begin)
|
||||
ORDER BY e.begin ASC; -- { serverError NOT_IMPLEMENTED}
|
||||
|
||||
|
||||
SELECT
|
||||
p.begin,
|
||||
e.value
|
||||
FROM
|
||||
probe0 p
|
||||
ASOF LEFT JOIN (
|
||||
SELECT * FROM events0 WHERE log(value + 5) > 10
|
||||
) e ON p.begin + INTERVAL 2 HOUR >= e.begin + INTERVAL 1 HOUR
|
||||
ORDER BY p.begin ASC;
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS events0;
|
||||
DROP TABLE IF EXISTS probe0;
|
Loading…
Reference in New Issue
Block a user