mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Support unaligned array join
For left ARRAY JOIN, expand to the greatest size. If all sizes are zero, resize to one which is the same as the old behavior, i.e. emptyArrayToSingle For non-left ARRAY JOIN, expand to the greatest size but keep empty if all sizes are zero.
This commit is contained in:
parent
1cc69100f1
commit
86f462acff
@ -143,8 +143,15 @@ ExpressionAction ExpressionAction::arrayJoin(const NameSet & array_joined_column
|
||||
a.type = ARRAY_JOIN;
|
||||
a.array_joined_columns = array_joined_columns;
|
||||
a.array_join_is_left = array_join_is_left;
|
||||
a.unaligned_array_join = context.getSettingsRef().enable_unaligned_array_join;
|
||||
|
||||
if (array_join_is_left)
|
||||
if (a.unaligned_array_join)
|
||||
{
|
||||
a.function_length = FunctionFactory::instance().get("length", context);
|
||||
a.function_greatest = FunctionFactory::instance().get("greatest", context);
|
||||
a.function_arrayResize = FunctionFactory::instance().get("arrayResize", context);
|
||||
}
|
||||
else if (array_join_is_left)
|
||||
a.function_builder = FunctionFactory::instance().get("emptyArrayToSingle", context);
|
||||
|
||||
return a;
|
||||
@ -375,7 +382,44 @@ void ExpressionAction::execute(Block & block) const
|
||||
|
||||
/// If LEFT ARRAY JOIN, then we create columns in which empty arrays are replaced by arrays with one element - the default value.
|
||||
std::map<String, ColumnPtr> non_empty_array_columns;
|
||||
|
||||
if (unaligned_array_join)
|
||||
{
|
||||
/// Resize all array joined columns to the longest one, (at least 1 if LEFT ARRAY JOIN), padded with default values.
|
||||
auto rows = block.rows();
|
||||
auto uint64 = std::make_shared<DataTypeUInt64>();
|
||||
ColumnWithTypeAndName column_of_max_length;
|
||||
if (array_join_is_left)
|
||||
column_of_max_length = ColumnWithTypeAndName(uint64->createColumnConst(rows, 1u), uint64, {});
|
||||
else
|
||||
column_of_max_length = ColumnWithTypeAndName(uint64->createColumnConst(rows, 0u), uint64, {});
|
||||
|
||||
for (const auto & name : array_joined_columns)
|
||||
{
|
||||
auto & src_col = block.getByName(name);
|
||||
|
||||
Block tmp_block{src_col, {{}, uint64, {}}};
|
||||
function_length->build({src_col})->execute(tmp_block, {0}, 1, rows);
|
||||
|
||||
Block tmp_block2{
|
||||
column_of_max_length, tmp_block.safeGetByPosition(1), {{}, uint64, {}}};
|
||||
function_greatest->build({column_of_max_length, tmp_block.safeGetByPosition(1)})->execute(tmp_block2, {0, 1}, 2, rows);
|
||||
column_of_max_length = tmp_block2.safeGetByPosition(2);
|
||||
}
|
||||
|
||||
for (const auto & name : array_joined_columns)
|
||||
{
|
||||
auto & src_col = block.getByName(name);
|
||||
|
||||
Block tmp_block{src_col, column_of_max_length, {{}, src_col.type, {}}};
|
||||
function_arrayResize->build({src_col, column_of_max_length})->execute(tmp_block, {0, 1}, 2, rows);
|
||||
any_array_ptr = src_col.column = tmp_block.safeGetByPosition(2).column;
|
||||
}
|
||||
if (ColumnPtr converted = any_array_ptr->convertToFullColumnIfConst())
|
||||
any_array_ptr = converted;
|
||||
any_array = typeid_cast<const ColumnArray *>(&*any_array_ptr);
|
||||
}
|
||||
else if (array_join_is_left && !unaligned_array_join)
|
||||
{
|
||||
for (const auto & name : array_joined_columns)
|
||||
{
|
||||
@ -404,13 +448,13 @@ void ExpressionAction::execute(Block & block) const
|
||||
if (!typeid_cast<const DataTypeArray *>(&*current.type))
|
||||
throw Exception("ARRAY JOIN of not array: " + current.name, ErrorCodes::TYPE_MISMATCH);
|
||||
|
||||
ColumnPtr array_ptr = array_join_is_left ? non_empty_array_columns[current.name] : current.column;
|
||||
ColumnPtr array_ptr = (array_join_is_left && !unaligned_array_join) ? non_empty_array_columns[current.name] : current.column;
|
||||
|
||||
if (ColumnPtr converted = array_ptr->convertToFullColumnIfConst())
|
||||
array_ptr = converted;
|
||||
|
||||
const ColumnArray & array = typeid_cast<const ColumnArray &>(*array_ptr);
|
||||
if (!array.hasEqualOffsets(typeid_cast<const ColumnArray &>(*any_array_ptr)))
|
||||
if (!unaligned_array_join && !array.hasEqualOffsets(typeid_cast<const ColumnArray &>(*any_array_ptr)))
|
||||
throw Exception("Sizes of ARRAY-JOIN-ed arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
|
||||
|
||||
current.column = typeid_cast<const ColumnArray &>(*array_ptr).getDataPtr();
|
||||
|
@ -87,6 +87,12 @@ public:
|
||||
/// For APPLY_FUNCTION and LEFT ARRAY JOIN.
|
||||
/// FunctionBuilder is used before action was added to ExpressionActions (when we don't know types of arguments).
|
||||
FunctionBuilderPtr function_builder;
|
||||
|
||||
/// For unaligned [LEFT] ARRAY JOIN
|
||||
FunctionBuilderPtr function_length;
|
||||
FunctionBuilderPtr function_greatest;
|
||||
FunctionBuilderPtr function_arrayResize;
|
||||
|
||||
/// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity.
|
||||
FunctionBasePtr function_base;
|
||||
/// Prepared function which is used in function execution.
|
||||
@ -97,6 +103,7 @@ public:
|
||||
/// For ARRAY_JOIN
|
||||
NameSet array_joined_columns;
|
||||
bool array_join_is_left = false;
|
||||
bool unaligned_array_join = false;
|
||||
|
||||
/// For JOIN
|
||||
std::shared_ptr<const Join> join;
|
||||
|
@ -292,6 +292,7 @@ struct Settings
|
||||
M(SettingBool, allow_ddl, true, "If it is set to true, then a user is allowed to executed DDL queries.") \
|
||||
M(SettingBool, parallel_view_processing, false, "Enables pushing to attached views concurrently instead of sequentially.") \
|
||||
M(SettingBool, enable_debug_queries, false, "Enables debug queries such as AST.") \
|
||||
M(SettingBool, enable_unaligned_array_join, false, "Enables unaligned array join.") \
|
||||
|
||||
|
||||
#define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \
|
||||
|
26
dbms/tests/performance/array/array_join.xml
Normal file
26
dbms/tests/performance/array/array_join.xml
Normal file
@ -0,0 +1,26 @@
|
||||
<test>
|
||||
<name>array_join</name>
|
||||
<type>once</type>
|
||||
|
||||
<stop_conditions>
|
||||
<any_of>
|
||||
<average_speed_not_changing_for_ms>10000</average_speed_not_changing_for_ms>
|
||||
<total_time_ms>1000</total_time_ms>
|
||||
</any_of>
|
||||
</stop_conditions>
|
||||
|
||||
<metrics>
|
||||
<max_rows_per_second />
|
||||
</metrics>
|
||||
|
||||
<main_metric>
|
||||
<max_rows_per_second />
|
||||
</main_metric>
|
||||
|
||||
<query>SELECT count() FROM (SELECT [number] a, [number * 2] b FROM system.numbers) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b)</query>
|
||||
<query>SELECT count() FROM (SELECT [number] a, [number * 2] b FROM system.numbers) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b)</query>
|
||||
<query>SELECT count() FROM (SELECT [number] a, [number * 2] b FROM system.numbers) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1</query>
|
||||
<query>SELECT count() FROM (SELECT [number] a, [number * 2] b FROM system.numbers) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1</query>
|
||||
<query>SELECT count() FROM (SELECT [number] a, [number * 2, number] b FROM system.numbers) AS t ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1</query>
|
||||
<query>SELECT count() FROM (SELECT [number] a, [number * 2, number] b FROM system.numbers) AS t LEFT ARRAY JOIN a, b WHERE NOT ignore(a + b) SETTINGS enable_unaligned_array_join = 1</query>
|
||||
</test>
|
@ -0,0 +1,24 @@
|
||||
1 [0] [0] 0 0
|
||||
2 [] [0,1] 0 0
|
||||
2 [] [0,1] 0 1
|
||||
3 [0] [] 0 0
|
||||
4 [] [0] 0 0
|
||||
5 [0] [0,1] 0 0
|
||||
5 [0] [0,1] 0 1
|
||||
7 [0] [0] 0 0
|
||||
8 [] [0,1] 0 0
|
||||
8 [] [0,1] 0 1
|
||||
9 [0] [] 0 0
|
||||
0 [] [] 0 0
|
||||
1 [0] [0] 0 0
|
||||
2 [] [0,1] 0 0
|
||||
2 [] [0,1] 0 1
|
||||
3 [0] [] 0 0
|
||||
4 [] [0] 0 0
|
||||
5 [0] [0,1] 0 0
|
||||
5 [0] [0,1] 0 1
|
||||
6 [] [] 0 0
|
||||
7 [0] [0] 0 0
|
||||
8 [] [0,1] 0 0
|
||||
8 [] [0,1] 0 1
|
||||
9 [0] [] 0 0
|
@ -0,0 +1,2 @@
|
||||
SELECT number, arr1, arr2, x, y FROM (SELECT number, range(number % 2) AS arr1, range(number % 3) arr2 FROM system.numbers LIMIT 10) ARRAY JOIN arr1 AS x, arr2 AS y SETTINGS enable_unaligned_array_join = 1;
|
||||
SELECT number, arr1, arr2, x, y FROM (SELECT number, range(number % 2) AS arr1, range(number % 3) arr2 FROM system.numbers LIMIT 10) LEFT ARRAY JOIN arr1 AS x, arr2 AS y SETTINGS enable_unaligned_array_join = 1;
|
Loading…
Reference in New Issue
Block a user