mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
rebase and reslove conflict
This commit is contained in:
parent
cfa4ca6fb1
commit
add486b62a
@ -94,7 +94,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
|||||||
{"type_json_skip_duplicated_paths", false, false, "Allow to skip duplicated paths during JSON parsing"},
|
{"type_json_skip_duplicated_paths", false, false, "Allow to skip duplicated paths during JSON parsing"},
|
||||||
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
|
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
|
||||||
{"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"},
|
{"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"},
|
||||||
{"input_format_try_infer_datetimes_only_datetime64", true, false, "Allow to infer DateTime instead of DateTime64 in data formats"}
|
{"input_format_try_infer_datetimes_only_datetime64", true, false, "Allow to infer DateTime instead of DateTime64 in data formats"},
|
||||||
{"join_to_sort_perkey_rows_threshold", 0, 40, "The lower limit of per-key average rows in the right table to determine whether to sort it in hash join."},
|
{"join_to_sort_perkey_rows_threshold", 0, 40, "The lower limit of per-key average rows in the right table to determine whether to sort it in hash join."},
|
||||||
{"join_to_sort_table_rows_threshold", 0, 10000, "The upper limit of rows in the right table to determine whether to sort it in hash join."},
|
{"join_to_sort_table_rows_threshold", 0, 10000, "The upper limit of rows in the right table to determine whether to sort it in hash join."},
|
||||||
}
|
}
|
||||||
|
@ -20,13 +20,10 @@ void AddedColumns<false>::buildOutput() {}
|
|||||||
|
|
||||||
template<>
|
template<>
|
||||||
void AddedColumns<false>::buildJoinGetOutput() {}
|
void AddedColumns<false>::buildJoinGetOutput() {}
|
||||||
<<<<<<< HEAD
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
template<bool from_row_list>
|
template<bool from_row_list>
|
||||||
void AddedColumns<false>::buildOutputFromBlocks() {}
|
void AddedColumns<false>::buildOutputFromBlocks() {}
|
||||||
=======
|
|
||||||
>>>>>>> add threshold for table rows
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
void AddedColumns<true>::buildOutput()
|
void AddedColumns<true>::buildOutput()
|
||||||
@ -35,15 +32,9 @@ void AddedColumns<true>::buildOutput()
|
|||||||
buildOutputFromBlocks<false>();
|
buildOutputFromBlocks<false>();
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
<<<<<<< HEAD
|
|
||||||
if (join_data_avg_perkey_rows < output_by_row_list_threshold)
|
if (join_data_avg_perkey_rows < output_by_row_list_threshold)
|
||||||
buildOutputFromBlocks<true>();
|
buildOutputFromBlocks<true>();
|
||||||
else
|
|
||||||
=======
|
|
||||||
if (join_data_avg_perkey_rows < sort_right_perkey_rows_threshold)
|
|
||||||
buildOutputFromBlocks<true>();
|
|
||||||
else if (join_data_sorted)
|
else if (join_data_sorted)
|
||||||
>>>>>>> add threshold for table rows
|
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < this->size(); ++i)
|
for (size_t i = 0; i < this->size(); ++i)
|
||||||
{
|
{
|
||||||
@ -53,19 +44,31 @@ void AddedColumns<true>::buildOutput()
|
|||||||
if (row_ref_i)
|
if (row_ref_i)
|
||||||
{
|
{
|
||||||
const RowRefList * row_ref_list = reinterpret_cast<const RowRefList *>(row_ref_i);
|
const RowRefList * row_ref_list = reinterpret_cast<const RowRefList *>(row_ref_i);
|
||||||
<<<<<<< HEAD
|
|
||||||
for (auto it = row_ref_list->begin(); it.ok(); ++it)
|
|
||||||
col->insertFrom(*it->block->getByPosition(right_indexes[i]).column, it->row_num);
|
|
||||||
=======
|
|
||||||
col->insertRangeFrom(*row_ref_list->block->getByPosition(right_indexes[i]).column, row_ref_list->row_num, row_ref_list->rows);
|
col->insertRangeFrom(*row_ref_list->block->getByPosition(right_indexes[i]).column, row_ref_list->row_num, row_ref_list->rows);
|
||||||
>>>>>>> add threshold for table rows
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
type_name[i].type->insertDefaultInto(*col);
|
type_name[i].type->insertDefaultInto(*col);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
<<<<<<< HEAD
|
else
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < this->size(); ++i)
|
||||||
|
{
|
||||||
|
auto & col = columns[i];
|
||||||
|
for (auto row_ref_i : lazy_output.row_refs)
|
||||||
|
{
|
||||||
|
if (row_ref_i)
|
||||||
|
{
|
||||||
|
const RowRefList * row_ref_list = reinterpret_cast<const RowRefList *>(row_ref_i);
|
||||||
|
for (auto it = row_ref_list->begin(); it.ok(); ++it)
|
||||||
|
col->insertFrom(*it->block->getByPosition(right_indexes[i]).column, it->row_num);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
type_name[i].type->insertDefaultInto(*col);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,25 +91,6 @@ void AddedColumns<true>::buildJoinGetOutput()
|
|||||||
nullable_col->insertFromNotNullable(*column_from_block.column, row_ref->row_num);
|
nullable_col->insertFromNotNullable(*column_from_block.column, row_ref->row_num);
|
||||||
else
|
else
|
||||||
col->insertFrom(*column_from_block.column, row_ref->row_num);
|
col->insertFrom(*column_from_block.column, row_ref->row_num);
|
||||||
=======
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (size_t i = 0; i < this->size(); ++i)
|
|
||||||
{
|
|
||||||
auto & col = columns[i];
|
|
||||||
for (auto row_ref_i : lazy_output.row_refs)
|
|
||||||
{
|
|
||||||
if (row_ref_i)
|
|
||||||
{
|
|
||||||
const RowRefList * row_ref_list = reinterpret_cast<const RowRefList *>(row_ref_i);
|
|
||||||
for (auto it = row_ref_list->begin(); it.ok(); ++it)
|
|
||||||
col->insertFrom(*it->block->getByPosition(right_indexes[i]).column, it->row_num);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
type_name[i].type->insertDefaultInto(*col);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
>>>>>>> add threshold for table rows
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -115,11 +99,7 @@ template<>
|
|||||||
template<bool from_row_list>
|
template<bool from_row_list>
|
||||||
void AddedColumns<true>::buildOutputFromBlocks()
|
void AddedColumns<true>::buildOutputFromBlocks()
|
||||||
{
|
{
|
||||||
<<<<<<< HEAD
|
|
||||||
if (this->size() == 0)
|
if (this->size() == 0)
|
||||||
=======
|
|
||||||
if (this->size() == 0)
|
|
||||||
>>>>>>> add threshold for table rows
|
|
||||||
return;
|
return;
|
||||||
std::vector<const Block *> blocks;
|
std::vector<const Block *> blocks;
|
||||||
std::vector<UInt32> row_nums;
|
std::vector<UInt32> row_nums;
|
||||||
@ -160,32 +140,6 @@ void AddedColumns<true>::buildOutputFromBlocks()
|
|||||||
col->insertFrom(*blocks[j]->getByPosition(right_indexes[i]).column, row_nums[j]);
|
col->insertFrom(*blocks[j]->getByPosition(right_indexes[i]).column, row_nums[j]);
|
||||||
else
|
else
|
||||||
type_name[i].type->insertDefaultInto(*col);
|
type_name[i].type->insertDefaultInto(*col);
|
||||||
<<<<<<< HEAD
|
|
||||||
=======
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
|
||||||
void AddedColumns<true>::buildJoinGetOutput()
|
|
||||||
{
|
|
||||||
for (size_t i = 0; i < this->size(); ++i)
|
|
||||||
{
|
|
||||||
auto & col = columns[i];
|
|
||||||
for (auto row_ref_i : lazy_output.row_refs)
|
|
||||||
{
|
|
||||||
if (!row_ref_i)
|
|
||||||
{
|
|
||||||
type_name[i].type->insertDefaultInto(*col);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const auto * row_ref = reinterpret_cast<const RowRef *>(row_ref_i);
|
|
||||||
const auto & column_from_block = row_ref->block->getByPosition(right_indexes[i]);
|
|
||||||
if (auto * nullable_col = typeid_cast<ColumnNullable *>(col.get()); nullable_col && !column_from_block.column->isNullable())
|
|
||||||
nullable_col->insertFromNotNullable(*column_from_block.column, row_ref->row_num);
|
|
||||||
else
|
|
||||||
col->insertFrom(*column_from_block.column, row_ref->row_num);
|
|
||||||
>>>>>>> add threshold for table rows
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -115,6 +115,7 @@ public:
|
|||||||
}
|
}
|
||||||
join_data_avg_perkey_rows = join.getJoinedData()->avgPerKeyRows();
|
join_data_avg_perkey_rows = join.getJoinedData()->avgPerKeyRows();
|
||||||
output_by_row_list_threshold = join.getTableJoin().outputByRowListPerkeyRowsThreshold();
|
output_by_row_list_threshold = join.getTableJoin().outputByRowListPerkeyRowsThreshold();
|
||||||
|
join_data_sorted = join.getJoinedData()->sorted;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t size() const { return columns.size(); }
|
size_t size() const { return columns.size(); }
|
||||||
@ -147,6 +148,7 @@ public:
|
|||||||
std::unique_ptr<IColumn::Offsets> offsets_to_replicate;
|
std::unique_ptr<IColumn::Offsets> offsets_to_replicate;
|
||||||
bool need_filter = false;
|
bool need_filter = false;
|
||||||
bool output_by_row_list = false;
|
bool output_by_row_list = false;
|
||||||
|
bool join_data_sorted = false;
|
||||||
size_t join_data_avg_perkey_rows = 0;
|
size_t join_data_avg_perkey_rows = 0;
|
||||||
size_t output_by_row_list_threshold = 0;
|
size_t output_by_row_list_threshold = 0;
|
||||||
IColumn::Filter filter;
|
IColumn::Filter filter;
|
||||||
@ -196,12 +198,6 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Build output from the blocks that extract from `RowRef` or `RowRefList`, to avoid block cache miss which may cause performance slow down.
|
|
||||||
* And This problem would happen it we directly build output from `RowRef` or `RowRefList`.
|
|
||||||
*/
|
|
||||||
template<bool from_row_list>
|
|
||||||
void buildOutputFromBlocks();
|
|
||||||
|
|
||||||
MutableColumns columns;
|
MutableColumns columns;
|
||||||
bool is_join_get;
|
bool is_join_get;
|
||||||
std::vector<size_t> right_indexes;
|
std::vector<size_t> right_indexes;
|
||||||
|
@ -1422,12 +1422,12 @@ void HashJoin::tryRerangeRightTableData()
|
|||||||
if ((kind != JoinKind::Inner && kind != JoinKind::Left) || strictness != JoinStrictness::All || table_join->getMixedJoinExpression())
|
if ((kind != JoinKind::Inner && kind != JoinKind::Left) || strictness != JoinStrictness::All || table_join->getMixedJoinExpression())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!data || data->sorted || data->blocks.empty() || data->maps.size() > 1)
|
if (!data || data->sorted || data->blocks.empty() || data->maps.size() > 1 || data->rows_to_join > table_join->sortRightTableRowsThreshold() || data->avgPerKeyRows() < table_join->sortRightPerkeyRowsThreshold())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (data->keys_to_join == 0)
|
if (data->keys_to_join == 0)
|
||||||
data->keys_to_join = getTotalRowCount();
|
data->keys_to_join = getTotalRowCount();
|
||||||
if (sample_block_with_columns_to_add.columns() == 0 || data->rows_to_join > table_join->sortRightTableRowsThreshold() || data->avgPerKeyRows() < table_join->sortRightPerkeyRowsThreshold())
|
if (sample_block_with_columns_to_add.columns() == 0)
|
||||||
{
|
{
|
||||||
LOG_DEBUG(log, "The joined right table total rows :{}, total keys :{}, columns added:{}",
|
LOG_DEBUG(log, "The joined right table total rows :{}, total keys :{}, columns added:{}",
|
||||||
data->rows_to_join, data->keys_to_join, sample_block_with_columns_to_add.columns());
|
data->rows_to_join, data->keys_to_join, sample_block_with_columns_to_add.columns());
|
||||||
|
@ -116,6 +116,8 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_, Temporary
|
|||||||
, max_files_to_merge(settings.join_on_disk_max_files_to_merge)
|
, max_files_to_merge(settings.join_on_disk_max_files_to_merge)
|
||||||
, temporary_files_codec(settings.temporary_files_codec)
|
, temporary_files_codec(settings.temporary_files_codec)
|
||||||
, output_by_rowlist_perkey_rows_threshold(settings.join_output_by_rowlist_perkey_rows_threshold)
|
, output_by_rowlist_perkey_rows_threshold(settings.join_output_by_rowlist_perkey_rows_threshold)
|
||||||
|
, sort_right_perkey_rows_threshold(settings.join_to_sort_perkey_rows_threshold)
|
||||||
|
, sort_right_table_rows_threshold(settings.join_to_sort_table_rows_threshold)
|
||||||
, max_memory_usage(settings.max_memory_usage)
|
, max_memory_usage(settings.max_memory_usage)
|
||||||
, tmp_volume(tmp_volume_)
|
, tmp_volume(tmp_volume_)
|
||||||
, tmp_data(tmp_data_)
|
, tmp_data(tmp_data_)
|
||||||
|
@ -149,6 +149,8 @@ private:
|
|||||||
const size_t max_files_to_merge = 0;
|
const size_t max_files_to_merge = 0;
|
||||||
const String temporary_files_codec = "LZ4";
|
const String temporary_files_codec = "LZ4";
|
||||||
const size_t output_by_rowlist_perkey_rows_threshold = 0;
|
const size_t output_by_rowlist_perkey_rows_threshold = 0;
|
||||||
|
const size_t sort_right_perkey_rows_threshold = 0;
|
||||||
|
const size_t sort_right_table_rows_threshold = 0;
|
||||||
|
|
||||||
/// Value if setting max_memory_usage for query, can be used when max_bytes_in_join is not specified.
|
/// Value if setting max_memory_usage for query, can be used when max_bytes_in_join is not specified.
|
||||||
size_t max_memory_usage = 0;
|
size_t max_memory_usage = 0;
|
||||||
@ -297,6 +299,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t outputByRowListPerkeyRowsThreshold() const { return output_by_rowlist_perkey_rows_threshold; }
|
size_t outputByRowListPerkeyRowsThreshold() const { return output_by_rowlist_perkey_rows_threshold; }
|
||||||
|
size_t sortRightPerkeyRowsThreshold() const { return sort_right_perkey_rows_threshold; }
|
||||||
|
size_t sortRightTableRowsThreshold() const { return sort_right_table_rows_threshold; }
|
||||||
size_t defaultMaxBytes() const { return default_max_bytes; }
|
size_t defaultMaxBytes() const { return default_max_bytes; }
|
||||||
size_t maxJoinedBlockRows() const { return max_joined_block_rows; }
|
size_t maxJoinedBlockRows() const { return max_joined_block_rows; }
|
||||||
size_t maxRowsInRightBlock() const { return partial_merge_join_rows_in_right_blocks; }
|
size_t maxRowsInRightBlock() const { return partial_merge_join_rows_in_right_blocks; }
|
||||||
|
@ -5,10 +5,10 @@
|
|||||||
<fill_query>INSERT INTO test SELECT number % 10000, number % 10000, number % 10000 FROM numbers(10000000)</fill_query>
|
<fill_query>INSERT INTO test SELECT number % 10000, number % 10000, number % 10000 FROM numbers(10000000)</fill_query>
|
||||||
<fill_query>INSERT INTO test1 SELECT number % 1000 , number % 1000, number % 1000 FROM numbers(100000)</fill_query>
|
<fill_query>INSERT INTO test1 SELECT number % 1000 , number % 1000, number % 1000 FROM numbers(100000)</fill_query>
|
||||||
|
|
||||||
<query tag='INNER'>SELECT MAX(test1.a) FROM test INNER JOIN test1 on test.b = test1.b SETTINGS join_to_sort_table_rows_threshold=100000</query>
|
<query tag='INNER'>SELECT MAX(test1.a) FROM test INNER JOIN test1 on test.b = test1.b</query>
|
||||||
<query tag='LEFT'>SELECT MAX(test1.a) FROM test LEFT JOIN test1 on test.b = test1.b SETTINGS join_to_sort_table_rows_threshold=100000</query>
|
<query tag='LEFT'>SELECT MAX(test1.a) FROM test LEFT JOIN test1 on test.b = test1.b</query>
|
||||||
<query tag='RIGHT'>SELECT MAX(test1.a) FROM test RIGHT JOIN test1 on test.b = test1.b SETTINGS join_to_sort_table_rows_threshold=100000</query>
|
<query tag='RIGHT'>SELECT MAX(test1.a) FROM test RIGHT JOIN test1 on test.b = test1.b</query>
|
||||||
<query tag='FULL'>SELECT MAX(test1.a) FROM test FULL JOIN test1 on test.b = test1.b SETTINGS join_to_sort_table_rows_threshold=100000</query>
|
<query tag='FULL'>SELECT MAX(test1.a) FROM test FULL JOIN test1 on test.b = test1.b</query>
|
||||||
|
|
||||||
<drop_query>DROP TABLE IF EXISTS test</drop_query>
|
<drop_query>DROP TABLE IF EXISTS test</drop_query>
|
||||||
<drop_query>DROP TABLE IF EXISTS test1</drop_query>
|
<drop_query>DROP TABLE IF EXISTS test1</drop_query>
|
||||||
|
Loading…
Reference in New Issue
Block a user