dbms: improved performance of INSERT SELECT [#METR-19025].

This commit is contained in:
Alexey Milovidov 2015-11-21 18:47:32 +03:00
parent 2c67747a0d
commit 8314861690
3 changed files with 55 additions and 2 deletions

View File

@ -23,4 +23,10 @@ void stableSortBlock(Block & block, const SortDescription & description);
*/
void stableGetPermutation(const Block & block, const SortDescription & description, IColumn::Permutation & out_permutation);
/** Быстро проверить, является ли блок уже отсортированным. Если блок не отсортирован - возвращает false максимально быстро.
* Не поддерживаются collations.
*/
bool isAlreadySorted(const Block & block, const SortDescription & description);
}

View File

@ -172,6 +172,50 @@ void stableGetPermutation(const Block & block, const SortDescription & descripti
}
bool isAlreadySorted(const Block & block, const SortDescription & description)
{
if (!block)
return true;
size_t rows = block.rows();
ColumnsWithSortDescriptions columns_with_sort_desc;
for (size_t i = 0, size = description.size(); i < size; ++i)
{
const IColumn * column = !description[i].column_name.empty()
? block.getByName(description[i].column_name).column
: block.getByPosition(description[i].column_number).column;
columns_with_sort_desc.push_back(std::make_pair(column, description[i]));
}
PartialSortingLess less(columns_with_sort_desc);
/** Если строк не слишком мало, то предпримем быструю попытку проверить, что блок не сортирован.
* Константы - наугад.
*/
static constexpr size_t num_rows_to_try = 10;
if (rows > num_rows_to_try * 5)
{
for (size_t i = 1; i < num_rows_to_try; ++i)
{
size_t prev_position = rows * (i - 1) / num_rows_to_try;
size_t curr_position = rows * i / num_rows_to_try;
if (less(curr_position, prev_position))
return false;
}
}
for (size_t i = 1; i < rows; ++i)
if (less(i, i - 1))
return false;
return true;
}
void stableSortBlock(Block & block, const SortDescription & description)
{
if (!block)

View File

@ -108,10 +108,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa
IColumn::Permutation * perm_ptr = nullptr;
IColumn::Permutation perm;
if (data.mode != MergeTreeData::Unsorted)
{
if (!isAlreadySorted(block, sort_descr))
{
stableGetPermutation(block, sort_descr, perm);
perm_ptr = &perm;
}
}
NamesAndTypesList columns = data.getColumnsList().filter(block.getColumnsList().getNames());
MergedBlockOutputStream out(data, part_tmp_path, columns, CompressionMethod::LZ4);