wip on finish sorting

This commit is contained in:
CurtizJ 2018-10-04 17:55:02 +03:00
parent dd9516a810
commit 8c2060b48a
4 changed files with 63 additions and 50 deletions

View File

@ -319,17 +319,28 @@ FinishMergeSortingBlockInputStream::FinishMergeSortingBlockInputStream(
removeConstantsFromSortDescription(header, description_to_sort); removeConstantsFromSortDescription(header, description_to_sort);
} }
static bool equalKeysAt(const ColumnsWithSortDescriptions & lhs, const ColumnsWithSortDescriptions & rhs, size_t n, size_t m)
{
for (auto it = lhs.begin(), jt = rhs.begin(); it != lhs.end(); ++it, ++jt) struct Less
{
const ColumnsWithSortDescriptions & left_columns;
const ColumnsWithSortDescriptions & right_columns;
Less(const ColumnsWithSortDescriptions & left_columns_, const ColumnsWithSortDescriptions & right_columns_) :
left_columns(left_columns_), right_columns(right_columns_) {}
bool operator() (size_t a, size_t b) const
{ {
int res = it->first->compareAt(n, m, *jt->first, it->second.nulls_direction); for (auto it = left_columns.begin(), jt = right_columns.begin(); it != left_columns.end(); ++it, ++jt)
if (res != 0) {
return false; int res = it->second.direction * it->first->compareAt(a, b, *jt->first, it->second.nulls_direction);
if (res < 0)
return true;
else if (res > 0)
return false;
}
return false;
} }
return true; };
}
Block FinishMergeSortingBlockInputStream::readImpl() Block FinishMergeSortingBlockInputStream::readImpl()
{ {
@ -356,7 +367,7 @@ Block FinishMergeSortingBlockInputStream::readImpl()
{ {
block = children.back()->read(); block = children.back()->read();
/// End of input stream, but we can`t returns immediatly, we need to merge already read blocks. /// End of input stream, but we can`t return immediatly, we need to merge already read blocks.
/// Check it later, when get end of stream from impl. /// Check it later, when get end of stream from impl.
if (!block) if (!block)
{ {
@ -373,35 +384,36 @@ Block FinishMergeSortingBlockInputStream::readImpl()
if (size == 0) if (size == 0)
continue; continue;
auto columns_with_sort_desc = getColumnsWithSortDescription(block, description_sorted);
removeConstantsFromBlock(block); removeConstantsFromBlock(block);
/// May be new block starts with new key. /// Find the position of last already read key in current block.
if (!blocks.empty()) if (!blocks.empty())
{ {
const Block & last_block = blocks.back(); const Block & last_block = blocks.back();
if (!equalKeysAt(getColumnsWithSortDescription(last_block, description_sorted), columns_with_sort_desc, last_block.rows() - 1, 0)) auto last_columns = getColumnsWithSortDescription(last_block, description_sorted);
auto current_columns = getColumnsWithSortDescription(block, description_sorted);
Less less(last_columns, current_columns);
IColumn::Permutation perm(size);
for (size_t i = 0; i < size; ++i)
perm[i] = i;
auto it = std::upper_bound(perm.begin(), perm.end(), last_block.rows() - 1, less);
if (it != perm.end())
{
tail_pos = it - perm.begin();
break; break;
}
} }
IColumn::Permutation perm(size);
for (size_t i = 0; i < size; ++i)
perm[i] = i;
PartialSortingLess less(columns_with_sort_desc);
/// We need to save tail of block, because next block may starts with the same key as in tail
/// and we should sort these rows in one chunk.
tail_pos = *std::lower_bound(perm.begin(), perm.end(), size - 1, less);
if (tail_pos != 0)
break;
/// If we reach here, that means that current block has all rows with the same key as tail of a previous block. /// If we reach here, that means that current block is first in chunk
/// or it all consists of rows with the same key as tail of a previous block.
blocks.push_back(block); blocks.push_back(block);
} }
/// We need to save tail of block, because next block may starts with the same key as in tail
/// and we should sort these rows in one chunk.
if (block) if (block)
{ {
Block head_block = block.cloneEmpty(); Block head_block = block.cloneEmpty();
@ -411,8 +423,8 @@ Block FinishMergeSortingBlockInputStream::readImpl()
head_block.getByPosition(i).column = block.getByPosition(i).column->cut(0, tail_pos); head_block.getByPosition(i).column = block.getByPosition(i).column->cut(0, tail_pos);
tail_block.getByPosition(i).column = block.getByPosition(i).column->cut(tail_pos, block.rows() - tail_pos); tail_block.getByPosition(i).column = block.getByPosition(i).column->cut(tail_pos, block.rows() - tail_pos);
} }
if (head_block.rows())
blocks.push_back(head_block); blocks.push_back(head_block);
} }
impl = std::make_unique<MergeSortingBlocksBlockInputStream>(blocks, description_to_sort, max_merged_block_size, limit); impl = std::make_unique<MergeSortingBlocksBlockInputStream>(blocks, description_to_sort, max_merged_block_size, limit);

View File

@ -133,7 +133,7 @@ private:
/** Takes stream already sorted by `x` and finishes sorting it by (`x`, `y`). /** Takes stream already sorted by `x` and finishes sorting it by (`x`, `y`).
* During sorting only blocks with rows equal by `x` saved in RAM. * During sorting only blocks with rows that equal by `x` saved in RAM.
* */ * */
class FinishMergeSortingBlockInputStream : public IProfilingBlockInputStream class FinishMergeSortingBlockInputStream : public IProfilingBlockInputStream
{ {

View File

@ -44,6 +44,27 @@ ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, c
} }
struct PartialSortingLess
{
const ColumnsWithSortDescriptions & columns;
explicit PartialSortingLess(const ColumnsWithSortDescriptions & columns_) : columns(columns_) {}
bool operator() (size_t a, size_t b) const
{
for (ColumnsWithSortDescriptions::const_iterator it = columns.begin(); it != columns.end(); ++it)
{
int res = it->second.direction * it->first->compareAt(a, b, *it->first, it->second.nulls_direction);
if (res < 0)
return true;
else if (res > 0)
return false;
}
return false;
}
};
struct PartialSortingLessWithCollation struct PartialSortingLessWithCollation
{ {
const ColumnsWithSortDescriptions & columns; const ColumnsWithSortDescriptions & columns;

View File

@ -31,26 +31,6 @@ bool isAlreadySorted(const Block & block, const SortDescription & description);
using ColumnsWithSortDescriptions = std::vector<std::pair<const IColumn *, SortColumnDescription>>; using ColumnsWithSortDescriptions = std::vector<std::pair<const IColumn *, SortColumnDescription>>;
struct PartialSortingLess
{
const ColumnsWithSortDescriptions & columns;
explicit PartialSortingLess(const ColumnsWithSortDescriptions & columns_) : columns(columns_) {}
bool operator() (size_t a, size_t b) const
{
for (ColumnsWithSortDescriptions::const_iterator it = columns.begin(); it != columns.end(); ++it)
{
int res = it->second.direction * it->first->compareAt(a, b, *it->first, it->second.nulls_direction);
if (res < 0)
return true;
else if (res > 0)
return false;
}
return false;
}
};
ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description); ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description);
} }