mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-17 13:13:36 +00:00
wip on finish sorting
This commit is contained in:
parent
dd9516a810
commit
8c2060b48a
@ -319,17 +319,28 @@ FinishMergeSortingBlockInputStream::FinishMergeSortingBlockInputStream(
|
|||||||
removeConstantsFromSortDescription(header, description_to_sort);
|
removeConstantsFromSortDescription(header, description_to_sort);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool equalKeysAt(const ColumnsWithSortDescriptions & lhs, const ColumnsWithSortDescriptions & rhs, size_t n, size_t m)
|
|
||||||
{
|
|
||||||
|
|
||||||
for (auto it = lhs.begin(), jt = rhs.begin(); it != lhs.end(); ++it, ++jt)
|
struct Less
|
||||||
|
{
|
||||||
|
const ColumnsWithSortDescriptions & left_columns;
|
||||||
|
const ColumnsWithSortDescriptions & right_columns;
|
||||||
|
|
||||||
|
Less(const ColumnsWithSortDescriptions & left_columns_, const ColumnsWithSortDescriptions & right_columns_) :
|
||||||
|
left_columns(left_columns_), right_columns(right_columns_) {}
|
||||||
|
|
||||||
|
bool operator() (size_t a, size_t b) const
|
||||||
{
|
{
|
||||||
int res = it->first->compareAt(n, m, *jt->first, it->second.nulls_direction);
|
for (auto it = left_columns.begin(), jt = right_columns.begin(); it != left_columns.end(); ++it, ++jt)
|
||||||
if (res != 0)
|
{
|
||||||
|
int res = it->second.direction * it->first->compareAt(a, b, *jt->first, it->second.nulls_direction);
|
||||||
|
if (res < 0)
|
||||||
|
return true;
|
||||||
|
else if (res > 0)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return false;
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
Block FinishMergeSortingBlockInputStream::readImpl()
|
Block FinishMergeSortingBlockInputStream::readImpl()
|
||||||
{
|
{
|
||||||
@ -356,7 +367,7 @@ Block FinishMergeSortingBlockInputStream::readImpl()
|
|||||||
{
|
{
|
||||||
block = children.back()->read();
|
block = children.back()->read();
|
||||||
|
|
||||||
/// End of input stream, but we can`t returns immediatly, we need to merge already read blocks.
|
/// End of input stream, but we can`t return immediatly, we need to merge already read blocks.
|
||||||
/// Check it later, when get end of stream from impl.
|
/// Check it later, when get end of stream from impl.
|
||||||
if (!block)
|
if (!block)
|
||||||
{
|
{
|
||||||
@ -373,35 +384,36 @@ Block FinishMergeSortingBlockInputStream::readImpl()
|
|||||||
if (size == 0)
|
if (size == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
auto columns_with_sort_desc = getColumnsWithSortDescription(block, description_sorted);
|
|
||||||
|
|
||||||
removeConstantsFromBlock(block);
|
removeConstantsFromBlock(block);
|
||||||
|
|
||||||
/// May be new block starts with new key.
|
/// Find the position of last already read key in current block.
|
||||||
if (!blocks.empty())
|
if (!blocks.empty())
|
||||||
{
|
{
|
||||||
const Block & last_block = blocks.back();
|
const Block & last_block = blocks.back();
|
||||||
if (!equalKeysAt(getColumnsWithSortDescription(last_block, description_sorted), columns_with_sort_desc, last_block.rows() - 1, 0))
|
auto last_columns = getColumnsWithSortDescription(last_block, description_sorted);
|
||||||
break;
|
auto current_columns = getColumnsWithSortDescription(block, description_sorted);
|
||||||
}
|
|
||||||
|
Less less(last_columns, current_columns);
|
||||||
|
|
||||||
IColumn::Permutation perm(size);
|
IColumn::Permutation perm(size);
|
||||||
for (size_t i = 0; i < size; ++i)
|
for (size_t i = 0; i < size; ++i)
|
||||||
perm[i] = i;
|
perm[i] = i;
|
||||||
|
|
||||||
PartialSortingLess less(columns_with_sort_desc);
|
auto it = std::upper_bound(perm.begin(), perm.end(), last_block.rows() - 1, less);
|
||||||
|
if (it != perm.end())
|
||||||
/// We need to save tail of block, because next block may starts with the same key as in tail
|
{
|
||||||
/// and we should sort these rows in one chunk.
|
tail_pos = it - perm.begin();
|
||||||
tail_pos = *std::lower_bound(perm.begin(), perm.end(), size - 1, less);
|
|
||||||
|
|
||||||
if (tail_pos != 0)
|
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// If we reach here, that means that current block has all rows with the same key as tail of a previous block.
|
/// If we reach here, that means that current block is first in chunk
|
||||||
|
/// or it all consists of rows with the same key as tail of a previous block.
|
||||||
blocks.push_back(block);
|
blocks.push_back(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// We need to save tail of block, because next block may starts with the same key as in tail
|
||||||
|
/// and we should sort these rows in one chunk.
|
||||||
if (block)
|
if (block)
|
||||||
{
|
{
|
||||||
Block head_block = block.cloneEmpty();
|
Block head_block = block.cloneEmpty();
|
||||||
@ -411,7 +423,7 @@ Block FinishMergeSortingBlockInputStream::readImpl()
|
|||||||
head_block.getByPosition(i).column = block.getByPosition(i).column->cut(0, tail_pos);
|
head_block.getByPosition(i).column = block.getByPosition(i).column->cut(0, tail_pos);
|
||||||
tail_block.getByPosition(i).column = block.getByPosition(i).column->cut(tail_pos, block.rows() - tail_pos);
|
tail_block.getByPosition(i).column = block.getByPosition(i).column->cut(tail_pos, block.rows() - tail_pos);
|
||||||
}
|
}
|
||||||
|
if (head_block.rows())
|
||||||
blocks.push_back(head_block);
|
blocks.push_back(head_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,7 +133,7 @@ private:
|
|||||||
|
|
||||||
|
|
||||||
/** Takes stream already sorted by `x` and finishes sorting it by (`x`, `y`).
|
/** Takes stream already sorted by `x` and finishes sorting it by (`x`, `y`).
|
||||||
* During sorting only blocks with rows equal by `x` saved in RAM.
|
* During sorting only blocks with rows that equal by `x` saved in RAM.
|
||||||
* */
|
* */
|
||||||
class FinishMergeSortingBlockInputStream : public IProfilingBlockInputStream
|
class FinishMergeSortingBlockInputStream : public IProfilingBlockInputStream
|
||||||
{
|
{
|
||||||
|
@ -44,6 +44,27 @@ ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, c
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct PartialSortingLess
|
||||||
|
{
|
||||||
|
const ColumnsWithSortDescriptions & columns;
|
||||||
|
|
||||||
|
explicit PartialSortingLess(const ColumnsWithSortDescriptions & columns_) : columns(columns_) {}
|
||||||
|
|
||||||
|
bool operator() (size_t a, size_t b) const
|
||||||
|
{
|
||||||
|
for (ColumnsWithSortDescriptions::const_iterator it = columns.begin(); it != columns.end(); ++it)
|
||||||
|
{
|
||||||
|
int res = it->second.direction * it->first->compareAt(a, b, *it->first, it->second.nulls_direction);
|
||||||
|
if (res < 0)
|
||||||
|
return true;
|
||||||
|
else if (res > 0)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct PartialSortingLessWithCollation
|
struct PartialSortingLessWithCollation
|
||||||
{
|
{
|
||||||
const ColumnsWithSortDescriptions & columns;
|
const ColumnsWithSortDescriptions & columns;
|
||||||
|
@ -31,26 +31,6 @@ bool isAlreadySorted(const Block & block, const SortDescription & description);
|
|||||||
|
|
||||||
using ColumnsWithSortDescriptions = std::vector<std::pair<const IColumn *, SortColumnDescription>>;
|
using ColumnsWithSortDescriptions = std::vector<std::pair<const IColumn *, SortColumnDescription>>;
|
||||||
|
|
||||||
struct PartialSortingLess
|
|
||||||
{
|
|
||||||
const ColumnsWithSortDescriptions & columns;
|
|
||||||
|
|
||||||
explicit PartialSortingLess(const ColumnsWithSortDescriptions & columns_) : columns(columns_) {}
|
|
||||||
|
|
||||||
bool operator() (size_t a, size_t b) const
|
|
||||||
{
|
|
||||||
for (ColumnsWithSortDescriptions::const_iterator it = columns.begin(); it != columns.end(); ++it)
|
|
||||||
{
|
|
||||||
int res = it->second.direction * it->first->compareAt(a, b, *it->first, it->second.nulls_direction);
|
|
||||||
if (res < 0)
|
|
||||||
return true;
|
|
||||||
else if (res > 0)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description);
|
ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user