2011-09-04 01:42:14 +00:00
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
2011-09-04 05:14:52 +00:00
|
|
|
|
#include <DB/Core/Types.h>
|
2012-07-27 20:19:15 +00:00
|
|
|
|
#include <DB/Core/Block.h>
|
2012-07-25 19:53:43 +00:00
|
|
|
|
#include <DB/Columns/IColumn.h>
|
2013-05-28 16:56:05 +00:00
|
|
|
|
#include <DB/Columns/ColumnString.h>
|
|
|
|
|
#include <DB/Common/Collator.h>
|
2011-09-04 05:14:52 +00:00
|
|
|
|
|
2011-09-04 01:42:14 +00:00
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
/// Описание правила сортировки по одному столбцу.
|
|
|
|
|
struct SortColumnDescription
|
|
|
|
|
{
|
2013-05-28 16:56:05 +00:00
|
|
|
|
String column_name; /// Имя столбца.
|
|
|
|
|
size_t column_number; /// Номер столбца (используется, если не задано имя).
|
|
|
|
|
int direction; /// 1 - по возрастанию, -1 - по убыванию.
|
2016-05-28 14:14:18 +00:00
|
|
|
|
std::shared_ptr<Collator> collator; /// Collator для locale-specific сортировки строк
|
2011-09-04 01:42:14 +00:00
|
|
|
|
|
2016-05-28 14:14:18 +00:00
|
|
|
|
SortColumnDescription(size_t column_number_, int direction_, const std::shared_ptr<Collator> & collator_ = nullptr)
|
2013-05-28 16:56:05 +00:00
|
|
|
|
: column_number(column_number_), direction(direction_), collator(collator_) {}
|
2011-09-04 05:14:52 +00:00
|
|
|
|
|
2016-05-28 14:14:18 +00:00
|
|
|
|
SortColumnDescription(String column_name_, int direction_, const std::shared_ptr<Collator> & collator_ = nullptr)
|
2013-05-28 16:56:05 +00:00
|
|
|
|
: column_name(column_name_), column_number(0), direction(direction_), collator(collator_) {}
|
2013-05-03 10:20:53 +00:00
|
|
|
|
|
|
|
|
|
/// Для IBlockInputStream.
|
|
|
|
|
String getID() const
|
|
|
|
|
{
|
|
|
|
|
std::stringstream res;
|
|
|
|
|
res << column_name << ", " << column_number << ", " << direction;
|
2016-05-28 14:14:18 +00:00
|
|
|
|
if (collator)
|
2013-05-28 16:56:05 +00:00
|
|
|
|
res << ", collation locale: " << collator->getLocale();
|
2013-05-03 10:20:53 +00:00
|
|
|
|
return res.str();
|
|
|
|
|
}
|
2011-09-04 01:42:14 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/// Описание правила сортировки по нескольким столбцам.
|
2016-05-28 10:35:44 +00:00
|
|
|
|
using SortDescription = std::vector<SortColumnDescription>;
|
2011-09-04 01:42:14 +00:00
|
|
|
|
|
2012-07-25 19:53:43 +00:00
|
|
|
|
|
|
|
|
|
/** Курсор, позволяющий сравнивать соответствующие строки в разных блоках.
|
2012-07-27 20:19:15 +00:00
|
|
|
|
* Курсор двигается по одному блоку.
|
2012-07-25 19:53:43 +00:00
|
|
|
|
* Для использования в priority queue.
|
|
|
|
|
*/
|
2012-07-27 20:19:15 +00:00
|
|
|
|
struct SortCursorImpl
|
2012-07-25 19:53:43 +00:00
|
|
|
|
{
|
2012-07-27 20:19:15 +00:00
|
|
|
|
ConstColumnPlainPtrs all_columns;
|
|
|
|
|
ConstColumnPlainPtrs sort_columns;
|
|
|
|
|
SortDescription desc;
|
2015-01-07 15:30:11 +00:00
|
|
|
|
size_t sort_columns_size = 0;
|
|
|
|
|
size_t pos = 0;
|
|
|
|
|
size_t rows = 0;
|
2012-08-14 20:33:37 +00:00
|
|
|
|
|
|
|
|
|
/** Порядок (что сравнивается), если сравниваемые столбцы равны.
|
|
|
|
|
* Даёт возможность предпочитать строки из нужного курсора.
|
|
|
|
|
*/
|
|
|
|
|
size_t order;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2016-05-28 10:35:44 +00:00
|
|
|
|
using NeedCollationFlags = std::vector<UInt8>;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-05-28 16:56:05 +00:00
|
|
|
|
/** Нужно ли использовать Collator для сортировки столбца */
|
|
|
|
|
NeedCollationFlags need_collation;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-05-28 16:56:05 +00:00
|
|
|
|
/** Есть ли хотя бы один столбец с Collator. */
|
2015-01-07 15:30:11 +00:00
|
|
|
|
bool has_collation = false;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2015-01-07 15:30:11 +00:00
|
|
|
|
SortCursorImpl() {}
|
2012-07-25 19:53:43 +00:00
|
|
|
|
|
2012-08-14 20:33:37 +00:00
|
|
|
|
SortCursorImpl(const Block & block, const SortDescription & desc_, size_t order_ = 0)
|
2015-01-07 15:30:11 +00:00
|
|
|
|
: desc(desc_), sort_columns_size(desc.size()), order(order_), need_collation(desc.size())
|
2012-07-25 19:53:43 +00:00
|
|
|
|
{
|
2012-07-27 20:19:15 +00:00
|
|
|
|
reset(block);
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-15 03:14:29 +00:00
|
|
|
|
bool empty() const { return rows == 0; }
|
|
|
|
|
|
2012-07-27 20:19:15 +00:00
|
|
|
|
/// Установить курсор в начало нового блока.
|
|
|
|
|
void reset(const Block & block)
|
|
|
|
|
{
|
|
|
|
|
all_columns.clear();
|
|
|
|
|
sort_columns.clear();
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2012-07-27 20:19:15 +00:00
|
|
|
|
size_t num_columns = block.columns();
|
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < num_columns; ++j)
|
|
|
|
|
all_columns.push_back(&*block.getByPosition(j).column);
|
|
|
|
|
|
|
|
|
|
for (size_t j = 0, size = desc.size(); j < size; ++j)
|
|
|
|
|
{
|
|
|
|
|
size_t column_number = !desc[j].column_name.empty()
|
|
|
|
|
? block.getPositionByName(desc[j].column_name)
|
|
|
|
|
: desc[j].column_number;
|
|
|
|
|
|
|
|
|
|
sort_columns.push_back(&*block.getByPosition(column_number).column);
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2016-05-28 14:14:18 +00:00
|
|
|
|
need_collation[j] = desc[j].collator != nullptr && sort_columns.back()->getName() == "ColumnString";
|
2013-05-28 16:56:05 +00:00
|
|
|
|
has_collation |= need_collation[j];
|
2012-07-27 20:19:15 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pos = 0;
|
|
|
|
|
rows = all_columns[0]->size();
|
2012-07-25 19:53:43 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-29 13:18:12 +00:00
|
|
|
|
bool isFirst() const { return pos == 0; }
|
2012-07-27 20:19:15 +00:00
|
|
|
|
bool isLast() const { return pos + 1 >= rows; }
|
|
|
|
|
void next() { ++pos; }
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// Для лёгкости копирования.
|
|
|
|
|
struct SortCursor
|
|
|
|
|
{
|
|
|
|
|
SortCursorImpl * impl;
|
|
|
|
|
|
|
|
|
|
SortCursor(SortCursorImpl * impl_) : impl(impl_) {}
|
|
|
|
|
SortCursorImpl * operator-> () { return impl; }
|
|
|
|
|
const SortCursorImpl * operator-> () const { return impl; }
|
|
|
|
|
|
2015-11-29 13:18:12 +00:00
|
|
|
|
/// Указанная строка данного курсора больше указанной строки другого курсора.
|
|
|
|
|
bool greaterAt(const SortCursor & rhs, size_t lhs_pos, size_t rhs_pos) const
|
2012-07-25 19:53:43 +00:00
|
|
|
|
{
|
2012-07-27 20:19:15 +00:00
|
|
|
|
for (size_t i = 0; i < impl->sort_columns_size; ++i)
|
2012-07-25 19:53:43 +00:00
|
|
|
|
{
|
2013-11-01 20:10:43 +00:00
|
|
|
|
int direction = impl->desc[i].direction;
|
2015-11-29 13:18:12 +00:00
|
|
|
|
int res = direction * impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), direction);
|
2012-07-25 19:53:43 +00:00
|
|
|
|
if (res > 0)
|
|
|
|
|
return true;
|
|
|
|
|
if (res < 0)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2012-08-14 20:33:37 +00:00
|
|
|
|
return impl->order > rhs.impl->order;
|
2012-07-25 19:53:43 +00:00
|
|
|
|
}
|
2015-11-29 13:18:12 +00:00
|
|
|
|
|
|
|
|
|
/// Проверяет, что все строки в текущем блоке данного курсора меньше или равны, чем все строки текущего блока другого курсора.
|
|
|
|
|
bool totallyLessOrEquals(const SortCursor & rhs) const
|
|
|
|
|
{
|
|
|
|
|
if (impl->rows == 0 || rhs.impl->rows == 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/// Последняя строка данного курсора не больше первой строки другого.
|
|
|
|
|
return !greaterAt(rhs, impl->rows - 1, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool greater(const SortCursor & rhs) const
|
|
|
|
|
{
|
|
|
|
|
return greaterAt(rhs, impl->pos, rhs.impl->pos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Инвертировано, чтобы из priority queue элементы вынимались в порядке по возрастанию.
|
|
|
|
|
bool operator< (const SortCursor & rhs) const
|
|
|
|
|
{
|
|
|
|
|
return greater(rhs);
|
|
|
|
|
}
|
2012-07-25 19:53:43 +00:00
|
|
|
|
};
|
|
|
|
|
|
2013-05-28 16:56:05 +00:00
|
|
|
|
|
|
|
|
|
/// Отдельный компаратор для locale-sensitive сравнения строк
|
|
|
|
|
struct SortCursorWithCollation
|
|
|
|
|
{
|
|
|
|
|
SortCursorImpl * impl;
|
|
|
|
|
|
|
|
|
|
SortCursorWithCollation(SortCursorImpl * impl_) : impl(impl_) {}
|
|
|
|
|
SortCursorImpl * operator-> () { return impl; }
|
|
|
|
|
const SortCursorImpl * operator-> () const { return impl; }
|
|
|
|
|
|
2015-11-29 13:18:12 +00:00
|
|
|
|
bool greaterAt(const SortCursorWithCollation & rhs, size_t lhs_pos, size_t rhs_pos) const
|
2013-05-28 16:56:05 +00:00
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < impl->sort_columns_size; ++i)
|
|
|
|
|
{
|
2013-11-01 20:10:43 +00:00
|
|
|
|
int direction = impl->desc[i].direction;
|
2013-05-28 16:56:05 +00:00
|
|
|
|
int res;
|
|
|
|
|
if (impl->need_collation[i])
|
|
|
|
|
{
|
2014-06-26 00:58:14 +00:00
|
|
|
|
const ColumnString & column_string = typeid_cast<const ColumnString &>(*impl->sort_columns[i]);
|
2015-11-29 13:18:12 +00:00
|
|
|
|
res = column_string.compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), *impl->desc[i].collator);
|
2013-05-28 16:56:05 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
2015-11-29 13:18:12 +00:00
|
|
|
|
res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), direction);
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-11-01 20:10:43 +00:00
|
|
|
|
res *= direction;
|
2013-05-28 16:56:05 +00:00
|
|
|
|
if (res > 0)
|
|
|
|
|
return true;
|
|
|
|
|
if (res < 0)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return impl->order > rhs.impl->order;
|
|
|
|
|
}
|
2015-11-29 13:18:12 +00:00
|
|
|
|
|
|
|
|
|
bool totallyLessOrEquals(const SortCursorWithCollation & rhs) const
|
|
|
|
|
{
|
|
|
|
|
if (impl->rows == 0 || rhs.impl->rows == 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/// Последняя строка данного курсора не больше первой строки другого.
|
|
|
|
|
return !greaterAt(rhs, impl->rows - 1, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool greater(const SortCursorWithCollation & rhs) const
|
|
|
|
|
{
|
|
|
|
|
return greaterAt(rhs, impl->pos, rhs.impl->pos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool operator< (const SortCursorWithCollation & rhs) const
|
|
|
|
|
{
|
|
|
|
|
return greater(rhs);
|
|
|
|
|
}
|
2013-05-28 16:56:05 +00:00
|
|
|
|
};
|
|
|
|
|
|
2011-09-04 01:42:14 +00:00
|
|
|
|
}
|
|
|
|
|
|