dbms: development [#CONV-2944].

This commit is contained in:
Alexey Milovidov 2011-09-04 05:14:52 +00:00
parent ad3ecadcf9
commit 2215bc9626
19 changed files with 221 additions and 56 deletions

View File

@ -42,6 +42,8 @@ public:
void insert(size_t position, const ColumnWithNameAndType & elem);
/// вставить столбец в конец
void insert(const ColumnWithNameAndType & elem);
/// вставить столбец в конец, если столбца с таким именем ещё нет
void insertUnique(const ColumnWithNameAndType & elem);
/// удалить столбец в заданной позиции
void erase(size_t position);

View File

@ -2,6 +2,8 @@
#include <vector>
#include <DB/Core/Types.h>
namespace DB
{
@ -9,11 +11,15 @@ namespace DB
/// Описание правила сортировки по одному столбцу.
struct SortColumnDescription
{
size_t column_number; /// Номер столбца
String column_name; /// Имя столбца.
size_t column_number; /// Номер столбца (используется, если не задано имя).
int direction; /// 1 - по возрастанию, -1 - по убыванию.
SortColumnDescription(size_t column_number_, int direction_)
: column_number(column_number_), direction(direction_) {}
SortColumnDescription(String column_name_, int direction_)
: column_name(column_name_), column_number(0), direction(direction_) {}
};
/// Описание правила сортировки по нескольким столбцам.

View File

@ -18,8 +18,12 @@ using Poco::SharedPtr;
class ProjectionBlockInputStream : public IBlockInputStream
{
public:
ProjectionBlockInputStream(BlockInputStreamPtr input_, SharedPtr<Expression> expression_, unsigned part_id_ = 0)
: input(input_), expression(expression_), part_id(part_id_) {}
ProjectionBlockInputStream(
BlockInputStreamPtr input_,
SharedPtr<Expression> expression_,
bool without_duplicates_ = false,
unsigned part_id_ = 0)
: input(input_), expression(expression_), without_duplicates(without_duplicates_), part_id(part_id_) {}
Block read()
{
@ -27,12 +31,13 @@ public:
if (!res)
return res;
return expression->projectResult(res, part_id);
return expression->projectResult(res, without_duplicates, part_id);
}
private:
BlockInputStreamPtr input;
SharedPtr<Expression> expression;
bool without_duplicates;
unsigned part_id;
};

View File

@ -37,7 +37,7 @@ public:
/** Взять из блока с промежуточными результатами вычислений только столбцы, представляющие собой конечный результат.
* Вернуть новый блок, в котором эти столбцы расположены в правильном порядке.
*/
Block projectResult(Block & block, unsigned part_id = 0);
Block projectResult(Block & block, bool without_duplicates = false, unsigned part_id = 0);
/** Получить список типов столбцов результата.
*/
@ -49,7 +49,7 @@ private:
typedef std::set<String> NamesSet;
NamesSet required_columns;
/** Для узлов - звёздочек - раскрыть их в список всех столбцов.
* Для узлов - литералов - прописать их типы данных.
@ -79,7 +79,7 @@ private:
void executeImpl(ASTPtr ast, Block & block, unsigned part_id);
void collectFinalColumns(ASTPtr ast, Block & src, Block & dst, unsigned part_id);
void collectFinalColumns(ASTPtr ast, Block & src, Block & dst, bool without_duplicates, unsigned part_id);
void getReturnTypesImpl(ASTPtr ast, DataTypes & res);
};

View File

@ -29,10 +29,11 @@ private:
enum PartID
{
PART_OTHER = 0,
PART_SELECT = 1,
PART_WHERE = 2,
PART_HAVING = 3,
PART_OTHER = 1,
PART_SELECT = 2,
PART_WHERE = 4,
PART_HAVING = 8,
PART_ORDER = 16,
};

View File

@ -0,0 +1,26 @@
#pragma once
#include <DB/Parsers/IAST.h>
namespace DB
{
using Poco::SharedPtr;
/** Элемент выражения, после которого стоит ASC или DESC
*/
class ASTOrderByElement : public IAST
{
public:
int direction; /// 1, если ASC, -1, если DESC
ASTOrderByElement() {}
ASTOrderByElement(StringRange range_, int direction_) : IAST(range_), direction(direction_) {}
/** Получить текст, который идентифицирует этот элемент. */
String getID() { return "OrderByElement"; }
};
}

View File

@ -1,5 +1,4 @@
#ifndef DBMS_PARSERS_EXPRESSIONELEMENTPARSERS_H
#define DBMS_PARSERS_EXPRESSIONELEMENTPARSERS_H
#pragma once
#include <DB/Parsers/IParserBase.h>
@ -97,6 +96,14 @@ protected:
};
}
/** Элемент выражения ORDER BY - то же самое, что и элемент выражения, но после него ещё может быть указано ASC[ENDING] | DESC[ENDING].
*/
class ParserOrderByElement : public IParserBase
{
protected:
String getName() { return "element of ORDER BY expression"; }
bool parseImpl(Pos & pos, Pos end, ASTPtr & node, String & expected);
};
#endif
}

View File

@ -1,5 +1,4 @@
#ifndef DBMS_PARSERS_EXPRESSIONLISTPARSERS_H
#define DBMS_PARSERS_EXPRESSIONLISTPARSERS_H
#pragma once
#include <list>
@ -294,7 +293,12 @@ protected:
};
class ParserOrderByExpressionList : public IParserBase
{
protected:
String getName() { return "order by expression"; }
bool parseImpl(Pos & pos, Pos end, ASTPtr & node, String & expected);
};
}
#endif

View File

@ -1,5 +1,4 @@
#ifndef DBMS_PARSERS_FORMATAST_H
#define DBMS_PARSERS_FORMATAST_H
#pragma once
#include <ostream>
@ -12,6 +11,7 @@
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Parsers/ASTNameTypePair.h>
#include <DB/Parsers/ASTAsterisk.h>
#include <DB/Parsers/ASTOrderByElement.h>
namespace DB
@ -29,8 +29,6 @@ void formatAST(const ASTIdentifier & ast, std::ostream & s);
void formatAST(const ASTLiteral & ast, std::ostream & s);
void formatAST(const ASTNameTypePair & ast, std::ostream & s);
void formatAST(const ASTAsterisk & ast, std::ostream & s);
void formatAST(const ASTOrderByElement & ast, std::ostream & s);
}
#endif

View File

@ -58,7 +58,14 @@ void Block::insert(const ColumnWithNameAndType & elem)
{
Container_t::iterator it = data.insert(data.end(), elem);
rebuildIndexByPosition();
index_by_name[elem.name] = it;
index_by_name[elem.name] = it;
}
void Block::insertUnique(const ColumnWithNameAndType & elem)
{
if (index_by_name.end() == index_by_name.find(elem.name))
insert(elem);
}

View File

@ -76,8 +76,12 @@ void MergeSortingBlockInputStream::merge(Block & left, Block & right)
for (size_t i = 0, size = description.size(); i < size; ++i)
{
left_sort_columns.push_back(&*left.getByPosition(description[i].column_number).column);
right_sort_columns.push_back(&*right.getByPosition(description[i].column_number).column);
size_t column_number = !description[i].column_name.empty()
? left.getPositionByName(description[i].column_name)
: description[i].column_number;
left_sort_columns.push_back(&*left.getByPosition(column_number).column);
right_sort_columns.push_back(&*right.getByPosition(column_number).column);
}
/// Объединяем.

View File

@ -12,7 +12,11 @@ struct PartialSortingLess
PartialSortingLess(const Block & block, const SortDescription & description)
{
for (size_t i = 0, size = description.size(); i < size; ++i)
columns.push_back(std::make_pair(&*block.getByPosition(description[i].column_number).column, description[i].direction));
columns.push_back(std::make_pair(
!description[i].column_name.empty()
? &*block.getByName(description[i].column_name).column
: &*block.getByPosition(description[i].column_number).column,
description[i].direction));
}
bool operator() (size_t a, size_t b) const

View File

@ -150,10 +150,10 @@ int main(int argc, char ** argv)
;
DB::SortDescription sort_columns;
// sort_columns.push_back(DB::SortColumnDescription(1, -1));
// sort_columns.push_back(DB::SortColumnDescription(2, 1));
sort_columns.push_back(DB::SortColumnDescription(1, -1));
sort_columns.push_back(DB::SortColumnDescription(2, 1));
sort_columns.push_back(DB::SortColumnDescription(0, 1));
// sort_columns.push_back(DB::SortColumnDescription(3, 1));
sort_columns.push_back(DB::SortColumnDescription(3, 1));
Poco::SharedPtr<DB::IBlockInputStream> in = table.read(column_names, 0, argc == 2 ? atoi(argv[1]) : 1048576);
Poco::SharedPtr<DB::ProfilingBlockInputStream> profiling1 = new DB::ProfilingBlockInputStream(in);

View File

@ -128,7 +128,7 @@ Names Expression::getRequiredColumns()
void Expression::setNotCalculated(ASTPtr ast, unsigned part_id)
{
if (ast->part_id == part_id)
if ((ast->part_id & part_id) || (ast->part_id == 0 && part_id == 0))
ast->calculated = false;
for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
setNotCalculated(*it, part_id);
@ -149,7 +149,7 @@ void Expression::executeImpl(ASTPtr ast, Block & block, unsigned part_id)
for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
executeImpl(*it, block, part_id);
if (ast->calculated || ast->part_id != part_id)
if (ast->calculated || !((ast->part_id & part_id) || (ast->part_id == 0 && part_id == 0)))
return;
/** Столбцы из таблицы уже загружены в блок.
@ -201,38 +201,42 @@ void Expression::executeImpl(ASTPtr ast, Block & block, unsigned part_id)
}
Block Expression::projectResult(Block & block, unsigned part_id)
Block Expression::projectResult(Block & block, bool without_duplicates, unsigned part_id)
{
Block res;
collectFinalColumns(ast, block, res, part_id);
collectFinalColumns(ast, block, res, without_duplicates, part_id);
return res;
}
void Expression::collectFinalColumns(ASTPtr ast, Block & src, Block & dst, unsigned part_id)
void Expression::collectFinalColumns(ASTPtr ast, Block & src, Block & dst, bool without_duplicates, unsigned part_id)
{
/// Обход в глубину, который не заходит внутрь функций.
if (ast->part_id != part_id)
if (!((ast->part_id & part_id) || (ast->part_id == 0 && part_id == 0)))
{
if (!dynamic_cast<ASTFunction *>(&*ast))
for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
collectFinalColumns(*it, src, dst, part_id);
collectFinalColumns(*it, src, dst, without_duplicates, part_id);
return;
}
if (ASTIdentifier * ident = dynamic_cast<ASTIdentifier *>(&*ast))
{
if (ident->kind == ASTIdentifier::Column)
dst.insert(src.getByName(ident->name));
without_duplicates ? dst.insertUnique(src.getByName(ident->name)) : dst.insert(src.getByName(ident->name));
}
else if (dynamic_cast<ASTLiteral *>(&*ast))
dst.insert(src.getByName(ast->getTreeID()));
without_duplicates ? dst.insertUnique(src.getByName(ast->getTreeID())) : dst.insert(src.getByName(ast->getTreeID()));
else if (ASTFunction * func = dynamic_cast<ASTFunction *>(&*ast))
for (ColumnNumbers::const_iterator jt = func->return_column_numbers.begin(); jt != func->return_column_numbers.end(); ++jt)
dst.insert(src.getByPosition(*jt));
{
for (size_t i = 0, size = func->return_types.size(); i != size; ++i)
without_duplicates
? dst.insertUnique(src.getByName(ast->getTreeID() + "_" + Poco::NumberFormatter::format(i)))
: dst.insert(src.getByName(ast->getTreeID() + "_" + Poco::NumberFormatter::format(i)));
}
else
for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
collectFinalColumns(*it, src, dst, part_id);
collectFinalColumns(*it, src, dst, without_duplicates, part_id);
}

View File

@ -2,10 +2,14 @@
#include <DB/DataStreams/ProjectionBlockInputStream.h>
#include <DB/DataStreams/FilterBlockInputStream.h>
#include <DB/DataStreams/LimitBlockInputStream.h>
#include <DB/DataStreams/PartialSortingBlockInputStream.h>
#include <DB/DataStreams/MergeSortingBlockInputStream.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ASTFunction.h>
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Parsers/ASTOrderByElement.h>
#include <DB/Interpreters/Expression.h>
#include <DB/Interpreters/InterpreterSelectQuery.h>
@ -67,7 +71,7 @@ BlockInputStreamPtr InterpreterSelectQuery::execute()
ASTSelectQuery & query = dynamic_cast<ASTSelectQuery &>(*query_ptr);
StoragePtr table = getTable();
/// Какие столбцы читать из этой таблицы
context.columns = table->getColumns();
@ -78,7 +82,26 @@ BlockInputStreamPtr InterpreterSelectQuery::execute()
if (required_columns.empty())
required_columns.push_back(table->getColumns().begin()->first);
BlockInputStreamPtr stream = table->read(required_columns, query_ptr, max_block_size);
size_t limit_length = 0;
size_t limit_offset = 0;
if (query.limit_length)
{
limit_length = boost::get<UInt64>(dynamic_cast<ASTLiteral &>(*query.limit_length).value);
if (query.limit_offset)
limit_offset = boost::get<UInt64>(dynamic_cast<ASTLiteral &>(*query.limit_offset).value);
}
/** Оптимизация - если не указаны WHERE, GROUP, HAVING, ORDER, но указан LIMIT, и limit + offset < max_block_size,
* то в качестве размера блока будем использовать limit + offset (чтобы не читать из таблицы больше, чем запрошено).
*/
size_t block_size = max_block_size;
if (!query.where_expression && !query.group_expression_list && !query.having_expression && !query.order_expression_list
&& query.limit_length && limit_length + limit_offset < block_size)
{
block_size = limit_length + limit_offset;
}
BlockInputStreamPtr stream = table->read(required_columns, query_ptr, block_size);
/// Если есть условие WHERE - сначала выполним часть выражения, необходимую для его вычисления
if (query.where_expression)
@ -90,17 +113,41 @@ BlockInputStreamPtr InterpreterSelectQuery::execute()
/// Выполним оставшуюся часть выражения
setPartID(query.select_expression_list, PART_SELECT);
stream = new ExpressionBlockInputStream(stream, expression, PART_SELECT);
stream = new ProjectionBlockInputStream(stream, expression, PART_SELECT);
if (query.order_expression_list)
setPartID(query.order_expression_list, PART_ORDER);
stream = new ExpressionBlockInputStream(stream, expression, PART_SELECT | PART_ORDER);
stream = new ProjectionBlockInputStream(stream, expression, true, PART_SELECT | PART_ORDER);
/// Если есть ORDER BY
if (query.order_expression_list)
{
SortDescription order_descr;
order_descr.reserve(query.order_expression_list->children.size());
for (ASTs::iterator it = query.order_expression_list->children.begin();
it != query.order_expression_list->children.end();
++it)
{
ASTPtr elem = (*it)->children.front();
ASTIdentifier * id_elem = dynamic_cast<ASTIdentifier *>(&*elem);
ASTFunction * id_func = dynamic_cast<ASTFunction *>(&*elem);
String name = id_elem ? id_elem->name : elem->getTreeID();
if (id_func)
name += "_0";
order_descr.push_back(SortColumnDescription(name, dynamic_cast<ASTOrderByElement &>(**it).direction));
}
stream = new PartialSortingBlockInputStream(stream, order_descr);
stream = new MergeSortingBlockInputStream(stream, order_descr);
}
/// Удалим ненужные больше столбцы
stream = new ProjectionBlockInputStream(stream, expression, false, PART_SELECT);
/// Если есть LIMIT
if (query.limit_length)
{
size_t limit_length = boost::get<UInt64>(dynamic_cast<ASTLiteral &>(*query.limit_length).value);
size_t limit_offset = 0;
if (query.limit_offset)
limit_offset = boost::get<UInt64>(dynamic_cast<ASTLiteral &>(*query.limit_offset).value);
stream = new LimitBlockInputStream(stream, limit_length, limit_offset);
}
@ -110,7 +157,7 @@ BlockInputStreamPtr InterpreterSelectQuery::execute()
void InterpreterSelectQuery::setPartID(ASTPtr ast, unsigned part_id)
{
ast->part_id = part_id;
ast->part_id |= part_id;
for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
setPartID(*it, part_id);

View File

@ -7,6 +7,7 @@
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Parsers/ASTAsterisk.h>
#include <DB/Parsers/ASTOrderByElement.h>
#include <DB/Parsers/CommonParsers.h>
#include <DB/Parsers/ExpressionListParsers.h>
@ -336,5 +337,34 @@ bool ParserExpressionElement::parseImpl(Pos & pos, Pos end, ASTPtr & node, Strin
}
bool ParserOrderByElement::parseImpl(Pos & pos, Pos end, ASTPtr & node, String & expected)
{
Pos begin = pos;
ParserWhiteSpaceOrComments ws;
ParserLogicalOrExpression elem_p;
ParserString ascending("ASCENDING", true, true);
ParserString descending("DESCENDING", true, true);
ParserString asc("ASC", true, true);
ParserString desc("DESC", true, true);
ASTPtr expr_elem;
if (!elem_p.parse(pos, end, expr_elem, expected))
return false;
int direction = 1;
ws.ignore(pos, end);
if (descending.ignore(pos, end) || desc.ignore(pos, end))
direction = -1;
else
ascending.ignore(pos, end) || asc.ignore(pos, end);
node = new ASTOrderByElement(StringRange(begin, pos), direction);
node->children.push_back(expr_elem);
return true;
}
}

View File

@ -208,4 +208,10 @@ bool ParserNotEmptyExpressionList::parseImpl(Pos & pos, Pos end, ASTPtr & node,
}
bool ParserOrderByExpressionList::parseImpl(Pos & pos, Pos end, ASTPtr & node, String & expected)
{
return ParserList(new ParserOrderByElement, new ParserString(",")).parse(pos, end, node, expected);
}
}

View File

@ -29,6 +29,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, String & ex
ParserString s_limit("LIMIT", true, true);
ParserNotEmptyExpressionList exp_list;
ParserLogicalOrExpression exp_elem;
ParserOrderByExpressionList order_list;
ws.ignore(pos, end);
@ -107,14 +108,14 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, String & ex
ws.ignore(pos, end);
}
/// ORDER BY expr list TODO ASC, DESC
/// ORDER BY expr ASC|DESC list
if (s_order.ignore(pos, end, expected))
{
ws.ignore(pos, end);
if (!s_by.ignore(pos, end, expected))
return false;
if (!exp_list.parse(pos, end, select_query->order_expression_list, expected))
if (!order_list.parse(pos, end, select_query->order_expression_list, expected))
return false;
ws.ignore(pos, end);

View File

@ -74,6 +74,13 @@ void formatAST(const IAST & ast, std::ostream & s)
return;
}
const ASTOrderByElement * order_by_elem = dynamic_cast<const ASTOrderByElement *>(&ast);
if (order_by_elem)
{
formatAST(*order_by_elem, s);
return;
}
throw DB::Exception("Unknown element in AST", ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
}
@ -179,5 +186,11 @@ void formatAST(const ASTAsterisk & ast, std::ostream & s)
s << "*";
}
void formatAST(const ASTOrderByElement & ast, std::ostream & s)
{
formatAST(*ast.children.front(), s);
s << (ast.direction == -1 ? " DESC" : " ASC");
}
}