dbms: preparation [#METR-2944].

This commit is contained in:
Alexey Milovidov 2015-03-02 04:10:58 +03:00
parent 7276d5f0e9
commit 0828dba841
14 changed files with 173 additions and 129 deletions

View File

@ -1,7 +1,6 @@
#pragma once
#include <DB/Columns/IColumn.h>
#include <DB/Interpreters/Set.h>
namespace DB

View File

@ -185,6 +185,24 @@ public:
}
/// То же самое, но вернуть false, если переполнено.
bool ALWAYS_INLINE tryEmplace(Key x, iterator & it, bool & inserted)
{
Cell * res = findCell(x);
it = iteratorTo(res);
inserted = res == buf + m_size;
if (inserted)
{
if (res == buf + capacity)
return false;
new(res) Cell(x, *this);
++m_size;
}
return true;
}
/// Скопировать ячейку из другой хэш-таблицы. Предполагается, что такого ключа в таблице ещё не было.
void ALWAYS_INLINE insertUnique(const Cell * cell)
{
@ -192,6 +210,12 @@ public:
++m_size;
}
void ALWAYS_INLINE insertUnique(Key x)
{
new(&buf[m_size]) Cell(x, *this);
++m_size;
}
iterator ALWAYS_INLINE find(Key x) { return iteratorTo(findCell(x)); }
const_iterator ALWAYS_INLINE find(Key x) const { return iteratorTo(findCell(x)); }

View File

@ -2,8 +2,6 @@
#include <DB/DataStreams/IProfilingBlockInputStream.h>
#include <DB/Interpreters/ExpressionAnalyzer.h>
#include <DB/Interpreters/Set.h>
#include <DB/Interpreters/Join.h>
namespace DB

View File

@ -5,7 +5,6 @@
#include <DB/Parsers/ASTJoin.h>
#include <DB/Interpreters/AggregationCommon.h>
#include <DB/Interpreters/Set.h>
#include <DB/Common/Arena.h>
#include <DB/Common/HashTable/HashMap.h>
@ -67,7 +66,7 @@ public:
{
}
bool empty() { return type == Set::EMPTY; }
bool empty() { return type == Type::EMPTY; }
/** Добавить в отображение для соединения блок "правой" таблицы.
* Возвращает false, если превышено какое-нибудь ограничение, и больше не нужно вставлять.
@ -155,7 +154,17 @@ private:
/// Дополнительные данные - строки, а также продолжения односвязных списков строк.
Arena pool;
Set::Type type = Set::EMPTY;
enum class Type
{
EMPTY,
KEY_64,
KEY_STRING,
HASHED,
};
Type type = Type::EMPTY;
static Type chooseMethod(const ConstColumnPlainPtrs & key_columns, bool & keys_fit_128_bits, Sizes & key_sizes);
bool keys_fit_128_bits;
Sizes key_sizes;
@ -174,7 +183,7 @@ private:
*/
mutable Poco::RWLock rwlock;
void init(Set::Type type_);
void init(Type type_);
template <ASTJoin::Strictness STRICTNESS, typename Maps>
void insertFromBlockImpl(Maps & maps, size_t rows, const ConstColumnPlainPtrs & key_columns, size_t keys_size, Block * stored_block);

View File

@ -2,32 +2,8 @@
#include <ostream>
#include <DB/Core/NamesAndTypes.h>
#include <DB/Parsers/IAST.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/Parsers/ASTCreateQuery.h>
#include <DB/Parsers/ASTDropQuery.h>
#include <DB/Parsers/ASTInsertQuery.h>
#include <DB/Parsers/ASTRenameQuery.h>
#include <DB/Parsers/ASTShowTablesQuery.h>
#include <DB/Parsers/ASTUseQuery.h>
#include <DB/Parsers/ASTSetQuery.h>
#include <DB/Parsers/ASTOptimizeQuery.h>
#include <DB/Parsers/TablePropertiesQueriesASTs.h>
#include <DB/Parsers/ASTExpressionList.h>
#include <DB/Parsers/ASTFunction.h>
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Parsers/ASTNameTypePair.h>
#include <DB/Parsers/ASTColumnDeclaration.h>
#include <DB/Parsers/ASTAsterisk.h>
#include <DB/Parsers/ASTOrderByElement.h>
#include <DB/Parsers/ASTSubquery.h>
#include <DB/Parsers/ASTAlterQuery.h>
#include <DB/Parsers/ASTShowProcesslistQuery.h>
#include <DB/Parsers/ASTSet.h>
#include <DB/Parsers/ASTJoin.h>
#include <DB/Parsers/ASTCheckQuery.h>
//#include <DB/Parsers/ASTMultiQuery.h>
namespace DB
@ -38,39 +14,6 @@ namespace DB
*/
void formatAST(const IAST & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTSelectQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTCreateQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTDropQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTInsertQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTRenameQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTShowTablesQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTUseQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTSetQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTOptimizeQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTExistsQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTDescribeQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTShowCreateQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTExpressionList & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTFunction & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTIdentifier & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTLiteral & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTNameTypePair & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTColumnDeclaration & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTAsterisk & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTOrderByElement & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTSubquery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTAlterQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTSet & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTJoin & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTCheckQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
//void formatAST(const ASTMultiQuery & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTQueryWithTableAndOutput & ast, std::string name, std::ostream & s,
size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
void formatAST(const ASTShowProcesslistQuery & ast, std::ostream & s,
size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false);
String formatColumnsForCreateQuery(NamesAndTypesList & columns);
String backQuoteIfNeed(const String & x);

View File

@ -1,3 +1,5 @@
#include <DB/Interpreters/Set.h>
#include <DB/Interpreters/Join.h>
#include <DB/DataStreams/CreatingSetsBlockInputStream.h>
#include <iomanip>

View File

@ -5,6 +5,7 @@
#include <DB/Parsers/ASTExpressionList.h>
#include <DB/Parsers/ASTNameTypePair.h>
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Parsers/ParserCreateQuery.h>
#include <DB/IO/copyData.h>

View File

@ -8,6 +8,7 @@
#include <DB/Parsers/ASTSetQuery.h>
#include <DB/Parsers/ASTOptimizeQuery.h>
#include <DB/Parsers/ASTAlterQuery.h>
#include <DB/Parsers/ASTShowProcesslistQuery.h>
#include <DB/Parsers/TablePropertiesQueriesASTs.h>
#include <DB/Parsers/ASTCheckQuery.h>

View File

@ -8,6 +8,7 @@
#include <DB/IO/copyData.h>
#include <DB/Parsers/ASTRenameQuery.h>
#include <DB/Parsers/ASTCreateQuery.h>
#include <DB/Parsers/ParserCreateQuery.h>
#include <DB/Parsers/formatAST.h>

View File

@ -9,15 +9,53 @@ namespace DB
{
Join::Type Join::chooseMethod(const ConstColumnPlainPtrs & key_columns, bool & keys_fit_128_bits, Sizes & key_sizes)
{
size_t keys_size = key_columns.size();
keys_fit_128_bits = true;
size_t keys_bytes = 0;
key_sizes.resize(keys_size);
for (size_t j = 0; j < keys_size; ++j)
{
if (!key_columns[j]->isFixed())
{
keys_fit_128_bits = false;
break;
}
key_sizes[j] = key_columns[j]->sizeOfField();
keys_bytes += key_sizes[j];
}
if (keys_bytes > 16)
keys_fit_128_bits = false;
/// Если есть один числовой ключ, который помещается в 64 бита
if (keys_size == 1 && key_columns[0]->isNumeric())
return Type::KEY_64;
/// Если есть один строковый ключ, то используем хэш-таблицу с ним
if (keys_size == 1
&& (typeid_cast<const ColumnString *>(key_columns[0])
|| typeid_cast<const ColumnConstString *>(key_columns[0])
|| (typeid_cast<const ColumnFixedString *>(key_columns[0]) && !keys_fit_128_bits)))
return Type::KEY_STRING;
/// Если много ключей - будем строить множество хэшей от них
return Type::HASHED;
}
template <typename Maps>
static void initImpl(Maps & maps, Set::Type type)
static void initImpl(Maps & maps, Join::Type type)
{
switch (type)
{
case Set::EMPTY: break;
case Set::KEY_64: maps.key64 .reset(new typename Maps::MapUInt64); break;
case Set::KEY_STRING: maps.key_string .reset(new typename Maps::MapString); break;
case Set::HASHED: maps.hashed .reset(new typename Maps::MapHashed); break;
case Join::Type::EMPTY: break;
case Join::Type::KEY_64: maps.key64 .reset(new typename Maps::MapUInt64); break;
case Join::Type::KEY_STRING: maps.key_string .reset(new typename Maps::MapString); break;
case Join::Type::HASHED: maps.hashed .reset(new typename Maps::MapHashed); break;
default:
throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
@ -51,7 +89,7 @@ static size_t getTotalByteCountImpl(const Maps & maps)
}
void Join::init(Set::Type type_)
void Join::init(Type type_)
{
type = type_;
@ -190,7 +228,7 @@ struct Inserter<ASTJoin::All, Join::MapsAll::MapString>
template <ASTJoin::Strictness STRICTNESS, typename Maps>
void Join::insertFromBlockImpl(Maps & maps, size_t rows, const ConstColumnPlainPtrs & key_columns, size_t keys_size, Block * stored_block)
{
if (type == Set::KEY_64)
if (type == Type::KEY_64)
{
typedef typename Maps::MapUInt64 Map;
Map & res = *maps.key64;
@ -204,7 +242,7 @@ void Join::insertFromBlockImpl(Maps & maps, size_t rows, const ConstColumnPlainP
Inserter<STRICTNESS, Map>::insert(res, key, stored_block, i, pool);
}
}
else if (type == Set::KEY_STRING)
else if (type == Type::KEY_STRING)
{
typedef typename Maps::MapString Map;
Map & res = *maps.key_string;
@ -239,7 +277,7 @@ void Join::insertFromBlockImpl(Maps & maps, size_t rows, const ConstColumnPlainP
else
throw Exception("Illegal type of column when creating join with string key: " + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
else if (type == Set::HASHED)
else if (type == Type::HASHED)
{
typedef typename Maps::MapHashed Map;
Map & res = *maps.hashed;
@ -274,7 +312,7 @@ bool Join::insertFromBlock(const Block & block)
/// Какую структуру данных для множества использовать?
if (empty())
init(Set::chooseMethod(key_columns, keys_fit_128_bits, key_sizes));
init(chooseMethod(key_columns, keys_fit_128_bits, key_sizes));
blocks.push_back(block);
Block * stored_block = &blocks.back();
@ -441,7 +479,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const
if (strictness == ASTJoin::All)
offsets_to_replicate.reset(new IColumn::Offsets_t(rows));
if (type == Set::KEY_64)
if (type == Type::KEY_64)
{
typedef typename Maps::MapUInt64 Map;
const Map & map = *maps.key64;
@ -455,7 +493,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const
Adder<KIND, STRICTNESS, Map>::add(map, key, num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get());
}
}
else if (type == Set::KEY_STRING)
else if (type == Type::KEY_STRING)
{
typedef typename Maps::MapString Map;
const Map & map = *maps.key_string;
@ -490,7 +528,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const
else
throw Exception("Illegal type of column when creating set with string key: " + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
else if (type == Set::HASHED)
else if (type == Type::HASHED)
{
typedef typename Maps::MapHashed Map;
Map & map = *maps.hashed;

View File

@ -3,6 +3,8 @@
#include <DB/Parsers/formatAST.h>
#include <DB/DataStreams/BlockIO.h>
#include <DB/Parsers/ASTInsertQuery.h>
#include <DB/Parsers/ASTShowProcesslistQuery.h>
#include <DB/Interpreters/executeQuery.h>

View File

@ -1,7 +1,5 @@
#include <sstream>
#include <boost/variant/static_visitor.hpp>
#include <mysqlxx/Manip.h>
#include <DB/IO/WriteBufferFromOStream.h>
@ -12,6 +10,32 @@
#include <DB/Core/ErrorCodes.h>
#include <DB/Core/NamesAndTypes.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/Parsers/ASTCreateQuery.h>
#include <DB/Parsers/ASTDropQuery.h>
#include <DB/Parsers/ASTInsertQuery.h>
#include <DB/Parsers/ASTRenameQuery.h>
#include <DB/Parsers/ASTShowTablesQuery.h>
#include <DB/Parsers/ASTUseQuery.h>
#include <DB/Parsers/ASTSetQuery.h>
#include <DB/Parsers/ASTOptimizeQuery.h>
#include <DB/Parsers/TablePropertiesQueriesASTs.h>
#include <DB/Parsers/ASTExpressionList.h>
#include <DB/Parsers/ASTFunction.h>
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Parsers/ASTNameTypePair.h>
#include <DB/Parsers/ASTColumnDeclaration.h>
#include <DB/Parsers/ASTAsterisk.h>
#include <DB/Parsers/ASTOrderByElement.h>
#include <DB/Parsers/ASTSubquery.h>
#include <DB/Parsers/ASTAlterQuery.h>
#include <DB/Parsers/ASTShowProcesslistQuery.h>
#include <DB/Parsers/ASTSet.h>
#include <DB/Parsers/ASTJoin.h>
#include <DB/Parsers/ASTCheckQuery.h>
//#include <DB/Parsers/ASTMultiQuery.h>
#include <DB/Parsers/formatAST.h>
@ -45,52 +69,6 @@ String hightlight(const String & keyword, const String & color_sequence, const b
}
void formatAST(const IAST & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens)
{
#define DISPATCH(NAME) \
else if (const AST ## NAME * concrete = typeid_cast<const AST ## NAME *>(&ast)) \
formatAST(*concrete, s, indent, hilite, one_line, need_parens);
if (false) {}
DISPATCH(SelectQuery)
DISPATCH(InsertQuery)
DISPATCH(CreateQuery)
DISPATCH(DropQuery)
DISPATCH(RenameQuery)
DISPATCH(ShowTablesQuery)
DISPATCH(UseQuery)
DISPATCH(SetQuery)
DISPATCH(OptimizeQuery)
DISPATCH(ExistsQuery)
DISPATCH(ShowCreateQuery)
DISPATCH(DescribeQuery)
DISPATCH(ExpressionList)
DISPATCH(Function)
DISPATCH(Identifier)
DISPATCH(Literal)
DISPATCH(NameTypePair)
DISPATCH(ColumnDeclaration)
DISPATCH(Asterisk)
DISPATCH(OrderByElement)
DISPATCH(Subquery)
DISPATCH(AlterQuery)
DISPATCH(ShowProcesslistQuery)
DISPATCH(Set)
DISPATCH(Join)
DISPATCH(CheckQuery)
// DISPATCH(MultiQuery)
else
throw Exception("Unknown element in AST: " + ast.getID()
+ ((ast.range.first && (ast.range.second > ast.range.first))
? " '" + std::string(ast.range.first, ast.range.second - ast.range.first) + "'"
: ""),
ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
#undef DISPATCH
}
void formatAST(const ASTExpressionList & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens)
{
for (ASTs::const_iterator it = ast.children.begin(); it != ast.children.end(); ++it)
@ -354,17 +332,17 @@ void formatAST(const ASTQueryWithTableAndOutput & ast, std::string name, std::os
void formatAST(const ASTExistsQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens)
{
formatAST(static_cast<const ASTQueryWithTableAndOutput &>(ast), "EXISTS TABLE", s, indent, hilite, one_line);
formatAST(static_cast<const ASTQueryWithTableAndOutput &>(ast), "EXISTS TABLE", s, indent, hilite, one_line, false);
}
void formatAST(const ASTDescribeQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens)
{
formatAST(static_cast<const ASTQueryWithTableAndOutput &>(ast), "DESCRIBE TABLE", s, indent, hilite, one_line);
formatAST(static_cast<const ASTQueryWithTableAndOutput &>(ast), "DESCRIBE TABLE", s, indent, hilite, one_line, false);
}
void formatAST(const ASTShowCreateQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens)
{
formatAST(static_cast<const ASTQueryWithTableAndOutput &>(ast), "SHOW CREATE TABLE", s, indent, hilite, one_line);
formatAST(static_cast<const ASTQueryWithTableAndOutput &>(ast), "SHOW CREATE TABLE", s, indent, hilite, one_line, false);
}
void formatAST(const ASTRenameQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens)
@ -888,6 +866,52 @@ void formatAST(const ASTMultiQuery & ast, std::ostream & s, size_t indent, bool
}*/
void formatAST(const IAST & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens)
{
#define DISPATCH(NAME) \
else if (const AST ## NAME * concrete = typeid_cast<const AST ## NAME *>(&ast)) \
formatAST(*concrete, s, indent, hilite, one_line, need_parens);
if (false) {}
DISPATCH(SelectQuery)
DISPATCH(InsertQuery)
DISPATCH(CreateQuery)
DISPATCH(DropQuery)
DISPATCH(RenameQuery)
DISPATCH(ShowTablesQuery)
DISPATCH(UseQuery)
DISPATCH(SetQuery)
DISPATCH(OptimizeQuery)
DISPATCH(ExistsQuery)
DISPATCH(ShowCreateQuery)
DISPATCH(DescribeQuery)
DISPATCH(ExpressionList)
DISPATCH(Function)
DISPATCH(Identifier)
DISPATCH(Literal)
DISPATCH(NameTypePair)
DISPATCH(ColumnDeclaration)
DISPATCH(Asterisk)
DISPATCH(OrderByElement)
DISPATCH(Subquery)
DISPATCH(AlterQuery)
DISPATCH(ShowProcesslistQuery)
DISPATCH(Set)
DISPATCH(Join)
DISPATCH(CheckQuery)
// DISPATCH(MultiQuery)
else
throw Exception("Unknown element in AST: " + ast.getID()
+ ((ast.range.first && (ast.range.second > ast.range.first))
? " '" + std::string(ast.range.first, ast.range.second - ast.range.first) + "'"
: ""),
ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
#undef DISPATCH
}
String formatColumnsForCreateQuery(NamesAndTypesList & columns)
{
std::string res;

View File

@ -6,6 +6,7 @@
#include <DB/Storages/Distributed/DistributedBlockOutputStream.h>
#include <DB/Storages/Distributed/DirectoryMonitor.h>
#include <DB/Common/escapeForFileName.h>
#include <DB/Parsers/ASTInsertQuery.h>
#include <DB/Interpreters/InterpreterSelectQuery.h>
#include <DB/Interpreters/InterpreterAlterQuery.h>

View File

@ -8,6 +8,7 @@
#include <DB/IO/ReadBufferFromString.h>
#include <DB/Interpreters/InterpreterAlterQuery.h>
#include <DB/Common/VirtualColumnUtils.h>
#include <DB/Parsers/ASTInsertQuery.h>
#include <DB/DataStreams/AddingConstColumnBlockInputStream.h>
#include <time.h>