mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
dbms: JOINs: development [#METR-11370].
This commit is contained in:
parent
1ba07d65ad
commit
bfff8ab715
@ -21,6 +21,8 @@ typedef std::vector<NameWithAlias> NamesWithAliases;
|
|||||||
typedef std::unordered_set<String> NameSet;
|
typedef std::unordered_set<String> NameSet;
|
||||||
typedef std::unordered_map<String, String> NameToNameMap;
|
typedef std::unordered_map<String, String> NameToNameMap;
|
||||||
|
|
||||||
|
class Join;
|
||||||
|
|
||||||
|
|
||||||
/** Действие над блоком.
|
/** Действие над блоком.
|
||||||
*/
|
*/
|
||||||
@ -67,7 +69,8 @@ public:
|
|||||||
NameSet array_joined_columns;
|
NameSet array_joined_columns;
|
||||||
|
|
||||||
/// Для JOIN
|
/// Для JOIN
|
||||||
//JoinPtr join;
|
Join * join = nullptr;
|
||||||
|
NamesAndTypesList columns_added_by_join;
|
||||||
|
|
||||||
/// Для PROJECT.
|
/// Для PROJECT.
|
||||||
NamesWithAliases projection;
|
NamesWithAliases projection;
|
||||||
@ -130,6 +133,15 @@ public:
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ExpressionAction ordinaryJoin(Join * join_, const NamesAndTypesList & columns_added_by_join_)
|
||||||
|
{
|
||||||
|
ExpressionAction a;
|
||||||
|
a.type = JOIN;
|
||||||
|
a.join = join_;
|
||||||
|
a.columns_added_by_join = columns_added_by_join_;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
/// Какие столбцы нужны, чтобы выполнить это действие.
|
/// Какие столбцы нужны, чтобы выполнить это действие.
|
||||||
/// Если этот Action еще не добавлен в ExpressionActions, возвращаемый список может быть неполным, потому что не учтены prerequisites.
|
/// Если этот Action еще не добавлен в ExpressionActions, возвращаемый список может быть неполным, потому что не учтены prerequisites.
|
||||||
Names getNeededColumns() const;
|
Names getNeededColumns() const;
|
||||||
|
@ -71,6 +71,7 @@ public:
|
|||||||
|
|
||||||
/// До агрегации:
|
/// До агрегации:
|
||||||
bool appendArrayJoin(ExpressionActionsChain & chain, bool only_types);
|
bool appendArrayJoin(ExpressionActionsChain & chain, bool only_types);
|
||||||
|
bool appendJoin(ExpressionActionsChain & chain, bool only_types);
|
||||||
bool appendWhere(ExpressionActionsChain & chain, bool only_types);
|
bool appendWhere(ExpressionActionsChain & chain, bool only_types);
|
||||||
bool appendGroupBy(ExpressionActionsChain & chain, bool only_types);
|
bool appendGroupBy(ExpressionActionsChain & chain, bool only_types);
|
||||||
void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
|
void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
|
||||||
@ -136,6 +137,7 @@ private:
|
|||||||
|
|
||||||
std::unordered_map<String, SetPtr> sets_with_subqueries;
|
std::unordered_map<String, SetPtr> sets_with_subqueries;
|
||||||
Joins joins;
|
Joins joins;
|
||||||
|
NamesAndTypesList columns_added_by_join;
|
||||||
|
|
||||||
typedef std::unordered_map<String, ASTPtr> Aliases;
|
typedef std::unordered_map<String, ASTPtr> Aliases;
|
||||||
Aliases aliases;
|
Aliases aliases;
|
||||||
@ -194,6 +196,8 @@ private:
|
|||||||
void getArrayJoinedColumnsImpl(ASTPtr ast);
|
void getArrayJoinedColumnsImpl(ASTPtr ast);
|
||||||
void addMultipleArrayJoinAction(ExpressionActions & actions);
|
void addMultipleArrayJoinAction(ExpressionActions & actions);
|
||||||
|
|
||||||
|
void addJoinAction(ExpressionActions & actions);
|
||||||
|
|
||||||
struct ScopeStack;
|
struct ScopeStack;
|
||||||
void getActionsImpl(ASTPtr ast, bool no_subqueries, bool only_consts, ScopeStack & actions_stack);
|
void getActionsImpl(ASTPtr ast, bool no_subqueries, bool only_consts, ScopeStack & actions_stack);
|
||||||
|
|
||||||
@ -210,6 +214,7 @@ private:
|
|||||||
/// Получить таблицу, из которой идет запрос
|
/// Получить таблицу, из которой идет запрос
|
||||||
StoragePtr getTable();
|
StoragePtr getTable();
|
||||||
|
|
||||||
|
/// columns - столбцы, присутствующие до начала преобразований.
|
||||||
void initChain(ExpressionActionsChain & chain, NamesAndTypesList & columns);
|
void initChain(ExpressionActionsChain & chain, NamesAndTypesList & columns);
|
||||||
|
|
||||||
void assertSelect();
|
void assertSelect();
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include <DB/Common/ProfileEvents.h>
|
#include <DB/Common/ProfileEvents.h>
|
||||||
#include <DB/Interpreters/ExpressionActions.h>
|
#include <DB/Interpreters/ExpressionActions.h>
|
||||||
|
#include <DB/Interpreters/Join.h>
|
||||||
#include <DB/Columns/ColumnsNumber.h>
|
#include <DB/Columns/ColumnsNumber.h>
|
||||||
#include <DB/Columns/ColumnArray.h>
|
#include <DB/Columns/ColumnArray.h>
|
||||||
#include <DB/DataTypes/DataTypeNested.h>
|
#include <DB/DataTypes/DataTypeNested.h>
|
||||||
@ -28,8 +29,8 @@ Names ExpressionAction::getNeededColumns() const
|
|||||||
}
|
}
|
||||||
|
|
||||||
ExpressionAction ExpressionAction::applyFunction(FunctionPtr function_,
|
ExpressionAction ExpressionAction::applyFunction(FunctionPtr function_,
|
||||||
const std::vector<std::string> & argument_names_,
|
const std::vector<std::string> & argument_names_,
|
||||||
std::string result_name_)
|
std::string result_name_)
|
||||||
{
|
{
|
||||||
if (result_name_ == "")
|
if (result_name_ == "")
|
||||||
{
|
{
|
||||||
@ -82,6 +83,8 @@ ExpressionActions::Actions ExpressionAction::getPrerequisites(Block & sample_blo
|
|||||||
|
|
||||||
void ExpressionAction::prepare(Block & sample_block)
|
void ExpressionAction::prepare(Block & sample_block)
|
||||||
{
|
{
|
||||||
|
//std::cerr << "preparing: " << toString() << std::endl;
|
||||||
|
|
||||||
if (type == APPLY_FUNCTION)
|
if (type == APPLY_FUNCTION)
|
||||||
{
|
{
|
||||||
if (sample_block.has(result_name))
|
if (sample_block.has(result_name))
|
||||||
@ -144,6 +147,13 @@ void ExpressionAction::prepare(Block & sample_block)
|
|||||||
current.column = nullptr;
|
current.column = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (type == JOIN)
|
||||||
|
{
|
||||||
|
for (const auto & col : columns_added_by_join)
|
||||||
|
sample_block.insert(ColumnWithNameAndType(col.second->createColumn(), col.second, col.first));
|
||||||
|
|
||||||
|
std::cerr << sample_block.dumpNames() << std::endl;
|
||||||
|
}
|
||||||
else if (type == ADD_COLUMN)
|
else if (type == ADD_COLUMN)
|
||||||
{
|
{
|
||||||
if (sample_block.has(result_name))
|
if (sample_block.has(result_name))
|
||||||
@ -162,6 +172,8 @@ void ExpressionAction::prepare(Block & sample_block)
|
|||||||
|
|
||||||
void ExpressionAction::execute(Block & block) const
|
void ExpressionAction::execute(Block & block) const
|
||||||
{
|
{
|
||||||
|
//std::cerr << "executing: " << toString() << std::endl;
|
||||||
|
|
||||||
if (type == REMOVE_COLUMN || type == COPY_COLUMN)
|
if (type == REMOVE_COLUMN || type == COPY_COLUMN)
|
||||||
if (!block.has(source_name))
|
if (!block.has(source_name))
|
||||||
throw Exception("Not found column '" + source_name + "'. There are columns: " + block.dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
throw Exception("Not found column '" + source_name + "'. There are columns: " + block.dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||||
@ -242,10 +254,20 @@ void ExpressionAction::execute(Block & block) const
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case JOIN:
|
||||||
|
{
|
||||||
|
/// TODO Другие виды JOIN-ов.
|
||||||
|
join->anyLeftJoinBlock(block);
|
||||||
|
std::cerr << block.dumpStructure() << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case PROJECT:
|
case PROJECT:
|
||||||
{
|
{
|
||||||
Block new_block;
|
Block new_block;
|
||||||
|
|
||||||
|
//std::cerr << block.dumpNames() << std::endl;
|
||||||
|
|
||||||
for (size_t i = 0; i < projection.size(); ++i)
|
for (size_t i = 0; i < projection.size(); ++i)
|
||||||
{
|
{
|
||||||
const std::string & name = projection[i].first;
|
const std::string & name = projection[i].first;
|
||||||
@ -284,24 +306,28 @@ std::string ExpressionAction::toString() const
|
|||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case ADD_COLUMN:
|
case ADD_COLUMN:
|
||||||
ss << "+" << result_name << "(" << result_type->getName() << ")" << "[" << added_column->getName() << "]";
|
ss << "ADD " << result_name << " " << result_type->getName() << " " << added_column->getName();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case REMOVE_COLUMN:
|
case REMOVE_COLUMN:
|
||||||
ss << "-" << source_name;
|
ss << "REMOVE " << source_name;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case COPY_COLUMN:
|
case COPY_COLUMN:
|
||||||
ss << result_name << "(" << result_type->getName() << ")" << "=" << source_name;
|
ss << "COPY " << result_name << " " << result_type->getName() << " = " << source_name;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case APPLY_FUNCTION:
|
case APPLY_FUNCTION:
|
||||||
ss << result_name << "(" << result_type->getName() << ")" << "= " << function->getName() << " ( ";
|
ss << "FUNCTION " << result_name << " " << result_type->getName() << " = " << function->getName() << "(";
|
||||||
for (size_t i = 0; i < argument_names.size(); ++i)
|
for (size_t i = 0; i < argument_names.size(); ++i)
|
||||||
{
|
{
|
||||||
if (i)
|
if (i)
|
||||||
ss << " , ";
|
ss << ", ";
|
||||||
ss << argument_names[i];
|
ss << argument_names[i];
|
||||||
}
|
}
|
||||||
ss << " )";
|
ss << ")";
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ARRAY_JOIN:
|
case ARRAY_JOIN:
|
||||||
ss << "ARRAY JOIN ";
|
ss << "ARRAY JOIN ";
|
||||||
for (NameSet::const_iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it)
|
for (NameSet::const_iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it)
|
||||||
@ -311,18 +337,29 @@ std::string ExpressionAction::toString() const
|
|||||||
ss << *it;
|
ss << *it;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case JOIN:
|
||||||
|
ss << "JOIN ";
|
||||||
|
for (NamesAndTypesList::const_iterator it = columns_added_by_join.begin(); it != columns_added_by_join.end(); ++it)
|
||||||
|
{
|
||||||
|
if (it != columns_added_by_join.begin())
|
||||||
|
ss << ", ";
|
||||||
|
ss << it->first;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case PROJECT:
|
case PROJECT:
|
||||||
ss << "{";
|
ss << "PROJECT ";
|
||||||
for (size_t i = 0; i < projection.size(); ++i)
|
for (size_t i = 0; i < projection.size(); ++i)
|
||||||
{
|
{
|
||||||
if (i)
|
if (i)
|
||||||
ss << ", ";
|
ss << ", ";
|
||||||
ss << projection[i].first;
|
ss << projection[i].first;
|
||||||
if (projection[i].second != "" && projection[i].second != projection[i].first)
|
if (projection[i].second != "" && projection[i].second != projection[i].first)
|
||||||
ss << "=>" << projection[i].second;
|
ss << " AS " << projection[i].second;
|
||||||
}
|
}
|
||||||
ss << "}";
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
throw Exception("Unexpected Action type", ErrorCodes::LOGICAL_ERROR);
|
throw Exception("Unexpected Action type", ErrorCodes::LOGICAL_ERROR);
|
||||||
}
|
}
|
||||||
@ -628,6 +665,8 @@ std::string ExpressionActions::getID() const
|
|||||||
}
|
}
|
||||||
ss << "}";
|
ss << "}";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// TODO JOIN
|
||||||
}
|
}
|
||||||
|
|
||||||
ss << ": {";
|
ss << ": {";
|
||||||
|
@ -101,6 +101,12 @@ void ExpressionAnalyzer::init()
|
|||||||
addMultipleArrayJoinAction(temp_actions);
|
addMultipleArrayJoinAction(temp_actions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (select_query && select_query->join)
|
||||||
|
{
|
||||||
|
getRootActionsImpl(dynamic_cast<ASTJoin &>(*select_query->join).using_expr_list, true, false, temp_actions);
|
||||||
|
addJoinAction(temp_actions);
|
||||||
|
}
|
||||||
|
|
||||||
getAggregatesImpl(ast, temp_actions);
|
getAggregatesImpl(ast, temp_actions);
|
||||||
|
|
||||||
if (has_aggregation)
|
if (has_aggregation)
|
||||||
@ -1229,6 +1235,28 @@ bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool on
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ExpressionAnalyzer::addJoinAction(ExpressionActions & actions)
|
||||||
|
{
|
||||||
|
actions.add(ExpressionAction::ordinaryJoin(joins[0], columns_added_by_join));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types)
|
||||||
|
{
|
||||||
|
assertSelect();
|
||||||
|
|
||||||
|
if (!select_query->join)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
initChain(chain, columns);
|
||||||
|
ExpressionActionsChain::Step & step = chain.steps.back();
|
||||||
|
|
||||||
|
getRootActionsImpl(dynamic_cast<ASTJoin &>(*select_query->join).using_expr_list, only_types, false, *step.actions);
|
||||||
|
|
||||||
|
addJoinAction(*step.actions);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types)
|
bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types)
|
||||||
{
|
{
|
||||||
assertSelect();
|
assertSelect();
|
||||||
@ -1576,6 +1604,8 @@ void ExpressionAnalyzer::findJoins(NameSet & required_columns)
|
|||||||
String name = join_keys_expr_list.children[i]->getColumnName();
|
String name = join_keys_expr_list.children[i]->getColumnName();
|
||||||
join_key_names[i] = name;
|
join_key_names[i] = name;
|
||||||
|
|
||||||
|
std::cerr << "USING " << name << std::endl;
|
||||||
|
|
||||||
if (!join_key_names_set.insert(name).second)
|
if (!join_key_names_set.insert(name).second)
|
||||||
throw Exception("Duplicate column in USING list", ErrorCodes::DUPLICATE_COLUMN);
|
throw Exception("Duplicate column in USING list", ErrorCodes::DUPLICATE_COLUMN);
|
||||||
}
|
}
|
||||||
@ -1606,11 +1636,23 @@ void ExpressionAnalyzer::findJoins(NameSet & required_columns)
|
|||||||
|
|
||||||
joins.push_back(join);
|
joins.push_back(join);
|
||||||
|
|
||||||
|
std::cerr << right_table_sample.dumpNames() << std::endl;
|
||||||
|
|
||||||
|
for (const auto & x : required_columns)
|
||||||
|
std::cerr << "Required column: " << x << std::endl;
|
||||||
|
|
||||||
/// Удаляем из required_columns столбцы, которые есть в подзапросе, но нет в USING-е.
|
/// Удаляем из required_columns столбцы, которые есть в подзапросе, но нет в USING-е.
|
||||||
for (NameSet::iterator it = required_columns.begin(); it != required_columns.end();)
|
for (NameSet::iterator it = required_columns.begin(); it != required_columns.end();)
|
||||||
{
|
{
|
||||||
if (right_table_sample.has(*it) && !join_key_names_set.count(*it))
|
if (right_table_sample.has(*it) && !join_key_names_set.count(*it))
|
||||||
|
{
|
||||||
|
ColumnWithNameAndType & added_col = right_table_sample.getByName(*it);
|
||||||
|
columns_added_by_join.emplace_back(added_col.name, added_col.type);
|
||||||
|
|
||||||
|
std::cerr << "Column added by JOIN: " << *it << std::endl;
|
||||||
|
|
||||||
required_columns.erase(it++);
|
required_columns.erase(it++);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
|
@ -243,6 +243,7 @@ BlockInputStreamPtr InterpreterSelectQuery::execute()
|
|||||||
need_aggregate = query_analyzer->hasAggregation();
|
need_aggregate = query_analyzer->hasAggregation();
|
||||||
|
|
||||||
query_analyzer->appendArrayJoin(chain, !first_stage);
|
query_analyzer->appendArrayJoin(chain, !first_stage);
|
||||||
|
query_analyzer->appendJoin(chain, !first_stage);
|
||||||
|
|
||||||
if (query_analyzer->appendWhere(chain, !first_stage))
|
if (query_analyzer->appendWhere(chain, !first_stage))
|
||||||
{
|
{
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
size_t Join::getTotalRowCount() const
|
size_t Join::getTotalRowCount() const
|
||||||
{
|
{
|
||||||
size_t rows = 0;
|
size_t rows = 0;
|
||||||
@ -32,8 +32,8 @@ size_t Join::getTotalByteCount() const
|
|||||||
bytes += pool.size();
|
bytes += pool.size();
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool Join::checkSizeLimits() const
|
bool Join::checkSizeLimits() const
|
||||||
{
|
{
|
||||||
if (max_rows && getTotalRowCount() > max_rows)
|
if (max_rows && getTotalRowCount() > max_rows)
|
||||||
@ -42,7 +42,7 @@ bool Join::checkSizeLimits() const
|
|||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool Join::checkExternalSizeLimits() const
|
bool Join::checkExternalSizeLimits() const
|
||||||
{
|
{
|
||||||
@ -104,8 +104,12 @@ bool Join::insertFromBlock(const Block & block)
|
|||||||
init(Set::chooseMethod(key_columns, keys_fit_128_bits, key_sizes));
|
init(Set::chooseMethod(key_columns, keys_fit_128_bits, key_sizes));
|
||||||
|
|
||||||
blocks.push_back(block);
|
blocks.push_back(block);
|
||||||
const Block * stored_block = &blocks.back();
|
Block * stored_block = &blocks.back();
|
||||||
/// TODO Удалить из stored_block ключевые столбцы, так как они не нужны.
|
|
||||||
|
/// Удаляем из stored_block ключевые столбцы, так как они не нужны.
|
||||||
|
for (const auto & name : key_names)
|
||||||
|
stored_block->erase(stored_block->getPositionByName(name));
|
||||||
|
|
||||||
|
|
||||||
if (type == Set::KEY_64)
|
if (type == Set::KEY_64)
|
||||||
{
|
{
|
||||||
@ -225,6 +229,8 @@ void Join::anyLeftJoinBlock(Block & block)
|
|||||||
if (blocks.empty())
|
if (blocks.empty())
|
||||||
throw Exception("Attempt to JOIN with empty table", ErrorCodes::EMPTY_DATA_PASSED);
|
throw Exception("Attempt to JOIN with empty table", ErrorCodes::EMPTY_DATA_PASSED);
|
||||||
|
|
||||||
|
std::cerr << "!!! " << block.dumpNames() << std::endl;
|
||||||
|
|
||||||
size_t keys_size = key_names.size();
|
size_t keys_size = key_names.size();
|
||||||
ConstColumnPlainPtrs key_columns(keys_size);
|
ConstColumnPlainPtrs key_columns(keys_size);
|
||||||
|
|
||||||
@ -251,7 +257,9 @@ void Join::anyLeftJoinBlock(Block & block)
|
|||||||
added_columns[i]->reserve(src_column.column->size());
|
added_columns[i]->reserve(src_column.column->size());
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t rows = block.rows();
|
std::cerr << "??? " << block.dumpNames() << std::endl;
|
||||||
|
|
||||||
|
size_t rows = block.rowsInFirstColumn();
|
||||||
|
|
||||||
if (type == Set::KEY_64)
|
if (type == Set::KEY_64)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user