Merge pull request #4099 from yandex/mutations-fix-in

Get rid of IAST::range
This commit is contained in:
alexey-milovidov 2019-01-24 20:29:13 +03:00 committed by GitHub
commit 18f1266dab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 273 additions and 253 deletions

View File

@ -427,6 +427,18 @@ Names Block::getNames() const
}
DataTypes Block::getDataTypes() const
{
DataTypes res;
res.reserve(columns());
for (const auto & elem : data)
res.push_back(elem.type);
return res;
}
template <typename ReturnType>
static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, const std::string & context_description)
{

View File

@ -82,6 +82,7 @@ public:
const ColumnsWithTypeAndName & getColumnsWithTypeAndName() const;
NamesAndTypesList getNamesAndTypesList() const;
Names getNames() const;
DataTypes getDataTypes() const;
/// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0.
size_t rows() const;

View File

@ -54,8 +54,9 @@ NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList &
[&](const NamesAndTypesList::value_type & val) { return val.name == name; });
}
void makeExplicitSet(const ASTFunction * node, const Block & sample_block, bool create_ordered_set,
const Context & context, const SizeLimits & size_limits, PreparedSets & prepared_sets)
SetPtr makeExplicitSet(
const ASTFunction * node, const Block & sample_block, bool create_ordered_set,
const Context & context, const SizeLimits & size_limits, PreparedSets & prepared_sets)
{
const IAST & args = *node->arguments;
@ -65,6 +66,21 @@ void makeExplicitSet(const ASTFunction * node, const Block & sample_block, bool
const ASTPtr & left_arg = args.children.at(0);
const ASTPtr & right_arg = args.children.at(1);
const DataTypePtr & left_arg_type = sample_block.getByName(left_arg->getColumnName()).type;
DataTypes set_element_types = {left_arg_type};
auto left_tuple_type = typeid_cast<const DataTypeTuple *>(left_arg_type.get());
if (left_tuple_type && left_tuple_type->getElements().size() != 1)
set_element_types = left_tuple_type->getElements();
for (auto & element_type : set_element_types)
if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(element_type.get()))
element_type = low_cardinality_type->getDictionaryType();
auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types);
if (prepared_sets.count(set_key))
return prepared_sets.at(set_key); /// Already prepared.
auto getTupleTypeFromAst = [&context](const ASTPtr & tuple_ast) -> DataTypePtr
{
auto ast_function = typeid_cast<const ASTFunction *>(tuple_ast.get());
@ -79,7 +95,6 @@ void makeExplicitSet(const ASTFunction * node, const Block & sample_block, bool
return evaluateConstantExpression(tuple_ast, context).second;
};
const DataTypePtr & left_arg_type = sample_block.getByName(left_arg->getColumnName()).type;
const DataTypePtr & right_arg_type = getTupleTypeFromAst(right_arg);
std::function<size_t(const DataTypePtr &)> getTupleDepth;
@ -94,15 +109,6 @@ void makeExplicitSet(const ASTFunction * node, const Block & sample_block, bool
size_t left_tuple_depth = getTupleDepth(left_arg_type);
size_t right_tuple_depth = getTupleDepth(right_arg_type);
DataTypes set_element_types = {left_arg_type};
auto left_tuple_type = typeid_cast<const DataTypeTuple *>(left_arg_type.get());
if (left_tuple_type && left_tuple_type->getElements().size() != 1)
set_element_types = left_tuple_type->getElements();
for (auto & element_type : set_element_types)
if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(element_type.get()))
element_type = low_cardinality_type->getDictionaryType();
ASTPtr elements_ast = nullptr;
/// 1 in 1; (1, 2) in (1, 2); identity(tuple(tuple(tuple(1)))) in tuple(tuple(tuple(1))); etc.
@ -131,7 +137,8 @@ void makeExplicitSet(const ASTFunction * node, const Block & sample_block, bool
SetPtr set = std::make_shared<Set>(size_limits, create_ordered_set);
set->createFromAST(set_element_types, elements_ast, context);
prepared_sets[right_arg->range] = std::move(set);
prepared_sets[set_key] = set;
return set;
}
static String getUniqueName(const Block & block, const String & prefix)
@ -305,6 +312,7 @@ void ActionsVisitor::visit(const ASTPtr & ast)
return;
}
SetPtr prepared_set;
if (functionIsInOrGlobalInOperator(node->name))
{
/// Let's find the type of the first argument (then getActionsImpl will be called again and will not affect anything).
@ -313,7 +321,7 @@ void ActionsVisitor::visit(const ASTPtr & ast)
if (!no_subqueries)
{
/// Transform tuple or subquery into a set.
makeSet(node, actions_stack.getSampleBlock());
prepared_set = makeSet(node, actions_stack.getSampleBlock());
}
else
{
@ -380,23 +388,21 @@ void ActionsVisitor::visit(const ASTPtr & ast)
/// Select the name in the next cycle.
argument_names.emplace_back();
}
else if (prepared_sets.count(child->range) && functionIsInOrGlobalInOperator(node->name) && arg == 1)
else if (functionIsInOrGlobalInOperator(node->name) && arg == 1 && prepared_set)
{
ColumnWithTypeAndName column;
column.type = std::make_shared<DataTypeSet>();
const SetPtr & set = prepared_sets[child->range];
/// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name,
/// so that sets with the same literal representation do not fuse together (they can have different types).
if (!set->empty())
if (!prepared_set->empty())
column.name = getUniqueName(actions_stack.getSampleBlock(), "__set");
else
column.name = child_column_name;
if (!actions_stack.getSampleBlock().has(column.name))
{
column.column = ColumnSet::create(1, set);
column.column = ColumnSet::create(1, prepared_set);
actions_stack.addAction(ExpressionAction::addColumn(column));
}
@ -522,7 +528,7 @@ void ActionsVisitor::visit(const ASTPtr & ast)
}
}
void ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_block)
SetPtr ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_block)
{
/** You need to convert the right argument to a set.
* This can be a table name, a value, a value enumeration, or a subquery.
@ -531,14 +537,14 @@ void ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_bloc
const IAST & args = *node->arguments;
const ASTPtr & arg = args.children.at(1);
/// Already converted.
if (prepared_sets.count(arg->range))
return;
/// If the subquery or table name for SELECT.
const ASTIdentifier * identifier = typeid_cast<const ASTIdentifier *>(arg.get());
if (typeid_cast<const ASTSubquery *>(arg.get()) || identifier)
{
auto set_key = PreparedSetKey::forSubquery(*arg);
if (prepared_sets.count(set_key))
return prepared_sets.at(set_key);
/// A special case is if the name of the table is specified on the right side of the IN statement,
/// and the table has the type Set (a previously prepared set).
if (identifier)
@ -549,11 +555,10 @@ void ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_bloc
if (table)
{
StorageSet * storage_set = dynamic_cast<StorageSet *>(table.get());
if (storage_set)
{
prepared_sets[arg->range] = storage_set->getSet();
return;
prepared_sets[set_key] = storage_set->getSet();
return storage_set->getSet();
}
}
}
@ -566,8 +571,8 @@ void ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_bloc
/// If you already created a Set with the same subquery / table.
if (subquery_for_set.set)
{
prepared_sets[arg->range] = subquery_for_set.set;
return;
prepared_sets[set_key] = subquery_for_set.set;
return subquery_for_set.set;
}
SetPtr set = std::make_shared<Set>(set_size_limit, false);
@ -612,12 +617,13 @@ void ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_bloc
}
subquery_for_set.set = set;
prepared_sets[arg->range] = set;
prepared_sets[set_key] = set;
return set;
}
else
{
/// An explicit enumeration of values in parentheses.
makeExplicitSet(node, sample_block, false, context, set_size_limit, prepared_sets);
return makeExplicitSet(node, sample_block, false, context, set_size_limit, prepared_sets);
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Parsers/StringRange.h>
#include <Parsers/IAST.h>
#include <Interpreters/PreparedSets.h>
#include <Interpreters/ExpressionActions.h>
@ -10,13 +11,6 @@ namespace DB
class Context;
class ASTFunction;
class Set;
using SetPtr = std::shared_ptr<Set>;
/// Will compare sets by their position in query string. It's possible because IAST::clone() doesn't chane IAST::range.
/// It should be taken into account when we want to change AST part which contains sets.
using PreparedSets = std::unordered_map<StringRange, SetPtr, StringRangePointersHash, StringRangePointersEqualTo>;
class Join;
using JoinPtr = std::shared_ptr<Join>;
@ -43,9 +37,10 @@ struct SubqueryForSet
using SubqueriesForSets = std::unordered_map<String, SubqueryForSet>;
/// The case of an explicit enumeration of values.
void makeExplicitSet(const ASTFunction * node, const Block & sample_block, bool create_ordered_set,
const Context & context, const SizeLimits & limits, PreparedSets & prepared_sets);
/// The case of an explicit enumeration of values.
SetPtr makeExplicitSet(
const ASTFunction * node, const Block & sample_block, bool create_ordered_set,
const Context & context, const SizeLimits & limits, PreparedSets & prepared_sets);
/** For ActionsVisitor
@ -111,7 +106,7 @@ private:
std::ostream * ostr;
ScopeStack actions_stack;
void makeSet(const ASTFunction * node, const Block & sample_block);
SetPtr makeSet(const ASTFunction * node, const Block & sample_block);
};
}

View File

@ -258,20 +258,25 @@ void ExpressionAnalyzer::makeSetsForIndex()
if (storage && select_query && storage->supportsIndexForIn())
{
if (select_query->where_expression)
makeSetsForIndexImpl(select_query->where_expression, storage->getSampleBlock());
makeSetsForIndexImpl(select_query->where_expression);
if (select_query->prewhere_expression)
makeSetsForIndexImpl(select_query->prewhere_expression, storage->getSampleBlock());
makeSetsForIndexImpl(select_query->prewhere_expression);
}
}
void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name)
{
BlockIO res = interpretSubquery(subquery_or_table_name, context, subquery_depth + 1, {})->execute();
auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name);
if (prepared_sets.count(set_key))
return; /// Already prepared.
auto interpreter_subquery = interpretSubquery(subquery_or_table_name, context, subquery_depth + 1, {});
BlockIO res = interpreter_subquery->execute();
SetPtr set = std::make_shared<Set>(settings.size_limits_for_set, true);
set->setHeader(res.in->getHeader());
while (Block block = res.in->read())
{
/// If the limits have been exceeded, give up and let the default subquery processing actions take place.
@ -279,24 +284,24 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_
return;
}
prepared_sets[subquery_or_table_name->range] = std::move(set);
prepared_sets[set_key] = std::move(set);
}
void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block & sample_block)
void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node)
{
for (auto & child : node->children)
{
/// Don't descent into subqueries.
/// Don't descend into subqueries.
if (typeid_cast<ASTSubquery *>(child.get()))
continue;
/// Don't dive into lambda functions
/// Don't descend into lambda functions
const ASTFunction * func = typeid_cast<const ASTFunction *>(child.get());
if (func && func->name == "lambda")
continue;
makeSetsForIndexImpl(child, sample_block);
makeSetsForIndexImpl(child);
}
const ASTFunction * func = typeid_cast<const ASTFunction *>(node.get());
@ -307,28 +312,24 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block &
if (storage && storage->mayBenefitFromIndexForIn(args.children.at(0)))
{
const ASTPtr & arg = args.children.at(1);
if (!prepared_sets.count(arg->range)) /// Not already prepared.
if (typeid_cast<ASTSubquery *>(arg.get()) || isIdentifier(arg))
{
if (typeid_cast<ASTSubquery *>(arg.get()) || isIdentifier(arg))
{
if (settings.use_index_for_in_with_subqueries)
tryMakeSetForIndexFromSubquery(arg);
}
else
{
NamesAndTypesList temp_columns = source_columns;
temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end());
for (const auto & joined_column : columns_added_by_join)
temp_columns.push_back(joined_column.name_and_type);
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(temp_columns, context);
getRootActions(func->arguments->children.at(0), true, temp_actions);
if (settings.use_index_for_in_with_subqueries)
tryMakeSetForIndexFromSubquery(arg);
}
else
{
NamesAndTypesList temp_columns = source_columns;
temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end());
for (const auto & joined_column : columns_added_by_join)
temp_columns.push_back(joined_column.name_and_type);
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(temp_columns, context);
getRootActions(func->arguments->children.at(0), true, temp_actions);
Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
if (sample_block_with_calculated_columns.has(args.children.at(0)->getColumnName()))
makeExplicitSet(func, sample_block_with_calculated_columns, true, context,
settings.size_limits_for_set, prepared_sets);
}
Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
if (sample_block_with_calculated_columns.has(args.children.at(0)->getColumnName()))
makeExplicitSet(func, sample_block_with_calculated_columns, true, context,
settings.size_limits_for_set, prepared_sets);
}
}
}

View File

@ -271,7 +271,7 @@ private:
*/
void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name);
void makeSetsForIndexImpl(const ASTPtr & node, const Block & sample_block);
void makeSetsForIndexImpl(const ASTPtr & node);
bool isRemoteStorage() const;
};

View File

@ -339,13 +339,11 @@ ASTPtr InterpreterCreateQuery::formatColumns(const NamesAndTypesList & columns)
const auto column_declaration = std::make_shared<ASTColumnDeclaration>();
column_declaration->name = column.name;
StringPtr type_name = std::make_shared<String>(column.type->getName());
auto pos = type_name->data();
const auto end = pos + type_name->size();
ParserIdentifierWithOptionalParameters storage_p;
String type_name = column.type->getName();
auto pos = type_name.data();
const auto end = pos + type_name.size();
column_declaration->type = parseQuery(storage_p, pos, end, "data type", 0);
column_declaration->type->owned_string = type_name;
columns_list->children.emplace_back(column_declaration);
}
@ -363,13 +361,11 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns)
column_declaration->name = column.name;
StringPtr type_name = std::make_shared<String>(column.type->getName());
auto type_name_pos = type_name->data();
const auto type_name_end = type_name_pos + type_name->size();
ParserIdentifierWithOptionalParameters storage_p;
String type_name = column.type->getName();
auto type_name_pos = type_name.data();
const auto type_name_end = type_name_pos + type_name.size();
column_declaration->type = parseQuery(storage_p, type_name_pos, type_name_end, "data type", 0);
column_declaration->type->owned_string = type_name;
const auto defaults_it = columns.defaults.find(column.name);
if (defaults_it != std::end(columns.defaults))

View File

@ -228,17 +228,10 @@ void LogicalExpressionsOptimizer::addInExpression(const DisjunctiveEqualityChain
/// Construct a list of literals `x1, ..., xN` from the string `expr = x1 OR ... OR expr = xN`
ASTPtr value_list = std::make_shared<ASTExpressionList>();
const char * min_range_first = nullptr;
const char * max_range_second = nullptr;
for (const auto function : equality_functions)
{
const auto & operands = getFunctionOperands(function);
value_list->children.push_back(operands[1]);
/// Get range min/max from all literals x1...xN, which will be used as tuple_functions' range
if (min_range_first == nullptr || min_range_first > operands[1]->range.first)
min_range_first = operands[1]->range.first;
if (max_range_second == nullptr || max_range_second < operands[1]->range.second)
max_range_second = operands[1]->range.second;
}
/// Sort the literals so that they are specified in the same order in the IN expression.
@ -260,7 +253,6 @@ void LogicalExpressionsOptimizer::addInExpression(const DisjunctiveEqualityChain
auto tuple_function = std::make_shared<ASTFunction>();
tuple_function->name = "tuple";
tuple_function->range = StringRange(min_range_first, max_range_second);
tuple_function->arguments = value_list;
tuple_function->children.push_back(tuple_function->arguments);

View File

@ -53,7 +53,7 @@ bool MutationsInterpreter::isStorageTouchedByMutations() const
select->select_expression_list->children.push_back(count_func);
if (commands.size() == 1)
select->where_expression = commands[0].predicate;
select->where_expression = commands[0].predicate->clone();
else
{
auto coalesced_predicates = std::make_shared<ASTFunction>();
@ -62,7 +62,7 @@ bool MutationsInterpreter::isStorageTouchedByMutations() const
coalesced_predicates->children.push_back(coalesced_predicates->arguments);
for (const MutationCommand & command : commands)
coalesced_predicates->arguments->children.push_back(command.predicate);
coalesced_predicates->arguments->children.push_back(command.predicate->clone());
select->where_expression = std::move(coalesced_predicates);
}

View File

@ -0,0 +1,64 @@
#pragma once
#include <Parsers/IAST.h>
#include <DataTypes/IDataType.h>
#include <memory>
#include <unordered_map>
namespace DB
{
struct PreparedSetKey
{
/// Prepared sets for tuple literals are indexed by the hash of the tree contents and by the desired
/// data types of set elements (two different Sets can be required for two tuples with the same contents
/// if left hand sides of the IN operators have different types).
static PreparedSetKey forLiteral(const IAST & ast, DataTypes types_)
{
PreparedSetKey key;
key.ast_hash = ast.getTreeHash();
key.types = std::move(types_);
return key;
}
/// Prepared sets for subqueries are indexed only by the AST contents because the type of the resulting
/// set is fully determined by the subquery.
static PreparedSetKey forSubquery(const IAST & ast)
{
PreparedSetKey key;
key.ast_hash = ast.getTreeHash();
return key;
}
IAST::Hash ast_hash;
DataTypes types; /// Empty for subqueries.
bool operator==(const PreparedSetKey & other) const
{
if (ast_hash != other.ast_hash)
return false;
if (types.size() != other.types.size())
return false;
for (size_t i = 0; i < types.size(); ++i)
{
if (!types[i]->equals(*other.types[i]))
return false;
}
return true;
}
struct Hash
{
UInt64 operator()(const PreparedSetKey & key) const { return key.ast_hash.first; }
};
};
class Set;
using SetPtr = std::shared_ptr<Set>;
using PreparedSets = std::unordered_map<PreparedSetKey, SetPtr, PreparedSetKey::Hash>;
}

View File

@ -3,6 +3,7 @@
#include <Core/Names.h>
#include <Parsers/IAST.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <map>
namespace DB
{

View File

@ -150,7 +150,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
ParserQuery parser(end, settings.enable_debug_queries);
ASTPtr ast;
size_t query_size;
const char * query_end;
/// Don't limit the size of internal queries.
size_t max_query_size = 0;
@ -162,10 +162,11 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
/// TODO Parser should fail early when max_query_size limit is reached.
ast = parseQuery(parser, begin, end, "", max_query_size);
/// Copy query into string. It will be written to log and presented in processlist. If an INSERT query, string will not include data to insertion.
if (!(begin <= ast->range.first && ast->range.second <= end))
throw Exception("Unexpected behavior: AST chars range is not inside source range", ErrorCodes::LOGICAL_ERROR);
query_size = ast->range.second - begin;
const auto * insert_query = dynamic_cast<const ASTInsertQuery *>(ast.get());
if (insert_query && insert_query->data)
query_end = insert_query->data;
else
query_end = end;
}
catch (...)
{
@ -180,7 +181,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
throw;
}
String query(begin, query_size);
/// Copy query into string. It will be written to log and presented in processlist. If an INSERT query, string will not include data to insertion.
String query(begin, query_end);
BlockIO res;
try

View File

@ -24,12 +24,22 @@ std::string getClusterName(const IAST & node)
if (const ASTLiteral * ast_lit = typeid_cast<const ASTLiteral *>(&node))
return ast_lit->value.safeGet<String>();
/// A hack to support hyphens in cluster names.
if (const ASTFunction * ast_func = typeid_cast<const ASTFunction *>(&node))
{
if (!ast_func->range.first || !ast_func->range.second)
if (ast_func->name != "minus" || !ast_func->arguments || ast_func->arguments->children.size() < 2)
throw Exception("Illegal expression instead of cluster name.", ErrorCodes::BAD_ARGUMENTS);
return String(ast_func->range.first, ast_func->range.second);
String name;
for (const auto & arg : ast_func->arguments->children)
{
if (name.empty())
name += getClusterName(*arg);
else
name += "-" + getClusterName(*arg);
}
return name;
}
throw Exception("Illegal expression instead of cluster name.", ErrorCodes::BAD_ARGUMENTS);

View File

@ -22,7 +22,6 @@ public:
, name_parts(name_parts_)
, special(false)
{
range = StringRange(name.data(), name.data() + name.size());
}
/** Get the text that identifies this element. */

View File

@ -7,6 +7,13 @@
namespace DB
{
void ASTLiteral::updateTreeHashImpl(SipHash & hash_state) const
{
const char * prefix = "Literal_";
hash_state.update(prefix, strlen(prefix));
applyVisitor(FieldVisitorHash(hash_state), value);
}
void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const
{
/// Special case for very large arrays. Instead of listing all elements, will use hash of them.

View File

@ -22,6 +22,8 @@ public:
ASTPtr clone() const override { return std::make_shared<ASTLiteral>(*this); }
void updateTreeHashImpl(SipHash & hash_state) const override;
protected:
void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override
{

View File

@ -672,8 +672,6 @@ bool ParserRightExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
bool ParserExtractExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
auto begin = pos;
if (!ParserKeyword("EXTRACT").ignore(pos, expected))
return false;
@ -734,14 +732,10 @@ bool ParserExtractExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
auto function = std::make_shared<ASTFunction>();
auto exp_list = std::make_shared<ASTExpressionList>();
function->range.first = begin->begin;
function->range.second = pos->begin;
function->name = function_name; //"toYear";
function->arguments = exp_list;
function->children.push_back(exp_list);
exp_list->children.push_back(expr);
exp_list->range.first = begin->begin;
exp_list->range.second = pos->begin;
node = function;
return true;

View File

@ -138,7 +138,6 @@ static bool parseOperator(IParser::Pos & pos, const char * op, Expected & expect
bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
bool first = true;
Pos begin = pos;
while (1)
{
@ -174,16 +173,12 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node
return false;
/// the first argument of the function is the previous element, the second is the next one
function->range.first = begin->begin;
function->range.second = pos->begin;
function->name = it[1];
function->arguments = exp_list;
function->children.push_back(exp_list);
exp_list->children.push_back(node);
exp_list->children.push_back(elem);
exp_list->range.first = begin->begin;
exp_list->range.second = pos->begin;
/** special exception for the access operator to the element of the array `x[y]`, which
* contains the infix part '[' and the suffix ''] '(specified as' [')
@ -243,8 +238,6 @@ bool ParserBetweenExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
ASTPtr left;
ASTPtr right;
Pos begin = pos;
if (!elem_parser.parse(pos, subject, expected))
return false;
@ -279,14 +272,10 @@ bool ParserBetweenExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
args_le->children.emplace_back(subject);
args_le->children.emplace_back(right);
f_ge->range.first = begin->begin;
f_ge->range.second = pos->begin;
f_ge->name = "greaterOrEquals";
f_ge->arguments = args_ge;
f_ge->children.emplace_back(f_ge->arguments);
f_le->range.first = begin->begin;
f_le->range.second = pos->begin;
f_le->name = "lessOrEquals";
f_le->arguments = args_le;
f_le->children.emplace_back(f_le->arguments);
@ -294,8 +283,6 @@ bool ParserBetweenExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
args_and->children.emplace_back(f_ge);
args_and->children.emplace_back(f_le);
f_and->range.first = begin->begin;
f_and->range.second = pos->begin;
f_and->name = "and";
f_and->arguments = args_and;
f_and->children.emplace_back(f_and->arguments);
@ -315,8 +302,6 @@ bool ParserTernaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expect
ASTPtr elem_then;
ASTPtr elem_else;
Pos begin = pos;
if (!elem_parser.parse(pos, elem_cond, expected))
return false;
@ -339,8 +324,6 @@ bool ParserTernaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expect
/// function arguments
auto exp_list = std::make_shared<ASTExpressionList>();
function->range.first = begin->begin;
function->range.second = pos->begin;
function->name = "if";
function->arguments = exp_list;
function->children.push_back(exp_list);
@ -348,8 +331,6 @@ bool ParserTernaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expect
exp_list->children.push_back(elem_cond);
exp_list->children.push_back(elem_then);
exp_list->children.push_back(elem_else);
exp_list->range.first = begin->begin;
exp_list->range.second = pos->begin;
node = function;
}
@ -423,7 +404,6 @@ bool ParserLambdaExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
bool ParserPrefixUnaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
/// try to find any of the valid operators
Pos begin = pos;
const char ** it;
for (it = operators; *it; it += 2)
{
@ -471,15 +451,11 @@ bool ParserPrefixUnaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Ex
/// function arguments
auto exp_list = std::make_shared<ASTExpressionList>();
function->range.first = begin->begin;
function->range.second = pos->begin;
function->name = it[1];
function->arguments = exp_list;
function->children.push_back(exp_list);
exp_list->children.push_back(elem);
exp_list->range.first = begin->begin;
exp_list->range.second = pos->begin;
node = function;
}
@ -595,8 +571,6 @@ bool ParserNullityChecking::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
Pos begin = pos;
/// If no INTERVAL keyword, go to nested parser.
if (!ParserKeyword("INTERVAL").ignore(pos, expected))
return next_parser.parse(pos, node, expected);
@ -620,15 +594,11 @@ bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expec
auto exp_list = std::make_shared<ASTExpressionList>();
/// the first argument of the function is the previous element, the second is the next one
function->range.first = begin->begin;
function->range.second = pos->begin;
function->name = function_name;
function->arguments = exp_list;
function->children.push_back(exp_list);
exp_list->children.push_back(expr);
exp_list->range.first = begin->begin;
exp_list->range.second = pos->begin;
node = function;
return true;

View File

@ -53,20 +53,26 @@ size_t IAST::checkSize(size_t max_size) const
IAST::Hash IAST::getTreeHash() const
{
SipHash hash_state;
getTreeHashImpl(hash_state);
updateTreeHash(hash_state);
IAST::Hash res;
hash_state.get128(res.first, res.second);
return res;
}
void IAST::getTreeHashImpl(SipHash & hash_state) const
void IAST::updateTreeHash(SipHash & hash_state) const
{
updateTreeHashImpl(hash_state);
hash_state.update(children.size());
for (const auto & child : children)
child->updateTreeHash(hash_state);
}
void IAST::updateTreeHashImpl(SipHash & hash_state) const
{
auto id = getID();
hash_state.update(id.data(), id.size());
hash_state.update(children.size());
for (const auto & child : children)
child->getTreeHashImpl(hash_state);
}

View File

@ -7,7 +7,6 @@
#include <Core/Types.h>
#include <Common/Exception.h>
#include <Parsers/StringRange.h>
#include <Parsers/IdentifierQuotingStyle.h>
@ -40,10 +39,6 @@ class IAST : public std::enable_shared_from_this<IAST>
{
public:
ASTs children;
StringRange range;
/// This pointer does not allow it to be deleted while the range refers to it.
StringPtr owned_string;
virtual ~IAST() = default;
IAST() = default;
@ -81,7 +76,8 @@ public:
*/
using Hash = std::pair<UInt64, UInt64>;
Hash getTreeHash() const;
void getTreeHashImpl(SipHash & hash_state) const;
void updateTreeHash(SipHash & hash_state) const;
virtual void updateTreeHashImpl(SipHash & hash_state) const;
void dumpTree(std::ostream & ostr, size_t indent = 0) const
{
@ -196,11 +192,7 @@ public:
virtual void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const
{
throw Exception("Unknown element in AST: " + getID()
+ ((range.first && (range.second > range.first))
? " '" + std::string(range.first, range.second - range.first) + "'"
: ""),
ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
throw Exception("Unknown element in AST: " + getID(), ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
}
void cloneChildren();

View File

@ -22,8 +22,6 @@ bool IParserBase::parse(Pos & pos, ASTPtr & node, Expected & expected)
node = nullptr;
pos = begin;
}
else if (node)
node->range = StringRange(begin, pos);
return res;
}

View File

@ -4,6 +4,7 @@
#include <Interpreters/SyntaxAnalyzer.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/QueryNormalizer.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <Common/FieldVisitors.h>
@ -12,6 +13,8 @@
#include <Interpreters/Set.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTIdentifier.h>
namespace DB
@ -107,7 +110,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map
{
{
"notEquals",
[] (RPNElement & out, const Field & value, const ASTPtr &)
[] (RPNElement & out, const Field & value)
{
out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
out.range = Range(value);
@ -116,7 +119,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map
},
{
"equals",
[] (RPNElement & out, const Field & value, const ASTPtr &)
[] (RPNElement & out, const Field & value)
{
out.function = RPNElement::FUNCTION_IN_RANGE;
out.range = Range(value);
@ -125,7 +128,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map
},
{
"less",
[] (RPNElement & out, const Field & value, const ASTPtr &)
[] (RPNElement & out, const Field & value)
{
out.function = RPNElement::FUNCTION_IN_RANGE;
out.range = Range::createRightBounded(value, false);
@ -134,7 +137,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map
},
{
"greater",
[] (RPNElement & out, const Field & value, const ASTPtr &)
[] (RPNElement & out, const Field & value)
{
out.function = RPNElement::FUNCTION_IN_RANGE;
out.range = Range::createLeftBounded(value, false);
@ -143,7 +146,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map
},
{
"lessOrEquals",
[] (RPNElement & out, const Field & value, const ASTPtr &)
[] (RPNElement & out, const Field & value)
{
out.function = RPNElement::FUNCTION_IN_RANGE;
out.range = Range::createRightBounded(value, true);
@ -152,7 +155,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map
},
{
"greaterOrEquals",
[] (RPNElement & out, const Field & value, const ASTPtr &)
[] (RPNElement & out, const Field & value)
{
out.function = RPNElement::FUNCTION_IN_RANGE;
out.range = Range::createLeftBounded(value, true);
@ -161,25 +164,23 @@ const KeyCondition::AtomMap KeyCondition::atom_map
},
{
"in",
[] (RPNElement & out, const Field &, const ASTPtr & node)
[] (RPNElement & out, const Field &)
{
out.function = RPNElement::FUNCTION_IN_SET;
out.in_function = node;
return true;
}
},
{
"notIn",
[] (RPNElement & out, const Field &, const ASTPtr & node)
[] (RPNElement & out, const Field &)
{
out.function = RPNElement::FUNCTION_NOT_IN_SET;
out.in_function = node;
return true;
}
},
{
"like",
[] (RPNElement & out, const Field & value, const ASTPtr &)
[] (RPNElement & out, const Field & value)
{
if (value.getType() != Field::Types::String)
return false;
@ -458,71 +459,64 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
return found_transformation;
}
void KeyCondition::getKeyTuplePositionMapping(
const ASTPtr & node,
const Context & context,
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
const size_t tuple_index,
size_t & out_key_column_num)
{
MergeTreeSetIndex::KeyTuplePositionMapping index_mapping;
index_mapping.tuple_index = tuple_index;
DataTypePtr data_type;
if (isKeyPossiblyWrappedByMonotonicFunctions(
node, context, index_mapping.key_index,
data_type, index_mapping.functions))
{
indexes_mapping.push_back(index_mapping);
if (out_key_column_num < index_mapping.key_index)
{
out_key_column_num = index_mapping.key_index;
}
}
}
bool KeyCondition::tryPrepareSetIndex(
const ASTPtr & node,
const ASTs & args,
const Context & context,
RPNElement & out,
const SetPtr & prepared_set,
size_t & out_key_column_num)
{
/// The index can be prepared if the elements of the set were saved in advance.
if (!prepared_set->hasExplicitSetElements())
return false;
const ASTPtr & left_arg = args[0];
out_key_column_num = 0;
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> indexes_mapping;
DataTypes data_types;
size_t num_key_columns = prepared_set->getDataTypes().size();
const ASTFunction * node_tuple = typeid_cast<const ASTFunction *>(node.get());
if (node_tuple && node_tuple->name == "tuple")
auto get_key_tuple_position_mapping = [&](const ASTPtr & node, size_t tuple_index)
{
if (num_key_columns != node_tuple->arguments->children.size())
MergeTreeSetIndex::KeyTuplePositionMapping index_mapping;
index_mapping.tuple_index = tuple_index;
DataTypePtr data_type;
if (isKeyPossiblyWrappedByMonotonicFunctions(
node, context, index_mapping.key_index, data_type, index_mapping.functions))
{
std::stringstream message;
message << "Number of columns in section IN doesn't match. "
<< node_tuple->arguments->children.size() << " at left, " << num_key_columns << " at right.";
throw Exception(message.str(), ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
indexes_mapping.push_back(index_mapping);
data_types.push_back(data_type);
if (out_key_column_num < index_mapping.key_index)
out_key_column_num = index_mapping.key_index;
}
};
size_t current_tuple_index = 0;
for (const auto & arg : node_tuple->arguments->children)
{
getKeyTuplePositionMapping(arg, context, indexes_mapping, current_tuple_index, out_key_column_num);
++current_tuple_index;
}
const ASTFunction * left_arg_tuple = typeid_cast<const ASTFunction *>(left_arg.get());
if (left_arg_tuple && left_arg_tuple->name == "tuple")
{
const auto & tuple_elements = left_arg_tuple->arguments->children;
for (size_t i = 0; i < tuple_elements.size(); ++i)
get_key_tuple_position_mapping(tuple_elements[i], i);
}
else
{
getKeyTuplePositionMapping(node, context, indexes_mapping, 0, out_key_column_num);
}
get_key_tuple_position_mapping(left_arg, 0);
if (indexes_mapping.empty())
return false;
const ASTPtr & right_arg = args[1];
PreparedSetKey set_key;
if (typeid_cast<const ASTSubquery *>(right_arg.get()) || typeid_cast<const ASTIdentifier *>(right_arg.get()))
set_key = PreparedSetKey::forSubquery(*right_arg);
else
set_key = PreparedSetKey::forLiteral(*right_arg, data_types);
auto set_it = prepared_sets.find(set_key);
if (set_it == prepared_sets.end())
return false;
const SetPtr & prepared_set = set_it->second;
/// The index can be prepared if the elements of the set were saved in advance.
if (!prepared_set->hasExplicitSetElements())
return false;
out.set_index = std::make_shared<MergeTreeSetIndex>(prepared_set->getSetElements(), std::move(indexes_mapping));
return true;
@ -635,13 +629,13 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
DataTypePtr key_expr_type; /// Type of expression containing key column
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
size_t key_column_num; /// Number of a key column (inside key_column_names array)
size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
MonotonicFunctionsChain chain;
bool is_set_const = false;
bool is_constant_transformed = false;
if (prepared_sets.count(args[1]->range)
&& tryPrepareSetIndex(args[0], context, out, prepared_sets[args[1]->range], key_column_num))
if (functionIsInOrGlobalInOperator(func->name)
&& tryPrepareSetIndex(args, context, out, key_column_num))
{
key_arg_pos = 0;
is_set_const = true;
@ -671,6 +665,9 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
else
return false;
if (key_column_num == static_cast<size_t>(-1))
throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
std::string func_name = func->name;
/// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5"
@ -714,7 +711,7 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
if (!cast_not_needed)
castValueToType(key_expr_type, const_value, const_type, node);
return atom_it->second(out, const_value, node);
return atom_it->second(out, const_value);
}
else if (getConstant(node, block_with_constants, const_value, const_type)) /// For cases where it says, for example, `WHERE 0 AND something`
{
@ -1013,17 +1010,12 @@ bool KeyCondition::mayBeTrueInParallelogram(const std::vector<Range> & parallelo
element.function == RPNElement::FUNCTION_IN_SET
|| element.function == RPNElement::FUNCTION_NOT_IN_SET)
{
auto in_func = typeid_cast<const ASTFunction *>(element.in_function.get());
const ASTs & args = typeid_cast<const ASTExpressionList &>(*in_func->arguments).children;
PreparedSets::const_iterator it = prepared_sets.find(args[1]->range);
if (in_func && it != prepared_sets.end())
{
rpn_stack.emplace_back(element.set_index->mayBeTrueInRange(parallelogram, data_types));
if (element.function == RPNElement::FUNCTION_NOT_IN_SET)
rpn_stack.back() = !rpn_stack.back();
}
else
if (!element.set_index)
throw Exception("Set for IN is not created yet", ErrorCodes::LOGICAL_ERROR);
rpn_stack.emplace_back(element.set_index->mayBeTrueInRange(parallelogram, data_types));
if (element.function == RPNElement::FUNCTION_NOT_IN_SET)
rpn_stack.back() = !rpn_stack.back();
}
else if (element.function == RPNElement::FUNCTION_NOT)
{

View File

@ -310,7 +310,6 @@ private:
Range range;
size_t key_column = 0;
/// For FUNCTION_IN_SET, FUNCTION_NOT_IN_SET
ASTPtr in_function;
using MergeTreeSetIndexPtr = std::shared_ptr<MergeTreeSetIndex>;
MergeTreeSetIndexPtr set_index;
@ -320,7 +319,7 @@ private:
using RPN = std::vector<RPNElement>;
using ColumnIndices = std::map<String, size_t>;
using AtomMap = std::unordered_map<std::string, bool(*)(RPNElement & out, const Field & value, const ASTPtr & node)>;
using AtomMap = std::unordered_map<std::string, bool(*)(RPNElement & out, const Field & value)>;
public:
static const AtomMap atom_map;
@ -363,21 +362,13 @@ private:
Field & out_value,
DataTypePtr & out_type);
void getKeyTuplePositionMapping(
const ASTPtr & node,
const Context & context,
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
const size_t tuple_index,
size_t & out_key_column_num);
/// If it's possible to make an RPNElement
/// that will filter values (possibly tuples) by the content of 'prepared_set',
/// do it and return true.
bool tryPrepareSetIndex(
const ASTPtr & node,
const ASTs & args,
const Context & context,
RPNElement & out,
const SetPtr & prepared_set,
size_t & out_key_column_num);
RPN rpn;

View File

@ -8,6 +8,7 @@
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/formatAST.h>
#include <Interpreters/QueryNormalizer.h>
#include <Common/escapeForFileName.h>
#include <Common/typeid_cast.h>
#include <DataTypes/NestedUtils.h>
@ -44,7 +45,6 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
table_columns{ext::map<std::unordered_set>(data.getColumns().getAllPhysical(),
[] (const NameAndTypePair & col) { return col.name; })},
block_with_constants{KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)},
prepared_sets(query_info.sets),
log{log}
{
calculateColumnSizes(data, column_names);
@ -333,8 +333,7 @@ bool MergeTreeWhereOptimizer::isPrimaryKeyAtom(const IAST * const ast) const
if ((primary_key_columns.count(first_arg_name) && isConstant(args[1])) ||
(primary_key_columns.count(second_arg_name) && isConstant(args[0])) ||
(primary_key_columns.count(first_arg_name)
&& (prepared_sets.count(args[1]->range) || typeid_cast<const ASTSubquery *>(args[1].get()))))
(primary_key_columns.count(first_arg_name) && functionIsInOrGlobalInOperator(func->name)))
return true;
}

View File

@ -77,7 +77,6 @@ private:
const string_set_t primary_key_columns;
const string_set_t table_columns;
const Block block_with_constants;
const PreparedSets & prepared_sets;
Poco::Logger * log;
std::unordered_map<std::string, size_t> column_sizes{};
size_t total_column_size{};

View File

@ -4,6 +4,7 @@
#include <Core/Types.h>
#include <IO/WriteHelpers.h>
#include <Storages/MutationCommands.h>
#include <map>
namespace DB

View File

@ -1,24 +1,14 @@
#pragma once
#include <Interpreters/PreparedSets.h>
#include <memory>
#include <unordered_map>
#include <Parsers/StringRange.h>
namespace DB
{
class IAST;
using ASTPtr = std::shared_ptr<IAST>;
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
class Set;
using SetPtr = std::shared_ptr<Set>;
/// Information about calculated sets in right hand side of IN.
using PreparedSets = std::unordered_map<StringRange, SetPtr, StringRangePointersHash, StringRangePointersEqualTo>;
struct PrewhereInfo
{
/// Actions which are executed in order to alias columns are used for prewhere actions.