2020-08-13 20:17:18 +00:00
|
|
|
#include <Common/quoteString.h>
|
2019-08-15 13:54:59 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
2022-05-12 16:39:50 +00:00
|
|
|
#include <Columns/ColumnArray.h>
|
|
|
|
#include <Columns/ColumnFixedString.h>
|
2022-05-13 14:55:50 +00:00
|
|
|
#include <Core/ColumnNumbers.h>
|
2022-05-12 16:39:50 +00:00
|
|
|
#include <Core/ColumnWithTypeAndName.h>
|
2019-08-15 13:54:59 +00:00
|
|
|
|
2018-10-12 15:41:28 +00:00
|
|
|
#include <Functions/FunctionFactory.h>
|
|
|
|
#include <Functions/FunctionsMiscellaneous.h>
|
|
|
|
|
|
|
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
|
|
|
|
|
|
|
#include <DataTypes/DataTypeSet.h>
|
|
|
|
#include <DataTypes/DataTypeFunction.h>
|
2022-05-13 14:55:50 +00:00
|
|
|
#include <DataTypes/DataTypeFixedString.h>
|
2019-12-18 20:36:51 +00:00
|
|
|
#include <DataTypes/DataTypeString.h>
|
2018-10-12 15:41:28 +00:00
|
|
|
#include <DataTypes/DataTypeTuple.h>
|
2020-01-24 00:33:38 +00:00
|
|
|
#include <DataTypes/DataTypeArray.h>
|
2018-10-12 15:41:28 +00:00
|
|
|
#include <DataTypes/DataTypeLowCardinality.h>
|
2022-05-12 16:39:50 +00:00
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
2018-10-12 15:41:28 +00:00
|
|
|
#include <DataTypes/FieldToDataType.h>
|
|
|
|
|
|
|
|
#include <Columns/ColumnSet.h>
|
|
|
|
#include <Columns/ColumnConst.h>
|
|
|
|
|
|
|
|
#include <Storages/StorageSet.h>
|
|
|
|
|
|
|
|
#include <Parsers/ASTFunction.h>
|
|
|
|
#include <Parsers/ASTIdentifier.h>
|
|
|
|
#include <Parsers/ASTLiteral.h>
|
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
|
|
|
#include <Parsers/ASTSubquery.h>
|
|
|
|
#include <Parsers/ASTTablesInSelectQuery.h>
|
|
|
|
|
2021-09-08 18:29:38 +00:00
|
|
|
#include <Processors/QueryPlan/QueryPlan.h>
|
|
|
|
|
2020-05-20 20:16:32 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2018-10-12 15:41:28 +00:00
|
|
|
#include <Interpreters/ExpressionActions.h>
|
2019-10-23 13:59:03 +00:00
|
|
|
#include <Interpreters/misc.h>
|
2018-10-12 15:41:28 +00:00
|
|
|
#include <Interpreters/ActionsVisitor.h>
|
|
|
|
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
|
|
|
#include <Interpreters/Set.h>
|
|
|
|
#include <Interpreters/evaluateConstantExpression.h>
|
|
|
|
#include <Interpreters/convertFieldToType.h>
|
|
|
|
#include <Interpreters/interpretSubquery.h>
|
2019-02-11 14:19:09 +00:00
|
|
|
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
2019-12-18 20:36:51 +00:00
|
|
|
#include <Interpreters/IdentifierSemantic.h>
|
2021-09-09 13:47:48 +00:00
|
|
|
#include <Interpreters/UserDefinedExecutableFunctionFactory.h>
|
|
|
|
|
2018-10-12 15:41:28 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2020-02-25 18:02:41 +00:00
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
2018-10-12 15:41:28 +00:00
|
|
|
extern const int UNKNOWN_IDENTIFIER;
|
|
|
|
extern const int NOT_AN_AGGREGATE;
|
|
|
|
extern const int UNEXPECTED_EXPRESSION;
|
|
|
|
extern const int TYPE_MISMATCH;
|
|
|
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
2020-01-24 00:33:38 +00:00
|
|
|
extern const int INCORRECT_ELEMENT_OF_SET;
|
2020-09-13 13:51:31 +00:00
|
|
|
extern const int BAD_ARGUMENTS;
|
2021-03-03 20:01:07 +00:00
|
|
|
extern const int DUPLICATE_COLUMN;
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
|
2019-12-15 06:34:43 +00:00
|
|
|
static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols)
|
2018-11-02 18:53:23 +00:00
|
|
|
{
|
|
|
|
return std::find_if(cols.begin(), cols.end(),
|
|
|
|
[&](const NamesAndTypesList::value_type & val) { return val.name == name; });
|
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-07-31 11:39:09 +00:00
|
|
|
/// Recursion is limited in query parser and we did not check for too large depth here.
|
2020-07-22 23:33:14 +00:00
|
|
|
static size_t getTypeDepth(const DataTypePtr & type)
|
|
|
|
{
|
|
|
|
if (const auto * array_type = typeid_cast<const DataTypeArray *>(type.get()))
|
|
|
|
return 1 + getTypeDepth(array_type->getNestedType());
|
|
|
|
else if (const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get()))
|
|
|
|
return 1 + (tuple_type->getElements().empty() ? 0 : getTypeDepth(tuple_type->getElements().at(0)));
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-01-24 21:36:28 +00:00
|
|
|
template<typename Collection>
|
2020-10-07 15:36:34 +00:00
|
|
|
static Block createBlockFromCollection(const Collection & collection, const DataTypes & types, bool transform_null_in)
|
2020-01-24 00:33:38 +00:00
|
|
|
{
|
|
|
|
size_t columns_num = types.size();
|
|
|
|
MutableColumns columns(columns_num);
|
|
|
|
for (size_t i = 0; i < columns_num; ++i)
|
2022-03-16 11:21:18 +00:00
|
|
|
{
|
2020-01-24 00:33:38 +00:00
|
|
|
columns[i] = types[i]->createColumn();
|
2022-03-16 11:21:18 +00:00
|
|
|
columns[i]->reserve(collection.size());
|
|
|
|
}
|
2020-01-24 00:33:38 +00:00
|
|
|
|
|
|
|
Row tuple_values;
|
2020-01-24 21:36:28 +00:00
|
|
|
for (const auto & value : collection)
|
2020-01-24 00:33:38 +00:00
|
|
|
{
|
|
|
|
if (columns_num == 1)
|
|
|
|
{
|
|
|
|
auto field = convertFieldToType(value, *types[0]);
|
2020-10-07 15:36:34 +00:00
|
|
|
bool need_insert_null = transform_null_in && types[0]->isNullable();
|
|
|
|
if (!field.isNull() || need_insert_null)
|
2020-01-24 00:33:38 +00:00
|
|
|
columns[0]->insert(std::move(field));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (value.getType() != Field::Types::Tuple)
|
|
|
|
throw Exception("Invalid type in set. Expected tuple, got "
|
|
|
|
+ String(value.getTypeName()), ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
|
|
|
|
|
|
|
const auto & tuple = DB::get<const Tuple &>(value);
|
|
|
|
size_t tuple_size = tuple.size();
|
|
|
|
|
|
|
|
if (tuple_size != columns_num)
|
|
|
|
throw Exception("Incorrect size of tuple in set: " + toString(tuple_size)
|
|
|
|
+ " instead of " + toString(columns_num), ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
|
|
|
|
|
|
|
if (tuple_values.empty())
|
|
|
|
tuple_values.resize(tuple_size);
|
|
|
|
|
|
|
|
size_t i = 0;
|
|
|
|
for (; i < tuple_size; ++i)
|
|
|
|
{
|
|
|
|
tuple_values[i] = convertFieldToType(tuple[i], *types[i]);
|
2020-10-07 15:36:34 +00:00
|
|
|
bool need_insert_null = transform_null_in && types[i]->isNullable();
|
|
|
|
if (tuple_values[i].isNull() && !need_insert_null)
|
2020-01-24 00:33:38 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i == tuple_size)
|
|
|
|
for (i = 0; i < tuple_size; ++i)
|
2022-03-02 17:22:12 +00:00
|
|
|
columns[i]->insert(tuple_values[i]);
|
2020-01-24 00:33:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Block res;
|
|
|
|
for (size_t i = 0; i < columns_num; ++i)
|
|
|
|
res.insert(ColumnWithTypeAndName{std::move(columns[i]), types[i], "_" + toString(i)});
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
static Field extractValueFromNode(const ASTPtr & node, const IDataType & type, ContextPtr context)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2020-07-22 23:33:14 +00:00
|
|
|
if (const auto * lit = node->as<ASTLiteral>())
|
|
|
|
{
|
|
|
|
return convertFieldToType(lit->value, type);
|
|
|
|
}
|
|
|
|
else if (node->as<ASTFunction>())
|
|
|
|
{
|
|
|
|
std::pair<Field, DataTypePtr> value_raw = evaluateConstantExpression(node, context);
|
|
|
|
return convertFieldToType(value_raw.first, type, value_raw.second.get());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
throw Exception("Incorrect element of set. Must be literal or constant expression.", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, ContextPtr context)
|
2020-07-22 23:33:14 +00:00
|
|
|
{
|
2020-08-01 23:06:08 +00:00
|
|
|
/// Will form a block with values from the set.
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-07-22 23:33:14 +00:00
|
|
|
Block header;
|
|
|
|
size_t num_columns = types.size();
|
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
|
|
|
header.insert(ColumnWithTypeAndName(types[i]->createColumn(), types[i], "_" + toString(i)));
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-07-22 23:33:14 +00:00
|
|
|
MutableColumns columns = header.cloneEmptyColumns();
|
2019-01-22 12:33:56 +00:00
|
|
|
|
2020-07-22 23:33:14 +00:00
|
|
|
DataTypePtr tuple_type;
|
|
|
|
Row tuple_values;
|
|
|
|
const auto & list = node->as<ASTExpressionList &>();
|
2021-04-10 23:33:54 +00:00
|
|
|
bool transform_null_in = context->getSettingsRef().transform_null_in;
|
2020-07-22 23:33:14 +00:00
|
|
|
for (const auto & elem : list.children)
|
|
|
|
{
|
|
|
|
if (num_columns == 1)
|
|
|
|
{
|
2020-08-01 23:24:54 +00:00
|
|
|
/// One column at the left of IN.
|
|
|
|
|
2020-07-22 23:33:14 +00:00
|
|
|
Field value = extractValueFromNode(elem, *types[0], context);
|
2020-10-07 15:36:34 +00:00
|
|
|
bool need_insert_null = transform_null_in && types[0]->isNullable();
|
2019-01-22 12:33:56 +00:00
|
|
|
|
2020-10-07 15:36:34 +00:00
|
|
|
if (!value.isNull() || need_insert_null)
|
2020-07-22 23:33:14 +00:00
|
|
|
columns[0]->insert(value);
|
|
|
|
}
|
|
|
|
else if (elem->as<ASTFunction>() || elem->as<ASTLiteral>())
|
|
|
|
{
|
2020-08-01 23:24:54 +00:00
|
|
|
/// Multiple columns at the left of IN.
|
|
|
|
/// The right hand side of in should be a set of tuples.
|
|
|
|
|
2020-07-22 23:33:14 +00:00
|
|
|
Field function_result;
|
|
|
|
const Tuple * tuple = nullptr;
|
2019-01-22 12:33:56 +00:00
|
|
|
|
2020-08-01 23:24:54 +00:00
|
|
|
/// Tuple can be represented as a function in AST.
|
2020-07-22 23:33:14 +00:00
|
|
|
auto * func = elem->as<ASTFunction>();
|
|
|
|
if (func && func->name != "tuple")
|
|
|
|
{
|
|
|
|
if (!tuple_type)
|
|
|
|
tuple_type = std::make_shared<DataTypeTuple>(types);
|
2019-01-22 12:33:56 +00:00
|
|
|
|
2020-08-01 23:24:54 +00:00
|
|
|
/// If the function is not a tuple, treat it as a constant expression that returns tuple and extract it.
|
2020-07-22 23:33:14 +00:00
|
|
|
function_result = extractValueFromNode(elem, *tuple_type, context);
|
2021-09-06 15:59:46 +00:00
|
|
|
|
2020-07-22 23:33:14 +00:00
|
|
|
if (function_result.getType() != Field::Types::Tuple)
|
2021-09-06 15:59:46 +00:00
|
|
|
throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET,
|
|
|
|
"Invalid type of set. Expected tuple, got {}",
|
|
|
|
function_result.getTypeName());
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-07-22 23:33:14 +00:00
|
|
|
tuple = &function_result.get<Tuple>();
|
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-08-01 23:24:54 +00:00
|
|
|
/// Tuple can be represented as a literal in AST.
|
2020-07-22 23:33:14 +00:00
|
|
|
auto * literal = elem->as<ASTLiteral>();
|
|
|
|
if (literal)
|
|
|
|
{
|
2020-08-01 23:24:54 +00:00
|
|
|
/// The literal must be tuple.
|
|
|
|
if (literal->value.getType() != Field::Types::Tuple)
|
2021-09-06 15:59:46 +00:00
|
|
|
throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET,
|
|
|
|
"Invalid type in set. Expected tuple, got {}",
|
|
|
|
literal->value.getTypeName());
|
2020-07-22 23:33:14 +00:00
|
|
|
|
|
|
|
tuple = &literal->value.get<Tuple>();
|
|
|
|
}
|
|
|
|
|
2021-05-08 15:35:09 +00:00
|
|
|
assert(tuple || func);
|
|
|
|
|
2021-05-08 22:09:16 +00:00
|
|
|
size_t tuple_size = tuple ? tuple->size() : func->arguments->children.size(); //-V1004
|
2020-07-22 23:33:14 +00:00
|
|
|
if (tuple_size != num_columns)
|
|
|
|
throw Exception("Incorrect size of tuple in set: " + toString(tuple_size) + " instead of " + toString(num_columns),
|
|
|
|
ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
|
|
|
|
|
|
|
if (tuple_values.empty())
|
|
|
|
tuple_values.resize(tuple_size);
|
|
|
|
|
2020-08-01 23:24:54 +00:00
|
|
|
/// Fill tuple values by evaluation of constant expressions.
|
2020-07-22 23:33:14 +00:00
|
|
|
size_t i = 0;
|
|
|
|
for (; i < tuple_size; ++i)
|
|
|
|
{
|
2020-08-01 23:24:54 +00:00
|
|
|
Field value = tuple ? convertFieldToType((*tuple)[i], *types[i])
|
2020-07-22 23:33:14 +00:00
|
|
|
: extractValueFromNode(func->arguments->children[i], *types[i], context);
|
|
|
|
|
2020-10-07 15:36:34 +00:00
|
|
|
bool need_insert_null = transform_null_in && types[i]->isNullable();
|
|
|
|
|
2020-08-01 23:24:54 +00:00
|
|
|
/// If at least one of the elements of the tuple has an impossible (outside the range of the type) value,
|
|
|
|
/// then the entire tuple too.
|
2020-10-07 15:36:34 +00:00
|
|
|
if (value.isNull() && !need_insert_null)
|
2020-07-22 23:33:14 +00:00
|
|
|
break;
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-07-22 23:33:14 +00:00
|
|
|
tuple_values[i] = value;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i == tuple_size)
|
|
|
|
for (i = 0; i < tuple_size; ++i)
|
|
|
|
columns[i]->insert(tuple_values[i]);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
throw Exception("Incorrect element of set", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
|
|
|
}
|
|
|
|
|
|
|
|
return header.cloneWithColumns(std::move(columns));
|
|
|
|
}
|
|
|
|
|
2022-04-15 21:59:49 +00:00
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
|
|
|
/** Create a block for set from expression.
|
|
|
|
* 'set_element_types' - types of what are on the left hand side of IN.
|
|
|
|
* 'right_arg' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6).
|
|
|
|
*
|
|
|
|
* We need special implementation for ASTFunction, because in case, when we interpret
|
|
|
|
* large tuple or array as function, `evaluateConstantExpression` works extremely slow.
|
|
|
|
*/
|
2020-09-25 07:31:06 +00:00
|
|
|
Block createBlockForSet(
|
2020-07-22 23:33:14 +00:00
|
|
|
const DataTypePtr & left_arg_type,
|
2020-07-23 10:49:50 +00:00
|
|
|
const ASTPtr & right_arg,
|
2020-07-22 23:33:14 +00:00
|
|
|
const DataTypes & set_element_types,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context)
|
2020-07-22 23:33:14 +00:00
|
|
|
{
|
|
|
|
auto [right_arg_value, right_arg_type] = evaluateConstantExpression(right_arg, context);
|
|
|
|
|
|
|
|
const size_t left_type_depth = getTypeDepth(left_arg_type);
|
|
|
|
const size_t right_type_depth = getTypeDepth(right_arg_type);
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-01-24 21:36:28 +00:00
|
|
|
auto throw_unsupported_type = [](const auto & type)
|
|
|
|
{
|
2020-02-02 01:33:13 +00:00
|
|
|
throw Exception("Unsupported value type at the right-side of IN: "
|
2020-01-24 21:36:28 +00:00
|
|
|
+ type->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
};
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2019-05-19 19:22:51 +00:00
|
|
|
Block block;
|
2021-04-10 23:33:54 +00:00
|
|
|
bool tranform_null_in = context->getSettingsRef().transform_null_in;
|
2020-10-07 15:36:34 +00:00
|
|
|
|
2018-10-12 15:41:28 +00:00
|
|
|
/// 1 in 1; (1, 2) in (1, 2); identity(tuple(tuple(tuple(1)))) in tuple(tuple(tuple(1))); etc.
|
2020-01-24 00:33:38 +00:00
|
|
|
if (left_type_depth == right_type_depth)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2020-01-24 00:33:38 +00:00
|
|
|
Array array{right_arg_value};
|
2020-10-07 15:36:34 +00:00
|
|
|
block = createBlockFromCollection(array, set_element_types, tranform_null_in);
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
/// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)); etc.
|
2020-01-24 00:33:38 +00:00
|
|
|
else if (left_type_depth + 1 == right_type_depth)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2020-01-24 21:36:28 +00:00
|
|
|
auto type_index = right_arg_type->getTypeId();
|
|
|
|
if (type_index == TypeIndex::Tuple)
|
2020-10-07 15:36:34 +00:00
|
|
|
block = createBlockFromCollection(DB::get<const Tuple &>(right_arg_value), set_element_types, tranform_null_in);
|
2020-01-24 21:36:28 +00:00
|
|
|
else if (type_index == TypeIndex::Array)
|
2020-10-07 15:36:34 +00:00
|
|
|
block = createBlockFromCollection(DB::get<const Array &>(right_arg_value), set_element_types, tranform_null_in);
|
2020-01-24 21:36:28 +00:00
|
|
|
else
|
|
|
|
throw_unsupported_type(right_arg_type);
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
else
|
2020-01-24 21:36:28 +00:00
|
|
|
throw_unsupported_type(right_arg_type);
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-07-22 23:33:14 +00:00
|
|
|
return block;
|
|
|
|
}
|
|
|
|
|
2022-04-15 21:59:49 +00:00
|
|
|
/** Create a block for set from literal.
|
|
|
|
* 'set_element_types' - types of what are on the left hand side of IN.
|
|
|
|
* 'right_arg' - Literal - Tuple or Array.
|
|
|
|
*/
|
2020-09-25 07:31:06 +00:00
|
|
|
Block createBlockForSet(
|
2020-07-22 23:33:14 +00:00
|
|
|
const DataTypePtr & left_arg_type,
|
2020-07-23 10:49:50 +00:00
|
|
|
const std::shared_ptr<ASTFunction> & right_arg,
|
2020-07-22 23:33:14 +00:00
|
|
|
const DataTypes & set_element_types,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context)
|
2020-07-22 23:33:14 +00:00
|
|
|
{
|
2021-04-10 23:33:54 +00:00
|
|
|
auto get_tuple_type_from_ast = [context](const auto & func) -> DataTypePtr
|
2020-07-22 23:33:14 +00:00
|
|
|
{
|
|
|
|
if (func && (func->name == "tuple" || func->name == "array") && !func->arguments->children.empty())
|
|
|
|
{
|
|
|
|
/// Won't parse all values of outer tuple.
|
|
|
|
auto element = func->arguments->children.at(0);
|
|
|
|
std::pair<Field, DataTypePtr> value_raw = evaluateConstantExpression(element, context);
|
|
|
|
return std::make_shared<DataTypeTuple>(DataTypes({value_raw.second}));
|
|
|
|
}
|
|
|
|
|
2020-07-23 10:49:50 +00:00
|
|
|
return evaluateConstantExpression(func, context).second;
|
2020-07-22 23:33:14 +00:00
|
|
|
};
|
2019-05-19 19:22:51 +00:00
|
|
|
|
2020-07-22 23:33:14 +00:00
|
|
|
const DataTypePtr & right_arg_type = get_tuple_type_from_ast(right_arg);
|
|
|
|
|
|
|
|
size_t left_tuple_depth = getTypeDepth(left_arg_type);
|
|
|
|
size_t right_tuple_depth = getTypeDepth(right_arg_type);
|
|
|
|
ASTPtr elements_ast;
|
|
|
|
|
|
|
|
/// 1 in 1; (1, 2) in (1, 2); identity(tuple(tuple(tuple(1)))) in tuple(tuple(tuple(1))); etc.
|
|
|
|
if (left_tuple_depth == right_tuple_depth)
|
|
|
|
{
|
|
|
|
ASTPtr exp_list = std::make_shared<ASTExpressionList>();
|
|
|
|
exp_list->children.push_back(right_arg);
|
|
|
|
elements_ast = exp_list;
|
|
|
|
}
|
|
|
|
/// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)); etc.
|
|
|
|
else if (left_tuple_depth + 1 == right_tuple_depth)
|
|
|
|
{
|
|
|
|
const auto * set_func = right_arg->as<ASTFunction>();
|
|
|
|
if (!set_func || (set_func->name != "tuple" && set_func->name != "array"))
|
|
|
|
throw Exception("Incorrect type of 2nd argument for function 'in'"
|
|
|
|
". Must be subquery or set of elements with type " + left_arg_type->getName() + ".",
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
|
|
|
elements_ast = set_func->arguments;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
throw Exception("Invalid types for IN function: "
|
|
|
|
+ left_arg_type->getName() + " and " + right_arg_type->getName() + ".",
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
|
|
|
return createBlockFromAST(elements_ast, set_element_types, context);
|
|
|
|
}
|
|
|
|
|
2022-04-15 21:59:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-07-22 23:33:14 +00:00
|
|
|
SetPtr makeExplicitSet(
|
2020-09-30 11:45:47 +00:00
|
|
|
const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context, const SizeLimits & size_limits, PreparedSets & prepared_sets)
|
2020-07-22 23:33:14 +00:00
|
|
|
{
|
|
|
|
const IAST & args = *node->arguments;
|
|
|
|
|
|
|
|
if (args.children.size() != 2)
|
|
|
|
throw Exception("Wrong number of arguments passed to function in", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
|
|
|
|
|
|
const ASTPtr & left_arg = args.children.at(0);
|
|
|
|
const ASTPtr & right_arg = args.children.at(1);
|
|
|
|
|
2021-08-03 18:03:24 +00:00
|
|
|
auto column_name = left_arg->getColumnName();
|
2021-03-11 17:03:39 +00:00
|
|
|
const auto & dag_node = actions.findInIndex(column_name);
|
|
|
|
const DataTypePtr & left_arg_type = dag_node.result_type;
|
2020-07-22 23:33:14 +00:00
|
|
|
|
|
|
|
DataTypes set_element_types = {left_arg_type};
|
|
|
|
const auto * left_tuple_type = typeid_cast<const DataTypeTuple *>(left_arg_type.get());
|
|
|
|
if (left_tuple_type && left_tuple_type->getElements().size() != 1)
|
|
|
|
set_element_types = left_tuple_type->getElements();
|
|
|
|
|
|
|
|
for (auto & element_type : set_element_types)
|
|
|
|
if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(element_type.get()))
|
|
|
|
element_type = low_cardinality_type->getDictionaryType();
|
|
|
|
|
|
|
|
auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types);
|
2022-03-18 07:32:42 +00:00
|
|
|
if (auto it = prepared_sets.find(set_key); it != prepared_sets.end())
|
|
|
|
return it->second; /// Already prepared.
|
2020-07-22 23:33:14 +00:00
|
|
|
|
|
|
|
Block block;
|
2020-07-23 10:49:50 +00:00
|
|
|
const auto & right_arg_func = std::dynamic_pointer_cast<ASTFunction>(right_arg);
|
|
|
|
if (right_arg_func && (right_arg_func->name == "tuple" || right_arg_func->name == "array"))
|
|
|
|
block = createBlockForSet(left_arg_type, right_arg_func, set_element_types, context);
|
2020-07-22 23:33:14 +00:00
|
|
|
else
|
|
|
|
block = createBlockForSet(left_arg_type, right_arg, set_element_types, context);
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
SetPtr set
|
|
|
|
= std::make_shared<Set>(size_limits, create_ordered_set, context->getSettingsRef().transform_null_in);
|
2021-08-18 10:30:02 +00:00
|
|
|
set->setHeader(block.cloneEmpty().getColumnsWithTypeAndName());
|
|
|
|
set->insertFromBlock(block.getColumnsWithTypeAndName());
|
2020-07-22 23:33:14 +00:00
|
|
|
set->finishInsert();
|
2019-05-19 19:22:51 +00:00
|
|
|
|
2022-03-18 07:32:42 +00:00
|
|
|
prepared_sets.emplace(set_key, set);
|
2019-01-22 12:33:56 +00:00
|
|
|
return set;
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
|
2021-03-03 20:01:07 +00:00
|
|
|
ScopeStack::Level::~Level() = default;
|
|
|
|
ScopeStack::Level::Level() = default;
|
2022-02-25 19:04:48 +00:00
|
|
|
ScopeStack::Level::Level(Level &&) noexcept = default;
|
2021-03-03 20:01:07 +00:00
|
|
|
|
|
|
|
class ScopeStack::Index
|
|
|
|
{
|
|
|
|
/// Map column name -> Node.
|
|
|
|
/// Use string_view as key which always points to Node::result_name.
|
|
|
|
std::unordered_map<std::string_view, const ActionsDAG::Node *> map;
|
|
|
|
ActionsDAG::NodeRawConstPtrs & index;
|
|
|
|
|
|
|
|
public:
|
2021-03-04 17:38:12 +00:00
|
|
|
explicit Index(ActionsDAG::NodeRawConstPtrs & index_) : index(index_)
|
|
|
|
{
|
|
|
|
for (const auto * node : index)
|
|
|
|
map.emplace(node->result_name, node);
|
|
|
|
}
|
2021-03-03 20:01:07 +00:00
|
|
|
|
|
|
|
void addNode(const ActionsDAG::Node * node)
|
|
|
|
{
|
|
|
|
bool inserted = map.emplace(node->result_name, node).second;
|
|
|
|
if (!inserted)
|
|
|
|
throw Exception("Column '" + node->result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
|
|
|
|
|
|
|
|
index.push_back(node);
|
|
|
|
}
|
|
|
|
|
|
|
|
const ActionsDAG::Node * tryGetNode(const std::string & name) const
|
|
|
|
{
|
|
|
|
auto it = map.find(name);
|
|
|
|
if (it == map.end())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
return it->second;
|
|
|
|
}
|
|
|
|
|
|
|
|
const ActionsDAG::Node & getNode(const std::string & name) const
|
|
|
|
{
|
|
|
|
const auto * node = tryGetNode(name);
|
|
|
|
if (!node)
|
|
|
|
throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
|
|
|
|
|
|
|
|
return *node;
|
|
|
|
}
|
|
|
|
|
2022-04-18 10:18:43 +00:00
|
|
|
bool contains(const std::string & name) const { return map.contains(name); }
|
2021-03-03 20:01:07 +00:00
|
|
|
};
|
|
|
|
|
2020-09-30 12:32:37 +00:00
|
|
|
ActionsMatcher::Data::Data(
|
2022-05-12 16:39:50 +00:00
|
|
|
ContextPtr context_,
|
|
|
|
SizeLimits set_size_limit_,
|
|
|
|
size_t subquery_depth_,
|
|
|
|
const NamesAndTypesList & source_columns_,
|
|
|
|
const NamesAndTypesList & aggregation_keys_,
|
2022-05-13 14:55:50 +00:00
|
|
|
const ColumnNumbersList & grouping_set_keys_,
|
2022-05-12 16:39:50 +00:00
|
|
|
ActionsDAGPtr actions_dag,
|
|
|
|
PreparedSets & prepared_sets_,
|
|
|
|
SubqueriesForSets & subqueries_for_sets_,
|
|
|
|
bool no_subqueries_,
|
|
|
|
bool no_makeset_,
|
|
|
|
bool only_consts_,
|
|
|
|
bool create_source_for_in_)
|
2021-04-10 23:33:54 +00:00
|
|
|
: WithContext(context_)
|
2020-09-30 12:32:37 +00:00
|
|
|
, set_size_limit(set_size_limit_)
|
|
|
|
, subquery_depth(subquery_depth_)
|
|
|
|
, source_columns(source_columns_)
|
2022-05-12 16:39:50 +00:00
|
|
|
, aggregation_keys(aggregation_keys_)
|
2022-05-13 14:55:50 +00:00
|
|
|
, grouping_set_keys(grouping_set_keys_)
|
2020-09-30 12:32:37 +00:00
|
|
|
, prepared_sets(prepared_sets_)
|
|
|
|
, subqueries_for_sets(subqueries_for_sets_)
|
|
|
|
, no_subqueries(no_subqueries_)
|
|
|
|
, no_makeset(no_makeset_)
|
|
|
|
, only_consts(only_consts_)
|
2020-11-02 12:07:01 +00:00
|
|
|
, create_source_for_in(create_source_for_in_)
|
2020-09-30 12:32:37 +00:00
|
|
|
, visit_depth(0)
|
2021-04-10 23:33:54 +00:00
|
|
|
, actions_stack(std::move(actions_dag), context_)
|
2020-09-30 12:32:37 +00:00
|
|
|
, next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ActionsMatcher::Data::hasColumn(const String & column_name) const
|
|
|
|
{
|
2021-03-03 20:01:07 +00:00
|
|
|
return actions_stack.getLastActionsIndex().contains(column_name);
|
2020-09-30 12:32:37 +00:00
|
|
|
}
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
ScopeStack::ScopeStack(ActionsDAGPtr actions_dag, ContextPtr context_) : WithContext(context_)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2020-09-13 13:51:31 +00:00
|
|
|
auto & level = stack.emplace_back();
|
2020-11-11 16:52:27 +00:00
|
|
|
level.actions_dag = std::move(actions_dag);
|
2021-03-03 20:01:07 +00:00
|
|
|
level.index = std::make_unique<ScopeStack::Index>(level.actions_dag->getIndex());
|
2020-09-13 13:51:31 +00:00
|
|
|
|
2020-11-11 16:52:27 +00:00
|
|
|
for (const auto & node : level.actions_dag->getIndex())
|
2020-11-10 14:54:59 +00:00
|
|
|
if (node->type == ActionsDAG::ActionType::INPUT)
|
2020-11-03 11:28:28 +00:00
|
|
|
level.inputs.emplace(node->result_name);
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void ScopeStack::pushLevel(const NamesAndTypesList & input_columns)
|
|
|
|
{
|
2020-09-13 13:51:31 +00:00
|
|
|
auto & level = stack.emplace_back();
|
2020-11-11 16:52:27 +00:00
|
|
|
level.actions_dag = std::make_shared<ActionsDAG>();
|
2021-03-04 17:38:12 +00:00
|
|
|
level.index = std::make_unique<ScopeStack::Index>(level.actions_dag->getIndex());
|
2020-09-10 07:30:03 +00:00
|
|
|
const auto & prev = stack[stack.size() - 2];
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-03-08 23:48:08 +00:00
|
|
|
for (const auto & input_column : input_columns)
|
2020-09-13 13:51:31 +00:00
|
|
|
{
|
2021-03-03 20:01:07 +00:00
|
|
|
const auto & node = level.actions_dag->addInput(input_column.name, input_column.type);
|
|
|
|
level.index->addNode(&node);
|
2020-09-13 13:51:31 +00:00
|
|
|
level.inputs.emplace(input_column.name);
|
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-11-11 16:52:27 +00:00
|
|
|
for (const auto & node : prev.actions_dag->getIndex())
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2021-03-03 20:01:07 +00:00
|
|
|
if (!level.index->contains(node->result_name))
|
|
|
|
{
|
|
|
|
const auto & input = level.actions_dag->addInput({node->column, node->result_type, node->result_name});
|
|
|
|
level.index->addNode(&input);
|
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t ScopeStack::getColumnLevel(const std::string & name)
|
|
|
|
{
|
2020-09-30 11:45:47 +00:00
|
|
|
for (size_t i = stack.size(); i > 0;)
|
2020-09-13 13:51:31 +00:00
|
|
|
{
|
2020-09-30 11:45:47 +00:00
|
|
|
--i;
|
|
|
|
|
2022-04-18 10:18:43 +00:00
|
|
|
if (stack[i].inputs.contains(name))
|
2018-10-12 15:41:28 +00:00
|
|
|
return i;
|
|
|
|
|
2021-03-03 20:01:07 +00:00
|
|
|
const auto * node = stack[i].index->tryGetNode(name);
|
|
|
|
if (node && node->type != ActionsDAG::ActionType::INPUT)
|
2020-09-13 13:51:31 +00:00
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
2018-10-12 15:41:28 +00:00
|
|
|
throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER);
|
|
|
|
}
|
|
|
|
|
2020-09-10 16:01:41 +00:00
|
|
|
void ScopeStack::addColumn(ColumnWithTypeAndName column)
|
|
|
|
{
|
2020-11-11 16:52:27 +00:00
|
|
|
const auto & node = stack[0].actions_dag->addColumn(std::move(column));
|
2021-03-03 20:01:07 +00:00
|
|
|
stack[0].index->addNode(&node);
|
2020-09-10 16:01:41 +00:00
|
|
|
|
2020-09-13 13:51:31 +00:00
|
|
|
for (size_t j = 1; j < stack.size(); ++j)
|
2021-03-03 20:01:07 +00:00
|
|
|
{
|
|
|
|
const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
|
|
|
|
stack[j].index->addNode(&input);
|
|
|
|
}
|
2020-09-10 16:01:41 +00:00
|
|
|
}
|
|
|
|
|
2020-09-10 07:30:03 +00:00
|
|
|
void ScopeStack::addAlias(const std::string & name, std::string alias)
|
|
|
|
{
|
2020-09-13 13:51:31 +00:00
|
|
|
auto level = getColumnLevel(name);
|
2021-03-03 20:01:07 +00:00
|
|
|
const auto & source = stack[level].index->getNode(name);
|
|
|
|
const auto & node = stack[level].actions_dag->addAlias(source, std::move(alias));
|
|
|
|
stack[level].index->addNode(&node);
|
2020-09-10 07:30:03 +00:00
|
|
|
|
|
|
|
for (size_t j = level + 1; j < stack.size(); ++j)
|
2021-03-03 20:01:07 +00:00
|
|
|
{
|
|
|
|
const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
|
|
|
|
stack[j].index->addNode(&input);
|
|
|
|
}
|
2020-09-10 07:30:03 +00:00
|
|
|
}
|
|
|
|
|
2020-11-03 11:28:28 +00:00
|
|
|
void ScopeStack::addArrayJoin(const std::string & source_name, std::string result_name)
|
2020-09-10 07:30:03 +00:00
|
|
|
{
|
2020-09-13 13:51:31 +00:00
|
|
|
getColumnLevel(source_name);
|
2020-09-10 07:30:03 +00:00
|
|
|
|
2021-03-03 20:01:07 +00:00
|
|
|
const auto * source_node = stack.front().index->tryGetNode(source_name);
|
|
|
|
if (!source_node)
|
2020-09-13 13:51:31 +00:00
|
|
|
throw Exception("Expression with arrayJoin cannot depend on lambda argument: " + source_name,
|
|
|
|
ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
|
2021-03-03 20:01:07 +00:00
|
|
|
const auto & node = stack.front().actions_dag->addArrayJoin(*source_node, std::move(result_name));
|
|
|
|
stack.front().index->addNode(&node);
|
2020-09-13 13:51:31 +00:00
|
|
|
|
|
|
|
for (size_t j = 1; j < stack.size(); ++j)
|
2021-03-03 20:01:07 +00:00
|
|
|
{
|
|
|
|
const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
|
|
|
|
stack[j].index->addNode(&input);
|
|
|
|
}
|
2020-09-10 07:30:03 +00:00
|
|
|
}
|
|
|
|
|
2020-09-10 16:01:41 +00:00
|
|
|
void ScopeStack::addFunction(
|
|
|
|
const FunctionOverloadResolverPtr & function,
|
|
|
|
const Names & argument_names,
|
2020-11-03 11:28:28 +00:00
|
|
|
std::string result_name)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
|
|
|
size_t level = 0;
|
2020-09-10 16:01:41 +00:00
|
|
|
for (const auto & argument : argument_names)
|
|
|
|
level = std::max(level, getColumnLevel(argument));
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2021-03-03 20:01:07 +00:00
|
|
|
ActionsDAG::NodeRawConstPtrs children;
|
|
|
|
children.reserve(argument_names.size());
|
|
|
|
for (const auto & argument : argument_names)
|
|
|
|
children.push_back(&stack[level].index->getNode(argument));
|
|
|
|
|
|
|
|
const auto & node = stack[level].actions_dag->addFunction(function, std::move(children), std::move(result_name));
|
|
|
|
stack[level].index->addNode(&node);
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-09-10 16:01:41 +00:00
|
|
|
for (size_t j = level + 1; j < stack.size(); ++j)
|
2021-03-03 20:01:07 +00:00
|
|
|
{
|
|
|
|
const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name});
|
|
|
|
stack[j].index->addNode(&input);
|
|
|
|
}
|
2020-08-25 18:06:21 +00:00
|
|
|
}
|
|
|
|
|
2020-09-10 07:30:03 +00:00
|
|
|
ActionsDAGPtr ScopeStack::popLevel()
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2021-03-03 20:01:07 +00:00
|
|
|
auto res = std::move(stack.back().actions_dag);
|
2018-10-12 15:41:28 +00:00
|
|
|
stack.pop_back();
|
2021-03-03 20:01:07 +00:00
|
|
|
return res;
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
|
2020-09-10 16:01:41 +00:00
|
|
|
std::string ScopeStack::dumpNames() const
|
|
|
|
{
|
2020-11-11 16:52:27 +00:00
|
|
|
return stack.back().actions_dag->dumpNames();
|
2020-09-10 16:01:41 +00:00
|
|
|
}
|
|
|
|
|
2020-09-30 11:45:47 +00:00
|
|
|
const ActionsDAG & ScopeStack::getLastActions() const
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2020-11-11 16:52:27 +00:00
|
|
|
return *stack.back().actions_dag;
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
|
2021-03-03 20:01:07 +00:00
|
|
|
const ScopeStack::Index & ScopeStack::getLastActionsIndex() const
|
|
|
|
{
|
|
|
|
return *stack.back().index;
|
|
|
|
}
|
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
bool ActionsMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child)
|
|
|
|
{
|
|
|
|
/// Visit children themself
|
|
|
|
if (node->as<ASTIdentifier>() ||
|
2020-11-13 14:13:27 +00:00
|
|
|
node->as<ASTTableIdentifier>() ||
|
2019-08-15 13:54:59 +00:00
|
|
|
node->as<ASTFunction>() ||
|
2020-10-21 18:17:27 +00:00
|
|
|
node->as<ASTLiteral>() ||
|
|
|
|
node->as<ASTExpressionList>())
|
2019-08-15 13:54:59 +00:00
|
|
|
return false;
|
|
|
|
|
|
|
|
/// Do not go to FROM, JOIN, UNION.
|
|
|
|
if (child->as<ASTTableExpression>() ||
|
|
|
|
child->as<ASTSelectQuery>())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
void ActionsMatcher::visit(const ASTPtr & ast, Data & data)
|
2018-10-16 12:34:20 +00:00
|
|
|
{
|
2019-08-15 13:54:59 +00:00
|
|
|
if (const auto * identifier = ast->as<ASTIdentifier>())
|
|
|
|
visit(*identifier, ast, data);
|
2020-11-13 14:13:27 +00:00
|
|
|
else if (const auto * table = ast->as<ASTTableIdentifier>())
|
|
|
|
visit(*table, ast, data);
|
2019-08-15 13:54:59 +00:00
|
|
|
else if (const auto * node = ast->as<ASTFunction>())
|
|
|
|
visit(*node, ast, data);
|
|
|
|
else if (const auto * literal = ast->as<ASTLiteral>())
|
|
|
|
visit(*literal, ast, data);
|
2020-10-21 18:17:27 +00:00
|
|
|
else if (auto * expression_list = ast->as<ASTExpressionList>())
|
|
|
|
visit(*expression_list, ast, data);
|
2020-10-22 06:58:20 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
for (auto & child : ast->children)
|
2020-10-22 08:25:24 +00:00
|
|
|
if (needChildVisit(ast, child))
|
|
|
|
visit(child, data);
|
2020-10-22 06:58:20 +00:00
|
|
|
}
|
2020-10-21 18:17:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPtr & ast, Data & data)
|
|
|
|
{
|
|
|
|
// If the argument is a literal, we generated a unique column name for it.
|
|
|
|
// Use it instead of a generic display name.
|
2021-08-03 18:03:24 +00:00
|
|
|
auto child_column_name = ast->getColumnName();
|
2020-10-21 18:17:27 +00:00
|
|
|
const auto * as_literal = ast->as<ASTLiteral>();
|
|
|
|
if (as_literal)
|
|
|
|
{
|
|
|
|
assert(!as_literal->unique_column_name.empty());
|
|
|
|
child_column_name = as_literal->unique_column_name;
|
|
|
|
}
|
|
|
|
|
2021-03-03 20:01:07 +00:00
|
|
|
const auto & index = data.actions_stack.getLastActionsIndex();
|
|
|
|
if (const auto * node = index.tryGetNode(child_column_name))
|
|
|
|
return NameAndTypePair(child_column_name, node->result_type);
|
2020-10-21 18:17:27 +00:00
|
|
|
|
|
|
|
if (!data.only_consts)
|
2021-04-16 23:14:48 +00:00
|
|
|
throw Exception("Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(),
|
2020-10-21 18:17:27 +00:00
|
|
|
ErrorCodes::UNKNOWN_IDENTIFIER);
|
|
|
|
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Data & data)
|
|
|
|
{
|
|
|
|
if (function->arguments->children.size() != 1)
|
|
|
|
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
|
|
|
"Number of arguments for function untuple doesn't match. Passed {}, should be 1",
|
|
|
|
function->arguments->children.size());
|
|
|
|
|
|
|
|
auto & child = function->arguments->children[0];
|
|
|
|
|
|
|
|
/// Calculate nested function.
|
|
|
|
visit(child, data);
|
|
|
|
|
|
|
|
/// Get type and name for tuple argument
|
|
|
|
auto tuple_name_type = getNameAndTypeFromAST(child, data);
|
|
|
|
if (!tuple_name_type)
|
|
|
|
return {};
|
|
|
|
|
|
|
|
const auto * tuple_type = typeid_cast<const DataTypeTuple *>(tuple_name_type->type.get());
|
|
|
|
|
|
|
|
if (!tuple_type)
|
|
|
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
|
|
|
"Function untuple expect tuple argument, got {}",
|
|
|
|
tuple_name_type->type->getName());
|
|
|
|
|
|
|
|
ASTs columns;
|
|
|
|
size_t tid = 0;
|
2021-11-02 03:11:48 +00:00
|
|
|
auto func_alias = function->tryGetAlias();
|
2021-07-10 07:27:48 +00:00
|
|
|
for (const auto & name [[maybe_unused]] : tuple_type->getElementNames())
|
2020-10-21 18:17:27 +00:00
|
|
|
{
|
|
|
|
auto tuple_ast = function->arguments->children[0];
|
2022-01-07 06:15:10 +00:00
|
|
|
|
|
|
|
/// This transformation can lead to exponential growth of AST size, let's check it.
|
|
|
|
tuple_ast->checkSize(data.getContext()->getSettingsRef().max_ast_elements);
|
|
|
|
|
2020-10-21 18:17:27 +00:00
|
|
|
if (tid != 0)
|
|
|
|
tuple_ast = tuple_ast->clone();
|
|
|
|
|
2022-03-18 07:32:42 +00:00
|
|
|
auto literal = std::make_shared<ASTLiteral>(UInt64{++tid});
|
2020-10-21 18:17:27 +00:00
|
|
|
visit(*literal, literal, data);
|
|
|
|
|
|
|
|
auto func = makeASTFunction("tupleElement", tuple_ast, literal);
|
2021-11-02 03:11:48 +00:00
|
|
|
if (!func_alias.empty())
|
2021-11-15 06:47:34 +00:00
|
|
|
func->setAlias(func_alias + "." + toString(tid));
|
2021-04-10 23:33:54 +00:00
|
|
|
auto function_builder = FunctionFactory::instance().get(func->name, data.getContext());
|
2021-08-03 18:03:24 +00:00
|
|
|
data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName()}, func->getColumnName());
|
2020-10-21 18:17:27 +00:00
|
|
|
|
|
|
|
columns.push_back(std::move(func));
|
|
|
|
}
|
|
|
|
|
|
|
|
return columns;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ActionsMatcher::visit(ASTExpressionList & expression_list, const ASTPtr &, Data & data)
|
|
|
|
{
|
|
|
|
size_t num_children = expression_list.children.size();
|
|
|
|
for (size_t i = 0; i < num_children; ++i)
|
|
|
|
{
|
|
|
|
if (const auto * function = expression_list.children[i]->as<ASTFunction>())
|
|
|
|
{
|
|
|
|
if (function->name == "untuple")
|
|
|
|
{
|
|
|
|
auto columns = doUntuple(function, data);
|
|
|
|
|
|
|
|
if (columns.empty())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
expression_list.children.erase(expression_list.children.begin() + i);
|
|
|
|
expression_list.children.insert(expression_list.children.begin() + i, columns.begin(), columns.end());
|
|
|
|
num_children += columns.size() - 1;
|
|
|
|
i += columns.size() - 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
visit(expression_list.children[i], data);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
visit(expression_list.children[i], data);
|
|
|
|
}
|
2018-10-16 12:34:20 +00:00
|
|
|
}
|
|
|
|
|
2020-11-13 14:13:27 +00:00
|
|
|
void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr &, Data & data)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2021-06-16 18:10:56 +00:00
|
|
|
|
2020-11-13 14:13:27 +00:00
|
|
|
auto column_name = identifier.getColumnName();
|
2020-10-23 16:56:11 +00:00
|
|
|
if (data.hasColumn(column_name))
|
2019-08-15 14:22:33 +00:00
|
|
|
return;
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2019-08-15 14:22:33 +00:00
|
|
|
if (!data.only_consts)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2019-08-15 13:54:59 +00:00
|
|
|
/// The requested column is not in the block.
|
|
|
|
/// If such a column exists in the table, then the user probably forgot to surround it with an aggregate function or add it to GROUP BY.
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
for (const auto & column_name_type : data.source_columns)
|
2020-10-01 11:35:56 +00:00
|
|
|
{
|
2020-10-23 16:56:11 +00:00
|
|
|
if (column_name_type.name == column_name)
|
2020-10-01 11:35:56 +00:00
|
|
|
{
|
2020-10-23 16:56:11 +00:00
|
|
|
throw Exception("Column " + backQuote(column_name) + " is not under aggregate function and not in GROUP BY",
|
2021-02-18 11:49:32 +00:00
|
|
|
ErrorCodes::NOT_AN_AGGREGATE);
|
2020-10-01 09:03:19 +00:00
|
|
|
}
|
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
/// Special check for WITH statement alias. Add alias action to be able to use this alias.
|
|
|
|
if (identifier.prefer_alias_to_column_name && !identifier.alias.empty())
|
2020-10-24 18:46:10 +00:00
|
|
|
data.addAlias(identifier.name(), identifier.alias);
|
2019-08-15 13:54:59 +00:00
|
|
|
}
|
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data)
|
|
|
|
{
|
2021-08-03 18:03:24 +00:00
|
|
|
auto column_name = ast->getColumnName();
|
2020-10-23 16:56:11 +00:00
|
|
|
if (data.hasColumn(column_name))
|
2019-08-15 13:54:59 +00:00
|
|
|
return;
|
2018-11-12 18:19:16 +00:00
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
if (node.name == "lambda")
|
|
|
|
throw Exception("Unexpected lambda expression", ErrorCodes::UNEXPECTED_EXPRESSION);
|
|
|
|
|
|
|
|
/// Function arrayJoin.
|
|
|
|
if (node.name == "arrayJoin")
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2019-08-15 13:54:59 +00:00
|
|
|
if (node.arguments->children.size() != 1)
|
|
|
|
throw Exception("arrayJoin requires exactly 1 argument", ErrorCodes::TYPE_MISMATCH);
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
ASTPtr arg = node.arguments->children.at(0);
|
|
|
|
visit(arg, data);
|
|
|
|
if (!data.only_consts)
|
2021-08-03 18:03:24 +00:00
|
|
|
data.addArrayJoin(arg->getColumnName(), column_name);
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
return;
|
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2022-05-12 16:39:50 +00:00
|
|
|
if (node.name == "grouping")
|
|
|
|
{
|
|
|
|
auto arguments_column_name = data.getUniqueName("__grouping_args");
|
|
|
|
{
|
2022-05-13 14:55:50 +00:00
|
|
|
if (!data.hasColumn("__grouping_set_map"))
|
|
|
|
{
|
|
|
|
ColumnWithTypeAndName column;
|
|
|
|
column.name = "__grouping_set_map";
|
|
|
|
size_t map_size = data.aggregation_keys.size() + 1;
|
|
|
|
column.type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeFixedString>(map_size));
|
|
|
|
Array maps_per_set;
|
|
|
|
for (auto & grouping_set : data.grouping_set_keys)
|
|
|
|
{
|
|
|
|
std::string key_map(map_size, '0');
|
|
|
|
for (auto index : grouping_set)
|
|
|
|
key_map[index] = '1';
|
|
|
|
maps_per_set.push_back(key_map);
|
|
|
|
}
|
|
|
|
auto grouping_set_map_column = ColumnArray::create(ColumnFixedString::create(map_size));
|
|
|
|
grouping_set_map_column->insert(maps_per_set);
|
|
|
|
column.column = ColumnConst::create(std::move(grouping_set_map_column), 1);
|
|
|
|
|
|
|
|
data.addColumn(column);
|
|
|
|
}
|
2022-05-12 16:39:50 +00:00
|
|
|
ColumnWithTypeAndName column;
|
|
|
|
column.name = arguments_column_name;
|
|
|
|
column.type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
|
|
|
|
Array arguments_to_keys_map;
|
|
|
|
for (auto const & arg : node.arguments->children)
|
|
|
|
{
|
|
|
|
size_t pos = data.aggregation_keys.getPosByName(arg->getColumnName());
|
|
|
|
arguments_to_keys_map.push_back(pos);
|
|
|
|
}
|
|
|
|
auto arguments_column = ColumnArray::create(ColumnUInt64::create());
|
|
|
|
arguments_column->insert(Field{arguments_to_keys_map});
|
|
|
|
|
|
|
|
column.column = ColumnConst::create(ColumnPtr(std::move(arguments_column)), 1);
|
|
|
|
|
|
|
|
data.addColumn(column);
|
|
|
|
}
|
|
|
|
|
|
|
|
data.addFunction(
|
|
|
|
FunctionFactory::instance().get("grouping", data.getContext()),
|
2022-05-13 14:55:50 +00:00
|
|
|
{ "__grouping_set", "__grouping_set_map", arguments_column_name },
|
2022-05-12 16:39:50 +00:00
|
|
|
column_name
|
|
|
|
);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
SetPtr prepared_set;
|
2020-07-15 14:22:54 +00:00
|
|
|
if (checkFunctionIsInOrGlobalInOperator(node))
|
2019-08-15 13:54:59 +00:00
|
|
|
{
|
|
|
|
/// Let's find the type of the first argument (then getActionsImpl will be called again and will not affect anything).
|
|
|
|
visit(node.arguments->children.at(0), data);
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-01-27 21:57:44 +00:00
|
|
|
if (!data.no_makeset && (prepared_set = makeSet(node, data, data.no_subqueries)))
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2019-08-15 13:54:59 +00:00
|
|
|
/// Transform tuple or subquery into a set.
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (!data.only_consts)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2019-08-15 13:54:59 +00:00
|
|
|
/// We are in the part of the tree that we are not going to compute. You just need to define types.
|
2022-03-18 07:32:42 +00:00
|
|
|
/// Do not evaluate subquery and create sets. We replace "in*" function to "in*IgnoreSet".
|
2020-05-14 14:21:38 +00:00
|
|
|
|
2021-08-03 18:03:24 +00:00
|
|
|
auto argument_name = node.arguments->children.at(0)->getColumnName();
|
2020-09-10 16:01:41 +00:00
|
|
|
data.addFunction(
|
2022-03-18 07:32:42 +00:00
|
|
|
FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()),
|
|
|
|
{argument_name, argument_name},
|
|
|
|
column_name);
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2019-08-15 13:54:59 +00:00
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2021-02-28 07:25:56 +00:00
|
|
|
/// A special function `indexHint`. Everything that is inside it is not calculated
|
|
|
|
if (node.name == "indexHint")
|
|
|
|
{
|
|
|
|
// Arguments are removed. We add function instead of constant column to avoid constant folding.
|
|
|
|
data.addFunction(FunctionFactory::instance().get("indexHint", data.getContext()), {}, column_name);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-12-16 21:44:05 +00:00
|
|
|
if (node.is_window_function)
|
2020-12-09 11:14:40 +00:00
|
|
|
{
|
2020-12-16 21:44:05 +00:00
|
|
|
// Also add columns from PARTITION BY and ORDER BY of window functions.
|
2021-01-13 19:29:52 +00:00
|
|
|
if (node.window_definition)
|
2020-12-18 00:21:23 +00:00
|
|
|
{
|
2021-01-13 19:29:52 +00:00
|
|
|
visit(node.window_definition, data);
|
2020-12-18 00:21:23 +00:00
|
|
|
}
|
2020-12-16 21:44:05 +00:00
|
|
|
|
2020-12-24 08:49:55 +00:00
|
|
|
// Also manually add columns for arguments of the window function itself.
|
|
|
|
// ActionVisitor is written in such a way that this method must itself
|
|
|
|
// descend into all needed function children. Window functions can't have
|
|
|
|
// any special functions as argument, so the code below that handles
|
|
|
|
// special arguments is not needed. This is analogous to the
|
|
|
|
// appendWindowFunctionsArguments() in SelectQueryExpressionAnalyzer and
|
|
|
|
// partially duplicates its code. Probably we can remove most of the
|
|
|
|
// logic from that function, but I don't yet have it all figured out...
|
|
|
|
for (const auto & arg : node.arguments->children)
|
|
|
|
{
|
|
|
|
visit(arg, data);
|
|
|
|
}
|
|
|
|
|
2020-12-16 21:44:05 +00:00
|
|
|
// Don't need to do anything more for window functions here -- the
|
|
|
|
// resulting column is added in ExpressionAnalyzer, similar to the
|
|
|
|
// aggregate functions.
|
2020-12-09 11:14:40 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-12-16 21:44:05 +00:00
|
|
|
// An aggregate function can also be calculated as a window function, but we
|
|
|
|
// checked for it above, so no need to do anything more.
|
|
|
|
if (AggregateFunctionFactory::instance().isAggregateFunctionName(node.name))
|
|
|
|
return;
|
|
|
|
|
2021-09-09 13:47:48 +00:00
|
|
|
FunctionOverloadResolverPtr function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(node.name, data.getContext());
|
|
|
|
|
|
|
|
if (!function_builder)
|
2019-08-15 13:54:59 +00:00
|
|
|
{
|
2021-09-09 13:47:48 +00:00
|
|
|
try
|
|
|
|
{
|
|
|
|
function_builder = FunctionFactory::instance().get(node.name, data.getContext());
|
|
|
|
}
|
|
|
|
catch (Exception & e)
|
|
|
|
{
|
|
|
|
auto hints = AggregateFunctionFactory::instance().getHints(node.name);
|
|
|
|
if (!hints.empty())
|
|
|
|
e.addMessage("Or unknown aggregate function " + node.name + ". Maybe you meant: " + toString(hints));
|
|
|
|
throw;
|
|
|
|
}
|
2019-08-15 13:54:59 +00:00
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
Names argument_names;
|
|
|
|
DataTypes argument_types;
|
|
|
|
bool arguments_present = true;
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
/// If the function has an argument-lambda expression, you need to determine its type before the recursive call.
|
|
|
|
bool has_lambda_arguments = false;
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
if (node.arguments)
|
|
|
|
{
|
|
|
|
size_t num_arguments = node.arguments->children.size();
|
|
|
|
for (size_t arg = 0; arg < num_arguments; ++arg)
|
2019-08-15 13:54:59 +00:00
|
|
|
{
|
2020-12-04 02:15:44 +00:00
|
|
|
auto & child = node.arguments->children[arg];
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
const auto * function = child->as<ASTFunction>();
|
2020-12-16 11:50:36 +00:00
|
|
|
const auto * identifier = child->as<ASTTableIdentifier>();
|
2020-12-04 02:15:44 +00:00
|
|
|
if (function && function->name == "lambda")
|
|
|
|
{
|
|
|
|
/// If the argument is a lambda expression, just remember its approximate type.
|
|
|
|
if (function->arguments->children.size() != 2)
|
|
|
|
throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
const auto * lambda_args_tuple = function->arguments->children.at(0)->as<ASTFunction>();
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
if (!lambda_args_tuple || lambda_args_tuple->name != "tuple")
|
|
|
|
throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
|
2020-10-21 18:17:27 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
has_lambda_arguments = true;
|
|
|
|
argument_types.emplace_back(std::make_shared<DataTypeFunction>(DataTypes(lambda_args_tuple->arguments->children.size())));
|
|
|
|
/// Select the name in the next cycle.
|
|
|
|
argument_names.emplace_back();
|
2020-10-21 18:17:27 +00:00
|
|
|
}
|
2020-12-04 02:15:44 +00:00
|
|
|
else if (function && function->name == "untuple")
|
|
|
|
{
|
|
|
|
auto columns = doUntuple(function, data);
|
2020-10-21 18:17:27 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
if (columns.empty())
|
|
|
|
continue;
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
for (const auto & column : columns)
|
|
|
|
{
|
|
|
|
if (auto name_type = getNameAndTypeFromAST(column, data))
|
|
|
|
{
|
|
|
|
argument_types.push_back(name_type->type);
|
|
|
|
argument_names.push_back(name_type->name);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
arguments_present = false;
|
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
node.arguments->children.erase(node.arguments->children.begin() + arg);
|
|
|
|
node.arguments->children.insert(node.arguments->children.begin() + arg, columns.begin(), columns.end());
|
|
|
|
num_arguments += columns.size() - 1;
|
|
|
|
arg += columns.size() - 1;
|
|
|
|
}
|
|
|
|
else if (checkFunctionIsInOrGlobalInOperator(node) && arg == 1 && prepared_set)
|
2019-08-15 13:54:59 +00:00
|
|
|
{
|
2020-12-04 02:15:44 +00:00
|
|
|
ColumnWithTypeAndName column;
|
|
|
|
column.type = std::make_shared<DataTypeSet>();
|
|
|
|
|
|
|
|
/// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name,
|
|
|
|
/// so that sets with the same literal representation do not fuse together (they can have different types).
|
2019-10-27 18:12:40 +00:00
|
|
|
if (!prepared_set->empty())
|
2020-12-04 02:15:44 +00:00
|
|
|
column.name = data.getUniqueName("__set");
|
2019-10-27 18:12:40 +00:00
|
|
|
else
|
2021-08-03 18:03:24 +00:00
|
|
|
column.name = child->getColumnName();
|
2019-08-15 13:54:59 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
if (!data.hasColumn(column.name))
|
|
|
|
{
|
|
|
|
auto column_set = ColumnSet::create(1, prepared_set);
|
|
|
|
/// If prepared_set is not empty, we have a set made with literals.
|
|
|
|
/// Create a const ColumnSet to make constant folding work
|
|
|
|
if (!prepared_set->empty())
|
|
|
|
column.column = ColumnConst::create(std::move(column_set), 1);
|
|
|
|
else
|
|
|
|
column.column = std::move(column_set);
|
|
|
|
data.addColumn(column);
|
|
|
|
}
|
2020-03-30 18:00:38 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
argument_types.push_back(column.type);
|
|
|
|
argument_names.push_back(column.name);
|
|
|
|
}
|
|
|
|
else if (identifier && (functionIsJoinGet(node.name) || functionIsDictGet(node.name)) && arg == 0)
|
2020-03-30 18:00:38 +00:00
|
|
|
{
|
2020-12-16 11:50:36 +00:00
|
|
|
auto table_id = identifier->getTableId();
|
2021-04-10 23:33:54 +00:00
|
|
|
table_id = data.getContext()->resolveStorageID(table_id, Context::ResolveOrdinary);
|
2020-12-04 02:15:44 +00:00
|
|
|
auto column_string = ColumnString::create();
|
|
|
|
column_string->insert(table_id.getDatabaseName() + "." + table_id.getTableName());
|
|
|
|
ColumnWithTypeAndName column(
|
|
|
|
ColumnConst::create(std::move(column_string), 1),
|
|
|
|
std::make_shared<DataTypeString>(),
|
|
|
|
data.getUniqueName("__" + node.name));
|
|
|
|
data.addColumn(column);
|
|
|
|
argument_types.push_back(column.type);
|
|
|
|
argument_names.push_back(column.name);
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
else
|
2020-12-04 02:15:44 +00:00
|
|
|
{
|
|
|
|
/// If the argument is not a lambda expression, call it recursively and find out its type.
|
|
|
|
visit(child, data);
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
if (auto name_type = getNameAndTypeFromAST(child, data))
|
|
|
|
{
|
|
|
|
argument_types.push_back(name_type->type);
|
|
|
|
argument_names.push_back(name_type->name);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
arguments_present = false;
|
|
|
|
}
|
|
|
|
}
|
2019-08-15 13:54:59 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
if (data.only_consts && !arguments_present)
|
|
|
|
return;
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
if (has_lambda_arguments && !data.only_consts)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2020-12-04 02:15:44 +00:00
|
|
|
function_builder->getLambdaArgumentTypes(argument_types);
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
/// Call recursively for lambda expressions.
|
|
|
|
for (size_t i = 0; i < node.arguments->children.size(); ++i)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2020-12-04 02:15:44 +00:00
|
|
|
ASTPtr child = node.arguments->children[i];
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
const auto * lambda = child->as<ASTFunction>();
|
|
|
|
if (lambda && lambda->name == "lambda")
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2020-12-04 02:15:44 +00:00
|
|
|
const DataTypeFunction * lambda_type = typeid_cast<const DataTypeFunction *>(argument_types[i].get());
|
|
|
|
const auto * lambda_args_tuple = lambda->arguments->children.at(0)->as<ASTFunction>();
|
|
|
|
const ASTs & lambda_arg_asts = lambda_args_tuple->arguments->children;
|
|
|
|
NamesAndTypesList lambda_arguments;
|
2019-08-15 13:54:59 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
for (size_t j = 0; j < lambda_arg_asts.size(); ++j)
|
|
|
|
{
|
|
|
|
auto opt_arg_name = tryGetIdentifierName(lambda_arg_asts[j]);
|
|
|
|
if (!opt_arg_name)
|
|
|
|
throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
|
|
|
|
|
|
|
|
lambda_arguments.emplace_back(*opt_arg_name, lambda_type->getArgumentTypes()[j]);
|
|
|
|
}
|
2019-08-15 13:54:59 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
data.actions_stack.pushLevel(lambda_arguments);
|
|
|
|
visit(lambda->arguments->children.at(1), data);
|
|
|
|
auto lambda_dag = data.actions_stack.popLevel();
|
2019-08-15 13:54:59 +00:00
|
|
|
|
2021-08-03 18:03:24 +00:00
|
|
|
String result_name = lambda->arguments->children.at(1)->getColumnName();
|
2020-12-04 02:15:44 +00:00
|
|
|
lambda_dag->removeUnusedActions(Names(1, result_name));
|
2020-11-03 11:28:28 +00:00
|
|
|
|
2021-03-04 17:38:12 +00:00
|
|
|
auto lambda_actions = std::make_shared<ExpressionActions>(
|
|
|
|
lambda_dag,
|
2021-05-19 14:32:07 +00:00
|
|
|
ExpressionActionsSettings::fromContext(data.getContext(), CompileExpressions::yes));
|
2020-11-03 11:28:28 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type;
|
2019-08-15 13:54:59 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
Names captured;
|
|
|
|
Names required = lambda_actions->getRequiredColumns();
|
|
|
|
for (const auto & required_arg : required)
|
|
|
|
if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end())
|
|
|
|
captured.push_back(required_arg);
|
2019-08-15 13:54:59 +00:00
|
|
|
|
2021-08-03 18:03:24 +00:00
|
|
|
/// We can not name `getColumnName()`,
|
2020-12-04 02:15:44 +00:00
|
|
|
/// because it does not uniquely define the expression (the types of arguments can be different).
|
|
|
|
String lambda_name = data.getUniqueName("__lambda");
|
2019-08-15 13:54:59 +00:00
|
|
|
|
2021-05-15 17:33:15 +00:00
|
|
|
auto function_capture = std::make_shared<FunctionCaptureOverloadResolver>(
|
2020-12-04 02:15:44 +00:00
|
|
|
lambda_actions, captured, lambda_arguments, result_type, result_name);
|
2021-05-15 17:33:15 +00:00
|
|
|
data.addFunction(function_capture, captured, lambda_name);
|
2019-08-15 13:54:59 +00:00
|
|
|
|
2020-12-04 02:15:44 +00:00
|
|
|
argument_types[i] = std::make_shared<DataTypeFunction>(lambda_type->getArgumentTypes(), result_type);
|
|
|
|
argument_names[i] = lambda_name;
|
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
}
|
2019-08-15 13:54:59 +00:00
|
|
|
}
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
if (data.only_consts)
|
|
|
|
{
|
|
|
|
for (const auto & argument_name : argument_names)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2019-08-15 14:22:33 +00:00
|
|
|
if (!data.hasColumn(argument_name))
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2019-08-15 13:54:59 +00:00
|
|
|
arguments_present = false;
|
|
|
|
break;
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
if (arguments_present)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2020-10-23 16:56:11 +00:00
|
|
|
/// Calculate column name here again, because AST may be changed here (in case of untuple).
|
2021-08-03 18:03:24 +00:00
|
|
|
data.addFunction(function_builder, argument_names, ast->getColumnName());
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-30 18:00:38 +00:00
|
|
|
void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
|
|
|
|
Data & data)
|
2019-08-15 13:54:59 +00:00
|
|
|
{
|
|
|
|
DataTypePtr type = applyVisitor(FieldToDataType(), literal.value);
|
2020-04-01 12:16:39 +00:00
|
|
|
const auto value = convertFieldToType(literal.value, *type);
|
|
|
|
|
|
|
|
// FIXME why do we have a second pass with a clean sample block over the same
|
|
|
|
// AST here? Anyway, do not modify the column name if it is set already.
|
|
|
|
if (literal.unique_column_name.empty())
|
|
|
|
{
|
2021-08-03 18:03:24 +00:00
|
|
|
const auto default_name = literal.getColumnName();
|
2021-03-03 20:01:07 +00:00
|
|
|
const auto & index = data.actions_stack.getLastActionsIndex();
|
|
|
|
const auto * existing_column = index.tryGetNode(default_name);
|
2020-04-01 12:16:39 +00:00
|
|
|
|
|
|
|
/*
|
2020-04-01 12:21:13 +00:00
|
|
|
* To approximate CSE, bind all identical literals to a single temporary
|
2020-04-01 12:16:39 +00:00
|
|
|
* columns. We try to find the column by its default name, but after that
|
|
|
|
* we have to check that it contains the correct data. This might not be
|
|
|
|
* the case if it is a user-supplied column, or it is from under a join,
|
|
|
|
* etc.
|
|
|
|
* Overall, this is a hack around a generally poor name-based notion of
|
|
|
|
* column identity we currently use.
|
|
|
|
*/
|
|
|
|
if (existing_column
|
|
|
|
&& existing_column->column
|
|
|
|
&& isColumnConst(*existing_column->column)
|
|
|
|
&& existing_column->column->size() == 1
|
2020-04-15 14:10:33 +00:00
|
|
|
&& existing_column->column->operator[](0) == value)
|
2020-04-01 12:16:39 +00:00
|
|
|
{
|
|
|
|
const_cast<ASTLiteral &>(literal).unique_column_name = default_name;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const_cast<ASTLiteral &>(literal).unique_column_name
|
2020-04-14 17:41:06 +00:00
|
|
|
= data.getUniqueName(default_name);
|
2020-04-01 12:16:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (data.hasColumn(literal.unique_column_name))
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
2019-08-15 13:54:59 +00:00
|
|
|
|
|
|
|
ColumnWithTypeAndName column;
|
2020-04-01 12:16:39 +00:00
|
|
|
column.name = literal.unique_column_name;
|
|
|
|
column.column = type->createColumnConst(1, value);
|
2019-08-15 13:54:59 +00:00
|
|
|
column.type = type;
|
2020-03-30 18:00:38 +00:00
|
|
|
|
2020-09-10 16:01:41 +00:00
|
|
|
data.addColumn(std::move(column));
|
2019-08-15 13:54:59 +00:00
|
|
|
}
|
|
|
|
|
2019-10-27 18:12:40 +00:00
|
|
|
SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_subqueries)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
|
|
|
/** You need to convert the right argument to a set.
|
|
|
|
* This can be a table name, a value, a value enumeration, or a subquery.
|
|
|
|
* The enumeration of values is parsed as a function `tuple`.
|
|
|
|
*/
|
2019-08-15 13:54:59 +00:00
|
|
|
const IAST & args = *node.arguments;
|
2019-10-27 18:12:40 +00:00
|
|
|
const ASTPtr & left_in_operand = args.children.at(0);
|
|
|
|
const ASTPtr & right_in_operand = args.children.at(1);
|
2018-10-12 15:41:28 +00:00
|
|
|
|
|
|
|
/// If the subquery or table name for SELECT.
|
2020-11-13 14:13:27 +00:00
|
|
|
const auto * identifier = right_in_operand->as<ASTTableIdentifier>();
|
2019-10-27 18:12:40 +00:00
|
|
|
if (right_in_operand->as<ASTSubquery>() || identifier)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2019-10-27 18:12:40 +00:00
|
|
|
if (no_subqueries)
|
|
|
|
return {};
|
|
|
|
auto set_key = PreparedSetKey::forSubquery(*right_in_operand);
|
2022-03-18 07:32:42 +00:00
|
|
|
if (auto it = data.prepared_sets.find(set_key); it != data.prepared_sets.end())
|
|
|
|
return it->second;
|
2019-01-22 12:33:56 +00:00
|
|
|
|
2018-10-12 15:41:28 +00:00
|
|
|
/// A special case is if the name of the table is specified on the right side of the IN statement,
|
|
|
|
/// and the table has the type Set (a previously prepared set).
|
|
|
|
if (identifier)
|
|
|
|
{
|
2021-04-10 23:33:54 +00:00
|
|
|
auto table_id = data.getContext()->resolveStorageID(right_in_operand);
|
|
|
|
StoragePtr table = DatabaseCatalog::instance().tryGetTable(table_id, data.getContext());
|
2018-10-12 15:41:28 +00:00
|
|
|
|
|
|
|
if (table)
|
|
|
|
{
|
|
|
|
StorageSet * storage_set = dynamic_cast<StorageSet *>(table.get());
|
|
|
|
if (storage_set)
|
|
|
|
{
|
2022-03-18 07:32:42 +00:00
|
|
|
data.prepared_sets.emplace(set_key, storage_set->getSet());
|
2019-01-22 12:33:56 +00:00
|
|
|
return storage_set->getSet();
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-16 12:58:27 +00:00
|
|
|
/// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery.
|
2021-08-03 18:03:24 +00:00
|
|
|
String set_id = right_in_operand->getColumnName();
|
2019-01-16 12:58:27 +00:00
|
|
|
|
2019-08-15 13:54:59 +00:00
|
|
|
SubqueryForSet & subquery_for_set = data.subqueries_for_sets[set_id];
|
2018-10-12 15:41:28 +00:00
|
|
|
|
|
|
|
/// If you already created a Set with the same subquery / table.
|
|
|
|
if (subquery_for_set.set)
|
|
|
|
{
|
2022-03-18 07:32:42 +00:00
|
|
|
data.prepared_sets.emplace(set_key, subquery_for_set.set);
|
2019-01-22 12:33:56 +00:00
|
|
|
return subquery_for_set.set;
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
SetPtr set = std::make_shared<Set>(data.set_size_limit, false, data.getContext()->getSettingsRef().transform_null_in);
|
2018-10-12 15:41:28 +00:00
|
|
|
|
2020-10-23 19:08:38 +00:00
|
|
|
/** The following happens for GLOBAL INs or INs:
|
2018-10-12 15:41:28 +00:00
|
|
|
* - in the addExternalStorage function, the IN (SELECT ...) subquery is replaced with IN _data1,
|
|
|
|
* in the subquery_for_set object, this subquery is set as source and the temporary table _data1 as the table.
|
|
|
|
* - this function shows the expression IN_data1.
|
2020-11-03 16:07:27 +00:00
|
|
|
*
|
2020-10-23 19:08:38 +00:00
|
|
|
* In case that we have HAVING with IN subquery, we have to force creating set for it.
|
2020-11-12 23:27:18 +00:00
|
|
|
* Also it doesn't make sense if it is GLOBAL IN or ordinary IN.
|
2018-10-12 15:41:28 +00:00
|
|
|
*/
|
2020-11-02 12:07:01 +00:00
|
|
|
if (!subquery_for_set.source && data.create_source_for_in)
|
2018-10-12 15:41:28 +00:00
|
|
|
{
|
2021-04-10 23:33:54 +00:00
|
|
|
auto interpreter = interpretSubquery(right_in_operand, data.getContext(), data.subquery_depth, {});
|
2020-09-15 13:25:14 +00:00
|
|
|
subquery_for_set.source = std::make_unique<QueryPlan>();
|
|
|
|
interpreter->buildQueryPlan(*subquery_for_set.source);
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
subquery_for_set.set = set;
|
2022-03-18 07:32:42 +00:00
|
|
|
data.prepared_sets.emplace(set_key, set);
|
2019-01-22 12:33:56 +00:00
|
|
|
return set;
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-09-30 11:45:47 +00:00
|
|
|
const auto & last_actions = data.actions_stack.getLastActions();
|
2021-03-03 20:01:07 +00:00
|
|
|
const auto & index = data.actions_stack.getLastActionsIndex();
|
2021-08-03 18:03:24 +00:00
|
|
|
if (index.contains(left_in_operand->getColumnName()))
|
2019-10-27 18:12:40 +00:00
|
|
|
/// An explicit enumeration of values in parentheses.
|
2021-04-10 23:33:54 +00:00
|
|
|
return makeExplicitSet(&node, last_actions, false, data.getContext(), data.set_size_limit, data.prepared_sets);
|
2019-10-27 18:12:40 +00:00
|
|
|
else
|
|
|
|
return {};
|
2018-10-12 15:41:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|