#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int INCORRECT_QUERY; extern const int UNSUPPORTED_METHOD; } namespace { /// Everything except numbers is put as string literal. class ReplacingConstantExpressionsMatcherNumOrStr { public: using Data = Block; static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; } static void visit(ASTPtr & node, Block & block_with_constants) { if (!node->as()) return; std::string name = node->getColumnName(); if (block_with_constants.has(name)) { auto result = block_with_constants.getByName(name); if (!isColumnConst(*result.column)) return; if (result.column->isNullAt(0)) { node = std::make_shared(Field()); } else if (isNumber(result.type)) { node = std::make_shared(assert_cast(*result.column).getField()); } else { /// Everything except numbers is put as string literal. This is important for Date, DateTime, UUID. const IColumn & inner_column = assert_cast(*result.column).getDataColumn(); WriteBufferFromOwnString out; result.type->getDefaultSerialization()->serializeText(inner_column, 0, out, FormatSettings()); node = std::make_shared(out.str()); } } } }; struct ReplaceLiteralToExprVisitorData { using TypeToVisit = ASTFunction; void visit(ASTFunction & func, ASTPtr &) const { if (func.name == "and" || func.name == "or") { for (auto & argument : func.arguments->children) { auto * literal_expr = typeid_cast(argument.get()); UInt64 value; if (literal_expr && literal_expr->value.tryGet(value) && (value == 0 || value == 1)) { /// 1 -> 1=1, 0 -> 1=0. if (value) argument = makeASTFunction("equals", std::make_shared(1), std::make_shared(1)); else argument = makeASTFunction("equals", std::make_shared(1), std::make_shared(0)); } } } } }; using ReplaceLiteralToExprVisitor = InDepthNodeVisitor, true>; class DropAliasesMatcher { public: struct Data {}; Data data; static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; } static void visit(ASTPtr & node, Data) { if (!node->tryGetAlias().empty()) node->setAlias({}); } }; void replaceConstantExpressions(ASTPtr & node, ContextPtr context, const NamesAndTypesList & all_columns) { auto syntax_result = TreeRewriter(context).analyze(node, all_columns); Block block_with_constants = KeyCondition::getBlockWithConstants(node, syntax_result, context); InDepthNodeVisitor visitor(block_with_constants); visitor.visit(node); } void dropAliases(ASTPtr & node) { DropAliasesMatcher::Data data; InDepthNodeVisitor visitor(data); visitor.visit(node); } bool isCompatible(ASTPtr & node) { if (auto * function = node->as()) { if (function->parameters) /// Parametric aggregate functions return false; if (!function->arguments) throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: function->arguments is not set"); String name = function->name; if (!(name == "and" || name == "or" || name == "not" || name == "equals" || name == "notEquals" || name == "less" || name == "greater" || name == "lessOrEquals" || name == "greaterOrEquals" || name == "like" || name == "notLike" || name == "in" || name == "notIn" || name == "isNull" || name == "isNotNull" || name == "tuple")) return false; /// A tuple with zero or one elements is represented by a function tuple(x) and is not compatible, /// but a normal tuple with more than one element is represented as a parenthesized expression (x, y) and is perfectly compatible. /// So to support tuple with zero or one elements we can clear function name to get (x) instead of tuple(x) if (name == "tuple") { if (function->arguments->children.size() <= 1) { function->name.clear(); } } /// If the right hand side of IN is a table identifier (example: x IN table), then it's not compatible. if ((name == "in" || name == "notIn") && (function->arguments->children.size() != 2 || function->arguments->children[1]->as())) return false; for (auto & expr : function->arguments->children) if (!isCompatible(expr)) return false; return true; } if (const auto * literal = node->as()) { if (literal->value.getType() == Field::Types::Tuple) { /// Represent a tuple with zero or one elements as (x) instead of tuple(x). auto tuple_value = literal->value.safeGet(); if (tuple_value.size() == 1) { node = makeASTFunction("", std::make_shared(tuple_value[0])); return true; } } /// Foreign databases often have no support for Array. But Tuple literals are passed to support IN clause. return literal->value.getType() != Field::Types::Array; } return node->as(); } bool removeUnknownSubexpressions(ASTPtr & node, const NameSet & known_names); void removeUnknownChildren(ASTs & children, const NameSet & known_names) { ASTs new_children; for (auto & child : children) { bool leave_child = removeUnknownSubexpressions(child, known_names); if (leave_child) new_children.push_back(child); } children = std::move(new_children); } /// return `true` if we should leave node in tree bool removeUnknownSubexpressions(ASTPtr & node, const NameSet & known_names) { if (const auto * ident = node->as()) return known_names.contains(ident->name()); if (node->as() != nullptr) return true; auto * func = node->as(); if (func && (func->name == "and" || func->name == "or")) { removeUnknownChildren(func->arguments->children, known_names); /// all children removed, current node can be removed too if (func->arguments->children.size() == 1) { /// if only one child left, pull it on top level node = func->arguments->children[0]; return true; } return !func->arguments->children.empty(); } bool leave_child = true; for (auto & child : node->children) { leave_child = leave_child && removeUnknownSubexpressions(child, known_names); if (!leave_child) break; } return leave_child; } // When a query references an external table such as table from MySQL database, // the corresponding table storage has to execute the relevant part of the query. We // send the query to the storage as AST. Before that, we have to remove the conditions // that reference other tables from `WHERE`, so that the external engine is not confused // by the unknown columns. bool removeUnknownSubexpressionsFromWhere(ASTPtr & node, const NamesAndTypesList & available_columns) { if (!node) return false; NameSet known_names; for (const auto & col : available_columns) known_names.insert(col.name); if (auto * expr_list = node->as(); expr_list && !expr_list->children.empty()) { /// traverse expression list on top level removeUnknownChildren(expr_list->children, known_names); return !expr_list->children.empty(); } return removeUnknownSubexpressions(node, known_names); } String transformQueryForExternalDatabaseImpl( ASTPtr clone_query, Names used_columns, const NamesAndTypesList & available_columns, IdentifierQuotingStyle identifier_quoting_style, LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, ContextPtr context) { bool strict = context->getSettingsRef().external_table_strict_query; auto select = std::make_shared(); select->replaceDatabaseAndTable(database, table); auto select_expr_list = std::make_shared(); for (const auto & name : used_columns) select_expr_list->children.push_back(std::make_shared(name)); select->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expr_list)); /** If there was WHERE, * copy it to transformed query if it is compatible, * or if it is AND expression, * copy only compatible parts of it. */ ASTPtr original_where = clone_query->as().where(); bool where_has_known_columns = removeUnknownSubexpressionsFromWhere(original_where, available_columns); if (original_where && where_has_known_columns) { replaceConstantExpressions(original_where, context, available_columns); /// Replace like WHERE 1 AND 1 to WHERE 1 = 1 AND 1 = 1 ReplaceLiteralToExprVisitor::Data replace_literal_to_expr_data; ReplaceLiteralToExprVisitor(replace_literal_to_expr_data).visit(original_where); if (isCompatible(original_where)) { select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(original_where)); } else if (strict) { throw Exception(ErrorCodes::INCORRECT_QUERY, "Query contains non-compatible expressions (and external_table_strict_query=true)"); } else if (auto * function = original_where->as()) { if (function->name == "and" || function->name == "tuple") { auto new_function_and = makeASTFunction("and"); std::queue predicates; predicates.push(function); while (!predicates.empty()) { const auto * func = predicates.front(); predicates.pop(); for (auto & elem : func->arguments->children) { if (isCompatible(elem)) new_function_and->arguments->children.push_back(elem); else if (const auto * child = elem->as(); child && (child->name == "and" || child->name == "tuple")) predicates.push(child); } } if (new_function_and->arguments->children.size() == 1) select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(new_function_and->arguments->children[0])); else if (new_function_and->arguments->children.size() > 1) select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(new_function_and)); } } } else if (strict && original_where) { throw Exception(ErrorCodes::INCORRECT_QUERY, "Query contains non-compatible expressions '{}' (and external_table_strict_query=true)", original_where->formatForErrorMessage()); } auto * literal_expr = typeid_cast(original_where.get()); UInt64 value; if (literal_expr && literal_expr->value.tryGet(value) && (value == 0 || value == 1)) { /// WHERE 1 -> WHERE 1=1, WHERE 0 -> WHERE 1=0. if (value) original_where = makeASTFunction("equals", std::make_shared(1), std::make_shared(1)); else original_where = makeASTFunction("equals", std::make_shared(1), std::make_shared(0)); select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(original_where)); } ASTPtr select_ptr = select; dropAliases(select_ptr); WriteBufferFromOwnString out; IAST::FormatSettings settings( out, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ identifier_quoting_style != IdentifierQuotingStyle::None, /*identifier_quoting_style*/ identifier_quoting_style, /*show_secrets_*/ true, /*literal_escaping_style*/ literal_escaping_style); select->format(settings); return out.str(); } } String transformQueryForExternalDatabase( const SelectQueryInfo & query_info, const Names & column_names, const NamesAndTypesList & available_columns, IdentifierQuotingStyle identifier_quoting_style, LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, ContextPtr context) { if (!query_info.syntax_analyzer_result) { if (!query_info.query_tree) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Query is not analyzed: no query tree"); if (!query_info.planner_context) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Query is not analyzed: no planner context"); if (!query_info.table_expression) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Query is not analyzed: no table expression"); if (column_names.empty()) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "No column names for query '{}' to external table '{}.{}'", query_info.query_tree->formatASTForErrorMessage(), database, table); auto clone_query = getASTForExternalDatabaseFromQueryTree(query_info.query_tree); return transformQueryForExternalDatabaseImpl( clone_query, column_names, available_columns, identifier_quoting_style, literal_escaping_style, database, table, context); } auto clone_query = query_info.query->clone(); return transformQueryForExternalDatabaseImpl( clone_query, query_info.syntax_analyzer_result->requiredSourceColumns(), available_columns, identifier_quoting_style, literal_escaping_style, database, table, context); } }