#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std::literals; namespace DB { namespace ErrorCodes { extern const int UNEXPECTED_EXPRESSION; extern const int UNEXPECTED_AST_STRUCTURE; } void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const { if (name == "view") throw Exception("Table function view cannot be used as an expression", ErrorCodes::UNEXPECTED_EXPRESSION); writeString(name, ostr); if (parameters) { writeChar('(', ostr); for (auto it = parameters->children.begin(); it != parameters->children.end(); ++it) { if (it != parameters->children.begin()) writeCString(", ", ostr); (*it)->appendColumnName(ostr); } writeChar(')', ostr); } writeChar('(', ostr); if (arguments) { for (auto it = arguments->children.begin(); it != arguments->children.end(); ++it) { if (it != arguments->children.begin()) writeCString(", ", ostr); (*it)->appendColumnName(ostr); } } writeChar(')', ostr); if (is_window_function) { writeCString(" OVER ", ostr); if (!window_name.empty()) { ostr << window_name; } else { FormatSettings format_settings{ostr, true /* one_line */}; FormatState state; FormatStateStacked frame; writeCString("(", ostr); window_definition->formatImpl(format_settings, state, frame); writeCString(")", ostr); } } } /** Get the text that identifies this element. */ String ASTFunction::getID(char delim) const { return "Function" + (delim + name); } ASTPtr ASTFunction::clone() const { auto res = std::make_shared(*this); res->children.clear(); if (arguments) { res->arguments = arguments->clone(); res->children.push_back(res->arguments); } if (parameters) { res->parameters = parameters->clone(); res->children.push_back(res->parameters); } if (window_definition) { res->window_definition = window_definition->clone(); res->children.push_back(res->window_definition); } return res; } void ASTFunction::updateTreeHashImpl(SipHash & hash_state) const { hash_state.update(name.size()); hash_state.update(name); IAST::updateTreeHashImpl(hash_state); } ASTPtr ASTFunction::toLiteral() const { if (!arguments) return {}; if (name == "array") { Array array; for (const auto & arg : arguments->children) { if (auto * literal = arg->as()) array.push_back(literal->value); else if (auto * func = arg->as()) { if (auto func_literal = func->toLiteral()) array.push_back(func_literal->as()->value); } else /// Some of the Array arguments is not literal return {}; } return std::make_shared(array); } return {}; } /** A special hack. If it's [I]LIKE or NOT [I]LIKE expression and the right hand side is a string literal, * we will highlight unescaped metacharacters % and _ in string literal for convenience. * Motivation: most people are unaware that _ is a metacharacter and forgot to properly escape it with two backslashes. * With highlighting we make it clearly obvious. * * Another case is regexp match. Suppose the user types match(URL, 'www.yandex.ru'). It often means that the user is unaware that . is a metacharacter. */ static bool highlightStringLiteralWithMetacharacters(const ASTPtr & node, const IAST::FormatSettings & settings, const char * metacharacters) { if (const auto * literal = node->as()) { if (literal->value.getType() == Field::Types::String) { auto string = applyVisitor(FieldVisitorToString(), literal->value); unsigned escaping = 0; for (auto c : string) { if (c == '\\') { settings.ostr << c; if (escaping == 2) escaping = 0; ++escaping; } else if (nullptr != strchr(metacharacters, c)) { if (escaping == 2) /// Properly escaped metacharacter settings.ostr << c; else /// Unescaped metacharacter settings.ostr << "\033[1;35m" << c << "\033[0m"; escaping = 0; } else { settings.ostr << c; escaping = 0; } } return true; } } return false; } ASTSelectWithUnionQuery * ASTFunction::tryGetQueryArgument() const { if (arguments && arguments->children.size() == 1) { return arguments->children[0]->as(); } return nullptr; } void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { frame.expression_list_prepend_whitespace = false; FormatStateStacked nested_need_parens = frame; FormatStateStacked nested_dont_need_parens = frame; nested_need_parens.need_parens = true; nested_dont_need_parens.need_parens = false; if (auto * query = tryGetQueryArgument()) { std::string nl_or_nothing = settings.one_line ? "" : "\n"; std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' '); settings.ostr << (settings.hilite ? hilite_function : "") << name << "(" << nl_or_nothing; FormatStateStacked frame_nested = frame; frame_nested.need_parens = false; ++frame_nested.indent; query->formatImpl(settings, state, frame_nested); settings.ostr << nl_or_nothing << indent_str << ")"; return; } /// Should this function to be written as operator? bool written = false; if (arguments && !parameters) { /// Unary prefix operators. if (arguments->children.size() == 1) { const char * operators[] = { "negate", "-", "not", "NOT ", nullptr }; for (const char ** func = operators; *func; func += 2) { if (strcasecmp(name.c_str(), func[0]) != 0) { continue; } const auto * literal = arguments->children[0]->as(); const auto * function = arguments->children[0]->as(); bool negate = name == "negate"; bool is_tuple = literal && literal->value.getType() == Field::Types::Tuple; // do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))` bool literal_need_parens = literal && !is_tuple; // negate always requires parentheses, otherwise -(-1) will be printed as --1 bool negate_need_parens = negate && (literal_need_parens || (function && function->name == "negate")); // We don't need parentheses around a single literal. bool need_parens = !literal && frame.need_parens && !negate_need_parens; // do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1))) if (negate_need_parens) nested_need_parens.need_parens = false; if (need_parens) settings.ostr << '('; settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); if (negate_need_parens) settings.ostr << '('; arguments->formatImpl(settings, state, nested_need_parens); written = true; if (negate_need_parens) settings.ostr << ')'; if (need_parens) settings.ostr << ')'; break; } } /// Unary postfix operators. if (!written && arguments->children.size() == 1) { const char * operators[] = { "isNull", " IS NULL", "isNotNull", " IS NOT NULL", nullptr }; for (const char ** func = operators; *func; func += 2) { if (strcasecmp(name.c_str(), func[0]) != 0) { continue; } if (frame.need_parens) settings.ostr << '('; arguments->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); if (frame.need_parens) settings.ostr << ')'; written = true; break; } } /** need_parens - do we need parentheses around the expression with the operator. * They are needed only if this expression is included in another expression with the operator. */ if (!written && arguments->children.size() == 2) { const char * operators[] = { "multiply", " * ", "divide", " / ", "modulo", " % ", "plus", " + ", "minus", " - ", "notEquals", " != ", "lessOrEquals", " <= ", "greaterOrEquals", " >= ", "less", " < ", "greater", " > ", "equals", " = ", "like", " LIKE ", "ilike", " ILIKE ", "notLike", " NOT LIKE ", "notILike", " NOT ILIKE ", "in", " IN ", "notIn", " NOT IN ", "globalIn", " GLOBAL IN ", "globalNotIn", " GLOBAL NOT IN ", nullptr }; for (const char ** func = operators; *func; func += 2) { if (name == std::string_view(func[0])) { if (frame.need_parens) settings.ostr << '('; arguments->children[0]->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); bool special_hilite = settings.hilite && (name == "like" || name == "notLike" || name == "ilike" || name == "notILike") && highlightStringLiteralWithMetacharacters(arguments->children[1], settings, "%_"); /// Format x IN 1 as x IN (1): put parens around rhs even if there is a single element in set. const auto * second_arg_func = arguments->children[1]->as(); const auto * second_arg_literal = arguments->children[1]->as(); bool extra_parents_around_in_rhs = (name == "in" || name == "notIn" || name == "globalIn" || name == "globalNotIn") && !second_arg_func && !(second_arg_literal && (second_arg_literal->value.getType() == Field::Types::Tuple || second_arg_literal->value.getType() == Field::Types::Array)) && !arguments->children[1]->as(); if (extra_parents_around_in_rhs) { settings.ostr << '('; arguments->children[1]->formatImpl(settings, state, nested_dont_need_parens); settings.ostr << ')'; } if (!special_hilite && !extra_parents_around_in_rhs) arguments->children[1]->formatImpl(settings, state, nested_need_parens); if (frame.need_parens) settings.ostr << ')'; written = true; } } if (!written && name == "arrayElement"sv) { if (frame.need_parens) settings.ostr << '('; arguments->children[0]->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); arguments->children[1]->formatImpl(settings, state, nested_dont_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); written = true; if (frame.need_parens) settings.ostr << ')'; } if (!written && name == "tupleElement"sv) { // fuzzer sometimes may insert tupleElement() created from ASTLiteral: // // Function_tupleElement, 0xx // -ExpressionList_, 0xx // --Literal_Int64_255, 0xx // --Literal_Int64_100, 0xx // // And in this case it will be printed as "255.100", which // later will be parsed as float, and formatting will be // inconsistent. // // So instead of printing it as regular tuple, // let's print it as ExpressionList instead (i.e. with ", " delimiter). bool tuple_arguments_valid = true; const auto * lit_left = arguments->children[0]->as(); const auto * lit_right = arguments->children[1]->as(); if (lit_left) { Field::Types::Which type = lit_left->value.getType(); if (type != Field::Types::Tuple && type != Field::Types::Array) { tuple_arguments_valid = false; } } // It can be printed in a form of 'x.1' only if right hand side // is an unsigned integer lineral. We also allow nonnegative // signed integer literals, because the fuzzer sometimes inserts // them, and we want to have consistent formatting. if (tuple_arguments_valid && lit_right) { if (isInt64OrUInt64FieldType(lit_right->value.getType()) && lit_right->value.get() >= 0) { if (frame.need_parens) settings.ostr << '('; arguments->children[0]->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << "." << (settings.hilite ? hilite_none : ""); arguments->children[1]->formatImpl(settings, state, nested_dont_need_parens); written = true; if (frame.need_parens) settings.ostr << ')'; } } } if (!written && name == "lambda"sv) { /// Special case: zero elements tuple in lhs of lambda is printed as (). /// Special case: one-element tuple in lhs of lambda is printed as its element. if (frame.need_parens) settings.ostr << '('; const auto * first_arg_func = arguments->children[0]->as(); if (first_arg_func && first_arg_func->name == "tuple" && first_arg_func->arguments && (first_arg_func->arguments->children.size() == 1 || first_arg_func->arguments->children.empty())) { if (first_arg_func->arguments->children.size() == 1) first_arg_func->arguments->children[0]->formatImpl(settings, state, nested_need_parens); else settings.ostr << "()"; } else arguments->children[0]->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << " -> " << (settings.hilite ? hilite_none : ""); arguments->children[1]->formatImpl(settings, state, nested_need_parens); if (frame.need_parens) settings.ostr << ')'; written = true; } } if (!written && arguments->children.size() >= 2) { const char * operators[] = { "and", " AND ", "or", " OR ", nullptr }; for (const char ** func = operators; *func; func += 2) { if (name == std::string_view(func[0])) { if (frame.need_parens) settings.ostr << '('; for (size_t i = 0; i < arguments->children.size(); ++i) { if (i != 0) settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); arguments->children[i]->formatImpl(settings, state, nested_need_parens); } if (frame.need_parens) settings.ostr << ')'; written = true; } } } if (!written && name == "array"sv) { settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); for (size_t i = 0; i < arguments->children.size(); ++i) { if (i != 0) settings.ostr << ", "; arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); } settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); written = true; } if (!written && arguments->children.size() >= 2 && name == "tuple"sv) { settings.ostr << (settings.hilite ? hilite_operator : "") << '(' << (settings.hilite ? hilite_none : ""); for (size_t i = 0; i < arguments->children.size(); ++i) { if (i != 0) settings.ostr << ", "; arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); } settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); written = true; } if (!written && name == "map"sv) { settings.ostr << (settings.hilite ? hilite_operator : "") << "map(" << (settings.hilite ? hilite_none : ""); for (size_t i = 0; i < arguments->children.size(); ++i) { if (i != 0) settings.ostr << ", "; arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); } settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); written = true; } } if (written) { return; } settings.ostr << (settings.hilite ? hilite_function : "") << name; if (parameters) { settings.ostr << '(' << (settings.hilite ? hilite_none : ""); parameters->formatImpl(settings, state, nested_dont_need_parens); settings.ostr << (settings.hilite ? hilite_function : "") << ')'; } if ((arguments && !arguments->children.empty()) || !no_empty_args) settings.ostr << '(' << (settings.hilite ? hilite_none : ""); if (arguments) { bool special_hilite_regexp = settings.hilite && (name == "match" || name == "extract" || name == "extractAll" || name == "replaceRegexpOne" || name == "replaceRegexpAll"); for (size_t i = 0, size = arguments->children.size(); i < size; ++i) { if (i != 0) settings.ostr << ", "; bool special_hilite = false; if (i == 1 && special_hilite_regexp) special_hilite = highlightStringLiteralWithMetacharacters(arguments->children[i], settings, "|()^$.[]?*+{:-"); if (!special_hilite) arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); } } if ((arguments && !arguments->children.empty()) || !no_empty_args) settings.ostr << (settings.hilite ? hilite_function : "") << ')'; settings.ostr << (settings.hilite ? hilite_none : ""); if (!is_window_function) { return; } settings.ostr << " OVER "; if (!window_name.empty()) { settings.ostr << backQuoteIfNeed(window_name); } else { settings.ostr << "("; window_definition->formatImpl(settings, state, frame); settings.ostr << ")"; } } String getFunctionName(const IAST * ast) { String res; if (tryGetFunctionNameInto(ast, res)) return res; throw Exception(ast ? queryToString(*ast) + " is not an function" : "AST node is nullptr", ErrorCodes::UNEXPECTED_AST_STRUCTURE); } std::optional tryGetFunctionName(const IAST * ast) { String res; if (tryGetFunctionNameInto(ast, res)) return res; return {}; } bool tryGetFunctionNameInto(const IAST * ast, String & name) { if (ast) { if (const auto * node = ast->as()) { name = node->name; return true; } } return false; } }