From 2919f6710cc3b020f8ede883a9d309c0d918b3ee Mon Sep 17 00:00:00 2001 From: potya Date: Tue, 26 May 2020 23:58:51 +0300 Subject: [PATCH 01/11] Add some multiword data types --- src/DataTypes/DataTypeString.cpp | 1 + src/Parsers/ParserCreateQuery.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index efaf844a845..5762d5d7055 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -376,6 +376,7 @@ void registerDataTypeString(DataTypeFactory & factory) /// These synonyms are added for compatibility. factory.registerAlias("CHAR", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("CHARACTER", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("VARCHAR", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("TEXT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("TINYTEXT", "String", DataTypeFactory::CaseInsensitive); diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 19410a78dd2..930dc0fec35 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -10,6 +10,8 @@ #include #include +#include + namespace DB { @@ -163,6 +165,36 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (require_type && !type && !default_expression) return false; /// reject column name without type + auto first_word = type->getID(); + + if (boost::algorithm::to_lower_copy(first_word) == "function_double") { + ParserKeyword s_presicion{"PRESICION"}; + s_presicion.ignore(pos); + } else if (boost::algorithm::to_lower_copy(first_word) == "function_char") { + ParserKeyword s_varying{"VARYING"}; + s_varying.ignore(pos); + } else if (boost::algorithm::to_lower_copy(first_word) == "function_native") { + ParserIdentifierWithOptionalParameters tmp; + ASTPtr second_word; + if (!tmp.parse(pos, second_word, expected)) { + return false; + } + if (boost::algorithm::to_lower_copy(second_word->getID()) != "function_character") { + return false; + } + + type = second_word; + } else if (boost::algorithm::to_lower_copy(first_word) == "function_varying") + { + ParserIdentifierWithOptionalParameters tmp; + ASTPtr second_word; + if (!tmp.parse(pos, second_word, expected)) + return false; + if (boost::algorithm::to_lower_copy(second_word->getID()) != "function_char") + return false; + type = second_word; + } + if (s_comment.ignore(pos, expected)) { From 80496628e33a2af30a2635af0f43b527b7dd9fad Mon Sep 17 00:00:00 2001 From: potya Date: Sun, 7 Jun 2020 23:02:57 +0300 Subject: [PATCH 02/11] Add better code --- src/DataTypes/DataTypeString.cpp | 3 ++ src/DataTypes/DataTypesNumber.cpp | 1 + src/Parsers/ParserCreateQuery.h | 62 +++++++++++++++++-------------- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 5762d5d7055..d02f11d3602 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -376,7 +376,10 @@ void registerDataTypeString(DataTypeFactory & factory) /// These synonyms are added for compatibility. factory.registerAlias("CHAR", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("CHAR VARYING", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("VARYING CHAR", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("CHARACTER", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("NATIVE CHARACTER", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("VARCHAR", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("TEXT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("TINYTEXT", "String", DataTypeFactory::CaseInsensitive); diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index 5739a64d815..801213236fa 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -29,6 +29,7 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerAlias("BIGINT", "Int64", DataTypeFactory::CaseInsensitive); factory.registerAlias("FLOAT", "Float32", DataTypeFactory::CaseInsensitive); factory.registerAlias("DOUBLE", "Float64", DataTypeFactory::CaseInsensitive); + factory.registerAlias("DOUBLE PRECISION", "Float64", DataTypeFactory::CaseInsensitive); } } diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 930dc0fec35..b661dd51d16 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -150,6 +151,39 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E { if (!type_parser.parse(pos, type, expected)) return false; + + ASTFunction * type_func = type->as(); + if (!type_func->arguments) + { + if (boost::algorithm::iequals(type_func->name, "DOUBLE")) + { + if (ParserKeyword{"PRESICION"}.ignore(pos)) + { + type_func->name += " PRESICION"; + } + } + else if (boost::algorithm::iequals(type_func->name, "CHAR")) + { + if (ParserKeyword{"VARYING"}.ignore(pos)) + { + type_func->name += " VARYING"; + } + } + else if (boost::algorithm::iequals(type_func->name, "NATIVE")) + { + if (ParserKeyword{"CHARACTER"}.ignore(pos)) + { + type_func->name += " CHARACTER"; + } + } + else if (boost::algorithm::iequals(type_func->name, "VARYING")) + { + if (ParserKeyword{"CHAR"}.ignore(pos)) + { + type_func->name += " CHAR"; + } + } + } } Pos pos_before_specifier = pos; @@ -165,35 +199,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (require_type && !type && !default_expression) return false; /// reject column name without type - auto first_word = type->getID(); - if (boost::algorithm::to_lower_copy(first_word) == "function_double") { - ParserKeyword s_presicion{"PRESICION"}; - s_presicion.ignore(pos); - } else if (boost::algorithm::to_lower_copy(first_word) == "function_char") { - ParserKeyword s_varying{"VARYING"}; - s_varying.ignore(pos); - } else if (boost::algorithm::to_lower_copy(first_word) == "function_native") { - ParserIdentifierWithOptionalParameters tmp; - ASTPtr second_word; - if (!tmp.parse(pos, second_word, expected)) { - return false; - } - if (boost::algorithm::to_lower_copy(second_word->getID()) != "function_character") { - return false; - } - - type = second_word; - } else if (boost::algorithm::to_lower_copy(first_word) == "function_varying") - { - ParserIdentifierWithOptionalParameters tmp; - ASTPtr second_word; - if (!tmp.parse(pos, second_word, expected)) - return false; - if (boost::algorithm::to_lower_copy(second_word->getID()) != "function_char") - return false; - type = second_word; - } if (s_comment.ignore(pos, expected)) From acd0d3c528d58432d9bf201f77283b13d817c325 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 18 Jun 2020 15:52:05 +0300 Subject: [PATCH 03/11] separate parser for data type --- src/DataTypes/DataTypeFactory.cpp | 2 +- src/DataTypes/DataTypeString.cpp | 20 +++-- src/DataTypes/DataTypesNumber.cpp | 1 + src/Parsers/ExpressionElementParsers.cpp | 4 +- src/Parsers/ExpressionElementParsers.h | 9 +- src/Parsers/ParserCreateQuery.cpp | 11 +-- src/Parsers/ParserCreateQuery.h | 47 ++-------- src/Parsers/ParserDataType.cpp | 90 +++++++++++++++++++ src/Parsers/ParserDataType.h | 18 ++++ .../ParserDictionaryAttributeDeclaration.cpp | 3 +- src/Parsers/ya.make | 1 + 11 files changed, 140 insertions(+), 66 deletions(-) create mode 100644 src/Parsers/ParserDataType.cpp create mode 100644 src/Parsers/ParserDataType.h diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 880f25d009d..8babcbd31d6 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -27,7 +27,7 @@ namespace ErrorCodes DataTypePtr DataTypeFactory::get(const String & full_name) const { - ParserIdentifierWithOptionalParameters parser; + ParserDataType parser; ASTPtr ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); return get(ast); } diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 99eb1108f1f..dc2eeb7f72c 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -379,14 +379,14 @@ bool DataTypeString::equals(const IDataType & rhs) const static DataTypePtr create(const ASTPtr & arguments) { - if (arguments) + if (arguments && !arguments->children.empty()) { if (arguments->children.size() > 1) - throw Exception("String data type family mustnt have more than one argument - size in characters", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception("String data type family mustn't have more than one argument - size in characters", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); const auto * argument = arguments->children[0]->as(); if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) - throw Exception("FixedString data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + throw Exception("String data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); } return std::make_shared(); @@ -416,8 +416,18 @@ void registerDataTypeString(DataTypeFactory & factory) factory.registerAlias("LONGBLOB", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("BYTEA", "String", DataTypeFactory::CaseInsensitive); /// PostgreSQL + factory.registerAlias("CHARACTER LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("CHARACTER VARYING", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("CHAR LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("CHAR VARYING", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("VARYING CHAR", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("NATIVE CHARACTER", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("NATIONAL CHAR", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("NATIONAL CHARACTER", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("NATIONAL CHARACTER LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("NATIONAL CHARACTER VARYING", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("NATIONAL CHAR VARYING", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("NCHAR VARYING", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("NCHAR LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("BINARY LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("BINARY VARYING", "String", DataTypeFactory::CaseInsensitive); } } diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index ee8d590d2d1..e0f3d2829d1 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -63,6 +63,7 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerAlias("REAL", "Float32", DataTypeFactory::CaseInsensitive); factory.registerAlias("SINGLE", "Float32", DataTypeFactory::CaseInsensitive); /// MS Access factory.registerAlias("DOUBLE", "Float64", DataTypeFactory::CaseInsensitive); + factory.registerAlias("DOUBLE PRECISION", "Float64", DataTypeFactory::CaseInsensitive); } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 785e6b25f98..0466c6f96f9 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -352,7 +352,7 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected) && ParserExpression().parse(pos, expr_node, expected) && ParserKeyword("AS").ignore(pos, expected) - && ParserIdentifierWithOptionalParameters().parse(pos, type_node, expected) + && ParserDataType().parse(pos, type_node, expected) && ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) { /// Convert to canonical representation in functional form: CAST(expr, 'type') @@ -1233,7 +1233,7 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ++pos; auto old_pos = pos; - ParserIdentifierWithOptionalParameters type_parser; + ParserDataType type_parser; if (!type_parser.ignore(pos, expected)) { expected.add(pos, "substitution type"); diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index e6aff9d5349..f5cd705bb83 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -48,13 +48,6 @@ protected: }; -class ParserBareWord : public IParserBase -{ -protected: - const char * getName() const override { return "bare word"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - /** An identifier, possibly containing a dot, for example, x_yz123 or `something special` or Hits.EventTime, * possibly with UUID clause like `db name`.`table name` UUID 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' */ @@ -363,7 +356,7 @@ protected: bool brackets_can_be_omitted; }; -/** Data type or table engine, possibly with parameters. For example, UInt8 or see examples from ParserIdentifierWithParameters +/** Table engine, possibly with parameters. See examples from ParserIdentifierWithParameters * Parse result is ASTFunction, with or without arguments. */ class ParserIdentifierWithOptionalParameters : public IParserBase diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index c54033bd27d..1658d935fdd 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -53,12 +53,7 @@ bool ParserNestedTable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserIdentifierWithParameters::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserFunction function_or_array; - if (function_or_array.parse(pos, node, expected)) - return true; - - ParserNestedTable nested; - return nested.parse(pos, node, expected); + return ParserFunction().parse(pos, node, expected); } bool ParserNameTypePairList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) @@ -85,7 +80,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ParserKeyword s_granularity("GRANULARITY"); ParserIdentifier name_p; - ParserIdentifierWithOptionalParameters ident_with_optional_params_p; + ParserDataType data_type_p; ParserExpression expression_p; ParserUnsignedInteger granularity_p; @@ -103,7 +98,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!s_type.ignore(pos, expected)) return false; - if (!ident_with_optional_params_p.parse(pos, type, expected)) + if (!data_type_p.parse(pos, type, expected)) return false; if (!s_granularity.ignore(pos, expected)) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index b661dd51d16..e8b5a08692f 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -27,10 +28,9 @@ protected: }; -/** Parametric type or Storage. For example: - * FixedString(10) or - * Partitioned(Log, ChunkID) or - * Nested(UInt32 CounterID, FixedString(2) UserAgentMajor) +/** Storage engine or Codec. For example: + * Memory() + * ReplicatedMergeTree('/path', 'replica') * Result of parsing - ASTFunction with or without parameters. */ class ParserIdentifierWithParameters : public IParserBase @@ -50,14 +50,12 @@ protected: /** The name and type are separated by a space. For example, URL String. */ using ParserNameTypePair = IParserNameTypePair; -/** Name and type separated by a space. The name can contain a dot. For example, Hits.URL String. */ -using ParserCompoundNameTypePair = IParserNameTypePair; template bool IParserNameTypePair::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { NameParser name_parser; - ParserIdentifierWithOptionalParameters type_parser; + ParserDataType type_parser; ASTPtr name, type; if (name_parser.parse(pos, name, expected) @@ -116,7 +114,7 @@ template bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { NameParser name_parser; - ParserIdentifierWithOptionalParameters type_parser; + ParserDataType type_parser; ParserKeyword s_default{"DEFAULT"}; ParserKeyword s_materialized{"MATERIALIZED"}; ParserKeyword s_alias{"ALIAS"}; @@ -151,39 +149,6 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E { if (!type_parser.parse(pos, type, expected)) return false; - - ASTFunction * type_func = type->as(); - if (!type_func->arguments) - { - if (boost::algorithm::iequals(type_func->name, "DOUBLE")) - { - if (ParserKeyword{"PRESICION"}.ignore(pos)) - { - type_func->name += " PRESICION"; - } - } - else if (boost::algorithm::iequals(type_func->name, "CHAR")) - { - if (ParserKeyword{"VARYING"}.ignore(pos)) - { - type_func->name += " VARYING"; - } - } - else if (boost::algorithm::iequals(type_func->name, "NATIVE")) - { - if (ParserKeyword{"CHARACTER"}.ignore(pos)) - { - type_func->name += " CHARACTER"; - } - } - else if (boost::algorithm::iequals(type_func->name, "VARYING")) - { - if (ParserKeyword{"CHAR"}.ignore(pos)) - { - type_func->name += " CHAR"; - } - } - } } Pos pos_before_specifier = pos; diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp new file mode 100644 index 00000000000..9ccbe86c064 --- /dev/null +++ b/src/Parsers/ParserDataType.cpp @@ -0,0 +1,90 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserNestedTable nested; + if (nested.parse(pos, node, expected)) + return true; + + String type_name; + + ParserIdentifier name_parser; + ASTPtr identifier; + if (!name_parser.parse(pos, identifier, expected)) + return false; + tryGetIdentifierNameInto(identifier, type_name); + + String type_name_upper = Poco::toUpper(type_name); + String type_name_suffix; + + /// Special cases for compatibility with SQL standard. We can parse several words as type name + /// only for certain first words, otherwise we don't know how many words to parse + if (type_name_upper == "NATIONAL") + { + if (ParserKeyword("CHARACTER LARGE OBJECT").ignore(pos)) + type_name_suffix = "CHARACTER LARGE OBJECT"; + else if (ParserKeyword("CHARACTER VARYING").ignore(pos)) + type_name_suffix = "CHARACTER VARYING"; + else if (ParserKeyword("CHAR VARYING").ignore(pos)) + type_name_suffix = "CHAR VARYING"; + else if (ParserKeyword("CHARACTER").ignore(pos)) + type_name_suffix = "CHARACTER"; + else if (ParserKeyword("CHAR").ignore(pos)) + type_name_suffix = "CHAR"; + } + else if (type_name_upper == "BINARY" || + type_name_upper == "CHARACTER" || + type_name_upper == "CHAR" || + type_name_upper == "NCHAR") + { + if (ParserKeyword("LARGE OBJECT").ignore(pos)) + type_name_suffix = "LARGE OBJECT"; + else if (ParserKeyword("VARYING").ignore(pos)) + type_name_suffix = "VARYING"; + } + else if (type_name_upper == "DOUBLE") + { + if (ParserKeyword("PRECISION").ignore(pos)) + type_name_suffix = "PRECISION"; + } + + if (!type_name_suffix.empty()) + type_name = type_name_upper + " " + type_name_suffix; + + auto function_node = std::make_shared(); + function_node->name = type_name; + + if (pos->type != TokenType::OpeningRoundBracket) + { + node = function_node; + return true; + } + ++pos; + + /// Parse optional parameters + ParserList args_parser(std::make_unique(), std::make_unique(TokenType::Comma)); + ASTPtr expr_list_args; + + if (!args_parser.parse(pos, expr_list_args, expected)) + return false; + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + + function_node->arguments = expr_list_args; + function_node->children.push_back(function_node->arguments); + + node = function_node; + return true; +} + +} + diff --git a/src/Parsers/ParserDataType.h b/src/Parsers/ParserDataType.h new file mode 100644 index 00000000000..89a43003910 --- /dev/null +++ b/src/Parsers/ParserDataType.h @@ -0,0 +1,18 @@ +#pragma once +#include + + +namespace DB +{ + +/// Parses data type as ASTFunction +/// Examples: Int8, Array(Nullable(FixedString(16))), DOUBLE PRECISION, Nested(UInt32 CounterID, FixedString(2) UserAgentMajor) +class ParserDataType : public IParserBase +{ +protected: + const char * getName() const override { return "data type"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} + diff --git a/src/Parsers/ParserDictionaryAttributeDeclaration.cpp b/src/Parsers/ParserDictionaryAttributeDeclaration.cpp index d64e4b8dec0..9cd1cebe4e0 100644 --- a/src/Parsers/ParserDictionaryAttributeDeclaration.cpp +++ b/src/Parsers/ParserDictionaryAttributeDeclaration.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -9,7 +10,7 @@ namespace DB bool ParserDictionaryAttributeDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserIdentifier name_parser; - ParserIdentifierWithOptionalParameters type_parser; + ParserDataType type_parser; ParserKeyword s_default{"DEFAULT"}; ParserKeyword s_expression{"EXPRESSION"}; ParserKeyword s_hierarchical{"HIERARCHICAL"}; diff --git a/src/Parsers/ya.make b/src/Parsers/ya.make index 8c7e4ff68af..f74faef1702 100644 --- a/src/Parsers/ya.make +++ b/src/Parsers/ya.make @@ -75,6 +75,7 @@ SRCS( ParserCreateSettingsProfileQuery.cpp ParserCreateUserQuery.cpp ParserDescribeTableQuery.cpp + ParserDataType.cpp ParserDictionary.cpp ParserDictionaryAttributeDeclaration.cpp ParserDropAccessEntityQuery.cpp From 5e42441a722396ac630a1467d981abb53594c4f0 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 18 Jun 2020 17:19:14 +0300 Subject: [PATCH 04/11] add test --- .../01144_multiword_data_types.reference | 3 ++ .../01144_multiword_data_types.sql | 29 +++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 tests/queries/0_stateless/01144_multiword_data_types.reference create mode 100644 tests/queries/0_stateless/01144_multiword_data_types.sql diff --git a/tests/queries/0_stateless/01144_multiword_data_types.reference b/tests/queries/0_stateless/01144_multiword_data_types.reference new file mode 100644 index 00000000000..0f3b4691734 --- /dev/null +++ b/tests/queries/0_stateless/01144_multiword_data_types.reference @@ -0,0 +1,3 @@ +CREATE TABLE default.multiword_types\n(\n `a` Float64,\n `b` Float64,\n `c` String DEFAULT \'str\',\n `d` String,\n `e` String COMMENT \'comment\',\n `f` String,\n `g` String,\n `h` String DEFAULT toString(a) COMMENT \'comment\',\n `i` String,\n `j` String,\n `k` String,\n `l` String,\n `m` String,\n `n` String,\n `o` String,\n `p` String\n)\nENGINE = Memory +Tuple(Float64, Float64, String, String, String, String, String, String, String, String, String, String, String, String, String, String) +42 42 diff --git a/tests/queries/0_stateless/01144_multiword_data_types.sql b/tests/queries/0_stateless/01144_multiword_data_types.sql new file mode 100644 index 00000000000..7d96c5a1d74 --- /dev/null +++ b/tests/queries/0_stateless/01144_multiword_data_types.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS multiword_types; + +CREATE TABLE multiword_types ( + a DOUBLE, + b DOUBLE PRECISION, + c CHAR DEFAULT 'str', + d CHAR VARYING, + e CHAR LARGE OBJECT COMMENT 'comment', + f CHARACTER VARYING(123), + g ChArAcTeR large OBJECT, + h nchar varying (456) default toString(a) comment 'comment', + i NCHAR LARGE OBJECT, + j BINARY LARGE OBJECT, + k BINARY VARYING, + l NATIONAL CHAR, + m NATIONAL CHARACTER, + n NATIONAL CHARACTER LARGE OBJECT, + o NATIONAL CHARACTER VARYING, + p NATIONAL CHAR VARYING +) ENGINE=Memory; + +SHOW CREATE TABLE multiword_types; + +INSERT INTO multiword_types(a) VALUES (1); +SELECT toTypeName((*,)) FROM multiword_types; + +SELECT CAST('42' AS DOUBLE PRECISION), CAST(42, 'NATIONAL CHARACTER VARYING'); + +DROP TABLE multiword_types; From a541b5164731092e4cb5959faa2f4334087e9a1a Mon Sep 17 00:00:00 2001 From: Tom Bombadil <565258751@qq.com> Date: Thu, 18 Jun 2020 22:27:43 +0800 Subject: [PATCH 05/11] Update graphitemergetree.md translate the doc to Chinese version --- .../mergetree-family/graphitemergetree.md | 97 +++++++++---------- 1 file changed, 45 insertions(+), 52 deletions(-) diff --git a/docs/zh/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/zh/engines/table-engines/mergetree-family/graphitemergetree.md index 1ca762e8537..9af39bcf964 100644 --- a/docs/zh/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/graphitemergetree.md @@ -1,17 +1,16 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 38 toc_title: GraphiteMergeTree --- # GraphiteMergeTree {#graphitemergetree} -此引擎专为细化和聚合/平均(rollup) [石墨](http://graphite.readthedocs.io/en/latest/index.html) 戴达 对于想要使用ClickHouse作为Graphite的数据存储的开发人员来说,这可能会有所帮助。 +该引擎用来对 [Graphite](http://graphite.readthedocs.io/en/latest/index.html)数据进行瘦身及汇总。对于想使用CH来存储Graphite数据的开发者来说可能有用。 -您可以使用任何ClickHouse表引擎来存储石墨数据,如果你不需要汇总,但如果你需要一个汇总使用 `GraphiteMergeTree`. 该引擎减少了存储量,并提高了Graphite查询的效率。 -引擎继承从属性 [MergeTree](mergetree.md). +如果不需要对Graphite数据做汇总,那么可以使用任意的CH表引擎;但若需要,那就采用 `GraphiteMergeTree` 引擎。它能减少存储空间,同时能提高Graphite数据的查询效率。 + +该引擎继承自 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md). ## 创建表 {#creating-table} @@ -30,36 +29,32 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [SETTINGS name=value, ...] ``` -请参阅的详细说明 [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) 查询。 +建表语句的详细说明请参见 [创建表](../../../sql-reference/statements/create.md#create-table-query) -Graphite数据的表应具有以下数据的列: +含有Graphite数据集的表应该包含以下的数据列: +- 指标名称(Graphite sensor),数据类型:`String` +- 指标的时间度量,数据类型: `DateTime` +- 指标的值,数据类型:任意数值类型 +- 指标的版本号,数据类型: 任意数值类型 -- 公制名称(石墨传感器)。 数据类型: `String`. + CH以最大的版本号保存行记录,若版本号相同,保留最后写入的数据。 -- 测量度量的时间。 数据类型: `DateTime`. +以上列必须设置在汇总参数配置中。 -- 度量值。 数据类型:任何数字。 -- 指标的版本。 数据类型:任何数字。 +**GraphiteMergeTree 参数** +- `config_section` - 配置文件中标识汇总规则的节点名称 - 如果版本相同,ClickHouse会保存版本最高或最后写入的行。 其他行在数据部分合并期间被删除。 +**建表语句** -应在汇总配置中设置这些列的名称。 - -**GraphiteMergeTree参数** - -- `config_section` — Name of the section in the configuration file, where are the rules of rollup set. - -**查询子句** - -当创建一个 `GraphiteMergeTree` 表,相同 [条款](mergetree.md#table_engine-mergetree-creating-a-table) 是必需的,因为当创建 `MergeTree` 桌子 +在创建 `GraphiteMergeTree` 表时,需要采用和 [clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) 相同的语句,就像创建 `MergeTree` 一样。
-不推荐使用的创建表的方法 +已废弃的建表语句 -!!! attention "注意" - 不要在新项目中使用此方法,如果可能的话,请将旧项目切换到上述方法。 +!!! 注意 "Attention" + 请不要在新项目中使用;如有可能,请将旧的项目按上述的方法进行替换。 ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -73,31 +68,30 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ) ENGINE [=] GraphiteMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, config_section) ``` -所有参数除外 `config_section` 具有相同的含义 `MergeTree`. +除了`config_section`,其它所有参数和`MergeTree`的相应参数一样. -- `config_section` — Name of the section in the configuration file, where are the rules of rollup set. +- `config_section` —配置文件中设置汇总规则的节点
-## 汇总配置 {#rollup-configuration} +## 汇总配置的参数 {#rollup-configuration} +汇总的配置参数由服务器配置的 [graphite\_rollup](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 参数定义。参数名称可以是任意的。允许为多个不同表创建多组配置并使用。 -汇总的设置由 [graphite\_rollup](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 服务器配置中的参数。 参数的名称可以是any。 您可以创建多个配置并将它们用于不同的表。 -汇总配置结构: +汇总配置的结构如下: + 所需的列 + 模式Patterns - required-columns - patterns -### 必填列 {#required-columns} +### 所需的列 {#required-columns} +- `path_column_name` — 保存指标名称的列名 (Graphite sensor). 默认值: `Path`. +- `time_column_name` — 保存指标时间度量的列名. Default value: `Time`. +- `value_column_name` — The name of the column storing the value of the metric at the time set in `time_column_name`.默认值: `Value`. +- `version_column_name` - 保存指标的版本号列. 默认值: `Timestamp`. -- `path_column_name` — The name of the column storing the metric name (Graphite sensor). Default value: `Path`. -- `time_column_name` — The name of the column storing the time of measuring the metric. Default value: `Time`. -- `value_column_name` — The name of the column storing the value of the metric at the time set in `time_column_name`. 默认值: `Value`. -- `version_column_name` — The name of the column storing the version of the metric. Default value: `Timestamp`. -### 模式 {#patterns} - -的结构 `patterns` 科: +### 模式Patterns {#patterns} +`patterns` 的结构: ``` text pattern @@ -120,21 +114,20 @@ default ... ``` -!!! warning "注意" - 模式必须严格排序: +!!! 注意 "Attention" + 模式必须严格按顺序配置: + 1. 不含`function` or `retention`的Patterns + 1. 同时含有`function` and `retention`的Patterns + 1. `default`的Patterns. - 1. Patterns without `function` or `retention`. - 1. Patterns with both `function` and `retention`. - 1. Pattern `default`. +CH在处理行记录时,会检查 `pattern`节点的规则。每个 `pattern`(含`default`)节点可以包含 `function` 用于聚合操作,或`retention`参数,或者两者都有。如果指标名称和 `regexp`相匹配,相应 `pattern`的规则会生效;否则,使用 `default` 节点的规则。 -在处理行时,ClickHouse会检查以下内容中的规则 `pattern` 部分。 每个 `pattern` (包括 `default`)部分可以包含 `function` 聚合参数, `retention` 参数或两者兼而有之。 如果指标名称匹配 `regexp`,从规则 `pattern` 部分(sections节)的应用;否则,从规则 `default` 部分被使用。 +`pattern` 和 `default` 节点的字段设置: -字段为 `pattern` 和 `default` 科: - -- `regexp`– A pattern for the metric name. -- `age` – The minimum age of the data in seconds. -- `precision`– How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day). -- `function` – The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. +- `regexp`– 指标名的pattern. +- `age` – 数据的最小存活时间(按秒算). +- `precision`– 按秒来衡量数据存活时间时的精确程度. 必须能被86400整除 (一天的秒数). +- `function` – 对于存活时间在 `[age, age + precision]`之内的数据,需要使用的聚合函数 ### 配置示例 {#configuration-example} @@ -171,4 +164,4 @@ default ``` -[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/graphitemergetree/) +[原始文档](https://clickhouse.tech/docs/en/operations/table_engines/graphitemergetree/) From b125ebd2c156ebf1a3481d4662334c1d6353a977 Mon Sep 17 00:00:00 2001 From: Tom Bombadil <565258751@qq.com> Date: Thu, 18 Jun 2020 22:38:25 +0800 Subject: [PATCH 06/11] Update jdbc.md translate the doc to Chinese version. --- .../table-engines/integrations/jdbc.md | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/docs/zh/engines/table-engines/integrations/jdbc.md b/docs/zh/engines/table-engines/integrations/jdbc.md index 774afcc56bc..179d78e5a3b 100644 --- a/docs/zh/engines/table-engines/integrations/jdbc.md +++ b/docs/zh/engines/table-engines/integrations/jdbc.md @@ -1,19 +1,19 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 34 -toc_title: JDBC +toc_title: JDBC表引擎 --- # JDBC {#table-engine-jdbc} -允许ClickHouse通过以下方式连接到外部数据库 [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity). +允许CH通过 [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity) 连接到外部数据库。 -要实现JDBC连接,ClickHouse使用单独的程序 [ツ暗ェツ氾环催ツ団ツ法ツ人](https://github.com/alex-krash/clickhouse-jdbc-bridge) 这应该作为守护进程运行。 -该引擎支持 [可为空](../../../sql-reference/data-types/nullable.md) 数据类型。 +要实现JDBC连接,CH需要使用以后台进程运行的程序 [clickhouse-jdbc-bridge](https://github.com/alex-krash/clickhouse-jdbc-bridge)。 -## 创建表 {#creating-a-table} +该引擎支持 [Nullable](../../../sql-reference/data-types/nullable.md) 数据类型。 + + +## 建表 {#creating-a-table} ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name @@ -23,20 +23,22 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = JDBC(dbms_uri, external_database, external_table) ``` -**发动机参数** +**引擎参数** -- `dbms_uri` — URI of an external DBMS. +- `dbms_uri` — 外部DBMS的uri. 格式: `jdbc:://:/?user=&password=`. - Mysql的示例: `jdbc:mysql://localhost:3306/?user=root&password=root`. + MySQL示例: `jdbc:mysql://localhost:3306/?user=root&password=root`. -- `external_database` — Database in an external DBMS. +- `external_database` — 外部DBMS的数据库名. -- `external_table` — Name of the table in `external_database`. +- `external_table` — `external_database`中的外部表名. ## 用法示例 {#usage-example} -通过直接与它的控制台客户端连接在MySQL服务器中创建一个表: +通过mysql控制台客户端来创建表 + +Creating a table in MySQL server by connecting directly with it’s console client: ``` text mysql> CREATE TABLE `test`.`test` ( @@ -59,7 +61,7 @@ mysql> select * from test; 1 row in set (0,00 sec) ``` -在ClickHouse服务器中创建表并从中选择数据: +在CH服务端创建表,并从中查询数据: ``` sql CREATE TABLE jdbc_table @@ -83,8 +85,8 @@ FROM jdbc_table └────────┴──────────────┴───────┴────────────────┘ ``` -## 另请参阅 {#see-also} +## 参见 {#see-also} - [JDBC表函数](../../../sql-reference/table-functions/jdbc.md). -[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/jdbc/) +[原始文档](https://clickhouse.tech/docs/en/operations/table_engines/jdbc/) From 1292cddab5c7f0a50effe4225fef57a43f82939f Mon Sep 17 00:00:00 2001 From: Tom Bombadil <565258751@qq.com> Date: Thu, 18 Jun 2020 22:42:10 +0800 Subject: [PATCH 07/11] Update lazy.md optimize toc title for this doc --- docs/zh/engines/database-engines/lazy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/engines/database-engines/lazy.md b/docs/zh/engines/database-engines/lazy.md index 700eb4b3b25..bca7eaeda6c 100644 --- a/docs/zh/engines/database-engines/lazy.md +++ b/docs/zh/engines/database-engines/lazy.md @@ -1,6 +1,6 @@ --- toc_priority: 31 -toc_title: "\u61D2\u60F0" +toc_title: "延时引擎" --- # 延时引擎Lazy {#lazy} From 42de5ac8f06e3c1ec5de3b4880ac6f5b7778191b Mon Sep 17 00:00:00 2001 From: Tom Bombadil <565258751@qq.com> Date: Thu, 18 Jun 2020 23:03:32 +0800 Subject: [PATCH 08/11] Update generate.md provide Chinese version of the doc --- .../engines/table-engines/special/generate.md | 36 +++++++++---------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/docs/zh/engines/table-engines/special/generate.md b/docs/zh/engines/table-engines/special/generate.md index 41bd6d66918..80966767462 100644 --- a/docs/zh/engines/table-engines/special/generate.md +++ b/docs/zh/engines/table-engines/special/generate.md @@ -1,35 +1,31 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 46 -toc_title: GenerateRandom +toc_title: 随机数生成 --- -# Generaterandom {#table_engines-generate} +# 随机数生成表引擎 {#table_engines-generate} -GenerateRandom表引擎为给定的表架构生成随机数据。 +随机数生成表引擎为指定的表模式生成随机数 使用示例: +- 测试时生成可复写的大表 +- 为复杂测试生成随机输入 -- 在测试中使用填充可重复的大表。 -- 为模糊测试生成随机输入。 - -## 在ClickHouse服务器中的使用 {#usage-in-clickhouse-server} +## CH服务端的用法 {#usage-in-clickhouse-server} ``` sql ENGINE = GenerateRandom(random_seed, max_string_length, max_array_length) ``` -该 `max_array_length` 和 `max_string_length` 参数指定所有的最大长度 -数组列和字符串相应地在生成的数据中。 +生成数据时,通过`max_array_length` 设置array列的最大长度, `max_string_length`设置string数据的最大长度 -生成表引擎仅支持 `SELECT` 查询。 +该引擎仅支持 `SELECT` 查询语句. -它支持所有 [数据类型](../../../sql-reference/data-types/index.md) 可以存储在一个表中,除了 `LowCardinality` 和 `AggregateFunction`. +该引擎支持能在表中存储的所有数据类型 [DataTypes](../../../sql-reference/data-types/index.md) ,除了 `LowCardinality` 和 `AggregateFunction`. -**示例:** +## 示例 {#example} -**1.** 设置 `generate_engine_table` 表: +**1.** 设置 `generate_engine_table` 引擎表: ``` sql CREATE TABLE generate_engine_table (name String, value UInt32) ENGINE = GenerateRandom(1, 5, 3) @@ -49,13 +45,13 @@ SELECT * FROM generate_engine_table LIMIT 3 └──────┴────────────┘ ``` -## 实施细节 {#details-of-implementation} +## 实现细节 {#details-of-implementation} -- 不支持: +- 以下特性不支持: - `ALTER` - `SELECT ... SAMPLE` - `INSERT` - - 指数 - - 复制 + - Indices + - Replication -[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/generate/) +[原始文档](https://clickhouse.tech/docs/en/operations/table_engines/generate/) From e42c07954dc91429d2e3d02a00079695bd6e63d9 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 18 Jun 2020 19:01:49 +0300 Subject: [PATCH 09/11] fix --- src/Parsers/ParserCreateQuery.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 84bab5bf7e8..1e75a2cd1ca 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -7,13 +7,10 @@ #include #include #include -#include #include #include #include -#include - namespace DB { @@ -181,8 +178,6 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E null_modifier.emplace(true); } - - if (s_comment.ignore(pos, expected)) { /// should be followed by a string literal From bd28b7e1c22fae6061a94f6d70e00f9c5ece4c63 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 18 Jun 2020 20:05:26 +0300 Subject: [PATCH 10/11] better code --- src/Interpreters/AnyInputOptimize.cpp | 41 ++++++++++--------- .../01322_any_input_optimize.reference | 2 + .../0_stateless/01322_any_input_optimize.sql | 4 ++ 3 files changed, 28 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/01322_any_input_optimize.reference create mode 100644 tests/queries/0_stateless/01322_any_input_optimize.sql diff --git a/src/Interpreters/AnyInputOptimize.cpp b/src/Interpreters/AnyInputOptimize.cpp index 14448e85d90..13246fd8172 100644 --- a/src/Interpreters/AnyInputOptimize.cpp +++ b/src/Interpreters/AnyInputOptimize.cpp @@ -18,8 +18,8 @@ namespace ErrorCodes namespace { - constexpr const char * any = "any"; - constexpr const char * anyLast = "anyLast"; + constexpr auto * any = "any"; + constexpr auto * anyLast = "anyLast"; } ASTPtr * getExactChild(const ASTPtr & ast, const size_t ind) @@ -30,12 +30,12 @@ ASTPtr * getExactChild(const ASTPtr & ast, const size_t ind) } ///recursive searching of identifiers -void changeAllIdentifiers(ASTPtr & ast, size_t ind, std::string& mode) +void changeAllIdentifiers(ASTPtr & ast, size_t ind, const std::string & name) { - const char * name = mode.c_str(); ASTPtr * exact_child = getExactChild(ast, ind); if (!exact_child) return; + if ((*exact_child)->as()) { ///put new any @@ -43,14 +43,15 @@ void changeAllIdentifiers(ASTPtr & ast, size_t ind, std::string& mode) *exact_child = makeASTFunction(name); (*exact_child)->as()->arguments->children.push_back(old_ast); } - else if ((*exact_child)->as() && - !AggregateFunctionFactory::instance().isAggregateFunctionName((*exact_child)->as()->name)) + else if ((*exact_child)->as()) + { + if (AggregateFunctionFactory::instance().isAggregateFunctionName((*exact_child)->as()->name)) + throw Exception("Aggregate function " + (*exact_child)->as()->name + + " is found inside aggregate function " + name + " in query", ErrorCodes::ILLEGAL_AGGREGATION); + for (size_t i = 0; i < (*exact_child)->as()->arguments->children.size(); i++) - changeAllIdentifiers(*exact_child, i, mode); - else if ((*exact_child)->as() && - AggregateFunctionFactory::instance().isAggregateFunctionName((*exact_child)->as()->name)) - throw Exception("Aggregate function " + (*exact_child)->as()->name + - " is found inside aggregate function " + name + " in query", ErrorCodes::ILLEGAL_AGGREGATION); + changeAllIdentifiers(*exact_child, i, name); + } } @@ -62,18 +63,20 @@ void AnyInputMatcher::visit(ASTPtr & current_ast, Data data) return; auto * function_node = current_ast->as(); - if (function_node && (function_node->name == any || function_node->name == anyLast) - && !function_node->arguments->children.empty() && function_node->arguments->children[0] && - function_node->arguments->children[0]->as()) + if (!function_node || function_node->arguments->children.empty()) + return; + + const auto & function_argument = function_node->arguments->children[0]; + if ((function_node->name == any || function_node->name == anyLast) + && function_argument && function_argument->as()) { - std::string mode = function_node->name; + auto name = function_node->name; ///cut any or anyLast - if (function_node->arguments->children[0]->as() && - !function_node->arguments->children[0]->as()->arguments->children.empty()) + if (!function_argument->as()->arguments->children.empty()) { - current_ast = (function_node->arguments->children[0])->clone(); + current_ast = function_argument->clone(); for (size_t i = 0; i < current_ast->as()->arguments->children.size(); ++i) - changeAllIdentifiers(current_ast, i, mode); + changeAllIdentifiers(current_ast, i, name); } } } diff --git a/tests/queries/0_stateless/01322_any_input_optimize.reference b/tests/queries/0_stateless/01322_any_input_optimize.reference new file mode 100644 index 00000000000..5b724453c71 --- /dev/null +++ b/tests/queries/0_stateless/01322_any_input_optimize.reference @@ -0,0 +1,2 @@ +9 +SELECT any(number) + (any(number) * 2)\nFROM numbers(3, 10) diff --git a/tests/queries/0_stateless/01322_any_input_optimize.sql b/tests/queries/0_stateless/01322_any_input_optimize.sql new file mode 100644 index 00000000000..1094db00cbc --- /dev/null +++ b/tests/queries/0_stateless/01322_any_input_optimize.sql @@ -0,0 +1,4 @@ +SET optimize_any_input=1; +SET enable_debug_queries=1; +SELECT any(number + number * 2) FROM numbers(3, 10); +ANALYZE SELECT any(number + number * 2) FROM numbers(3, 10); From f44dbcd2ab3f0b9d50e8b175c13c457d345c5b7f Mon Sep 17 00:00:00 2001 From: Ruslan <49528413+kamalov-ruslan@users.noreply.github.com> Date: Thu, 18 Jun 2020 22:25:28 +0300 Subject: [PATCH 11/11] Fix crash in `min(multiply(1))` with optimize_arithmetic_operations_in_aggregate_functions (#11756) --- .../ArithmeticOperationsInAgrFuncOptimize.cpp | 38 ++++++++++++------- ...bad_arg_in_arithmetic_operations.reference | 0 ...01323_bad_arg_in_arithmetic_operations.sql | 15 ++++++++ 3 files changed, 39 insertions(+), 14 deletions(-) create mode 100644 tests/queries/0_stateless/01323_bad_arg_in_arithmetic_operations.reference create mode 100644 tests/queries/0_stateless/01323_bad_arg_in_arithmetic_operations.sql diff --git a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp index 665c2febd9d..37109e444e3 100644 --- a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp +++ b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp @@ -41,29 +41,31 @@ bool onlyConstsInside(const ASTFunction * func_node) bool inappropriateNameInside(const ASTFunction * func_node, const char * inter_func_name) { return (func_node->arguments->children[0]->as() && - inter_func_name != func_node->arguments->children[0]->as()->name) || + strcmp(inter_func_name, func_node->arguments->children[0]->as()->name.c_str()) != 0) || (func_node->arguments->children.size() == 2 && func_node->arguments->children[1]->as() && - inter_func_name != func_node->arguments->children[1]->as()->name); + strcmp(inter_func_name, func_node->arguments->children[1]->as()->name.c_str()) != 0); } bool isInappropriate(const ASTPtr & node, const char * inter_func_name) { - return !node->as() || inter_func_name != node->as()->name; + return !node->as() || (strcmp(inter_func_name, node->as()->name.c_str()) != 0); } ASTFunction * getInternalFunction(const ASTFunction * f_n) { const auto * function_args = f_n->arguments->as(); if (!function_args || function_args->children.size() != 1) - throw Exception("Wrong number of arguments for function" + f_n->name + "(" + toString(function_args->children.size()) + " instead of 1)", + throw Exception("Wrong number of arguments for function " + f_n->name + "(" + toString(function_args->children.size()) + " instead of 1)", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); return f_n->arguments->children[0]->as(); } -ASTFunction * treeFiller(ASTFunction * old_tree, const ASTs & nodes_array, size_t size, const char * name) +ASTFunction * treeFiller(ASTFunction * old_tree, const ASTs & nodes_array, size_t size, const char * name, bool flag) { + if (flag) + --size; for (size_t i = 0; i < size; ++i) { old_tree->arguments->children = {}; @@ -94,19 +96,23 @@ std::pair tryGetConst(const char * name, const ASTs & arguments) not_const.push_back(arg); } - if ((name == plus || name == mul) && const_num.size() + not_const.size() != 2) - { + if ((strcmp(name, plus) == 0 || strcmp(name, mul) == 0) && const_num.size() + not_const.size() != 2) throw Exception("Wrong number of arguments for function 'plus' or 'multiply' (" + toString(const_num.size() + not_const.size()) + " instead of 2)", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - } return {const_num, not_const}; } std::pair findAllConsts(const ASTFunction * func_node, const char * inter_func_name) { - if (!func_node->arguments) - return {}; + if (func_node->arguments->children.empty()) + { + if (strcmp(func_node->name.c_str(), plus) == 0 || strcmp(func_node->name.c_str(), mul) == 0) + throw Exception("Wrong number of arguments for function" + func_node->name + "(0 instead of 2)", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + else + return {}; + } if (onlyConstsInside(func_node)) return tryGetConst(func_node->name.c_str(), func_node->arguments->children); @@ -139,7 +145,7 @@ std::pair findAllConsts(const ASTFunction * func_node, const char * std::pair fl = tryGetConst(func_node->name.c_str(), func_node->arguments->children); ASTs first_lvl_consts = fl.first; ASTs first_lvl_not_consts = fl.second; - if (!first_lvl_not_consts[0]->as()) + if (first_lvl_not_consts.empty() || !first_lvl_not_consts[0]->as()) return {first_lvl_consts, first_lvl_not_consts}; std::pair ans = findAllConsts(first_lvl_not_consts[0]->as(), inter_func_name); @@ -176,17 +182,21 @@ void buildTree(ASTFunction * cur_node, const char * func_name, const char * intr ASTs cons_val = tree_comp.first; ASTs non_cons = tree_comp.second; + bool not_const_empty = non_cons.empty(); + cur_node->name = intro_func; - cur_node = treeFiller(cur_node, cons_val, cons_val.size(), intro_func); + cur_node = treeFiller(cur_node, cons_val, cons_val.size(), intro_func, not_const_empty); cur_node->name = func_name; - if (non_cons.size() == 1) + if (non_cons.empty()) + cur_node->arguments->children.push_back(cons_val[cons_val.size() - 1]); + else if (non_cons.size() == 1) cur_node->arguments->children.push_back(non_cons[0]); else { cur_node->arguments->children.push_back(makeASTFunction(intro_func)); cur_node = cur_node->arguments->children[0]->as(); - cur_node = treeFiller(cur_node, non_cons, non_cons.size() - 2, intro_func); + cur_node = treeFiller(cur_node, non_cons, non_cons.size() - 2, intro_func, not_const_empty); cur_node->arguments->children = {non_cons[non_cons.size() - 2], non_cons[non_cons.size() - 1]}; } } diff --git a/tests/queries/0_stateless/01323_bad_arg_in_arithmetic_operations.reference b/tests/queries/0_stateless/01323_bad_arg_in_arithmetic_operations.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01323_bad_arg_in_arithmetic_operations.sql b/tests/queries/0_stateless/01323_bad_arg_in_arithmetic_operations.sql new file mode 100644 index 00000000000..1c4bfc8f091 --- /dev/null +++ b/tests/queries/0_stateless/01323_bad_arg_in_arithmetic_operations.sql @@ -0,0 +1,15 @@ +SET optimize_arithmetic_operations_in_aggregate_functions = 1; + +SELECT max(multiply(1)); -- { serverError 42 } +SELECT min(multiply(2));-- { serverError 42 } +SELECT sum(multiply(3)); -- { serverError 42 } + +SELECT max(plus(1)); -- { serverError 42 } +SELECT min(plus(2)); -- { serverError 42 } +SELECT sum(plus(3)); -- { serverError 42 } + +SELECT max(multiply()); -- { serverError 42 } +SELECT min(multiply(1, 2 ,3)); -- { serverError 42 } +SELECT sum(plus() + multiply()); -- { serverError 42 } + +SELECT sum(plus(multiply(42, 3), multiply(42))); -- { serverError 42 }