From 80f3de1359e119851f1de45e4250f4cf5f87c63d Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Thu, 15 Oct 2020 20:39:04 +0800 Subject: [PATCH 1/6] ISSUES-15883 try fix collate name --- src/Parsers/MySQL/ASTAlterCommand.cpp | 6 +-- src/Parsers/MySQL/ASTDeclareColumn.cpp | 4 +- src/Parsers/MySQL/ASTDeclareOption.cpp | 39 +++++--------------- src/Parsers/MySQL/ASTDeclareOption.h | 4 +- src/Parsers/MySQL/ASTDeclareTableOptions.cpp | 8 ++-- 5 files changed, 21 insertions(+), 40 deletions(-) diff --git a/src/Parsers/MySQL/ASTAlterCommand.cpp b/src/Parsers/MySQL/ASTAlterCommand.cpp index b6f2b925de0..92461635265 100644 --- a/src/Parsers/MySQL/ASTAlterCommand.cpp +++ b/src/Parsers/MySQL/ASTAlterCommand.cpp @@ -303,9 +303,9 @@ static inline bool parseOtherCommand(IParser::Pos & pos, ASTPtr & node, Expected OptionDescribe("ENABLE KEYS", "enable_keys", std::make_shared()), OptionDescribe("DISABLE KEYS", "enable_keys", std::make_shared()), /// TODO: with collate - OptionDescribe("CONVERT TO CHARACTER SET", "charset", std::make_shared()), - OptionDescribe("CHARACTER SET", "charset", std::make_shared()), - OptionDescribe("DEFAULT CHARACTER SET", "charset", std::make_shared()), + OptionDescribe("CONVERT TO CHARACTER SET", "charset", std::make_shared()), + OptionDescribe("CHARACTER SET", "charset", std::make_shared()), + OptionDescribe("DEFAULT CHARACTER SET", "charset", std::make_shared()), OptionDescribe("LOCK", "lock", std::make_shared()) } }; diff --git a/src/Parsers/MySQL/ASTDeclareColumn.cpp b/src/Parsers/MySQL/ASTDeclareColumn.cpp index 6d21f934858..3913c828ec3 100644 --- a/src/Parsers/MySQL/ASTDeclareColumn.cpp +++ b/src/Parsers/MySQL/ASTDeclareColumn.cpp @@ -51,8 +51,8 @@ static inline bool parseColumnDeclareOptions(IParser::Pos & pos, ASTPtr & node, OptionDescribe("UNIQUE", "unique_key", std::make_unique()), OptionDescribe("KEY", "primary_key", std::make_unique()), OptionDescribe("COMMENT", "comment", std::make_unique()), - OptionDescribe("CHARACTER SET", "charset_name", std::make_unique()), - OptionDescribe("COLLATE", "collate", std::make_unique()), + OptionDescribe("CHARACTER SET", "charset_name", std::make_unique()), + OptionDescribe("COLLATE", "collate", std::make_unique()), OptionDescribe("COLUMN_FORMAT", "column_format", std::make_unique()), OptionDescribe("STORAGE", "storage", std::make_unique()), OptionDescribe("AS", "generated", std::make_unique()), diff --git a/src/Parsers/MySQL/ASTDeclareOption.cpp b/src/Parsers/MySQL/ASTDeclareOption.cpp index 92ac5f0343e..17be639b630 100644 --- a/src/Parsers/MySQL/ASTDeclareOption.cpp +++ b/src/Parsers/MySQL/ASTDeclareOption.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -94,41 +95,21 @@ bool ParserAlwaysFalse::parseImpl(IParser::Pos & /*pos*/, ASTPtr & node, Expecte return true; } -bool ParserCharsetName::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &) +bool ParserCharsetOrCollateName::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) { - /// Identifier in backquotes or in double quotes - if (pos->type == TokenType::QuotedIdentifier) - { - ReadBufferFromMemory buf(pos->begin, pos->size()); - String s; + ParserIdentifier p_identifier; + ParserStringLiteral p_string_literal; - if (*pos->begin == '`') - readBackQuotedStringWithSQLStyle(s, buf); - else - readDoubleQuotedStringWithSQLStyle(s, buf); - - if (s.empty()) /// Identifiers "empty string" are not allowed. - return false; - - node = std::make_shared(s); - ++pos; + if (p_identifier.parse(pos, node, expected)) return true; - } - else if (pos->type == TokenType::BareWord) + else { - const char * begin = pos->begin; - - while (true) + if (p_string_literal.parse(pos, node, expected)) { - if (!isWhitespaceASCII(*pos->end) && pos->type != TokenType::EndOfStream) - ++pos; - else - break; + const auto & string_value = node->as()->value.safeGet(); + node = std::make_shared(string_value); + return true; } - - node = std::make_shared(String(begin, pos->end)); - ++pos; - return true; } return false; diff --git a/src/Parsers/MySQL/ASTDeclareOption.h b/src/Parsers/MySQL/ASTDeclareOption.h index 24800371061..2502618b209 100644 --- a/src/Parsers/MySQL/ASTDeclareOption.h +++ b/src/Parsers/MySQL/ASTDeclareOption.h @@ -61,10 +61,10 @@ public: /// Copy and paste from ParserIdentifier, /// the difference is that multiple tokens are glued if there is no whitespace ASCII between them -struct ParserCharsetName : public IParserBase +struct ParserCharsetOrCollateName : public IParserBase { protected: - const char * getName() const override { return "charset name"; } + const char * getName() const override { return "charset or collate name"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected &) override; }; diff --git a/src/Parsers/MySQL/ASTDeclareTableOptions.cpp b/src/Parsers/MySQL/ASTDeclareTableOptions.cpp index 87b99cdf1ac..c903c7d2fa7 100644 --- a/src/Parsers/MySQL/ASTDeclareTableOptions.cpp +++ b/src/Parsers/MySQL/ASTDeclareTableOptions.cpp @@ -68,12 +68,12 @@ bool ParserDeclareTableOptions::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp { OptionDescribe("AUTO_INCREMENT", "auto_increment", std::make_shared()), OptionDescribe("AVG_ROW_LENGTH", "avg_row_length", std::make_shared()), - OptionDescribe("CHARSET", "character_set", std::make_shared()), - OptionDescribe("DEFAULT CHARSET", "character_set", std::make_shared()), - OptionDescribe("CHARACTER SET", "character_set", std::make_shared()), + OptionDescribe("CHARSET", "character_set", std::make_shared()), + OptionDescribe("DEFAULT CHARSET", "character_set", std::make_shared()), + OptionDescribe("CHARACTER SET", "character_set", std::make_shared()), OptionDescribe("DEFAULT CHARACTER SET", "character_set", std::make_shared()), OptionDescribe("CHECKSUM", "checksum", std::make_shared>()), - OptionDescribe("COLLATE", "collate", std::make_shared()), + OptionDescribe("COLLATE", "collate", std::make_shared()), OptionDescribe("DEFAULT COLLATE", "collate", std::make_shared()), OptionDescribe("COMMENT", "comment", std::make_shared()), OptionDescribe("COMPRESSION", "compression", std::make_shared()), From c8aa007a455372ab0b71a60f3405aea9beb5c5d6 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Thu, 15 Oct 2020 20:42:10 +0800 Subject: [PATCH 2/6] ISSUES-15883 modify comment --- src/Parsers/MySQL/ASTDeclareOption.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Parsers/MySQL/ASTDeclareOption.h b/src/Parsers/MySQL/ASTDeclareOption.h index 2502618b209..a9529924567 100644 --- a/src/Parsers/MySQL/ASTDeclareOption.h +++ b/src/Parsers/MySQL/ASTDeclareOption.h @@ -59,8 +59,7 @@ public: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -/// Copy and paste from ParserIdentifier, -/// the difference is that multiple tokens are glued if there is no whitespace ASCII between them +/// identifier, string literal, binary keyword struct ParserCharsetOrCollateName : public IParserBase { protected: From 4e285168dfa3cdcc6fad815fdde8e311af53275c Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Fri, 16 Oct 2020 09:11:05 +0800 Subject: [PATCH 3/6] ISSUES-15883 try fix test failure --- src/Parsers/MySQL/tests/gtest_column_parser.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Parsers/MySQL/tests/gtest_column_parser.cpp b/src/Parsers/MySQL/tests/gtest_column_parser.cpp index ef6371f71d9..9f1f61c8c47 100644 --- a/src/Parsers/MySQL/tests/gtest_column_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_column_parser.cpp @@ -15,7 +15,7 @@ TEST(ParserColumn, AllNonGeneratedColumnOption) { ParserDeclareColumn p_column; - String input = "col_01 VARCHAR(100) NOT NULL DEFAULT NULL AUTO_INCREMENT UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf-8 " + String input = "col_01 VARCHAR(100) NOT NULL DEFAULT NULL AUTO_INCREMENT UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf8 " "COLUMN_FORMAT FIXED STORAGE MEMORY REFERENCES tbl_name (col_01) CHECK 1"; ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); @@ -29,7 +29,7 @@ TEST(ParserColumn, AllNonGeneratedColumnOption) EXPECT_EQ(declare_options->changes["unique_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["primary_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "column comment"); - EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf-8"); + EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf8"); EXPECT_EQ(declare_options->changes["column_format"]->as()->name, "FIXED"); EXPECT_EQ(declare_options->changes["storage"]->as()->name, "MEMORY"); EXPECT_TRUE(declare_options->changes["reference"]->as()); @@ -40,7 +40,7 @@ TEST(ParserColumn, AllGeneratedColumnOption) { ParserDeclareColumn p_column; - String input = "col_01 VARCHAR(100) NULL UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf-8 " + String input = "col_01 VARCHAR(100) NULL UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf8 " "REFERENCES tbl_name (col_01) CHECK 1 GENERATED ALWAYS AS (1) STORED"; ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); @@ -52,7 +52,7 @@ TEST(ParserColumn, AllGeneratedColumnOption) EXPECT_EQ(declare_options->changes["unique_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["primary_key"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "column comment"); - EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf-8"); + EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf8"); EXPECT_EQ(declare_options->changes["generated"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["is_stored"]->as()->value.safeGet(), 1); EXPECT_TRUE(declare_options->changes["reference"]->as()); From 5207be9b32afcc5e780572a31bf17be1ac573da2 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sat, 17 Oct 2020 12:36:08 +0800 Subject: [PATCH 4/6] ISSUES-15883 try fix test failure --- src/Parsers/MySQL/tests/gtest_table_options_parser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp b/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp index b051f6149bb..656b59fe6f3 100644 --- a/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp @@ -11,7 +11,7 @@ using namespace DB::MySQLParser; TEST(ParserTableOptions, AllSubpatitionOptions) { - String input = "AUTO_INCREMENt = 1 AVG_ROW_LENGTh 3 CHARACTER SET utf-8 CHECKSUM 1 COLLATE utf8_bin" + String input = "AUTO_INCREMENt = 1 AVG_ROW_LENGTh 3 CHARACTER SET utf8 CHECKSUM 1 COLLATE utf8_bin" " COMMENT 'table option comment' COMPRESSION 'LZ4' CONNECTION 'connect_string' DATA DIRECTORY 'data_directory'" " INDEX DIRECTORY 'index_directory' DELAY_KEY_WRITE 0 ENCRYPTION 'Y' ENGINE INNODB INSERT_METHOD NO KEY_BLOCK_SIZE 3" " MAX_ROWS 1000 MIN_ROWS 0 PACK_KEYS DEFAULT PASSWORD 'password' ROW_FORMAT DYNAMIC STATS_AUTO_RECALC DEFAULT " @@ -23,7 +23,7 @@ TEST(ParserTableOptions, AllSubpatitionOptions) ASTDeclareOptions * declare_options = ast->as(); EXPECT_EQ(declare_options->changes["auto_increment"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["avg_row_length"]->as()->value.safeGet(), 3); - EXPECT_EQ(declare_options->changes["character_set"]->as()->name, "utf-8"); + EXPECT_EQ(declare_options->changes["character_set"]->as()->name, "utf8"); EXPECT_EQ(declare_options->changes["checksum"]->as()->value.safeGet(), 1); EXPECT_EQ(declare_options->changes["collate"]->as()->name, "utf8_bin"); EXPECT_EQ(declare_options->changes["comment"]->as()->value.safeGet(), "table option comment"); From d084e05aba63c6039db5524a4bf3ce33aa8f2d94 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sat, 17 Oct 2020 13:15:00 +0800 Subject: [PATCH 5/6] ISSUES-15883 support zero length argument with string type --- src/DataTypes/DataTypeString.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 9d563ee836c..141f896cfc2 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -384,7 +384,7 @@ static DataTypePtr create(const ASTPtr & arguments) throw Exception("String data type family mustn't have more than one argument - size in characters", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) + if (!argument || argument->value.getType() != Field::Types::UInt64) throw Exception("String data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); } From e354108e532b59c98f61ca4e72415543178a9451 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sat, 24 Oct 2020 10:54:02 +0800 Subject: [PATCH 6/6] ISSUES-15883 trigger CI