From d18aba1194cb1a48560315d420404783b418e439 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 27 Mar 2024 19:26:19 +0000 Subject: [PATCH] Done --- src/Client/Suggest.cpp | 19 +- src/Parsers/obfuscateQueries.cpp | 454 ++---------------- ...6_clickhouse_client_autocomplete.reference | 1 + .../01676_clickhouse_client_autocomplete.sh | 2 + 4 files changed, 51 insertions(+), 425 deletions(-) diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 03df582de10..7bbd45ed1d8 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -30,24 +30,6 @@ namespace ErrorCodes Suggest::Suggest() { - /// Keywords may be not up to date with ClickHouse parser. - addWords({"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", - "CLUSTER", "DEFAULT", "MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", - "SETTINGS", "ATTACH", "DETACH", "DROP", "RENAME", "TO", "ALTER", "ADD", - "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", "PRIMARY", "KEY", - "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO", - "OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", - "THEN", "ELSE", "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", - "FINAL", "DEDUPLICATE", "INSERT", "VALUES", "SELECT", "DISTINCT", "SAMPLE", "ARRAY", - "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", "LEFT", "RIGHT", - "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", - "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", - "OR", "ASC", "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", - "BETWEEN", "TRUNCATE", "USER", "ROLE", "PROFILE", "QUOTA", "POLICY", "ROW", - "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", "IDENTIFIED", "HOST", - "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", "INTERVAL", - "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "CLEANUP", "APPEND", - "IGNORE NULLS", "RESPECT NULLS", "OVER", "PASTE"}); } static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggestion) @@ -82,6 +64,7 @@ static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggesti add_column("name", "data_type_families", false, {}); add_column("name", "merge_tree_settings", false, {}); add_column("name", "settings", false, {}); + add_column("keyword", "keywords", false, {}); if (!basic_suggestion) { diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp index 8012dbb37c6..49af925212d 100644 --- a/src/Parsers/obfuscateQueries.cpp +++ b/src/Parsers/obfuscateQueries.cpp @@ -1,3 +1,5 @@ +#include "Parsers/CommonParsers.h" +#include #include #include @@ -10,6 +12,7 @@ #include #include #include +#include namespace DB @@ -24,413 +27,50 @@ namespace ErrorCodes namespace { -const std::unordered_set keywords +const std::unordered_set & getObfuscateKeywords() { - "!=", - "", - "%", - "*", - "+", - "-", - "->", - ".", - "/", - ":", - "::", - "<", - "<=", - "<>", - "=", - "==", - "<=>", - ">", - ">=", - "?", - "[", - "]+", - "]+|[", - "^[", - "||", - "]+$", - "ACCESS", - "ACTION", - "ADD", - "ADMIN", - "AFTER", - "ALGORITHM", - "ALIAS", - "ALL", - "ALLOWED_LATENESS", - "ALTER", - "AND", - "ANTI", - "ANY", - "APPLY", - "ARRAY", - "AS", - "ASC", - "ASCENDING", - "ASOF", - "ASSUME", - "AST", - "ASYNC", - "ATTACH", - "AUTO_INCREMENT", - "BACKUP", - "BASE_BACKUP", - "BEGIN", - "BETWEEN", - "BIDIRECTIONAL", - "BOTH", - "BY", - "CACHE", - "CACHES", - "CASCADE", - "CASE", - "CASEWITHEXPRESSION", - "CAST", - "CHANGE", - "CHANGEABLE_IN_READONLY", - "CHANGED", - "CHAR", - "CHARACTER", - "CHECK", - "CLEANUP", - "CLEAR", - "CLUSTER", - "CLUSTER_HOST_IDS", - "CLUSTERS", - "CN", - "CODEC", - "COLLATE", - "COLLECTION", - "COLUMN", - "COLUMNS", - "COMMENT", - "COMMIT", - "COMPRESSION", - "CONCAT", - "CONSTRAINT", - "CREATE", - "CROSS", - "CUBE", - "CURRENT", - "CURRENT_USER", - "DATABASE", - "DATABASES", - "DATE", - "DATE_ADD", - "DATEADD", - "DATE_DIFF", - "DATEDIFF", - "DATE_SUB", - "DATESUB", - "DAY", - "DD", - "DDL", - "DEDUPLICATE", - "DEFAULT", - "DELAY", - "DELETE", - "DESC", - "DESCENDING", - "DESCRIBE", - "DETACH", - "DETACHED", - "DICTIONARIES", - "DICTIONARY", - "DISK", - "DISTINCT", - "DIV", - "DOUBLE_SHA1_HASH", - "DROP", - "ELSE", - "EMPTY", - "ENABLED", - "END", - "ENFORCED", - "ENGINE", - "EPHEMERAL", - "EQUALS", - "ESTIMATE", - "EVENT", - "EVENTS", - "EXCEPT", - "EXCHANGE", - "EXISTS", - "EXPLAIN", - "EXPRESSION", - "EXTERNAL", - "EXTRACT", - "FALSE", - "FETCH", - "FILE", - "FILESYSTEM", - "FILL", - "FILTER", - "FINAL", - "FIRST", - "FOLLOWING", - "FOR", - "FOREIGN", - "FORMAT", - "FREEZE", - "FROM", - "FULL", - "FULLTEXT", - "FUNCTION", - "GLOBAL", - "GRANT", - "GRANTEES", - "GRANTS", - "GRANULARITY", - "GREATER", - "GREATEROREQUALS", - "GROUP", - "GROUPING", - "GROUPS", - "HASH", - "HAVING", - "HDFS", - "HH", - "HIERARCHICAL", - "HOST", - "HOUR", - "ID", - "IDENTIFIED", - "IF", - "ILIKE", - "IN", - "INDEX", - "INFILE", - "INHERIT", - "INJECTIVE", - "INNER", - "INSERT", - "INTERPOLATE", - "INTERSECT", - "INTERVAL", - "INTO", - "INVISIBLE", - "IP", - "IS", - "IS_OBJECT_ID", - "JOIN", - "KEY", - "KEYED", - "KILL", - "LAMBDA", - "LARGE", - "LAST", - "LAYOUT", - "LEADING", - "LEFT", - "LESS", - "LESSOREQUALS", - "LEVEL", - "LIFETIME", - "LIKE", - "LIMIT", - "LIMITS", - "LINEAR", - "LIST", - "LITERAL", - "LIVE", - "LOCAL", - "LTRIM", - "MATCH", - "MATERIALIZE", - "MATERIALIZED", - "MAX", - "MCS", - "MEMORY", - "MI", - "MICROSECOND", - "MILLISECOND", - "MIN", - "MINUS", - "MINUTE", - "MM", - "MOD", - "MODIFY", - "MONTH", - "MOVE", - "MS", - "MULTIIF", - "MUTATION", - "NAME", - "NAMED", - "NANOSECOND", - "NEXT", - "NO", - "NONE", - "NOT", - "NOTEQUALS", - "NOTIN", - "NS", - "NULL", - "NULLS", - "OBJECT", - "OFFSET", - "ON", - "ONLY", - "OPTIMIZE", - "OPTION", - "OR", - "ORDER", - "OUTER", - "OUTFILE", - "OVER", - "OVERRIDE", - "PART", - "PARTIAL", - "PARTITION", - "PARTITIONS", - "PART_MOVE_TO_SHARD", - "PERMANENTLY", - "PERMISSIVE", - "PIPELINE", - "PLAN", - "PLUS", - "POLICY", - "POPULATE", - "POSITION", - "PRECEDING", - "PRECISION", - "PREWHERE", - "PRIMARY", - "PRIVILEGES", - "PROCESSLIST", - "PROFILE", - "PROJECTION", - "QQ", - "QUARTER", - "QUERY", - "QUOTA", - "RANDOMIZED", - "RANGE", - "READONLY", - "REALM", - "RECOMPRESS", - "REFERENCES", - "REFRESH", - "REGEXP", - "REGEXPQUOTEMETA", - "REMOVE", - "RENAME", - "REPLACE", - "REPLACEREGEXPALL", - "REPLACEREGEXPONE", - "RESET", - "RESTORE", - "RESTRICT", - "RESTRICTIVE", - "RESUME", - "REVOKE", - "RIGHT", - "ROLE", - "ROLES", - "ROLLBACK", - "ROLLUP", - "ROW", - "ROWS", - "RTRIM", - "S3", - "SALT", - "SAMPLE", - "SECOND", - "SELECT", - "SEMI", - "SERVER", - "SET", - "SETS", - "SETTING", - "SETTINGS", - "SHA256_HASH", - "SHARD", - "SHOW", - "SIGNED", - "SIMPLE", - "SINGLEVALUEORNULL", - "SNAPSHOT", - "SOURCE", - "SPATIAL", - "SS", - "STDOUT", - "STEP", - "STORAGE", - "STRICT", - "STRICTLY_ASCENDING", - "SUBPARTITION", - "SUBPARTITIONS", - "SUBSTRING", - "SUSPEND", - "SYNC", - "SYNTAX", - "SYSTEM", - "TABLE", - "TABLES", - "TEMPORARY", - "TEST", - "THAN", - "THEN", - "TIES", - "TIMESTAMP", - "TIMESTAMP_ADD", - "TIMESTAMPADD", - "TIMESTAMP_DIFF", - "TIMESTAMPDIFF", - "TIMESTAMP_SUB", - "TIMESTAMPSUB", - "TO", - "TODATE", - "TODATETIME", - "TOP", - "TOTALS", - "TRACKING", - "TRAILING", - "TRANSACTION", - "TREE", - "TRIGGER", - "TRIM", - "TRIMBOTH", - "TRIMLEFT", - "TRIMRIGHT", - "TRUE", - "TRUNCATE", - "TTL", - "TUPLE", - "TYPE", - "UNBOUNDED", - "UNFREEZE", - "UNION", - "UNIQUE", - "UNSIGNED", - "UNTUPLE", - "UPDATE", - "URL", - "USE", - "USER", - "USING", - "UUID", - "VALUES", - "VARYING", - "VIEW", - "VIEWIFPERMITTED", - "VISIBLE", - "VOLUME", - "WATCH", - "WATERMARK", - "WEEK", - "WHEN", - "WHERE", - "WINDOW", - "WITH", - "WK", - "WRITABLE", - "YEAR", - "YYYY", - "ZKPATH" + static std::unordered_set instance; + + auto initialize = [&]() mutable + { + instance = { + "!=", + "", + "%", + "*", + "+", + "-", + "->", + ".", + "/", + ":", + "::", + "<", + "<=", + "<>", + "=", + "==", + "<=>", + ">", + ">=", + "?", + "[", + "]+", + "]+|[", + "^[", + "||", + "]+$" + }; + + auto & global_keywords = getAllKeyWords(); + std::copy(global_keywords.begin(), global_keywords.end(), std::inserter(instance, instance.begin())); + return true; + }; + + static bool initialized = initialize(); + (void) initialized; + + return instance; }; /// We want to keep some words inside quotes. For example we want to keep HOUR inside: @@ -1312,7 +952,7 @@ void obfuscateQueries( std::string whole_token_uppercase(whole_token); Poco::toUpperInPlace(whole_token_uppercase); - if (keywords.contains(whole_token_uppercase) + if (getObfuscateKeywords().contains(whole_token_uppercase) || known_identifier_func(whole_token)) { /// Keep keywords as is. diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference index cf3e942adfe..78f8967263d 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference @@ -9,6 +9,7 @@ test_shard_localhost: OK default_path_test: OK default: OK uniqCombined64ForEach: OK +CHANGEABLE_IN_READONLY: OK system: OK aggregate_function_combinators: OK primary_key_bytes_in_memory_allocated: OK diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh index db62dedb5b4..f757e32c56c 100755 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh @@ -99,6 +99,8 @@ client_compwords_positive=( default # system.aggregate_function_combinators uniqCombined64ForEach + # system.keywords + CHANGEABLE_IN_READONLY # FIXME: one may add separate case for suggestion_limit # system.databases