diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index f9996cbfb0b..07abd77fed0 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -519,6 +519,33 @@ Possible values: Default value: `1`. +## allow_settings_after_format_in_insert {#allow_settings_after_format_in_insert} + +Control whether `SETTINGS` after `FORMAT` in `INSERT` queries is allowed or not. It is not recommended to use this, since this may interpret part of `SETTINGS` as values. + +Example: + +```sql +INSERT INTO FUNCTION null('foo String') SETTINGS max_threads=1 VALUES ('bar'); +``` + +But the following query will work only with `allow_settings_after_format_in_insert`: + +```sql +SET allow_settings_after_format_in_insert=1; +INSERT INTO FUNCTION null('foo String') VALUES ('bar') SETTINGS max_threads=1; +``` + +Possible values: + +- 0 — Disallow. +- 1 — Allow. + +Default value: `0`. + +!!! note "Warning" + Use this setting only for backward compatibility if your use cases depend on old syntax. + ## input_format_skip_unknown_fields {#settings-input-format-skip-unknown-fields} Enables or disables skipping insertion of extra data. diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 835afcdb2ed..50d85cdd43d 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -54,6 +54,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) ("multiquery,n", "allow multiple queries in the same file") ("obfuscate", "obfuscate instead of formatting") ("backslash", "add a backslash at the end of each line of the formatted query") + ("allow_settings_after_format_in_insert", "Allow SETTINGS after FORMAT, but note, that this is not always safe") ("seed", po::value(), "seed (arbitrary string) that determines the result of obfuscation") ; @@ -83,6 +84,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) bool multiple = options.count("multiquery"); bool obfuscate = options.count("obfuscate"); bool backslash = options.count("backslash"); + bool allow_settings_after_format_in_insert = options.count("allow_settings_after_format_in_insert"); if (quiet && (hilite || oneline || obfuscate)) { @@ -154,7 +156,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) const char * pos = query.data(); const char * end = pos + query.size(); - ParserQuery parser(end); + ParserQuery parser(end, allow_settings_after_format_in_insert); do { ASTPtr res = parseQueryAndMovePosition( diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index dced4e60dd2..391d758e6dc 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -268,7 +268,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - ParserQuery parser(end); + ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); ASTPtr res; const auto & settings = global_context->getSettingsRef(); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2cbfe97cde5..0015fa73784 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -465,6 +465,7 @@ class IColumn; M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ + M(Bool, allow_settings_after_format_in_insert, false, "Allow SETTINGS after FORMAT, but note, that this is not always safe (note: this is a compatibility setting).", 0) \ M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \ M(Seconds, periodic_live_view_refresh, DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC, "Interval after which periodically refreshed live view is forced to refresh.", 0) \ M(Bool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \ diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index a490d7bed43..476da294789 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -142,10 +142,11 @@ void DDLTaskBase::parseQueryFromEntry(ContextPtr context) { const char * begin = entry.query.data(); const char * end = begin + entry.query.size(); + const auto & settings = context->getSettingsRef(); - ParserQuery parser_query(end); + ParserQuery parser_query(end, settings.allow_settings_after_format_in_insert); String description; - query = parseQuery(parser_query, begin, end, description, 0, context->getSettingsRef().max_parser_depth); + query = parseQuery(parser_query, begin, end, description, 0, settings.max_parser_depth); } ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & /*zookeeper*/) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index a3349f12f8f..dbd7063f3b3 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -423,7 +423,7 @@ static std::tuple executeQueryImpl( String query_table; try { - ParserQuery parser(end); + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); diff --git a/src/Parsers/ParserExplainQuery.cpp b/src/Parsers/ParserExplainQuery.cpp index e072f6a14d7..63314452447 100644 --- a/src/Parsers/ParserExplainQuery.cpp +++ b/src/Parsers/ParserExplainQuery.cpp @@ -58,11 +58,11 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserCreateTableQuery create_p; ParserSelectWithUnionQuery select_p; - ParserInsertQuery insert_p(end); + ParserInsertQuery insert_p(end, allow_settings_after_format_in_insert); ASTPtr query; if (kind == ASTExplainQuery::ExplainKind::ParsedAST) { - ParserQuery p(end); + ParserQuery p(end, allow_settings_after_format_in_insert); if (p.parse(pos, query, expected)) explain_query->setExplainedQuery(std::move(query)); else diff --git a/src/Parsers/ParserExplainQuery.h b/src/Parsers/ParserExplainQuery.h index ba30e97a58f..1a415a04dde 100644 --- a/src/Parsers/ParserExplainQuery.h +++ b/src/Parsers/ParserExplainQuery.h @@ -10,11 +10,15 @@ class ParserExplainQuery : public IParserBase { protected: const char * end; + bool allow_settings_after_format_in_insert; const char * getName() const override { return "EXPLAIN"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: - explicit ParserExplainQuery(const char* end_) : end(end_) {} + explicit ParserExplainQuery(const char* end_, bool allow_settings_after_format_in_insert_) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} }; } diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index b77c0bf5709..b0ca361155f 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -186,6 +186,31 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } + /// Read SETTINGS after FORMAT. + /// + /// Note, that part of SETTINGS can be interpreted as values, + /// hence it is done only under option. + /// + /// Refs: https://github.com/ClickHouse/ClickHouse/issues/35100 + if (allow_settings_after_format_in_insert && s_settings.ignore(pos, expected)) + { + if (settings_ast) + throw Exception("You have SETTINGS before and after FORMAT, " + "this is not allowed. " + "Consider switching to SETTINGS before FORMAT " + "and disable allow_settings_after_format_in_insert.", + ErrorCodes::SYNTAX_ERROR); + + /// Settings are written like SET query, so parse them with ParserSetQuery + ParserSetQuery parser_settings(true); + if (!parser_settings.parse(pos, settings_ast, expected)) + return false; + /// In case of INSERT INTO ... VALUES SETTINGS ... (...), (...), ... + /// we should move data pointer after all settings. + if (data != nullptr) + data = pos->begin; + } + if (select) { /// Copy SETTINGS from the INSERT ... SELECT ... SETTINGS diff --git a/src/Parsers/ParserInsertQuery.h b/src/Parsers/ParserInsertQuery.h index f98e433551d..0d7ce25e09d 100644 --- a/src/Parsers/ParserInsertQuery.h +++ b/src/Parsers/ParserInsertQuery.h @@ -26,11 +26,15 @@ class ParserInsertQuery : public IParserBase { private: const char * end; + bool allow_settings_after_format_in_insert; const char * getName() const override { return "INSERT query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: - explicit ParserInsertQuery(const char * end_) : end(end_) {} + explicit ParserInsertQuery(const char * end_, bool allow_settings_after_format_in_insert_) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} }; /** Insert accepts an identifier and an asterisk with variants. diff --git a/src/Parsers/ParserQuery.cpp b/src/Parsers/ParserQuery.cpp index 7677efd9415..78d8854f298 100644 --- a/src/Parsers/ParserQuery.cpp +++ b/src/Parsers/ParserQuery.cpp @@ -30,8 +30,8 @@ namespace DB bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserQueryWithOutput query_with_output_p(end); - ParserInsertQuery insert_p(end); + ParserQueryWithOutput query_with_output_p(end, allow_settings_after_format_in_insert); + ParserInsertQuery insert_p(end, allow_settings_after_format_in_insert); ParserUseQuery use_p; ParserSetQuery set_p; ParserSystemQuery system_p; diff --git a/src/Parsers/ParserQuery.h b/src/Parsers/ParserQuery.h index be72a436be8..a2d4e6e04df 100644 --- a/src/Parsers/ParserQuery.h +++ b/src/Parsers/ParserQuery.h @@ -10,12 +10,16 @@ class ParserQuery : public IParserBase { private: const char * end; + bool allow_settings_after_format_in_insert; const char * getName() const override { return "Query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: - explicit ParserQuery(const char * end_) : end(end_) {} + explicit ParserQuery(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} }; } diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index f1e007948f9..6041f986a49 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -49,7 +49,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserShowCreateAccessEntityQuery show_create_access_entity_p; ParserShowGrantsQuery show_grants_p; ParserShowPrivilegesQuery show_privileges_p; - ParserExplainQuery explain_p(end); + ParserExplainQuery explain_p(end, allow_settings_after_format_in_insert); ASTPtr query; diff --git a/src/Parsers/ParserQueryWithOutput.h b/src/Parsers/ParserQueryWithOutput.h index 1fd7bec1eea..dba420a077a 100644 --- a/src/Parsers/ParserQueryWithOutput.h +++ b/src/Parsers/ParserQueryWithOutput.h @@ -12,10 +12,16 @@ class ParserQueryWithOutput : public IParserBase { protected: const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "Query with output"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + public: - explicit ParserQueryWithOutput(const char * end_) : end(end_) {} + explicit ParserQueryWithOutput(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} }; } diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index ed09d648477..af8c9dc58a6 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -386,7 +386,8 @@ std::pair splitMultipartQuery( const std::string & queries, std::vector & queries_list, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + bool allow_settings_after_format_in_insert) { ASTPtr ast; @@ -394,7 +395,7 @@ std::pair splitMultipartQuery( const char * pos = begin; /// parser moves pos from begin to the end of current query const char * end = begin + queries.size(); - ParserQuery parser(end); + ParserQuery parser(end, allow_settings_after_format_in_insert); queries_list.clear(); diff --git a/src/Parsers/parseQuery.h b/src/Parsers/parseQuery.h index d8d7426872b..cc077bbdab2 100644 --- a/src/Parsers/parseQuery.h +++ b/src/Parsers/parseQuery.h @@ -61,6 +61,7 @@ std::pair splitMultipartQuery( const std::string & queries, std::vector & queries_list, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + bool allow_settings_after_format_in_insert); } diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index eeaf5b32a92..7578f8afc1d 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -868,7 +868,7 @@ namespace query_text = std::move(*(query_info.mutable_query())); const char * begin = query_text.data(); const char * end = begin + query_text.size(); - ParserQuery parser(end); + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); ast = parseQuery(parser, begin, end, "", settings.max_query_size, settings.max_parser_depth); /// Choose input format. diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index 04e43ed63aa..489c47b3c31 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -275,7 +275,10 @@ void PostgreSQLHandler::processQuery() const auto & settings = session->sessionContext()->getSettingsRef(); std::vector queries; - auto parse_res = splitMultipartQuery(query->query, queries, settings.max_query_size, settings.max_parser_depth); + auto parse_res = splitMultipartQuery(query->query, queries, + settings.max_query_size, + settings.max_parser_depth, + settings.allow_settings_after_format_in_insert); if (!parse_res.second) throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp index 1df8b43515e..111ea343398 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp @@ -71,13 +71,18 @@ static String clusterNameFromDDLQuery(ContextPtr context, const DDLTask & task) { const char * begin = task.entry.query.data(); const char * end = begin + task.entry.query.size(); - String cluster_name; - ParserQuery parser_query(end); + const auto & settings = context->getSettingsRef(); + String description = fmt::format("from {}", task.entry_path); + ParserQuery parser_query(end, settings.allow_settings_after_format_in_insert); ASTPtr query = parseQuery(parser_query, begin, end, description, - context->getSettingsRef().max_query_size, context->getSettingsRef().max_parser_depth); + settings.max_query_size, + settings.max_parser_depth); + + String cluster_name; if (const auto * query_on_cluster = dynamic_cast(query.get())) cluster_name = query_on_cluster->cluster; + return cluster_name; } diff --git a/tests/queries/0_stateless/02263_format_insert_settings.reference b/tests/queries/0_stateless/02263_format_insert_settings.reference index 1c189b2cc4b..721e7960875 100644 --- a/tests/queries/0_stateless/02263_format_insert_settings.reference +++ b/tests/queries/0_stateless/02263_format_insert_settings.reference @@ -2,6 +2,17 @@ insert into foo settings max_threads=1 Syntax error (query): failed at position 40 (end of query): insert into foo format tsv settings max_threads=1 Can't format ASTInsertQuery with data, since data will be lost. +[multi] insert into foo format tsv settings max_threads=1 +INSERT INTO foo +SETTINGS max_threads = 1 +FORMAT tsv +[oneline] insert into foo format tsv settings max_threads=1 +INSERT INTO foo SETTINGS max_threads = 1 FORMAT tsv +insert into foo settings max_threads=1 format tsv settings max_threads=1 +You have SETTINGS before and after FORMAT +Cannot parse input: expected '\n' before: 'settings max_threads=1 1' +1 +You have SETTINGS before and after FORMAT [multi] insert into foo values INSERT INTO foo FORMAT Values [oneline] insert into foo values diff --git a/tests/queries/0_stateless/02263_format_insert_settings.sh b/tests/queries/0_stateless/02263_format_insert_settings.sh index 830faad9f86..3d5f780a38c 100755 --- a/tests/queries/0_stateless/02263_format_insert_settings.sh +++ b/tests/queries/0_stateless/02263_format_insert_settings.sh @@ -9,21 +9,32 @@ function run_format() local q="$1" && shift echo "$q" - $CLICKHOUSE_FORMAT <<<"$q" + $CLICKHOUSE_FORMAT "$@" <<<"$q" } function run_format_both() { local q="$1" && shift echo "[multi] $q" - $CLICKHOUSE_FORMAT <<<"$q" + $CLICKHOUSE_FORMAT "$@" <<<"$q" echo "[oneline] $q" - $CLICKHOUSE_FORMAT --oneline <<<"$q" + $CLICKHOUSE_FORMAT --oneline "$@" <<<"$q" } # NOTE: that those queries may work slow, due to stack trace obtaining run_format 'insert into foo settings max_threads=1' 2> >(grep -m1 -o "Syntax error (query): failed at position .* (end of query):") +# compatibility run_format 'insert into foo format tsv settings max_threads=1' 2> >(grep -m1 -F -o "Can't format ASTInsertQuery with data, since data will be lost.") +run_format_both 'insert into foo format tsv settings max_threads=1' --allow_settings_after_format_in_insert +run_format 'insert into foo settings max_threads=1 format tsv settings max_threads=1' --allow_settings_after_format_in_insert 2> >(grep -m1 -F -o "You have SETTINGS before and after FORMAT") +# and via server (since this is a separate code path) +$CLICKHOUSE_CLIENT -q 'drop table if exists data_02263' +$CLICKHOUSE_CLIENT -q 'create table data_02263 (key Int) engine=Memory()' +$CLICKHOUSE_CLIENT -q 'insert into data_02263 format TSV settings max_threads=1 1' 2> >(grep -m1 -F -o "Cannot parse input: expected '\n' before: 'settings max_threads=1 1'") +$CLICKHOUSE_CLIENT --allow_settings_after_format_in_insert=1 -q 'insert into data_02263 format TSV settings max_threads=1 1' +$CLICKHOUSE_CLIENT -q 'select * from data_02263' +$CLICKHOUSE_CLIENT --allow_settings_after_format_in_insert=1 -q 'insert into data_02263 settings max_threads=1 format tsv settings max_threads=1' 2> >(grep -m1 -F -o "You have SETTINGS before and after FORMAT") +$CLICKHOUSE_CLIENT -q 'drop table data_02263' run_format_both 'insert into foo values' run_format_both 'insert into foo select 1'