diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6988a66cf1e..0d498ce7699 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1127,6 +1127,7 @@ class IColumn; M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ M(Bool, input_format_json_case_insensitive_column_matching, false, "Ignore case when matching JSON keys with CH columns", 0) \ + M(UInt64, input_format_json_max_depth, 1000, "Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely.", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index b6ef654438e..5b94391bade 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -75,6 +75,7 @@ static std::initializer_list #include #include +#include #include #include #include #include #include #include -#include #include #include @@ -39,6 +39,7 @@ namespace ErrorCodes extern const int ATTEMPT_TO_READ_AFTER_EOF; extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; + extern const int TOO_DEEP_RECURSION; } template @@ -1494,10 +1495,20 @@ template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const template -ReturnType skipJSONFieldImpl(ReadBuffer & buf, StringRef name_of_field, const FormatSettings::JSON & settings) +ReturnType skipJSONFieldImpl(ReadBuffer & buf, StringRef name_of_field, const FormatSettings::JSON & settings, size_t current_depth) { static constexpr bool throw_exception = std::is_same_v; + if (unlikely(current_depth > settings.max_depth)) + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "JSON is too deep for key '{}'", name_of_field.toString()); + return ReturnType(false); + } + + if (unlikely(current_depth > 0 && current_depth % 1024 == 0)) + checkStackSize(); + if (buf.eof()) { if constexpr (throw_exception) @@ -1560,8 +1571,8 @@ ReturnType skipJSONFieldImpl(ReadBuffer & buf, StringRef name_of_field, const Fo while (true) { if constexpr (throw_exception) - skipJSONFieldImpl(buf, name_of_field, settings); - else if (!skipJSONFieldImpl(buf, name_of_field, settings)) + skipJSONFieldImpl(buf, name_of_field, settings, current_depth + 1); + else if (!skipJSONFieldImpl(buf, name_of_field, settings, current_depth + 1)) return ReturnType(false); skipWhitespaceIfAny(buf); @@ -1619,8 +1630,8 @@ ReturnType skipJSONFieldImpl(ReadBuffer & buf, StringRef name_of_field, const Fo skipWhitespaceIfAny(buf); if constexpr (throw_exception) - skipJSONFieldImpl(buf, name_of_field, settings); - else if (!skipJSONFieldImpl(buf, name_of_field, settings)) + skipJSONFieldImpl(buf, name_of_field, settings, current_depth + 1); + else if (!skipJSONFieldImpl(buf, name_of_field, settings, current_depth + 1)) return ReturnType(false); skipWhitespaceIfAny(buf); @@ -1659,12 +1670,12 @@ ReturnType skipJSONFieldImpl(ReadBuffer & buf, StringRef name_of_field, const Fo void skipJSONField(ReadBuffer & buf, StringRef name_of_field, const FormatSettings::JSON & settings) { - skipJSONFieldImpl(buf, name_of_field, settings); + skipJSONFieldImpl(buf, name_of_field, settings, 0); } bool trySkipJSONField(ReadBuffer & buf, StringRef name_of_field, const FormatSettings::JSON & settings) { - return skipJSONFieldImpl(buf, name_of_field, settings); + return skipJSONFieldImpl(buf, name_of_field, settings, 0); } diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index c89a70be541..39edb1e6516 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -113,13 +113,13 @@ std::stack getSupportingParallelReplicasQuery(const IQueryTre return res; } -class ReplaceTableNodeToDummyVisitor : public InDepthQueryTreeVisitor +class ReplaceTableNodeToDummyVisitor : public InDepthQueryTreeVisitorWithContext { public: - using Base = InDepthQueryTreeVisitor; + using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(const QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto * table_node = node->as(); auto * table_function_node = node->as(); @@ -134,21 +134,19 @@ public: ColumnsDescription(storage_snapshot->getColumns(get_column_options)), storage_snapshot); - auto dummy_table_node = std::make_shared(std::move(storage_dummy), context); + auto dummy_table_node = std::make_shared(std::move(storage_dummy), getContext()); dummy_table_node->setAlias(node->getAlias()); replacement_map.emplace(node.get(), std::move(dummy_table_node)); } } - ContextPtr context; std::unordered_map replacement_map; }; -QueryTreeNodePtr replaceTablesWithDummyTables(const QueryTreeNodePtr & query, const ContextPtr & context) +QueryTreeNodePtr replaceTablesWithDummyTables(QueryTreeNodePtr query, const ContextPtr & context) { - ReplaceTableNodeToDummyVisitor visitor; - visitor.context = context; + ReplaceTableNodeToDummyVisitor visitor(context); visitor.visit(query); return query->cloneAndReplace(visitor.replacement_map); diff --git a/src/Planner/findQueryForParallelReplicas.h b/src/Planner/findQueryForParallelReplicas.h index f5dc69dfa0e..cdce4ad0b47 100644 --- a/src/Planner/findQueryForParallelReplicas.h +++ b/src/Planner/findQueryForParallelReplicas.h @@ -13,7 +13,7 @@ using QueryTreeNodePtr = std::shared_ptr; struct SelectQueryOptions; -/// Find a qury which can be executed with parallel replicas up to WithMergableStage. +/// Find a query which can be executed with parallel replicas up to WithMergableStage. /// Returned query will always contain some (>1) subqueries, possibly with joins. const QueryNode * findQueryForParallelReplicas(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options); diff --git a/tests/queries/0_stateless/03213_deep_json.reference b/tests/queries/0_stateless/03213_deep_json.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03213_deep_json.sql b/tests/queries/0_stateless/03213_deep_json.sql new file mode 100644 index 00000000000..2a9476381ff --- /dev/null +++ b/tests/queries/0_stateless/03213_deep_json.sql @@ -0,0 +1,5 @@ +-- The default limit works. +SELECT * FROM format("JSONCompactEachRow", 'x UInt32, y UInt32', REPEAT('[1,1,', 100000)) SETTINGS input_format_json_compact_allow_variable_number_of_columns = 1; -- { serverError TOO_DEEP_RECURSION, INCORRECT_DATA } +-- Even if we relax the limit, it is also safe. +SET input_format_json_max_depth = 100000; +SELECT * FROM format("JSONCompactEachRow", 'x UInt32, y UInt32', REPEAT('[1,1,', 100000)) SETTINGS input_format_json_compact_allow_variable_number_of_columns = 1; -- { serverError TOO_DEEP_RECURSION, INCORRECT_DATA } diff --git a/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.reference b/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql b/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql new file mode 100644 index 00000000000..6d084c2ac50 --- /dev/null +++ b/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql @@ -0,0 +1,15 @@ +create table t (number UInt64) engine MergeTree order by number; + +SELECT 1 +FROM +( + SELECT number IN ( + SELECT number + FROM view( + SELECT number + FROM numbers(1) + ) + ) + FROM t +) +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, allow_experimental_analyzer = 1; -- { serverError CLUSTER_DOESNT_EXIST }