From 9c20519f33f155c69676e6ad1657d6279fe5d3e6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 21 Sep 2021 01:17:56 +0300 Subject: [PATCH 1/2] Add ability to disable converting expressions to local filter for external queries Sometimes it is undesirable to remove any expressions from WHERE, since this may lead to reading the whole table, and this is pretty heavy job for MySQL and PostgreSQL (even if you read only one column). So external_table_strict_query had been introduced to prohibit this and fail the query instead. --- src/Core/Settings.h | 1 + ...test_transform_query_for_external_database.cpp | 15 +++++++++++++++ .../transformQueryForExternalDatabase.cpp | 11 +++++++++++ src/Storages/transformQueryForExternalDatabase.h | 3 +++ 4 files changed, 30 insertions(+) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4980897965f..8151ca3a0f1 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -386,6 +386,7 @@ class IColumn; M(Bool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.", 0) \ M(Bool, cancel_http_readonly_queries_on_client_close, false, "Cancel HTTP readonly queries when a client closes the connection without waiting for response.", 0) \ M(Bool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only by 'mysql', 'postgresql' and 'odbc' table functions.", 0) \ + M(Bool, external_table_strict_query, false, "If it is set to true, transforming expression to local filter is forbidden for queries to external tables.", 0) \ \ M(Bool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.", 0) \ M(UInt64, max_hyperscan_regexp_length, 0, "Max length of regexp than can be used in hyperscan multi-match functions. Zero means unlimited.", 0) \ diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 9501be73a38..cdd4f6eafdd 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -219,3 +219,18 @@ TEST(TransformQueryForExternalDatabase, ForeignColumnInWhere) "WHERE column > 2 AND (apply_id = 1 OR table2.num = 1) AND table2.attr != ''", R"(SELECT "column", "apply_id" FROM "test"."table" WHERE ("column" > 2) AND ("apply_id" = 1))"); } + +TEST(TransformQueryForExternalDatabase, Strict) +{ + const State & state = State::instance(); + + check(state, 1, + "SELECT field FROM table WHERE field IN (SELECT attr FROM table2)", + R"(SELECT "field" FROM "test"."table")"); + + state.context->setSetting("external_table_strict_query", true); + /// removeUnknownSubexpressionsFromWhere() takes place + EXPECT_THROW(check(state, 1, "SELECT field FROM table WHERE field IN (SELECT attr FROM table2)", ""), Exception); + /// isCompatible() takes place + EXPECT_THROW(check(state, 1, "SELECT column FROM test.table WHERE left(column, 10) = RIGHT(column, 10) AND SUBSTRING(column FROM 1 FOR 2) = 'Hello'", ""), Exception); +} diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 1bd665de460..96510585600 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int INCORRECT_QUERY; } namespace @@ -248,6 +250,7 @@ String transformQueryForExternalDatabase( { auto clone_query = query_info.query->clone(); const Names used_columns = query_info.syntax_analyzer_result->requiredSourceColumns(); + bool strict = context->getSettingsRef().external_table_strict_query; auto select = std::make_shared(); @@ -275,6 +278,10 @@ String transformQueryForExternalDatabase( { select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(original_where)); } + else if (strict) + { + throw Exception("Query contains non-compatible expressions (and external_table_strict_query=true)", ErrorCodes::INCORRECT_QUERY); + } else if (const auto * function = original_where->as()) { if (function->name == "and") @@ -292,6 +299,10 @@ String transformQueryForExternalDatabase( } } } + else if (strict && original_where) + { + throw Exception("Query contains non-compatible expressions (and external_table_strict_query=true)", ErrorCodes::INCORRECT_QUERY); + } ASTPtr select_ptr = select; dropAliases(select_ptr); diff --git a/src/Storages/transformQueryForExternalDatabase.h b/src/Storages/transformQueryForExternalDatabase.h index 215afab8b30..6f7d6af5319 100644 --- a/src/Storages/transformQueryForExternalDatabase.h +++ b/src/Storages/transformQueryForExternalDatabase.h @@ -22,6 +22,9 @@ class IAST; * that contain only compatible expressions. * * Compatible expressions are comparisons of identifiers, constants, and logical operations on them. + * + * Throws INCORRECT_QUERY if external_table_strict_query (from context settings) + * is set and some expression from WHERE is not compatible. */ String transformQueryForExternalDatabase( const SelectQueryInfo & query_info, From 23b3085c8b6c1783ea317725db7329d48e5e4114 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 21 Sep 2021 10:48:46 +0300 Subject: [PATCH 2/2] Add more tests for external_table_strict_query --- ..._transform_query_for_external_database.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index cdd4f6eafdd..13e1ca882af 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -220,17 +220,32 @@ TEST(TransformQueryForExternalDatabase, ForeignColumnInWhere) R"(SELECT "column", "apply_id" FROM "test"."table" WHERE ("column" > 2) AND ("apply_id" = 1))"); } -TEST(TransformQueryForExternalDatabase, Strict) +TEST(TransformQueryForExternalDatabase, NoStrict) { const State & state = State::instance(); check(state, 1, "SELECT field FROM table WHERE field IN (SELECT attr FROM table2)", R"(SELECT "field" FROM "test"."table")"); +} +TEST(TransformQueryForExternalDatabase, Strict) +{ + const State & state = State::instance(); state.context->setSetting("external_table_strict_query", true); + + check(state, 1, + "SELECT field FROM table WHERE field = '1'", + R"(SELECT "field" FROM "test"."table" WHERE "field" = '1')"); + check(state, 1, + "SELECT field FROM table WHERE field IN ('1', '2')", + R"(SELECT "field" FROM "test"."table" WHERE "field" IN ('1', '2'))"); + check(state, 1, + "SELECT field FROM table WHERE field LIKE '%test%'", + R"(SELECT "field" FROM "test"."table" WHERE "field" LIKE '%test%')"); + /// removeUnknownSubexpressionsFromWhere() takes place EXPECT_THROW(check(state, 1, "SELECT field FROM table WHERE field IN (SELECT attr FROM table2)", ""), Exception); - /// isCompatible() takes place + /// !isCompatible() takes place EXPECT_THROW(check(state, 1, "SELECT column FROM test.table WHERE left(column, 10) = RIGHT(column, 10) AND SUBSTRING(column FROM 1 FOR 2) = 'Hello'", ""), Exception); }